5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 //run filter on template
76 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
81 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
88 if (searchMethod == "distance") {
89 //read in all query seqs
90 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
92 vector<Sequence*> temp = templateSeqs;
93 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
95 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
99 if (m->control_pressed) { return 0; }
101 //run filter on template
102 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
104 string kmerDBNameLeft;
105 string kmerDBNameRight;
107 //generate the kmerdb to pass to maligner
108 if (searchMethod == "kmer") {
109 string templatePath = m->hasPath(templateFileName);
110 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
111 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
113 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
114 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
116 for (int i = 0; i < templateSeqs.size(); i++) {
118 if (m->control_pressed) { return 0; }
120 string leftFrag = templateSeqs[i]->getUnaligned();
121 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
123 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
124 databaseLeft->addSequence(leftTemp);
126 databaseLeft->generateDB();
127 databaseLeft->setNumSeqs(templateSeqs.size());
129 for (int i = 0; i < templateSeqs.size(); i++) {
130 if (m->control_pressed) { return 0; }
132 string rightFrag = templateSeqs[i]->getUnaligned();
133 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
135 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
136 databaseRight->addSequence(rightTemp);
138 databaseRight->generateDB();
139 databaseRight->setNumSeqs(templateSeqs.size());
143 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
144 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
145 bool needToGenerateLeft = true;
147 if(kmerFileTestLeft){
148 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
149 if (GoodFile) { needToGenerateLeft = false; }
152 if(needToGenerateLeft){
154 for (int i = 0; i < templateSeqs.size(); i++) {
156 if (m->control_pressed) { return 0; }
158 string leftFrag = templateSeqs[i]->getUnaligned();
159 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
161 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
162 databaseLeft->addSequence(leftTemp);
164 databaseLeft->generateDB();
167 databaseLeft->readKmerDB(kmerFileTestLeft);
169 kmerFileTestLeft.close();
171 databaseLeft->setNumSeqs(templateSeqs.size());
174 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
175 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
176 bool needToGenerateRight = true;
178 if(kmerFileTestRight){
179 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
180 if (GoodFile) { needToGenerateRight = false; }
183 if(needToGenerateRight){
185 for (int i = 0; i < templateSeqs.size(); i++) {
186 if (m->control_pressed) { return 0; }
188 string rightFrag = templateSeqs[i]->getUnaligned();
189 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
191 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
192 databaseRight->addSequence(rightTemp);
194 databaseRight->generateDB();
197 databaseRight->readKmerDB(kmerFileTestRight);
199 kmerFileTestRight.close();
201 databaseRight->setNumSeqs(templateSeqs.size());
203 }else if (searchMethod == "blast") {
206 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
208 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
209 databaseLeft->generateDB();
210 databaseLeft->setNumSeqs(templateSeqs.size());
216 catch(exception& e) {
217 m->errorOut(e, "ChimeraSlayer", "doprep");
221 //***************************************************************************************************************
222 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
225 //when template=self, the query file is sorted from most abundance to least abundant
226 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
227 vector<Sequence*> userTemplate;
229 int myAbund = priority[q->getName()];
231 for (int i = 0; i < templateSeqs.size(); i++) {
233 if (m->control_pressed) { return userTemplate; }
235 //have I reached a sequence with the same abundance as myself?
236 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
238 //if its am not chimeric add it
239 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { userTemplate.push_back(templateSeqs[i]); }
242 string kmerDBNameLeft;
243 string kmerDBNameRight;
245 //generate the kmerdb to pass to maligner
246 if (searchMethod == "kmer") {
247 string templatePath = m->hasPath(templateFileName);
248 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
249 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
251 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
252 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
254 for (int i = 0; i < userTemplate.size(); i++) {
256 if (m->control_pressed) { return userTemplate; }
258 string leftFrag = userTemplate[i]->getUnaligned();
259 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
261 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
262 databaseLeft->addSequence(leftTemp);
264 databaseLeft->generateDB();
265 databaseLeft->setNumSeqs(userTemplate.size());
267 for (int i = 0; i < userTemplate.size(); i++) {
268 if (m->control_pressed) { return userTemplate; }
270 string rightFrag = userTemplate[i]->getUnaligned();
271 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
273 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
274 databaseRight->addSequence(rightTemp);
276 databaseRight->generateDB();
277 databaseRight->setNumSeqs(userTemplate.size());
282 for (int i = 0; i < userTemplate.size(); i++) {
284 if (m->control_pressed) { return userTemplate; }
286 string leftFrag = userTemplate[i]->getUnaligned();
287 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
289 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
290 databaseLeft->addSequence(leftTemp);
292 databaseLeft->generateDB();
293 databaseLeft->setNumSeqs(userTemplate.size());
295 for (int i = 0; i < userTemplate.size(); i++) {
296 if (m->control_pressed) { return userTemplate; }
298 string rightFrag = userTemplate[i]->getUnaligned();
299 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
301 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
302 databaseRight->addSequence(rightTemp);
304 databaseRight->generateDB();
305 databaseRight->setNumSeqs(userTemplate.size());
307 }else if (searchMethod == "blast") {
310 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
312 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
313 databaseLeft->generateDB();
314 databaseLeft->setNumSeqs(userTemplate.size());
320 catch(exception& e) {
321 m->errorOut(e, "ChimeraSlayer", "getTemplate");
326 //***************************************************************************************************************
327 ChimeraSlayer::~ChimeraSlayer() {
329 if (templateFileName != "self") {
330 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
331 else if (searchMethod == "blast") { delete databaseLeft; }
334 //***************************************************************************************************************
335 void ChimeraSlayer::printHeader(ostream& out) {
336 m->mothurOutEndLine();
337 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
338 m->mothurOutEndLine();
340 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
342 //***************************************************************************************************************
343 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
345 Sequence* trim = NULL;
346 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
348 if (chimeraFlags == "yes") {
349 string chimeraFlag = "no";
350 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
352 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
355 if (chimeraFlag == "yes") {
356 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
357 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
358 outAcc << querySeq->getName() << endl;
360 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
363 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
364 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
366 string newAligned = trim->getAligned();
368 if (lengthLeft > lengthRight) { //trim right
369 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
371 for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
373 trim->setAligned(newAligned);
378 printBlock(chimeraResults[0], chimeraFlag, out);
381 out << querySeq->getName() << "\tno" << endl;
387 catch(exception& e) {
388 m->errorOut(e, "ChimeraSlayer", "print");
392 //***************************************************************************************************************
393 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
395 Sequence* trim = NULL;
398 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
399 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
402 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
404 string chimeraFlag = "no";
405 if (leftPiece.flag == "yes") {
407 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
409 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
412 if (rightPiece.flag == "yes") {
413 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
415 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
418 bool rightChimeric = false;
419 bool leftChimeric = false;
421 if (chimeraFlag == "yes") {
422 //which peice is chimeric or are both
423 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
424 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
426 if (rightChimeric || leftChimeric) {
427 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
428 outAcc << querySeq->getName() << endl;
430 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
433 string newAligned = trim->getAligned();
435 //right side is fine so keep that
436 if ((leftChimeric) && (!rightChimeric)) {
437 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
438 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
439 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
440 }else { //both sides are chimeric, keep longest piece
442 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
443 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
445 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
446 int length = lengthLeftLeft;
447 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
449 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
450 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
452 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
453 if (lengthRightRight > length) { longest = 4; }
455 if (longest == 1) { //leftleft
456 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457 }else if (longest == 2) { //leftright
458 //get rid of leftleft
459 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
461 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
462 }else if (longest == 3) { //rightleft
464 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
465 //get rid of rightright
466 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
469 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
470 //get rid of rightleft
471 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
475 trim->setAligned(newAligned);
481 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
484 out << querySeq->getName() << "\tno" << endl;
490 catch(exception& e) {
491 m->errorOut(e, "ChimeraSlayer", "print");
497 //***************************************************************************************************************
498 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
501 bool results = false;
502 string outAccString = "";
503 string outputString = "";
505 Sequence* trim = NULL;
508 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
509 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
513 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
515 string chimeraFlag = "no";
516 if (leftPiece.flag == "yes") {
518 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
520 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
523 if (rightPiece.flag == "yes") {
524 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
526 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
529 bool rightChimeric = false;
530 bool leftChimeric = false;
532 if (chimeraFlag == "yes") {
533 //which peice is chimeric or are both
534 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
535 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
537 if (rightChimeric || leftChimeric) {
538 cout << querySeq->getName() << "\tyes" << endl;
539 outAccString += querySeq->getName() + "\n";
542 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
544 //write to accnos file
545 int length = outAccString.length();
546 char* buf2 = new char[length];
547 memcpy(buf2, outAccString.c_str(), length);
549 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
553 string newAligned = trim->getAligned();
555 //right side is fine so keep that
556 if ((leftChimeric) && (!rightChimeric)) {
557 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
558 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
559 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
560 }else { //both sides are chimeric, keep longest piece
562 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
563 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
565 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
566 int length = lengthLeftLeft;
567 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
569 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
570 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
572 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
573 if (lengthRightRight > length) { longest = 4; }
575 if (longest == 1) { //leftleft
576 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
577 }else if (longest == 2) { //leftright
578 //get rid of leftleft
579 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
581 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
582 }else if (longest == 3) { //rightleft
584 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
585 //get rid of rightright
586 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
589 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
590 //get rid of rightleft
591 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
595 trim->setAligned(newAligned);
601 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
602 outputString += "\n";
604 //write to output file
605 int length = outputString.length();
606 char* buf = new char[length];
607 memcpy(buf, outputString.c_str(), length);
609 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
613 outputString += querySeq->getName() + "\tno\n";
615 //write to output file
616 int length = outputString.length();
617 char* buf = new char[length];
618 memcpy(buf, outputString.c_str(), length);
620 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
627 catch(exception& e) {
628 m->errorOut(e, "ChimeraSlayer", "print");
632 //***************************************************************************************************************
633 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
636 bool results = false;
637 string outAccString = "";
638 string outputString = "";
640 Sequence* trim = NULL;
641 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
643 if (chimeraFlags == "yes") {
644 string chimeraFlag = "no";
645 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
647 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
650 if (chimeraFlag == "yes") {
651 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
652 cout << querySeq->getName() << "\tyes" << endl;
653 outAccString += querySeq->getName() + "\n";
656 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
658 //write to accnos file
659 int length = outAccString.length();
660 char* buf2 = new char[length];
661 memcpy(buf2, outAccString.c_str(), length);
663 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
667 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
668 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
670 string newAligned = trim->getAligned();
671 if (lengthLeft > lengthRight) { //trim right
672 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
674 for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
676 trim->setAligned(newAligned);
681 outputString = getBlock(chimeraResults[0], chimeraFlag);
682 outputString += "\n";
684 //write to output file
685 int length = outputString.length();
686 char* buf = new char[length];
687 memcpy(buf, outputString.c_str(), length);
689 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
693 outputString += querySeq->getName() + "\tno\n";
695 //write to output file
696 int length = outputString.length();
697 char* buf = new char[length];
698 memcpy(buf, outputString.c_str(), length);
700 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
706 catch(exception& e) {
707 m->errorOut(e, "ChimeraSlayer", "print");
713 //***************************************************************************************************************
714 int ChimeraSlayer::getChimeras(Sequence* query) {
717 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
718 printResults.trimQuery = trimQuery;
721 printResults.flag = "no";
724 spotMap = runFilter(query);
725 printResults.spotMap = spotMap;
729 //you must create a template
730 vector<Sequence*> thisTemplate;
731 if (templateFileName != "self") { thisTemplate = templateSeqs; }
732 else { thisTemplate = getTemplate(query); } //fills this template and creates the databases
734 if (m->control_pressed) { return 0; }
736 if (thisTemplate.size() == 0) { return 0; } //not chimeric
738 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
739 Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
740 Slayer slayer(window, increment, minSim, divR, iters, minSNP);
742 if (templateFileName == "self") {
743 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
744 else if (searchMethod == "blast") { delete databaseLeft; }
747 if (m->control_pressed) { return 0; }
749 string chimeraFlag = maligner.getResults(query, decalc);
751 if (m->control_pressed) { return 0; }
753 vector<results> Results = maligner.getOutput();
755 //cout << query->getName() << endl;
756 //for (int i = 0; i < Results.size(); i++) {
757 //cout << Results[i].parent << '\t' << Results[i].regionStart << '\t' << Results[i].regionEnd << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd << '\t' << Results[i].queryToParent << '\t' << Results[i].queryToParentLocal << endl;
759 //cout << "done\n" << endl;
760 if (chimeraFlag == "yes") {
763 ChimeraReAligner realigner(thisTemplate, match, misMatch);
764 realigner.reAlign(query, Results);
767 //get sequence that were given from maligner results
768 vector<SeqDist> seqs;
769 map<string, float> removeDups;
770 map<string, float>::iterator itDup;
771 map<string, string> parentNameSeq;
772 map<string, string>::iterator itSeq;
773 for (int j = 0; j < Results.size(); j++) {
774 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
775 //only add if you are not a duplicate
776 itDup = removeDups.find(Results[j].parent);
777 if (itDup == removeDups.end()) { //this is not duplicate
778 removeDups[Results[j].parent] = dist;
779 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
780 }else if (dist > itDup->second) { //is this a stronger number for this parent
781 removeDups[Results[j].parent] = dist;
782 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
786 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
787 itSeq = parentNameSeq.find(itDup->first);
788 Sequence* seq = new Sequence(itDup->first, itSeq->second);
792 member.dist = itDup->second;
794 seqs.push_back(member);
797 //limit number of parents to explore - default 3
798 if (Results.size() > parents) {
800 sort(seqs.begin(), seqs.end(), compareSeqDist);
801 //prioritize larger more similiar sequence fragments
802 reverse(seqs.begin(), seqs.end());
804 for (int k = seqs.size()-1; k > (parents-1); k--) {
810 //put seqs into vector to send to slayer
811 vector<Sequence*> seqsForSlayer;
813 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
815 //mask then send to slayer...
817 decalc->setMask(seqMask);
820 decalc->runMask(query);
823 for (int k = 0; k < seqsForSlayer.size(); k++) {
824 decalc->runMask(seqsForSlayer[k]);
827 spotMap = decalc->getMaskMap();
830 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
833 chimeraFlags = slayer.getResults(query, seqsForSlayer);
834 if (m->control_pressed) { return 0; }
835 chimeraResults = slayer.getOutput();
838 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
840 printResults.spotMap = spotMap;
841 printResults.flag = chimeraFlags;
842 printResults.results = chimeraResults;
847 catch(exception& e) {
848 m->errorOut(e, "ChimeraSlayer", "getChimeras");
852 //***************************************************************************************************************
853 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
855 out << querySeq->getName() << '\t';
856 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
858 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
859 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
861 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
864 catch(exception& e) {
865 m->errorOut(e, "ChimeraSlayer", "printBlock");
869 //***************************************************************************************************************
870 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
873 if ((leftChimeric) && (!rightChimeric)) { //print left
874 out << querySeq->getName() << '\t';
875 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
877 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
878 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
880 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
882 }else if ((!leftChimeric) && (rightChimeric)) { //print right
883 out << querySeq->getName() << '\t';
884 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
886 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
887 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
889 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
891 }else { //print both results
892 if (leftdata.flag == "yes") {
893 out << querySeq->getName() + "_LEFT" << '\t';
894 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
896 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
897 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
899 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
902 if (rightdata.flag == "yes") {
903 if (leftdata.flag == "yes") { out << endl; }
905 out << querySeq->getName() + "_RIGHT"<< '\t';
906 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
908 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
909 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
911 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
916 catch(exception& e) {
917 m->errorOut(e, "ChimeraSlayer", "printBlock");
921 //***************************************************************************************************************
922 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
927 if ((leftChimeric) && (!rightChimeric)) { //get left
928 out += querySeq->getName() + "\t";
929 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
931 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
932 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
934 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
936 }else if ((!leftChimeric) && (rightChimeric)) { //print right
937 out += querySeq->getName() + "\t";
938 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
940 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
941 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
943 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
945 }else { //print both results
947 if (leftdata.flag == "yes") {
948 out += querySeq->getName() + "_LEFT\t";
949 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
951 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
952 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
954 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
957 if (rightdata.flag == "yes") {
958 if (leftdata.flag == "yes") { out += "\n"; }
959 out += querySeq->getName() + "_RIGHT\t";
960 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
962 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
963 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
965 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
972 catch(exception& e) {
973 m->errorOut(e, "ChimeraSlayer", "getBlock");
977 //***************************************************************************************************************
978 string ChimeraSlayer::getBlock(data_struct data, string flag){
981 string outputString = "";
983 outputString += querySeq->getName() + "\t";
984 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
986 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
987 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
989 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
993 catch(exception& e) {
994 m->errorOut(e, "ChimeraSlayer", "getBlock");
998 //***************************************************************************************************************/