5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
70 decalc = new DeCalculator();
72 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74 //run filter on template
75 for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i]; } templateSeqs.clear();
79 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
83 //***************************************************************************************************************
84 int ChimeraSlayer::doPrep() {
87 //read in all query seqs
88 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
90 vector<Sequence*> temp = templateSeqs;
91 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
93 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
95 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
97 if (m->control_pressed) { return 0; }
99 //run filter on template
100 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
102 string kmerDBNameLeft;
103 string kmerDBNameRight;
105 //generate the kmerdb to pass to maligner
106 if (searchMethod == "kmer") {
107 string templatePath = m->hasPath(templateFileName);
108 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
109 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
111 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
112 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
114 for (int i = 0; i < templateSeqs.size(); i++) {
116 if (m->control_pressed) { return 0; }
118 string leftFrag = templateSeqs[i]->getUnaligned();
119 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
121 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
122 databaseLeft->addSequence(leftTemp);
124 databaseLeft->generateDB();
125 databaseLeft->setNumSeqs(templateSeqs.size());
127 for (int i = 0; i < templateSeqs.size(); i++) {
128 if (m->control_pressed) { return 0; }
130 string rightFrag = templateSeqs[i]->getUnaligned();
131 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
133 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
134 databaseRight->addSequence(rightTemp);
136 databaseRight->generateDB();
137 databaseRight->setNumSeqs(templateSeqs.size());
141 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
142 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
143 bool needToGenerateLeft = true;
145 if(kmerFileTestLeft){
146 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
147 if (GoodFile) { needToGenerateLeft = false; }
150 if(needToGenerateLeft){
152 for (int i = 0; i < templateSeqs.size(); i++) {
154 if (m->control_pressed) { return 0; }
156 string leftFrag = templateSeqs[i]->getUnaligned();
157 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
159 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
160 databaseLeft->addSequence(leftTemp);
162 databaseLeft->generateDB();
165 databaseLeft->readKmerDB(kmerFileTestLeft);
167 kmerFileTestLeft.close();
169 databaseLeft->setNumSeqs(templateSeqs.size());
172 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
173 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
174 bool needToGenerateRight = true;
176 if(kmerFileTestRight){
177 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
178 if (GoodFile) { needToGenerateRight = false; }
181 if(needToGenerateRight){
183 for (int i = 0; i < templateSeqs.size(); i++) {
184 if (m->control_pressed) { return 0; }
186 string rightFrag = templateSeqs[i]->getUnaligned();
187 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
189 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
190 databaseRight->addSequence(rightTemp);
192 databaseRight->generateDB();
195 databaseRight->readKmerDB(kmerFileTestRight);
197 kmerFileTestRight.close();
199 databaseRight->setNumSeqs(templateSeqs.size());
201 }else if (searchMethod == "blast") {
204 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
206 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
207 databaseLeft->generateDB();
208 databaseLeft->setNumSeqs(templateSeqs.size());
214 catch(exception& e) {
215 m->errorOut(e, "ChimeraSlayer", "doprep");
219 //***************************************************************************************************************
220 int ChimeraSlayer::getTemplate(Sequence* q) {
223 string kmerDBNameLeft;
224 string kmerDBNameRight;
226 //generate the kmerdb to pass to maligner
227 if (searchMethod == "kmer") {
228 string templatePath = m->hasPath(templateFileName);
229 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
230 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
232 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
233 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
235 for (int i = 0; i < userTemplate.size(); i++) {
237 if (m->control_pressed) { return 0; }
239 string leftFrag = userTemplate[i]->getUnaligned();
240 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
242 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
243 databaseLeft->addSequence(leftTemp);
245 databaseLeft->generateDB();
246 databaseLeft->setNumSeqs(userTemplate.size());
248 for (int i = 0; i < userTemplate.size(); i++) {
249 if (m->control_pressed) { return 0; }
251 string rightFrag = userTemplate[i]->getUnaligned();
252 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
254 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
255 databaseRight->addSequence(rightTemp);
257 databaseRight->generateDB();
258 databaseRight->setNumSeqs(userTemplate.size());
263 for (int i = 0; i < userTemplate.size(); i++) {
265 if (m->control_pressed) { return 0; }
267 string leftFrag = userTemplate[i]->getUnaligned();
268 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
270 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271 databaseLeft->addSequence(leftTemp);
273 databaseLeft->generateDB();
274 databaseLeft->setNumSeqs(userTemplate.size());
276 for (int i = 0; i < userTemplate.size(); i++) {
277 if (m->control_pressed) { return 0; }
279 string rightFrag = userTemplate[i]->getUnaligned();
280 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
282 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283 databaseRight->addSequence(rightTemp);
285 databaseRight->generateDB();
286 databaseRight->setNumSeqs(userTemplate.size());
288 }else if (searchMethod == "blast") {
291 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
293 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return 0; } databaseLeft->addSequence(*userTemplate[i]); }
294 databaseLeft->generateDB();
295 databaseLeft->setNumSeqs(userTemplate.size());
301 catch(exception& e) {
302 m->errorOut(e, "ChimeraSlayer", "getTemplate");
307 //***************************************************************************************************************
308 ChimeraSlayer::~ChimeraSlayer() {
310 if (templateFileName != "self") {
311 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
312 else if (searchMethod == "blast") { delete databaseLeft; }
314 //delete userTemplate
315 for (int i = 0; i < userTemplate.size(); i++) {
316 delete userTemplate[i];
318 userTemplate.clear();
321 //***************************************************************************************************************
322 void ChimeraSlayer::printHeader(ostream& out) {
323 m->mothurOutEndLine();
324 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
325 m->mothurOutEndLine();
327 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
329 //***************************************************************************************************************
330 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
332 Sequence* trim = NULL;
333 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
335 if (chimeraFlags == "yes") {
336 string chimeraFlag = "no";
337 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
339 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
342 if (chimeraFlag == "yes") {
343 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
344 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
345 outAcc << querySeq->getName() << endl;
348 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
349 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
351 string newAligned = trim->getAligned();
353 if (lengthLeft > lengthRight) { //trim right
354 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
356 for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
358 trim->setAligned(newAligned);
363 printBlock(chimeraResults[0], chimeraFlag, out);
366 out << querySeq->getName() << "\tno" << endl;
367 if (templateFileName == "self") {
368 Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
370 userTemplate.push_back(temp);
377 catch(exception& e) {
378 m->errorOut(e, "ChimeraSlayer", "print");
382 //***************************************************************************************************************
383 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
385 Sequence* trim = NULL;
388 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
389 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
392 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
394 string chimeraFlag = "no";
395 if (leftPiece.flag == "yes") {
397 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
399 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
402 if (rightPiece.flag == "yes") {
403 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
405 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
408 bool rightChimeric = false;
409 bool leftChimeric = false;
411 if (chimeraFlag == "yes") {
412 //which peice is chimeric or are both
413 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
414 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
416 if (rightChimeric || leftChimeric) {
417 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
418 outAcc << querySeq->getName() << endl;
421 string newAligned = trim->getAligned();
423 //right side is fine so keep that
424 if ((leftChimeric) && (!rightChimeric)) {
425 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
426 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
427 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
428 }else { //both sides are chimeric, keep longest piece
430 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
431 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
433 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
434 int length = lengthLeftLeft;
435 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
437 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
438 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
440 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
441 if (lengthRightRight > length) { longest = 4; }
443 if (longest == 1) { //leftleft
444 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
445 }else if (longest == 2) { //leftright
446 //get rid of leftleft
447 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
449 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
450 }else if (longest == 3) { //rightleft
452 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
453 //get rid of rightright
454 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
458 //get rid of rightleft
459 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
463 trim->setAligned(newAligned);
469 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
472 out << querySeq->getName() << "\tno" << endl;
473 if (templateFileName == "self") {
474 Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
476 userTemplate.push_back(temp);
483 catch(exception& e) {
484 m->errorOut(e, "ChimeraSlayer", "print");
490 //***************************************************************************************************************
491 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
494 bool results = false;
495 string outAccString = "";
496 string outputString = "";
498 Sequence* trim = NULL;
501 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
502 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
506 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
508 string chimeraFlag = "no";
509 if (leftPiece.flag == "yes") {
511 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
513 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
516 if (rightPiece.flag == "yes") {
517 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
519 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
522 bool rightChimeric = false;
523 bool leftChimeric = false;
525 if (chimeraFlag == "yes") {
526 //which peice is chimeric or are both
527 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
528 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
530 if (rightChimeric || leftChimeric) {
531 cout << querySeq->getName() << "\tyes" << endl;
532 outAccString += querySeq->getName() + "\n";
535 //write to accnos file
536 int length = outAccString.length();
537 char* buf2 = new char[length];
538 memcpy(buf2, outAccString.c_str(), length);
540 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
544 string newAligned = trim->getAligned();
546 //right side is fine so keep that
547 if ((leftChimeric) && (!rightChimeric)) {
548 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
549 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
550 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
551 }else { //both sides are chimeric, keep longest piece
553 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
554 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
556 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
557 int length = lengthLeftLeft;
558 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
560 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
561 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
563 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
564 if (lengthRightRight > length) { longest = 4; }
566 if (longest == 1) { //leftleft
567 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
568 }else if (longest == 2) { //leftright
569 //get rid of leftleft
570 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
572 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
573 }else if (longest == 3) { //rightleft
575 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
576 //get rid of rightright
577 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
580 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
581 //get rid of rightleft
582 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
586 trim->setAligned(newAligned);
592 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
593 outputString += "\n";
595 //write to output file
596 int length = outputString.length();
597 char* buf = new char[length];
598 memcpy(buf, outputString.c_str(), length);
600 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
604 outputString += querySeq->getName() + "\tno\n";
606 //write to output file
607 int length = outputString.length();
608 char* buf = new char[length];
609 memcpy(buf, outputString.c_str(), length);
611 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
614 if (template == "self") {
615 Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
617 userTemplate.push_back(temp);
624 catch(exception& e) {
625 m->errorOut(e, "ChimeraSlayer", "print");
629 //***************************************************************************************************************
630 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
633 bool results = false;
634 string outAccString = "";
635 string outputString = "";
637 Sequence* trim = NULL;
638 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
640 if (chimeraFlags == "yes") {
641 string chimeraFlag = "no";
642 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
644 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
647 if (chimeraFlag == "yes") {
648 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
649 cout << querySeq->getName() << "\tyes" << endl;
650 outAccString += querySeq->getName() + "\n";
653 //write to accnos file
654 int length = outAccString.length();
655 char* buf2 = new char[length];
656 memcpy(buf2, outAccString.c_str(), length);
658 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
662 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
663 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
665 string newAligned = trim->getAligned();
666 if (lengthLeft > lengthRight) { //trim right
667 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
669 for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
671 trim->setAligned(newAligned);
676 outputString = getBlock(chimeraResults[0], chimeraFlag);
677 outputString += "\n";
679 //write to output file
680 int length = outputString.length();
681 char* buf = new char[length];
682 memcpy(buf, outputString.c_str(), length);
684 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
688 outputString += querySeq->getName() + "\tno\n";
690 //write to output file
691 int length = outputString.length();
692 char* buf = new char[length];
693 memcpy(buf, outputString.c_str(), length);
695 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
698 if (template == "self") {
699 Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
701 userTemplate.push_back(temp);
707 catch(exception& e) {
708 m->errorOut(e, "ChimeraSlayer", "print");
714 //***************************************************************************************************************
715 int ChimeraSlayer::getChimeras(Sequence* query) {
718 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
719 printResults.trimQuery = trimQuery;
722 printResults.flag = "no";
725 spotMap = runFilter(query);
726 printResults.spotMap = spotMap;
730 //you must create a template
731 vector<Sequence*> thisTemplate;
732 if (templateFileName != "self") { thisTemplate = templateSeqs; }
733 else { getTemplate(query); thisTemplate = userTemplate; } //fills this template and creates the databases
735 if (m->control_pressed) { return 0; }
737 if (thisTemplate.size() == 0) { return 0; } //not chimeric
739 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
740 Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
741 Slayer slayer(window, increment, minSim, divR, iters, minSNP);
743 if (templateFileName == "self") {
744 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
745 else if (searchMethod == "blast") { delete databaseLeft; }
748 if (m->control_pressed) { return 0; }
750 string chimeraFlag = maligner.getResults(query, decalc);
752 if (m->control_pressed) { return 0; }
754 vector<results> Results = maligner.getOutput();
757 ChimeraReAligner realigner(thisTemplate, match, misMatch);
758 realigner.reAlign(query, Results);
761 if (chimeraFlag == "yes") {
763 //get sequence that were given from maligner results
764 vector<SeqDist> seqs;
765 map<string, float> removeDups;
766 map<string, float>::iterator itDup;
767 map<string, string> parentNameSeq;
768 map<string, string>::iterator itSeq;
769 for (int j = 0; j < Results.size(); j++) {
770 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
771 //only add if you are not a duplicate
772 itDup = removeDups.find(Results[j].parent);
773 if (itDup == removeDups.end()) { //this is not duplicate
774 removeDups[Results[j].parent] = dist;
775 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
776 }else if (dist > itDup->second) { //is this a stronger number for this parent
777 removeDups[Results[j].parent] = dist;
778 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
782 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
783 itSeq = parentNameSeq.find(itDup->first);
784 Sequence* seq = new Sequence(itDup->first, itSeq->second);
788 member.dist = itDup->second;
790 seqs.push_back(member);
793 //limit number of parents to explore - default 3
794 if (Results.size() > parents) {
796 sort(seqs.begin(), seqs.end(), compareSeqDist);
797 //prioritize larger more similiar sequence fragments
798 reverse(seqs.begin(), seqs.end());
800 for (int k = seqs.size()-1; k > (parents-1); k--) {
806 //put seqs into vector to send to slayer
807 vector<Sequence*> seqsForSlayer;
809 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
811 //mask then send to slayer...
813 decalc->setMask(seqMask);
816 decalc->runMask(query);
819 for (int k = 0; k < seqsForSlayer.size(); k++) {
820 decalc->runMask(seqsForSlayer[k]);
823 spotMap = decalc->getMaskMap();
826 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
829 chimeraFlags = slayer.getResults(query, seqsForSlayer);
830 if (m->control_pressed) { return 0; }
831 chimeraResults = slayer.getOutput();
834 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
836 printResults.spotMap = spotMap;
837 printResults.flag = chimeraFlags;
838 printResults.results = chimeraResults;
843 catch(exception& e) {
844 m->errorOut(e, "ChimeraSlayer", "getChimeras");
848 //***************************************************************************************************************
849 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
851 out << querySeq->getName() << '\t';
852 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
854 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
855 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
857 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
860 catch(exception& e) {
861 m->errorOut(e, "ChimeraSlayer", "printBlock");
865 //***************************************************************************************************************
866 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
869 if ((leftChimeric) && (!rightChimeric)) { //print left
870 out << querySeq->getName() << '\t';
871 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
873 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
874 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
876 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
878 }else if ((!leftChimeric) && (rightChimeric)) { //print right
879 out << querySeq->getName() << '\t';
880 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
882 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
883 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
885 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
887 }else { //print both results
888 if (leftdata.flag == "yes") {
889 out << querySeq->getName() + "_LEFT" << '\t';
890 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
892 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
893 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
895 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
898 if (rightdata.flag == "yes") {
899 if (leftdata.flag == "yes") { out << endl; }
901 out << querySeq->getName() + "_RIGHT"<< '\t';
902 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
904 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
905 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
907 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
912 catch(exception& e) {
913 m->errorOut(e, "ChimeraSlayer", "printBlock");
917 //***************************************************************************************************************
918 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
923 if ((leftChimeric) && (!rightChimeric)) { //get left
924 out += querySeq->getName() + "\t";
925 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
927 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
928 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
930 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
932 }else if ((!leftChimeric) && (rightChimeric)) { //print right
933 out += querySeq->getName() + "\t";
934 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
936 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
937 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
939 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
941 }else { //print both results
943 if (leftdata.flag == "yes") {
944 out += querySeq->getName() + "_LEFT\t";
945 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
947 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
948 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
950 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
953 if (rightdata.flag == "yes") {
954 if (leftdata.flag == "yes") { out += "\n"; }
955 out += querySeq->getName() + "_RIGHT\t";
956 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
958 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
959 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
961 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
968 catch(exception& e) {
969 m->errorOut(e, "ChimeraSlayer", "getBlock");
973 //***************************************************************************************************************
974 string ChimeraSlayer::getBlock(data_struct data, string flag){
977 string outputString = "";
979 outputString += querySeq->getName() + "\t";
980 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
982 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
983 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
985 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
989 catch(exception& e) {
990 m->errorOut(e, "ChimeraSlayer", "getBlock");
994 //***************************************************************************************************************/