5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
42 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
46 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
72 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74 if (searchMethod == "distance") {
75 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
77 //run filter on template copying templateSeqs into filteredTemplateSeqs
78 for (int i = 0; i < templateSeqs.size(); i++) {
79 if (m->control_pressed) { break; }
81 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
83 filteredTemplateSeqs.push_back(newSeq);
88 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
92 //***************************************************************************************************************
93 int ChimeraSlayer::doPrep() {
95 if (searchMethod == "distance") {
96 //read in all query seqs
97 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
99 vector<Sequence*> temp = templateSeqs;
100 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
102 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
104 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
106 if (m->control_pressed) { return 0; }
108 //run filter on template copying templateSeqs into filteredTemplateSeqs
109 for (int i = 0; i < templateSeqs.size(); i++) {
110 if (m->control_pressed) { return 0; }
112 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
114 filteredTemplateSeqs.push_back(newSeq);
117 string kmerDBNameLeft;
118 string kmerDBNameRight;
120 //generate the kmerdb to pass to maligner
121 if (searchMethod == "kmer") {
122 string templatePath = m->hasPath(templateFileName);
123 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
124 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
126 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
127 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
129 for (int i = 0; i < templateSeqs.size(); i++) {
131 if (m->control_pressed) { return 0; }
133 string leftFrag = templateSeqs[i]->getUnaligned();
134 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
136 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
137 databaseLeft->addSequence(leftTemp);
139 databaseLeft->generateDB();
140 databaseLeft->setNumSeqs(templateSeqs.size());
142 for (int i = 0; i < templateSeqs.size(); i++) {
143 if (m->control_pressed) { return 0; }
145 string rightFrag = templateSeqs[i]->getUnaligned();
146 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
148 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
149 databaseRight->addSequence(rightTemp);
151 databaseRight->generateDB();
152 databaseRight->setNumSeqs(templateSeqs.size());
156 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
157 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
158 bool needToGenerateLeft = true;
160 if(kmerFileTestLeft){
161 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
162 if (GoodFile) { needToGenerateLeft = false; }
165 if(needToGenerateLeft){
167 for (int i = 0; i < templateSeqs.size(); i++) {
169 if (m->control_pressed) { return 0; }
171 string leftFrag = templateSeqs[i]->getUnaligned();
172 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
174 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
175 databaseLeft->addSequence(leftTemp);
177 databaseLeft->generateDB();
180 databaseLeft->readKmerDB(kmerFileTestLeft);
182 kmerFileTestLeft.close();
184 databaseLeft->setNumSeqs(templateSeqs.size());
187 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
188 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
189 bool needToGenerateRight = true;
191 if(kmerFileTestRight){
192 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
193 if (GoodFile) { needToGenerateRight = false; }
196 if(needToGenerateRight){
198 for (int i = 0; i < templateSeqs.size(); i++) {
199 if (m->control_pressed) { return 0; }
201 string rightFrag = templateSeqs[i]->getUnaligned();
202 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
204 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
205 databaseRight->addSequence(rightTemp);
207 databaseRight->generateDB();
210 databaseRight->readKmerDB(kmerFileTestRight);
212 kmerFileTestRight.close();
214 databaseRight->setNumSeqs(templateSeqs.size());
216 }else if (searchMethod == "blast") {
219 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
221 if (m->control_pressed) { return 0; }
223 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
224 databaseLeft->generateDB();
225 databaseLeft->setNumSeqs(templateSeqs.size());
231 catch(exception& e) {
232 m->errorOut(e, "ChimeraSlayer", "doprep");
236 //***************************************************************************************************************
237 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
240 //when template=self, the query file is sorted from most abundance to least abundant
241 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
242 vector<Sequence*> userTemplate;
244 int myAbund = priority[q.getName()];
246 for (int i = 0; i < templateSeqs.size(); i++) {
248 if (m->control_pressed) { return userTemplate; }
250 //have I reached a sequence with the same abundance as myself?
251 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
253 //if its am not chimeric add it
254 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
255 userTemplate.push_back(templateSeqs[i]);
256 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
260 //avoids nuisance error from formatdb for making blank blast database
261 if (userTemplate.size() == 0) {
265 string kmerDBNameLeft;
266 string kmerDBNameRight;
268 //generate the kmerdb to pass to maligner
269 if (searchMethod == "kmer") {
270 string templatePath = m->hasPath(templateFileName);
271 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
272 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
274 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
275 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
277 for (int i = 0; i < userTemplate.size(); i++) {
279 if (m->control_pressed) { return userTemplate; }
281 string leftFrag = userTemplate[i]->getUnaligned();
282 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
284 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
285 databaseLeft->addSequence(leftTemp);
287 databaseLeft->generateDB();
288 databaseLeft->setNumSeqs(userTemplate.size());
290 for (int i = 0; i < userTemplate.size(); i++) {
291 if (m->control_pressed) { return userTemplate; }
293 string rightFrag = userTemplate[i]->getUnaligned();
294 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
296 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
297 databaseRight->addSequence(rightTemp);
299 databaseRight->generateDB();
300 databaseRight->setNumSeqs(userTemplate.size());
305 for (int i = 0; i < userTemplate.size(); i++) {
307 if (m->control_pressed) { return userTemplate; }
309 string leftFrag = userTemplate[i]->getUnaligned();
310 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
312 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
313 databaseLeft->addSequence(leftTemp);
315 databaseLeft->generateDB();
316 databaseLeft->setNumSeqs(userTemplate.size());
318 for (int i = 0; i < userTemplate.size(); i++) {
319 if (m->control_pressed) { return userTemplate; }
321 string rightFrag = userTemplate[i]->getUnaligned();
322 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
324 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
325 databaseRight->addSequence(rightTemp);
327 databaseRight->generateDB();
328 databaseRight->setNumSeqs(userTemplate.size());
330 }else if (searchMethod == "blast") {
333 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
335 if (m->control_pressed) { return userTemplate; }
337 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
338 databaseLeft->generateDB();
339 databaseLeft->setNumSeqs(userTemplate.size());
345 catch(exception& e) {
346 m->errorOut(e, "ChimeraSlayer", "getTemplate");
351 //***************************************************************************************************************
352 ChimeraSlayer::~ChimeraSlayer() {
353 if (templateFileName != "self") {
354 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
355 else if (searchMethod == "blast") { delete databaseLeft; }
358 //***************************************************************************************************************
359 void ChimeraSlayer::printHeader(ostream& out) {
360 m->mothurOutEndLine();
361 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
362 m->mothurOutEndLine();
364 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
366 //***************************************************************************************************************
367 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
370 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
372 if (chimeraFlags == "yes") {
373 string chimeraFlag = "no";
374 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
376 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
379 if (chimeraFlag == "yes") {
380 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
381 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
382 outAcc << querySeq.getName() << endl;
384 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
387 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
388 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
390 string newAligned = trim.getAligned();
392 if (lengthLeft > lengthRight) { //trim right
393 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
395 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
397 trim.setAligned(newAligned);
402 printBlock(chimeraResults[0], chimeraFlag, out);
405 out << querySeq.getName() << "\tno" << endl;
411 catch(exception& e) {
412 m->errorOut(e, "ChimeraSlayer", "print");
416 //***************************************************************************************************************
417 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
422 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
423 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
426 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
428 string chimeraFlag = "no";
429 if (leftPiece.flag == "yes") {
431 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
433 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
436 if (rightPiece.flag == "yes") {
437 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
439 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
442 bool rightChimeric = false;
443 bool leftChimeric = false;
445 if (chimeraFlag == "yes") {
446 //which peice is chimeric or are both
447 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
448 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
450 if (rightChimeric || leftChimeric) {
451 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
452 outAcc << querySeq.getName() << endl;
454 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
457 string newAligned = trim.getAligned();
459 //right side is fine so keep that
460 if ((leftChimeric) && (!rightChimeric)) {
461 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
462 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
463 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
464 }else { //both sides are chimeric, keep longest piece
466 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
467 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
469 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
470 int length = lengthLeftLeft;
471 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
473 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
474 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
476 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
477 if (lengthRightRight > length) { longest = 4; }
479 if (longest == 1) { //leftleft
480 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
481 }else if (longest == 2) { //leftright
482 //get rid of leftleft
483 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
485 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
486 }else if (longest == 3) { //rightleft
488 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
489 //get rid of rightright
490 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
493 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
494 //get rid of rightleft
495 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
499 trim.setAligned(newAligned);
505 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
508 out << querySeq.getName() << "\tno" << endl;
514 catch(exception& e) {
515 m->errorOut(e, "ChimeraSlayer", "print");
521 //***************************************************************************************************************
522 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
525 bool results = false;
526 string outAccString = "";
527 string outputString = "";
532 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
533 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
537 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
539 string chimeraFlag = "no";
540 if (leftPiece.flag == "yes") {
542 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
544 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
547 if (rightPiece.flag == "yes") {
548 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
550 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
553 bool rightChimeric = false;
554 bool leftChimeric = false;
558 if (chimeraFlag == "yes") {
559 //which peice is chimeric or are both
560 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
561 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
563 if (rightChimeric || leftChimeric) {
564 cout << querySeq.getName() << "\tyes" << endl;
565 outAccString += querySeq.getName() + "\n";
568 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
570 //write to accnos file
571 int length = outAccString.length();
572 char* buf2 = new char[length];
573 memcpy(buf2, outAccString.c_str(), length);
575 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
579 string newAligned = trim.getAligned();
581 //right side is fine so keep that
582 if ((leftChimeric) && (!rightChimeric)) {
583 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
584 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
585 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586 }else { //both sides are chimeric, keep longest piece
588 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
589 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
591 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
592 int length = lengthLeftLeft;
593 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
595 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
596 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
598 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
599 if (lengthRightRight > length) { longest = 4; }
601 if (longest == 1) { //leftleft
602 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
603 }else if (longest == 2) { //leftright
604 //get rid of leftleft
605 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
607 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
608 }else if (longest == 3) { //rightleft
610 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
611 //get rid of rightright
612 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
615 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
616 //get rid of rightleft
617 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
621 trim.setAligned(newAligned);
627 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
628 outputString += "\n";
630 //write to output file
631 int length = outputString.length();
632 char* buf = new char[length];
633 memcpy(buf, outputString.c_str(), length);
635 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
639 outputString += querySeq.getName() + "\tno\n";
641 //write to output file
642 int length = outputString.length();
643 char* buf = new char[length];
644 memcpy(buf, outputString.c_str(), length);
646 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
653 catch(exception& e) {
654 m->errorOut(e, "ChimeraSlayer", "print");
658 //***************************************************************************************************************
659 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
662 bool results = false;
663 string outAccString = "";
664 string outputString = "";
667 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
669 if (chimeraFlags == "yes") {
670 string chimeraFlag = "no";
671 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
673 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
676 if (chimeraFlag == "yes") {
677 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
678 cout << querySeq.getName() << "\tyes" << endl;
679 outAccString += querySeq.getName() + "\n";
682 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
684 //write to accnos file
685 int length = outAccString.length();
686 char* buf2 = new char[length];
687 memcpy(buf2, outAccString.c_str(), length);
689 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
693 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
694 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
696 string newAligned = trim.getAligned();
697 if (lengthLeft > lengthRight) { //trim right
698 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
700 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
702 trim.setAligned(newAligned);
707 outputString = getBlock(chimeraResults[0], chimeraFlag);
708 outputString += "\n";
710 //write to output file
711 int length = outputString.length();
712 char* buf = new char[length];
713 memcpy(buf, outputString.c_str(), length);
715 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
719 outputString += querySeq.getName() + "\tno\n";
721 //write to output file
722 int length = outputString.length();
723 char* buf = new char[length];
724 memcpy(buf, outputString.c_str(), length);
726 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
732 catch(exception& e) {
733 m->errorOut(e, "ChimeraSlayer", "print");
739 //***************************************************************************************************************
740 int ChimeraSlayer::getChimeras(Sequence* query) {
743 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
744 printResults.trimQuery = trimQuery;
747 printResults.flag = "no";
751 //you must create a template
752 vector<Sequence*> thisTemplate;
753 vector<Sequence*> thisFilteredTemplate;
754 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
755 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
757 if (m->control_pressed) { return 0; }
758 if (thisTemplate.size() == 0) { return 0; } //not chimeric
760 //moved this out of maligner - 4/29/11
761 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
763 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
764 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
766 if (templateFileName == "self") {
767 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
768 else if (searchMethod == "blast") { delete databaseLeft; }
771 if (m->control_pressed) { return 0; }
773 string chimeraFlag = maligner.getResults(*query, decalc);
775 if (m->control_pressed) { return 0; }
777 vector<results> Results = maligner.getOutput();
779 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
781 if (chimeraFlag == "yes") {
784 vector<string> parents;
785 for (int i = 0; i < Results.size(); i++) {
786 parents.push_back(Results[i].parentAligned);
789 ChimeraReAligner realigner;
790 realigner.reAlign(query, parents);
794 // cout << query->getAligned() << endl;
795 //get sequence that were given from maligner results
796 vector<SeqCompare> seqs;
797 map<string, float> removeDups;
798 map<string, float>::iterator itDup;
799 map<string, string> parentNameSeq;
800 map<string, string>::iterator itSeq;
801 for (int j = 0; j < Results.size(); j++) {
803 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
804 //only add if you are not a duplicate
805 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
808 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
810 itDup = removeDups.find(Results[j].parent);
811 if (itDup == removeDups.end()) { //this is not duplicate
812 removeDups[Results[j].parent] = dist;
813 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
814 }else if (dist > itDup->second) { //is this a stronger number for this parent
815 removeDups[Results[j].parent] = dist;
816 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
823 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
824 itSeq = parentNameSeq.find(itDup->first);
825 Sequence seq(itDup->first, itSeq->second);
829 member.dist = itDup->second;
830 seqs.push_back(member);
833 //limit number of parents to explore - default 3
834 if (Results.size() > parents) {
836 sort(seqs.begin(), seqs.end(), compareSeqCompare);
837 //prioritize larger more similiar sequence fragments
838 reverse(seqs.begin(), seqs.end());
840 //for (int k = seqs.size()-1; k > (parents-1); k--) {
841 // delete seqs[k].seq;
846 //put seqs into vector to send to slayer
848 // cout << query->getAligned() << endl;
849 vector<Sequence> seqsForSlayer;
850 for (int k = 0; k < seqs.size(); k++) {
851 // cout << seqs[k].seq->getAligned() << endl;
852 seqsForSlayer.push_back(seqs[k].seq);
853 // cout << seqs[k].seq->getName() << endl;
856 if (m->control_pressed) { return 0; }
859 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
860 if (m->control_pressed) { return 0; }
861 chimeraResults = slayer.getOutput();
863 printResults.flag = chimeraFlags;
864 printResults.results = chimeraResults;
867 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
869 //cout << endl << endl;
872 catch(exception& e) {
873 m->errorOut(e, "ChimeraSlayer", "getChimeras");
877 //***************************************************************************************************************
878 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
880 out << querySeq.getName() << '\t';
881 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
883 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
884 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
886 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
889 catch(exception& e) {
890 m->errorOut(e, "ChimeraSlayer", "printBlock");
894 //***************************************************************************************************************
895 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
898 if ((leftChimeric) && (!rightChimeric)) { //print left
899 out << querySeq.getName() << '\t';
900 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
902 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
903 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
905 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
907 }else if ((!leftChimeric) && (rightChimeric)) { //print right
908 out << querySeq.getName() << '\t';
909 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
911 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
912 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
914 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
916 }else { //print both results
917 if (leftdata.flag == "yes") {
918 out << querySeq.getName() + "_LEFT" << '\t';
919 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
921 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
922 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
924 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
927 if (rightdata.flag == "yes") {
928 if (leftdata.flag == "yes") { out << endl; }
930 out << querySeq.getName() + "_RIGHT"<< '\t';
931 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
933 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
934 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
936 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
941 catch(exception& e) {
942 m->errorOut(e, "ChimeraSlayer", "printBlock");
946 //***************************************************************************************************************
947 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
952 if ((leftChimeric) && (!rightChimeric)) { //get left
953 out += querySeq.getName() + "\t";
954 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
956 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
957 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
959 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
961 }else if ((!leftChimeric) && (rightChimeric)) { //print right
962 out += querySeq.getName() + "\t";
963 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
965 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
966 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
968 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
970 }else { //print both results
972 if (leftdata.flag == "yes") {
973 out += querySeq.getName() + "_LEFT\t";
974 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
976 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
977 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
979 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
982 if (rightdata.flag == "yes") {
983 if (leftdata.flag == "yes") { out += "\n"; }
984 out += querySeq.getName() + "_RIGHT\t";
985 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
987 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
988 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
990 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
997 catch(exception& e) {
998 m->errorOut(e, "ChimeraSlayer", "getBlock");
1002 //***************************************************************************************************************
1003 string ChimeraSlayer::getBlock(data_struct data, string flag){
1006 string outputString = "";
1008 outputString += querySeq.getName() + "\t";
1009 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1011 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1012 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1014 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1016 return outputString;
1018 catch(exception& e) {
1019 m->errorOut(e, "ChimeraSlayer", "getBlock");
1023 //***************************************************************************************************************
1024 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1027 vector<Sequence> refSeqs;
1029 if (searchMethod == "distance") {
1030 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1031 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1033 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1035 }else if (searchMethod == "blast") {
1036 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1037 }else if (searchMethod == "kmer") {
1038 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1039 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1043 catch(exception& e) {
1044 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1048 //***************************************************************************************************************/
1049 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1052 vector<Sequence> refResults;
1054 //get parts of query
1055 string queryUnAligned = q.getUnaligned();
1056 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1057 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1058 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1059 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1060 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1062 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1063 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1066 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1067 // vector<int> smaller;
1068 // vector<int> larger;
1070 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1071 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1075 map<int, int>::iterator it;
1076 vector<int> mergedResults;
1079 // for (int i = 0; i < smaller.size(); i++) {
1080 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1082 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1084 //add left if you havent already
1085 it = seen.find(tempIndexesLeft[index]);
1086 if (it == seen.end()) {
1087 mergedResults.push_back(tempIndexesLeft[index]);
1088 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1091 //add right if you havent already
1092 it = seen.find(tempIndexesRight[index]);
1093 if (it == seen.end()) {
1094 mergedResults.push_back(tempIndexesRight[index]);
1095 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1101 for (int i = index; i < tempIndexesLeft.size(); i++) {
1102 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1104 //add right if you havent already
1105 it = seen.find(tempIndexesLeft[i]);
1106 if (it == seen.end()) {
1107 mergedResults.push_back(tempIndexesLeft[i]);
1108 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1112 for (int i = index; i < tempIndexesRight.size(); i++) {
1113 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1115 //add right if you havent already
1116 it = seen.find(tempIndexesRight[i]);
1117 if (it == seen.end()) {
1118 mergedResults.push_back(tempIndexesRight[i]);
1119 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1122 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1123 //cout << qname << endl;
1125 if (mergedResults.size() == 0) { numNoParents++; }
1127 for (int i = 0; i < mergedResults.size(); i++) {
1128 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1129 if (db[mergedResults[i]]->getName() != q.getName()) {
1130 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1131 refResults.push_back(temp);
1134 //cout << endl << endl;
1141 catch(exception& e) {
1142 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1146 //***************************************************************************************************************
1147 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1149 vector<Sequence> refResults;
1151 //get parts of query
1152 string queryUnAligned = q.getUnaligned();
1153 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1154 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1156 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1157 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1159 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1160 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1164 map<int, int>::iterator it;
1165 vector<int> mergedResults;
1168 // for (int i = 0; i < smaller.size(); i++) {
1169 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1171 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1173 //add left if you havent already
1174 it = seen.find(tempIndexesLeft[index]);
1175 if (it == seen.end()) {
1176 mergedResults.push_back(tempIndexesLeft[index]);
1177 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1180 //add right if you havent already
1181 it = seen.find(tempIndexesRight[index]);
1182 if (it == seen.end()) {
1183 mergedResults.push_back(tempIndexesRight[index]);
1184 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1190 for (int i = index; i < tempIndexesLeft.size(); i++) {
1191 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1193 //add right if you havent already
1194 it = seen.find(tempIndexesLeft[i]);
1195 if (it == seen.end()) {
1196 mergedResults.push_back(tempIndexesLeft[i]);
1197 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1201 for (int i = index; i < tempIndexesRight.size(); i++) {
1202 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1204 //add right if you havent already
1205 it = seen.find(tempIndexesRight[i]);
1206 if (it == seen.end()) {
1207 mergedResults.push_back(tempIndexesRight[i]);
1208 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1212 for (int i = 0; i < mergedResults.size(); i++) {
1213 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1214 if (db[mergedResults[i]]->getName() != q.getName()) {
1215 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1216 refResults.push_back(temp);
1227 catch(exception& e) {
1228 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1232 //***************************************************************************************************************