5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
44 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
48 //***************************************************************************************************************
49 //template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
51 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() {
54 fastafile = file; templateSeqs = readSeqs(fastafile);
55 templateFileName = temp;
78 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
80 if (searchMethod == "distance") {
81 //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
83 //run filter on template copying templateSeqs into filteredTemplateSeqs
84 for (int i = 0; i < templateSeqs.size(); i++) {
85 if (m->control_pressed) { break; }
87 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
89 filteredTemplateSeqs.push_back(newSeq);
94 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
98 //***************************************************************************************************************
100 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
101 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
103 fastafile = file; templateSeqs = readSeqs(fastafile);
104 templateFileName = temp;
123 blastlocation = blas;
127 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
129 if (searchMethod == "distance") {
130 //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
132 //run filter on template copying templateSeqs into filteredTemplateSeqs
133 for (int i = 0; i < templateSeqs.size(); i++) {
134 if (m->control_pressed) { break; }
136 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
138 filteredTemplateSeqs.push_back(newSeq);
142 catch(exception& e) {
143 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
147 //***************************************************************************************************************
148 int ChimeraSlayer::doPrep() {
150 if (searchMethod == "distance") {
151 //read in all query seqs
152 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
154 vector<Sequence*> temp = templateSeqs;
155 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
157 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
159 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
161 if (m->control_pressed) { return 0; }
163 //run filter on template copying templateSeqs into filteredTemplateSeqs
164 for (int i = 0; i < templateSeqs.size(); i++) {
165 if (m->control_pressed) { return 0; }
167 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
169 filteredTemplateSeqs.push_back(newSeq);
172 string kmerDBNameLeft;
173 string kmerDBNameRight;
175 //generate the kmerdb to pass to maligner
176 if (searchMethod == "kmer") {
177 string templatePath = m->hasPath(templateFileName);
178 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
179 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
181 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
182 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
184 for (int i = 0; i < templateSeqs.size(); i++) {
186 if (m->control_pressed) { return 0; }
188 string leftFrag = templateSeqs[i]->getUnaligned();
189 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
191 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
192 databaseLeft->addSequence(leftTemp);
194 databaseLeft->generateDB();
195 databaseLeft->setNumSeqs(templateSeqs.size());
197 for (int i = 0; i < templateSeqs.size(); i++) {
198 if (m->control_pressed) { return 0; }
200 string rightFrag = templateSeqs[i]->getUnaligned();
201 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
203 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
204 databaseRight->addSequence(rightTemp);
206 databaseRight->generateDB();
207 databaseRight->setNumSeqs(templateSeqs.size());
211 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
212 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
213 bool needToGenerateLeft = true;
215 if(kmerFileTestLeft){
216 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
217 if (GoodFile) { needToGenerateLeft = false; }
220 if(needToGenerateLeft){
222 for (int i = 0; i < templateSeqs.size(); i++) {
224 if (m->control_pressed) { return 0; }
226 string leftFrag = templateSeqs[i]->getUnaligned();
227 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
229 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
230 databaseLeft->addSequence(leftTemp);
232 databaseLeft->generateDB();
235 databaseLeft->readKmerDB(kmerFileTestLeft);
237 kmerFileTestLeft.close();
239 databaseLeft->setNumSeqs(templateSeqs.size());
242 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
243 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
244 bool needToGenerateRight = true;
246 if(kmerFileTestRight){
247 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
248 if (GoodFile) { needToGenerateRight = false; }
251 if(needToGenerateRight){
253 for (int i = 0; i < templateSeqs.size(); i++) {
254 if (m->control_pressed) { return 0; }
256 string rightFrag = templateSeqs[i]->getUnaligned();
257 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
259 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
260 databaseRight->addSequence(rightTemp);
262 databaseRight->generateDB();
265 databaseRight->readKmerDB(kmerFileTestRight);
267 kmerFileTestRight.close();
269 databaseRight->setNumSeqs(templateSeqs.size());
271 }else if (searchMethod == "blast") {
274 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
276 if (m->control_pressed) { return 0; }
278 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
279 databaseLeft->generateDB();
280 databaseLeft->setNumSeqs(templateSeqs.size());
286 catch(exception& e) {
287 m->errorOut(e, "ChimeraSlayer", "doprep");
291 //***************************************************************************************************************
292 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
295 //when template=self, the query file is sorted from most abundance to least abundant
296 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
297 vector<Sequence*> userTemplate;
299 int myAbund = priority[q.getName()];
301 for (int i = 0; i < templateSeqs.size(); i++) {
303 if (m->control_pressed) { return userTemplate; }
305 //have I reached a sequence with the same abundance as myself?
306 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
308 //if its am not chimeric add it
309 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
310 userTemplate.push_back(templateSeqs[i]);
311 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
315 //avoids nuisance error from formatdb for making blank blast database
316 if (userTemplate.size() == 0) {
320 string kmerDBNameLeft;
321 string kmerDBNameRight;
323 //generate the kmerdb to pass to maligner
324 if (searchMethod == "kmer") {
325 string templatePath = m->hasPath(templateFileName);
326 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
327 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
329 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
330 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
332 for (int i = 0; i < userTemplate.size(); i++) {
334 if (m->control_pressed) { return userTemplate; }
336 string leftFrag = userTemplate[i]->getUnaligned();
337 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
339 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
340 databaseLeft->addSequence(leftTemp);
342 databaseLeft->generateDB();
343 databaseLeft->setNumSeqs(userTemplate.size());
345 for (int i = 0; i < userTemplate.size(); i++) {
346 if (m->control_pressed) { return userTemplate; }
348 string rightFrag = userTemplate[i]->getUnaligned();
349 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
351 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
352 databaseRight->addSequence(rightTemp);
354 databaseRight->generateDB();
355 databaseRight->setNumSeqs(userTemplate.size());
360 for (int i = 0; i < userTemplate.size(); i++) {
362 if (m->control_pressed) { return userTemplate; }
364 string leftFrag = userTemplate[i]->getUnaligned();
365 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
367 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
368 databaseLeft->addSequence(leftTemp);
370 databaseLeft->generateDB();
371 databaseLeft->setNumSeqs(userTemplate.size());
373 for (int i = 0; i < userTemplate.size(); i++) {
374 if (m->control_pressed) { return userTemplate; }
376 string rightFrag = userTemplate[i]->getUnaligned();
377 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
379 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
380 databaseRight->addSequence(rightTemp);
382 databaseRight->generateDB();
383 databaseRight->setNumSeqs(userTemplate.size());
385 }else if (searchMethod == "blast") {
388 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
390 if (m->control_pressed) { return userTemplate; }
392 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
393 databaseLeft->generateDB();
394 databaseLeft->setNumSeqs(userTemplate.size());
400 catch(exception& e) {
401 m->errorOut(e, "ChimeraSlayer", "getTemplate");
406 //***************************************************************************************************************
407 ChimeraSlayer::~ChimeraSlayer() {
408 if (templateFileName != "self") {
409 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
410 else if (searchMethod == "blast") { delete databaseLeft; }
413 //***************************************************************************************************************
414 void ChimeraSlayer::printHeader(ostream& out) {
415 m->mothurOutEndLine();
416 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
417 m->mothurOutEndLine();
419 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
421 //***************************************************************************************************************
422 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
425 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
427 if (chimeraFlags == "yes") {
428 string chimeraFlag = "no";
429 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
431 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
434 if (chimeraFlag == "yes") {
435 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
436 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
437 outAcc << querySeq.getName() << endl;
439 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
442 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
443 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
445 string newAligned = trim.getAligned();
447 if (lengthLeft > lengthRight) { //trim right
448 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
450 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
452 trim.setAligned(newAligned);
457 printBlock(chimeraResults[0], chimeraFlag, out);
460 out << querySeq.getName() << "\tno" << endl;
466 catch(exception& e) {
467 m->errorOut(e, "ChimeraSlayer", "print");
471 //***************************************************************************************************************
472 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
477 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
478 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
481 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
483 string chimeraFlag = "no";
484 if (leftPiece.flag == "yes") {
486 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
488 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
491 if (rightPiece.flag == "yes") {
492 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
494 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
497 bool rightChimeric = false;
498 bool leftChimeric = false;
500 if (chimeraFlag == "yes") {
501 //which peice is chimeric or are both
502 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
503 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
505 if (rightChimeric || leftChimeric) {
506 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
507 outAcc << querySeq.getName() << endl;
509 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
512 string newAligned = trim.getAligned();
514 //right side is fine so keep that
515 if ((leftChimeric) && (!rightChimeric)) {
516 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
517 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
518 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
519 }else { //both sides are chimeric, keep longest piece
521 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
522 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
524 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
525 int length = lengthLeftLeft;
526 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
528 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
529 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
531 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
532 if (lengthRightRight > length) { longest = 4; }
534 if (longest == 1) { //leftleft
535 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
536 }else if (longest == 2) { //leftright
537 //get rid of leftleft
538 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
540 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
541 }else if (longest == 3) { //rightleft
543 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
544 //get rid of rightright
545 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
548 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
549 //get rid of rightleft
550 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
554 trim.setAligned(newAligned);
560 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
563 out << querySeq.getName() << "\tno" << endl;
569 catch(exception& e) {
570 m->errorOut(e, "ChimeraSlayer", "print");
576 //***************************************************************************************************************
577 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
580 bool results = false;
581 string outAccString = "";
582 string outputString = "";
587 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
588 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
592 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
594 string chimeraFlag = "no";
595 if (leftPiece.flag == "yes") {
597 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
599 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
602 if (rightPiece.flag == "yes") {
603 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
605 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
608 bool rightChimeric = false;
609 bool leftChimeric = false;
613 if (chimeraFlag == "yes") {
614 //which peice is chimeric or are both
615 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
616 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
618 if (rightChimeric || leftChimeric) {
619 cout << querySeq.getName() << "\tyes" << endl;
620 outAccString += querySeq.getName() + "\n";
623 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
625 //write to accnos file
626 int length = outAccString.length();
627 char* buf2 = new char[length];
628 memcpy(buf2, outAccString.c_str(), length);
630 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
634 string newAligned = trim.getAligned();
636 //right side is fine so keep that
637 if ((leftChimeric) && (!rightChimeric)) {
638 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
639 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
640 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
641 }else { //both sides are chimeric, keep longest piece
643 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
644 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
646 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
647 int length = lengthLeftLeft;
648 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
650 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
651 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
653 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
654 if (lengthRightRight > length) { longest = 4; }
656 if (longest == 1) { //leftleft
657 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
658 }else if (longest == 2) { //leftright
659 //get rid of leftleft
660 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
662 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
663 }else if (longest == 3) { //rightleft
665 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
666 //get rid of rightright
667 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
670 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
671 //get rid of rightleft
672 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
676 trim.setAligned(newAligned);
682 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
683 outputString += "\n";
685 //write to output file
686 int length = outputString.length();
687 char* buf = new char[length];
688 memcpy(buf, outputString.c_str(), length);
690 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
694 outputString += querySeq.getName() + "\tno\n";
696 //write to output file
697 int length = outputString.length();
698 char* buf = new char[length];
699 memcpy(buf, outputString.c_str(), length);
701 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
708 catch(exception& e) {
709 m->errorOut(e, "ChimeraSlayer", "print");
713 //***************************************************************************************************************
714 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
717 bool results = false;
718 string outAccString = "";
719 string outputString = "";
722 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
724 if (chimeraFlags == "yes") {
725 string chimeraFlag = "no";
726 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
728 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
731 if (chimeraFlag == "yes") {
732 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
733 cout << querySeq.getName() << "\tyes" << endl;
734 outAccString += querySeq.getName() + "\n";
737 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
739 //write to accnos file
740 int length = outAccString.length();
741 char* buf2 = new char[length];
742 memcpy(buf2, outAccString.c_str(), length);
744 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
748 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
749 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
751 string newAligned = trim.getAligned();
752 if (lengthLeft > lengthRight) { //trim right
753 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
755 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
757 trim.setAligned(newAligned);
762 outputString = getBlock(chimeraResults[0], chimeraFlag);
763 outputString += "\n";
765 //write to output file
766 int length = outputString.length();
767 char* buf = new char[length];
768 memcpy(buf, outputString.c_str(), length);
770 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
774 outputString += querySeq.getName() + "\tno\n";
776 //write to output file
777 int length = outputString.length();
778 char* buf = new char[length];
779 memcpy(buf, outputString.c_str(), length);
781 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
787 catch(exception& e) {
788 m->errorOut(e, "ChimeraSlayer", "print");
794 //***************************************************************************************************************
795 int ChimeraSlayer::getChimeras(Sequence* query) {
798 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
799 printResults.trimQuery = trimQuery;
802 printResults.flag = "no";
806 //you must create a template
807 vector<Sequence*> thisTemplate;
808 vector<Sequence*> thisFilteredTemplate;
809 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
810 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
812 if (m->control_pressed) { return 0; }
813 if (thisTemplate.size() == 0) { return 0; } //not chimeric
815 //moved this out of maligner - 4/29/11
816 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
818 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
819 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
821 if (templateFileName == "self") {
822 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
823 else if (searchMethod == "blast") { delete databaseLeft; }
826 if (m->control_pressed) { return 0; }
828 string chimeraFlag = maligner.getResults(*query, decalc);
830 if (m->control_pressed) { return 0; }
832 vector<results> Results = maligner.getOutput();
834 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
836 if (chimeraFlag == "yes") {
839 vector<string> parents;
840 for (int i = 0; i < Results.size(); i++) {
841 parents.push_back(Results[i].parentAligned);
844 ChimeraReAligner realigner;
845 realigner.reAlign(query, parents);
849 // cout << query->getAligned() << endl;
850 //get sequence that were given from maligner results
851 vector<SeqCompare> seqs;
852 map<string, float> removeDups;
853 map<string, float>::iterator itDup;
854 map<string, string> parentNameSeq;
855 map<string, string>::iterator itSeq;
856 for (int j = 0; j < Results.size(); j++) {
858 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
859 //only add if you are not a duplicate
860 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
863 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
865 itDup = removeDups.find(Results[j].parent);
866 if (itDup == removeDups.end()) { //this is not duplicate
867 removeDups[Results[j].parent] = dist;
868 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
869 }else if (dist > itDup->second) { //is this a stronger number for this parent
870 removeDups[Results[j].parent] = dist;
871 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
878 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
879 itSeq = parentNameSeq.find(itDup->first);
880 Sequence seq(itDup->first, itSeq->second);
884 member.dist = itDup->second;
885 seqs.push_back(member);
888 //limit number of parents to explore - default 3
889 if (Results.size() > parents) {
891 sort(seqs.begin(), seqs.end(), compareSeqCompare);
892 //prioritize larger more similiar sequence fragments
893 reverse(seqs.begin(), seqs.end());
895 //for (int k = seqs.size()-1; k > (parents-1); k--) {
896 // delete seqs[k].seq;
901 //put seqs into vector to send to slayer
903 // cout << query->getAligned() << endl;
904 vector<Sequence> seqsForSlayer;
905 for (int k = 0; k < seqs.size(); k++) {
906 // cout << seqs[k].seq->getAligned() << endl;
907 seqsForSlayer.push_back(seqs[k].seq);
908 // cout << seqs[k].seq->getName() << endl;
911 if (m->control_pressed) { return 0; }
914 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
915 if (m->control_pressed) { return 0; }
916 chimeraResults = slayer.getOutput();
918 printResults.flag = chimeraFlags;
919 printResults.results = chimeraResults;
922 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
924 //cout << endl << endl;
927 catch(exception& e) {
928 m->errorOut(e, "ChimeraSlayer", "getChimeras");
932 //***************************************************************************************************************
933 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
935 out << querySeq.getName() << '\t';
936 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
938 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
939 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
941 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
944 catch(exception& e) {
945 m->errorOut(e, "ChimeraSlayer", "printBlock");
949 //***************************************************************************************************************
950 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
953 if ((leftChimeric) && (!rightChimeric)) { //print left
954 out << querySeq.getName() << '\t';
955 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
957 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
958 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
960 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
962 }else if ((!leftChimeric) && (rightChimeric)) { //print right
963 out << querySeq.getName() << '\t';
964 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
966 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
967 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
969 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
971 }else { //print both results
972 if (leftdata.flag == "yes") {
973 out << querySeq.getName() + "_LEFT" << '\t';
974 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
976 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
977 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
979 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
982 if (rightdata.flag == "yes") {
983 if (leftdata.flag == "yes") { out << endl; }
985 out << querySeq.getName() + "_RIGHT"<< '\t';
986 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
988 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
989 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
991 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
996 catch(exception& e) {
997 m->errorOut(e, "ChimeraSlayer", "printBlock");
1001 //***************************************************************************************************************
1002 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
1007 if ((leftChimeric) && (!rightChimeric)) { //get left
1008 out += querySeq.getName() + "\t";
1009 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1011 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1012 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1014 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1016 }else if ((!leftChimeric) && (rightChimeric)) { //print right
1017 out += querySeq.getName() + "\t";
1018 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1020 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1021 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1023 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
1025 }else { //print both results
1027 if (leftdata.flag == "yes") {
1028 out += querySeq.getName() + "_LEFT\t";
1029 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1031 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1032 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1034 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1037 if (rightdata.flag == "yes") {
1038 if (leftdata.flag == "yes") { out += "\n"; }
1039 out += querySeq.getName() + "_RIGHT\t";
1040 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1042 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1043 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1045 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
1052 catch(exception& e) {
1053 m->errorOut(e, "ChimeraSlayer", "getBlock");
1057 //***************************************************************************************************************
1058 string ChimeraSlayer::getBlock(data_struct data, string flag){
1061 string outputString = "";
1063 outputString += querySeq.getName() + "\t";
1064 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1066 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1067 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1069 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1071 return outputString;
1073 catch(exception& e) {
1074 m->errorOut(e, "ChimeraSlayer", "getBlock");
1078 //***************************************************************************************************************
1079 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1082 vector<Sequence> refSeqs;
1084 if (searchMethod == "distance") {
1085 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1086 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1088 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1090 }else if (searchMethod == "blast") {
1091 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1092 }else if (searchMethod == "kmer") {
1093 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1094 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1098 catch(exception& e) {
1099 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1103 //***************************************************************************************************************/
1104 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1107 vector<Sequence> refResults;
1109 //get parts of query
1110 string queryUnAligned = q.getUnaligned();
1111 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1112 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1113 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1114 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1115 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1117 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1118 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1121 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1122 // vector<int> smaller;
1123 // vector<int> larger;
1125 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1126 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1130 map<int, int>::iterator it;
1131 vector<int> mergedResults;
1134 // for (int i = 0; i < smaller.size(); i++) {
1135 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1137 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1139 //add left if you havent already
1140 it = seen.find(tempIndexesLeft[index]);
1141 if (it == seen.end()) {
1142 mergedResults.push_back(tempIndexesLeft[index]);
1143 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1146 //add right if you havent already
1147 it = seen.find(tempIndexesRight[index]);
1148 if (it == seen.end()) {
1149 mergedResults.push_back(tempIndexesRight[index]);
1150 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1156 for (int i = index; i < tempIndexesLeft.size(); i++) {
1157 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1159 //add right if you havent already
1160 it = seen.find(tempIndexesLeft[i]);
1161 if (it == seen.end()) {
1162 mergedResults.push_back(tempIndexesLeft[i]);
1163 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1167 for (int i = index; i < tempIndexesRight.size(); i++) {
1168 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1170 //add right if you havent already
1171 it = seen.find(tempIndexesRight[i]);
1172 if (it == seen.end()) {
1173 mergedResults.push_back(tempIndexesRight[i]);
1174 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1177 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1178 //cout << qname << endl;
1180 if (mergedResults.size() == 0) { numNoParents++; }
1182 for (int i = 0; i < mergedResults.size(); i++) {
1183 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1184 if (db[mergedResults[i]]->getName() != q.getName()) {
1185 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1186 refResults.push_back(temp);
1189 //cout << endl << endl;
1196 catch(exception& e) {
1197 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1201 //***************************************************************************************************************
1202 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1204 vector<Sequence> refResults;
1206 //get parts of query
1207 string queryUnAligned = q.getUnaligned();
1208 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1209 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1211 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1212 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1214 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1215 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1219 map<int, int>::iterator it;
1220 vector<int> mergedResults;
1223 // for (int i = 0; i < smaller.size(); i++) {
1224 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1226 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1228 //add left if you havent already
1229 it = seen.find(tempIndexesLeft[index]);
1230 if (it == seen.end()) {
1231 mergedResults.push_back(tempIndexesLeft[index]);
1232 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1235 //add right if you havent already
1236 it = seen.find(tempIndexesRight[index]);
1237 if (it == seen.end()) {
1238 mergedResults.push_back(tempIndexesRight[index]);
1239 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1245 for (int i = index; i < tempIndexesLeft.size(); i++) {
1246 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1248 //add right if you havent already
1249 it = seen.find(tempIndexesLeft[i]);
1250 if (it == seen.end()) {
1251 mergedResults.push_back(tempIndexesLeft[i]);
1252 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1256 for (int i = index; i < tempIndexesRight.size(); i++) {
1257 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1259 //add right if you havent already
1260 it = seen.find(tempIndexesRight[i]);
1261 if (it == seen.end()) {
1262 mergedResults.push_back(tempIndexesRight[i]);
1263 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1267 for (int i = 0; i < mergedResults.size(); i++) {
1268 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1269 if (db[mergedResults[i]]->getName() != q.getName()) {
1270 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1271 refResults.push_back(temp);
1282 catch(exception& e) {
1283 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1287 //***************************************************************************************************************