5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
44 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
48 //***************************************************************************************************************
49 //template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
51 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera() {
54 fastafile = file; templateSeqs = readSeqs(fastafile);
55 templateFileName = temp;
78 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
80 if (searchMethod == "distance") {
81 //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
83 //run filter on template copying templateSeqs into filteredTemplateSeqs
84 for (int i = 0; i < templateSeqs.size(); i++) {
85 if (m->control_pressed) { break; }
87 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
89 filteredTemplateSeqs.push_back(newSeq);
94 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
98 //***************************************************************************************************************
100 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
101 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
103 fastafile = file; templateSeqs = readSeqs(fastafile);
104 templateFileName = temp;
123 blastlocation = blas;
127 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
129 if (searchMethod == "distance") {
130 //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
132 //run filter on template copying templateSeqs into filteredTemplateSeqs
133 for (int i = 0; i < templateSeqs.size(); i++) {
134 if (m->control_pressed) { break; }
136 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
138 filteredTemplateSeqs.push_back(newSeq);
142 catch(exception& e) {
143 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
147 //***************************************************************************************************************
148 int ChimeraSlayer::doPrep() {
150 if (searchMethod == "distance") {
151 //read in all query seqs
152 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
154 vector<Sequence*> temp = templateSeqs;
155 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
157 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
159 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
161 if (m->control_pressed) { return 0; }
163 //run filter on template copying templateSeqs into filteredTemplateSeqs
164 for (int i = 0; i < templateSeqs.size(); i++) {
165 if (m->control_pressed) { return 0; }
167 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
169 filteredTemplateSeqs.push_back(newSeq);
172 string kmerDBNameLeft;
173 string kmerDBNameRight;
175 //generate the kmerdb to pass to maligner
176 if (searchMethod == "kmer") {
177 string templatePath = m->hasPath(templateFileName);
178 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
179 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
181 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
182 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
184 for (int i = 0; i < templateSeqs.size(); i++) {
186 if (m->control_pressed) { return 0; }
188 string leftFrag = templateSeqs[i]->getUnaligned();
189 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
191 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
192 databaseLeft->addSequence(leftTemp);
194 databaseLeft->generateDB();
195 databaseLeft->setNumSeqs(templateSeqs.size());
197 for (int i = 0; i < templateSeqs.size(); i++) {
198 if (m->control_pressed) { return 0; }
200 string rightFrag = templateSeqs[i]->getUnaligned();
201 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
203 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
204 databaseRight->addSequence(rightTemp);
206 databaseRight->generateDB();
207 databaseRight->setNumSeqs(templateSeqs.size());
211 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
212 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
213 bool needToGenerateLeft = true;
215 if(kmerFileTestLeft){
216 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
217 if (GoodFile) { needToGenerateLeft = false; }
220 if(needToGenerateLeft){
222 for (int i = 0; i < templateSeqs.size(); i++) {
224 if (m->control_pressed) { return 0; }
226 string leftFrag = templateSeqs[i]->getUnaligned();
227 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
229 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
230 databaseLeft->addSequence(leftTemp);
232 databaseLeft->generateDB();
235 databaseLeft->readKmerDB(kmerFileTestLeft);
237 kmerFileTestLeft.close();
239 databaseLeft->setNumSeqs(templateSeqs.size());
242 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
243 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
244 bool needToGenerateRight = true;
246 if(kmerFileTestRight){
247 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
248 if (GoodFile) { needToGenerateRight = false; }
251 if(needToGenerateRight){
253 for (int i = 0; i < templateSeqs.size(); i++) {
254 if (m->control_pressed) { return 0; }
256 string rightFrag = templateSeqs[i]->getUnaligned();
257 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
259 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
260 databaseRight->addSequence(rightTemp);
262 databaseRight->generateDB();
265 databaseRight->readKmerDB(kmerFileTestRight);
267 kmerFileTestRight.close();
269 databaseRight->setNumSeqs(templateSeqs.size());
271 }else if (searchMethod == "blast") {
274 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
276 if (m->control_pressed) { return 0; }
278 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
279 databaseLeft->generateDB();
280 databaseLeft->setNumSeqs(templateSeqs.size());
286 catch(exception& e) {
287 m->errorOut(e, "ChimeraSlayer", "doprep");
291 //***************************************************************************************************************
292 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
295 //when template=self, the query file is sorted from most abundance to least abundant
296 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
297 vector<Sequence*> userTemplate;
299 int myAbund = priority[q.getName()];
301 for (int i = 0; i < templateSeqs.size(); i++) {
303 if (m->control_pressed) { return userTemplate; }
305 //have I reached a sequence with the same abundance as myself?
306 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
308 //if its am not chimeric add it
309 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
310 userTemplate.push_back(templateSeqs[i]);
311 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
315 //avoids nuisance error from formatdb for making blank blast database
316 if (userTemplate.size() == 0) {
320 string kmerDBNameLeft;
321 string kmerDBNameRight;
323 //generate the kmerdb to pass to maligner
324 if (searchMethod == "kmer") {
325 string templatePath = m->hasPath(templateFileName);
326 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
327 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
329 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
330 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
332 for (int i = 0; i < userTemplate.size(); i++) {
334 if (m->control_pressed) { return userTemplate; }
336 string leftFrag = userTemplate[i]->getUnaligned();
337 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
339 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
340 databaseLeft->addSequence(leftTemp);
342 databaseLeft->generateDB();
343 databaseLeft->setNumSeqs(userTemplate.size());
345 for (int i = 0; i < userTemplate.size(); i++) {
346 if (m->control_pressed) { return userTemplate; }
348 string rightFrag = userTemplate[i]->getUnaligned();
349 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
351 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
352 databaseRight->addSequence(rightTemp);
354 databaseRight->generateDB();
355 databaseRight->setNumSeqs(userTemplate.size());
360 for (int i = 0; i < userTemplate.size(); i++) {
362 if (m->control_pressed) { return userTemplate; }
364 string leftFrag = userTemplate[i]->getUnaligned();
365 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
367 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
368 databaseLeft->addSequence(leftTemp);
370 databaseLeft->generateDB();
371 databaseLeft->setNumSeqs(userTemplate.size());
373 for (int i = 0; i < userTemplate.size(); i++) {
374 if (m->control_pressed) { return userTemplate; }
376 string rightFrag = userTemplate[i]->getUnaligned();
377 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
379 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
380 databaseRight->addSequence(rightTemp);
382 databaseRight->generateDB();
383 databaseRight->setNumSeqs(userTemplate.size());
385 }else if (searchMethod == "blast") {
388 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
390 if (m->control_pressed) { return userTemplate; }
392 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
393 databaseLeft->generateDB();
394 databaseLeft->setNumSeqs(userTemplate.size());
400 catch(exception& e) {
401 m->errorOut(e, "ChimeraSlayer", "getTemplate");
406 //***************************************************************************************************************
407 ChimeraSlayer::~ChimeraSlayer() {
408 if (templateFileName != "self") {
409 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
410 else if (searchMethod == "blast") { delete databaseLeft; }
413 //***************************************************************************************************************
414 void ChimeraSlayer::printHeader(ostream& out) {
415 m->mothurOutEndLine();
416 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
417 m->mothurOutEndLine();
419 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
421 //***************************************************************************************************************
422 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
425 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
427 if (chimeraFlags == "yes") {
428 string chimeraFlag = "no";
429 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
431 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
434 if (chimeraFlag == "yes") {
435 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
436 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
437 outAcc << querySeq.getName() << endl;
439 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
442 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
443 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
445 string newAligned = trim.getAligned();
447 if (lengthLeft > lengthRight) { //trim right
448 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
450 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
452 trim.setAligned(newAligned);
457 printBlock(chimeraResults[0], chimeraFlag, out);
460 out << querySeq.getName() << "\tno" << endl;
466 catch(exception& e) {
467 m->errorOut(e, "ChimeraSlayer", "print");
471 //***************************************************************************************************************
472 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
477 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
478 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
481 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
483 string chimeraFlag = "no";
484 if (leftPiece.flag == "yes") {
486 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
488 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
491 if (rightPiece.flag == "yes") {
492 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
494 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
497 bool rightChimeric = false;
498 bool leftChimeric = false;
500 if (chimeraFlag == "yes") {
501 //which peice is chimeric or are both
502 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
503 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
505 if (rightChimeric || leftChimeric) {
506 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
507 outAcc << querySeq.getName() << endl;
509 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
512 string newAligned = trim.getAligned();
514 //right side is fine so keep that
515 if ((leftChimeric) && (!rightChimeric)) {
516 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
517 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
518 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
519 }else { //both sides are chimeric, keep longest piece
521 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
522 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
524 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
525 int length = lengthLeftLeft;
526 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
528 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
529 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
531 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
532 if (lengthRightRight > length) { longest = 4; }
534 if (longest == 1) { //leftleft
535 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
536 }else if (longest == 2) { //leftright
537 //get rid of leftleft
538 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
540 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
541 }else if (longest == 3) { //rightleft
543 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
544 //get rid of rightright
545 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
548 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
549 //get rid of rightleft
550 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
554 trim.setAligned(newAligned);
560 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
563 out << querySeq.getName() << "\tno" << endl;
569 catch(exception& e) {
570 m->errorOut(e, "ChimeraSlayer", "print");
576 //***************************************************************************************************************
577 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece, bool& chimFlag) {
580 bool results = false;
581 string outAccString = "";
582 string outputString = "";
588 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
589 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
593 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
595 string chimeraFlag = "no";
596 if (leftPiece.flag == "yes") {
598 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
600 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
603 if (rightPiece.flag == "yes") {
604 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
606 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
609 bool rightChimeric = false;
610 bool leftChimeric = false;
614 if (chimeraFlag == "yes") {
615 //which peice is chimeric or are both
616 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
617 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
619 if (rightChimeric || leftChimeric) {
620 cout << querySeq.getName() << "\tyes" << endl;
621 outAccString += querySeq.getName() + "\n";
624 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
626 //write to accnos file
627 int length = outAccString.length();
628 char* buf2 = new char[length];
629 memcpy(buf2, outAccString.c_str(), length);
631 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
636 string newAligned = trim.getAligned();
638 //right side is fine so keep that
639 if ((leftChimeric) && (!rightChimeric)) {
640 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
641 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
642 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
643 }else { //both sides are chimeric, keep longest piece
645 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
646 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
648 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
649 int length = lengthLeftLeft;
650 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
652 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
653 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
655 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
656 if (lengthRightRight > length) { longest = 4; }
658 if (longest == 1) { //leftleft
659 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
660 }else if (longest == 2) { //leftright
661 //get rid of leftleft
662 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
664 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
665 }else if (longest == 3) { //rightleft
667 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
668 //get rid of rightright
669 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
672 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
673 //get rid of rightleft
674 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
678 trim.setAligned(newAligned);
684 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
685 outputString += "\n";
687 //write to output file
688 int length = outputString.length();
689 char* buf = new char[length];
690 memcpy(buf, outputString.c_str(), length);
692 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
696 outputString += querySeq.getName() + "\tno\n";
698 //write to output file
699 int length = outputString.length();
700 char* buf = new char[length];
701 memcpy(buf, outputString.c_str(), length);
703 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
710 catch(exception& e) {
711 m->errorOut(e, "ChimeraSlayer", "print");
715 //***************************************************************************************************************
716 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
719 bool results = false;
720 string outAccString = "";
721 string outputString = "";
724 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
726 if (chimeraFlags == "yes") {
727 string chimeraFlag = "no";
728 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
730 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
733 if (chimeraFlag == "yes") {
734 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
735 cout << querySeq.getName() << "\tyes" << endl;
736 outAccString += querySeq.getName() + "\n";
739 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
741 //write to accnos file
742 int length = outAccString.length();
743 char* buf2 = new char[length];
744 memcpy(buf2, outAccString.c_str(), length);
746 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
750 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
751 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
753 string newAligned = trim.getAligned();
754 if (lengthLeft > lengthRight) { //trim right
755 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
757 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
759 trim.setAligned(newAligned);
764 outputString = getBlock(chimeraResults[0], chimeraFlag);
765 outputString += "\n";
767 //write to output file
768 int length = outputString.length();
769 char* buf = new char[length];
770 memcpy(buf, outputString.c_str(), length);
772 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
776 outputString += querySeq.getName() + "\tno\n";
778 //write to output file
779 int length = outputString.length();
780 char* buf = new char[length];
781 memcpy(buf, outputString.c_str(), length);
783 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
789 catch(exception& e) {
790 m->errorOut(e, "ChimeraSlayer", "print");
796 //***************************************************************************************************************
797 int ChimeraSlayer::getChimeras(Sequence* query) {
800 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
801 printResults.trimQuery = trimQuery;
804 printResults.flag = "no";
808 //you must create a template
809 vector<Sequence*> thisTemplate;
810 vector<Sequence*> thisFilteredTemplate;
811 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
812 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
814 if (m->control_pressed) { return 0; }
815 if (thisTemplate.size() == 0) { return 0; } //not chimeric
817 //moved this out of maligner - 4/29/11
818 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
820 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
821 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
823 if (templateFileName == "self") {
824 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
825 else if (searchMethod == "blast") { delete databaseLeft; }
828 if (m->control_pressed) { return 0; }
830 string chimeraFlag = maligner.getResults(*query, decalc);
832 if (m->control_pressed) { return 0; }
834 vector<results> Results = maligner.getOutput();
836 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
838 if (chimeraFlag == "yes") {
841 vector<string> parents;
842 for (int i = 0; i < Results.size(); i++) {
843 parents.push_back(Results[i].parentAligned);
846 ChimeraReAligner realigner;
847 realigner.reAlign(query, parents);
851 // cout << query->getAligned() << endl;
852 //get sequence that were given from maligner results
853 vector<SeqCompare> seqs;
854 map<string, float> removeDups;
855 map<string, float>::iterator itDup;
856 map<string, string> parentNameSeq;
857 map<string, string>::iterator itSeq;
858 for (int j = 0; j < Results.size(); j++) {
860 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
861 //only add if you are not a duplicate
862 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
865 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
867 itDup = removeDups.find(Results[j].parent);
868 if (itDup == removeDups.end()) { //this is not duplicate
869 removeDups[Results[j].parent] = dist;
870 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
871 }else if (dist > itDup->second) { //is this a stronger number for this parent
872 removeDups[Results[j].parent] = dist;
873 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
880 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
881 itSeq = parentNameSeq.find(itDup->first);
882 Sequence seq(itDup->first, itSeq->second);
886 member.dist = itDup->second;
887 seqs.push_back(member);
890 //limit number of parents to explore - default 3
891 if (Results.size() > parents) {
893 sort(seqs.begin(), seqs.end(), compareSeqCompare);
894 //prioritize larger more similiar sequence fragments
895 reverse(seqs.begin(), seqs.end());
897 //for (int k = seqs.size()-1; k > (parents-1); k--) {
898 // delete seqs[k].seq;
903 //put seqs into vector to send to slayer
905 // cout << query->getAligned() << endl;
906 vector<Sequence> seqsForSlayer;
907 for (int k = 0; k < seqs.size(); k++) {
908 // cout << seqs[k].seq->getAligned() << endl;
909 seqsForSlayer.push_back(seqs[k].seq);
910 // cout << seqs[k].seq->getName() << endl;
913 if (m->control_pressed) { return 0; }
916 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
917 if (m->control_pressed) { return 0; }
918 chimeraResults = slayer.getOutput();
920 printResults.flag = chimeraFlags;
921 printResults.results = chimeraResults;
924 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
926 //cout << endl << endl;
929 catch(exception& e) {
930 m->errorOut(e, "ChimeraSlayer", "getChimeras");
934 //***************************************************************************************************************
935 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
937 out << querySeq.getName() << '\t';
938 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
940 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
941 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
943 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
946 catch(exception& e) {
947 m->errorOut(e, "ChimeraSlayer", "printBlock");
951 //***************************************************************************************************************
952 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
955 if ((leftChimeric) && (!rightChimeric)) { //print left
956 out << querySeq.getName() << '\t';
957 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
959 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
960 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
962 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
964 }else if ((!leftChimeric) && (rightChimeric)) { //print right
965 out << querySeq.getName() << '\t';
966 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
968 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
969 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
971 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
973 }else { //print both results
974 if (leftdata.flag == "yes") {
975 out << querySeq.getName() + "_LEFT" << '\t';
976 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
978 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
979 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
981 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
984 if (rightdata.flag == "yes") {
985 if (leftdata.flag == "yes") { out << endl; }
987 out << querySeq.getName() + "_RIGHT"<< '\t';
988 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
990 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
991 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
993 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
998 catch(exception& e) {
999 m->errorOut(e, "ChimeraSlayer", "printBlock");
1003 //***************************************************************************************************************
1004 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
1009 if ((leftChimeric) && (!rightChimeric)) { //get left
1010 out += querySeq.getName() + "\t";
1011 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1013 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1014 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1016 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1018 }else if ((!leftChimeric) && (rightChimeric)) { //print right
1019 out += querySeq.getName() + "\t";
1020 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1022 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1023 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1025 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
1027 }else { //print both results
1029 if (leftdata.flag == "yes") {
1030 out += querySeq.getName() + "_LEFT\t";
1031 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1033 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1034 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1036 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1039 if (rightdata.flag == "yes") {
1040 if (leftdata.flag == "yes") { out += "\n"; }
1041 out += querySeq.getName() + "_RIGHT\t";
1042 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1044 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1045 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1047 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
1054 catch(exception& e) {
1055 m->errorOut(e, "ChimeraSlayer", "getBlock");
1059 //***************************************************************************************************************
1060 string ChimeraSlayer::getBlock(data_struct data, string flag){
1063 string outputString = "";
1065 outputString += querySeq.getName() + "\t";
1066 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1068 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1069 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1071 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1073 return outputString;
1075 catch(exception& e) {
1076 m->errorOut(e, "ChimeraSlayer", "getBlock");
1080 //***************************************************************************************************************
1081 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1084 vector<Sequence> refSeqs;
1086 if (searchMethod == "distance") {
1087 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1088 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1090 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1092 }else if (searchMethod == "blast") {
1093 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1094 }else if (searchMethod == "kmer") {
1095 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1096 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1100 catch(exception& e) {
1101 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1105 //***************************************************************************************************************/
1106 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1109 vector<Sequence> refResults;
1111 //get parts of query
1112 string queryUnAligned = q.getUnaligned();
1113 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1114 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1115 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1116 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1117 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1119 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1120 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1123 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1124 // vector<int> smaller;
1125 // vector<int> larger;
1127 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1128 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1132 map<int, int>::iterator it;
1133 vector<int> mergedResults;
1136 // for (int i = 0; i < smaller.size(); i++) {
1137 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1139 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1141 //add left if you havent already
1142 it = seen.find(tempIndexesLeft[index]);
1143 if (it == seen.end()) {
1144 mergedResults.push_back(tempIndexesLeft[index]);
1145 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1148 //add right if you havent already
1149 it = seen.find(tempIndexesRight[index]);
1150 if (it == seen.end()) {
1151 mergedResults.push_back(tempIndexesRight[index]);
1152 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1158 for (int i = index; i < tempIndexesLeft.size(); i++) {
1159 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1161 //add right if you havent already
1162 it = seen.find(tempIndexesLeft[i]);
1163 if (it == seen.end()) {
1164 mergedResults.push_back(tempIndexesLeft[i]);
1165 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1169 for (int i = index; i < tempIndexesRight.size(); i++) {
1170 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1172 //add right if you havent already
1173 it = seen.find(tempIndexesRight[i]);
1174 if (it == seen.end()) {
1175 mergedResults.push_back(tempIndexesRight[i]);
1176 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1179 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1180 //cout << qname << endl;
1182 if (mergedResults.size() == 0) { numNoParents++; }
1184 for (int i = 0; i < mergedResults.size(); i++) {
1185 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1186 if (db[mergedResults[i]]->getName() != q.getName()) {
1187 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1188 refResults.push_back(temp);
1191 //cout << endl << endl;
1198 catch(exception& e) {
1199 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1203 //***************************************************************************************************************
1204 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1206 vector<Sequence> refResults;
1208 //get parts of query
1209 string queryUnAligned = q.getUnaligned();
1210 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1211 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1213 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1214 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1216 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1217 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1221 map<int, int>::iterator it;
1222 vector<int> mergedResults;
1225 // for (int i = 0; i < smaller.size(); i++) {
1226 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1228 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1230 //add left if you havent already
1231 it = seen.find(tempIndexesLeft[index]);
1232 if (it == seen.end()) {
1233 mergedResults.push_back(tempIndexesLeft[index]);
1234 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1237 //add right if you havent already
1238 it = seen.find(tempIndexesRight[index]);
1239 if (it == seen.end()) {
1240 mergedResults.push_back(tempIndexesRight[index]);
1241 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1247 for (int i = index; i < tempIndexesLeft.size(); i++) {
1248 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1250 //add right if you havent already
1251 it = seen.find(tempIndexesLeft[i]);
1252 if (it == seen.end()) {
1253 mergedResults.push_back(tempIndexesLeft[i]);
1254 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1258 for (int i = index; i < tempIndexesRight.size(); i++) {
1259 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1261 //add right if you havent already
1262 it = seen.find(tempIndexesRight[i]);
1263 if (it == seen.end()) {
1264 mergedResults.push_back(tempIndexesRight[i]);
1265 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1269 for (int i = 0; i < mergedResults.size(); i++) {
1270 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1271 if (db[mergedResults[i]]->getName() != q.getName()) {
1272 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1273 refResults.push_back(temp);
1284 catch(exception& e) {
1285 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1289 //***************************************************************************************************************