5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 if (searchMethod == "distance") {
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 //run filter on template copying templateSeqs into filteredTemplateSeqs
79 for (int i = 0; i < templateSeqs.size(); i++) {
80 if (m->control_pressed) { break; }
82 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
84 filteredTemplateSeqs.push_back(newSeq);
89 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
93 //***************************************************************************************************************
94 int ChimeraSlayer::doPrep() {
96 if (searchMethod == "distance") {
97 //read in all query seqs
98 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
100 vector<Sequence*> temp = templateSeqs;
101 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
103 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
105 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
107 if (m->control_pressed) { return 0; }
109 //run filter on template copying templateSeqs into filteredTemplateSeqs
110 for (int i = 0; i < templateSeqs.size(); i++) {
111 if (m->control_pressed) { return 0; }
113 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
115 filteredTemplateSeqs.push_back(newSeq);
118 string kmerDBNameLeft;
119 string kmerDBNameRight;
121 //generate the kmerdb to pass to maligner
122 if (searchMethod == "kmer") {
123 string templatePath = m->hasPath(templateFileName);
124 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
125 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
127 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
128 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
130 for (int i = 0; i < templateSeqs.size(); i++) {
132 if (m->control_pressed) { return 0; }
134 string leftFrag = templateSeqs[i]->getUnaligned();
135 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
137 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
138 databaseLeft->addSequence(leftTemp);
140 databaseLeft->generateDB();
141 databaseLeft->setNumSeqs(templateSeqs.size());
143 for (int i = 0; i < templateSeqs.size(); i++) {
144 if (m->control_pressed) { return 0; }
146 string rightFrag = templateSeqs[i]->getUnaligned();
147 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
149 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
150 databaseRight->addSequence(rightTemp);
152 databaseRight->generateDB();
153 databaseRight->setNumSeqs(templateSeqs.size());
157 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
158 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
159 bool needToGenerateLeft = true;
161 if(kmerFileTestLeft){
162 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
163 if (GoodFile) { needToGenerateLeft = false; }
166 if(needToGenerateLeft){
168 for (int i = 0; i < templateSeqs.size(); i++) {
170 if (m->control_pressed) { return 0; }
172 string leftFrag = templateSeqs[i]->getUnaligned();
173 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
175 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
176 databaseLeft->addSequence(leftTemp);
178 databaseLeft->generateDB();
181 databaseLeft->readKmerDB(kmerFileTestLeft);
183 kmerFileTestLeft.close();
185 databaseLeft->setNumSeqs(templateSeqs.size());
188 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
189 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
190 bool needToGenerateRight = true;
192 if(kmerFileTestRight){
193 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
194 if (GoodFile) { needToGenerateRight = false; }
197 if(needToGenerateRight){
199 for (int i = 0; i < templateSeqs.size(); i++) {
200 if (m->control_pressed) { return 0; }
202 string rightFrag = templateSeqs[i]->getUnaligned();
203 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
205 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
206 databaseRight->addSequence(rightTemp);
208 databaseRight->generateDB();
211 databaseRight->readKmerDB(kmerFileTestRight);
213 kmerFileTestRight.close();
215 databaseRight->setNumSeqs(templateSeqs.size());
217 }else if (searchMethod == "blast") {
220 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
222 if (m->control_pressed) { return 0; }
224 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
225 databaseLeft->generateDB();
226 databaseLeft->setNumSeqs(templateSeqs.size());
232 catch(exception& e) {
233 m->errorOut(e, "ChimeraSlayer", "doprep");
237 //***************************************************************************************************************
238 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
241 //when template=self, the query file is sorted from most abundance to least abundant
242 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
243 vector<Sequence*> userTemplate;
245 int myAbund = priority[q->getName()];
247 for (int i = 0; i < templateSeqs.size(); i++) {
249 if (m->control_pressed) { return userTemplate; }
251 //have I reached a sequence with the same abundance as myself?
252 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
254 //if its am not chimeric add it
255 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
256 userTemplate.push_back(templateSeqs[i]);
257 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
261 string kmerDBNameLeft;
262 string kmerDBNameRight;
264 //generate the kmerdb to pass to maligner
265 if (searchMethod == "kmer") {
266 string templatePath = m->hasPath(templateFileName);
267 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
268 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
270 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
271 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
273 for (int i = 0; i < userTemplate.size(); i++) {
275 if (m->control_pressed) { return userTemplate; }
277 string leftFrag = userTemplate[i]->getUnaligned();
278 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
280 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
281 databaseLeft->addSequence(leftTemp);
283 databaseLeft->generateDB();
284 databaseLeft->setNumSeqs(userTemplate.size());
286 for (int i = 0; i < userTemplate.size(); i++) {
287 if (m->control_pressed) { return userTemplate; }
289 string rightFrag = userTemplate[i]->getUnaligned();
290 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
292 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
293 databaseRight->addSequence(rightTemp);
295 databaseRight->generateDB();
296 databaseRight->setNumSeqs(userTemplate.size());
301 for (int i = 0; i < userTemplate.size(); i++) {
303 if (m->control_pressed) { return userTemplate; }
305 string leftFrag = userTemplate[i]->getUnaligned();
306 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
308 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
309 databaseLeft->addSequence(leftTemp);
311 databaseLeft->generateDB();
312 databaseLeft->setNumSeqs(userTemplate.size());
314 for (int i = 0; i < userTemplate.size(); i++) {
315 if (m->control_pressed) { return userTemplate; }
317 string rightFrag = userTemplate[i]->getUnaligned();
318 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
320 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
321 databaseRight->addSequence(rightTemp);
323 databaseRight->generateDB();
324 databaseRight->setNumSeqs(userTemplate.size());
326 }else if (searchMethod == "blast") {
329 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
331 if (m->control_pressed) { return userTemplate; }
333 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
334 databaseLeft->generateDB();
335 databaseLeft->setNumSeqs(userTemplate.size());
341 catch(exception& e) {
342 m->errorOut(e, "ChimeraSlayer", "getTemplate");
347 //***************************************************************************************************************
348 ChimeraSlayer::~ChimeraSlayer() {
350 if (templateFileName != "self") {
351 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
352 else if (searchMethod == "blast") { delete databaseLeft; }
355 //***************************************************************************************************************
356 void ChimeraSlayer::printHeader(ostream& out) {
357 m->mothurOutEndLine();
358 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
359 m->mothurOutEndLine();
361 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
363 //***************************************************************************************************************
364 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
366 Sequence* trim = NULL;
367 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
369 if (chimeraFlags == "yes") {
370 string chimeraFlag = "no";
371 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
373 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
376 if (chimeraFlag == "yes") {
377 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
378 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
379 outAcc << querySeq->getName() << endl;
381 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
384 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
385 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
387 string newAligned = trim->getAligned();
389 if (lengthLeft > lengthRight) { //trim right
390 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
392 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
394 trim->setAligned(newAligned);
399 printBlock(chimeraResults[0], chimeraFlag, out);
402 out << querySeq->getName() << "\tno" << endl;
408 catch(exception& e) {
409 m->errorOut(e, "ChimeraSlayer", "print");
413 //***************************************************************************************************************
414 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
416 Sequence* trim = NULL;
419 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
420 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
423 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
425 string chimeraFlag = "no";
426 if (leftPiece.flag == "yes") {
428 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
430 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
433 if (rightPiece.flag == "yes") {
434 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
436 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
439 bool rightChimeric = false;
440 bool leftChimeric = false;
442 if (chimeraFlag == "yes") {
443 //which peice is chimeric or are both
444 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
445 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
447 if (rightChimeric || leftChimeric) {
448 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
449 outAcc << querySeq->getName() << endl;
451 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
454 string newAligned = trim->getAligned();
456 //right side is fine so keep that
457 if ((leftChimeric) && (!rightChimeric)) {
458 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
459 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
460 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
461 }else { //both sides are chimeric, keep longest piece
463 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
464 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
466 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
467 int length = lengthLeftLeft;
468 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
470 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
471 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
473 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
474 if (lengthRightRight > length) { longest = 4; }
476 if (longest == 1) { //leftleft
477 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
478 }else if (longest == 2) { //leftright
479 //get rid of leftleft
480 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
482 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
483 }else if (longest == 3) { //rightleft
485 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
486 //get rid of rightright
487 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
490 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
491 //get rid of rightleft
492 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
496 trim->setAligned(newAligned);
502 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
505 out << querySeq->getName() << "\tno" << endl;
511 catch(exception& e) {
512 m->errorOut(e, "ChimeraSlayer", "print");
518 //***************************************************************************************************************
519 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
522 bool results = false;
523 string outAccString = "";
524 string outputString = "";
526 Sequence* trim = NULL;
529 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
530 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
534 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
536 string chimeraFlag = "no";
537 if (leftPiece.flag == "yes") {
539 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
541 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
544 if (rightPiece.flag == "yes") {
545 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
547 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
550 bool rightChimeric = false;
551 bool leftChimeric = false;
555 if (chimeraFlag == "yes") {
556 //which peice is chimeric or are both
557 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
558 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
560 if (rightChimeric || leftChimeric) {
561 cout << querySeq->getName() << "\tyes" << endl;
562 outAccString += querySeq->getName() + "\n";
565 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
567 //write to accnos file
568 int length = outAccString.length();
569 char* buf2 = new char[length];
570 memcpy(buf2, outAccString.c_str(), length);
572 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
576 string newAligned = trim->getAligned();
578 //right side is fine so keep that
579 if ((leftChimeric) && (!rightChimeric)) {
580 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
581 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
582 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
583 }else { //both sides are chimeric, keep longest piece
585 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
586 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
588 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
589 int length = lengthLeftLeft;
590 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
592 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
593 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
595 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
596 if (lengthRightRight > length) { longest = 4; }
598 if (longest == 1) { //leftleft
599 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
600 }else if (longest == 2) { //leftright
601 //get rid of leftleft
602 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
604 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
605 }else if (longest == 3) { //rightleft
607 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
608 //get rid of rightright
609 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
612 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
613 //get rid of rightleft
614 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
618 trim->setAligned(newAligned);
624 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
625 outputString += "\n";
627 //write to output file
628 int length = outputString.length();
629 char* buf = new char[length];
630 memcpy(buf, outputString.c_str(), length);
632 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
636 outputString += querySeq->getName() + "\tno\n";
638 //write to output file
639 int length = outputString.length();
640 char* buf = new char[length];
641 memcpy(buf, outputString.c_str(), length);
643 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
650 catch(exception& e) {
651 m->errorOut(e, "ChimeraSlayer", "print");
655 //***************************************************************************************************************
656 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
659 bool results = false;
660 string outAccString = "";
661 string outputString = "";
663 Sequence* trim = NULL;
664 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
666 if (chimeraFlags == "yes") {
667 string chimeraFlag = "no";
668 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
670 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
673 if (chimeraFlag == "yes") {
674 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
675 cout << querySeq->getName() << "\tyes" << endl;
676 outAccString += querySeq->getName() + "\n";
679 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
681 //write to accnos file
682 int length = outAccString.length();
683 char* buf2 = new char[length];
684 memcpy(buf2, outAccString.c_str(), length);
686 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
690 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
691 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
693 string newAligned = trim->getAligned();
694 if (lengthLeft > lengthRight) { //trim right
695 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
697 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
699 trim->setAligned(newAligned);
704 outputString = getBlock(chimeraResults[0], chimeraFlag);
705 outputString += "\n";
707 //write to output file
708 int length = outputString.length();
709 char* buf = new char[length];
710 memcpy(buf, outputString.c_str(), length);
712 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
716 outputString += querySeq->getName() + "\tno\n";
718 //write to output file
719 int length = outputString.length();
720 char* buf = new char[length];
721 memcpy(buf, outputString.c_str(), length);
723 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
729 catch(exception& e) {
730 m->errorOut(e, "ChimeraSlayer", "print");
736 //***************************************************************************************************************
737 int ChimeraSlayer::getChimeras(Sequence* query) {
740 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
741 printResults.trimQuery = trimQuery;
744 printResults.flag = "no";
748 //you must create a template
749 vector<Sequence*> thisTemplate;
750 vector<Sequence*> thisFilteredTemplate;
751 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
752 else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
754 if (m->control_pressed) { return 0; }
756 if (thisTemplate.size() == 0) { return 0; } //not chimeric
758 //moved this out of maligner - 4/29/11
759 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
761 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
762 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
764 if (templateFileName == "self") {
765 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
766 else if (searchMethod == "blast") { delete databaseLeft; }
769 if (m->control_pressed) { return 0; }
771 string chimeraFlag = maligner.getResults(query, decalc);
773 if (m->control_pressed) { return 0; }
775 vector<results> Results = maligner.getOutput();
777 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
779 if (chimeraFlag == "yes") {
782 vector<string> parents;
783 for (int i = 0; i < Results.size(); i++) {
784 parents.push_back(Results[i].parentAligned);
787 ChimeraReAligner realigner;
788 realigner.reAlign(query, parents);
792 // cout << query->getAligned() << endl;
793 //get sequence that were given from maligner results
794 vector<SeqDist> seqs;
795 map<string, float> removeDups;
796 map<string, float>::iterator itDup;
797 map<string, string> parentNameSeq;
798 map<string, string>::iterator itSeq;
799 for (int j = 0; j < Results.size(); j++) {
801 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
802 //only add if you are not a duplicate
803 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
806 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
808 itDup = removeDups.find(Results[j].parent);
809 if (itDup == removeDups.end()) { //this is not duplicate
810 removeDups[Results[j].parent] = dist;
811 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
812 }else if (dist > itDup->second) { //is this a stronger number for this parent
813 removeDups[Results[j].parent] = dist;
814 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
821 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
822 itSeq = parentNameSeq.find(itDup->first);
823 Sequence* seq = new Sequence(itDup->first, itSeq->second);
827 member.dist = itDup->second;
828 seqs.push_back(member);
831 //limit number of parents to explore - default 3
832 if (Results.size() > parents) {
834 sort(seqs.begin(), seqs.end(), compareSeqDist);
835 //prioritize larger more similiar sequence fragments
836 reverse(seqs.begin(), seqs.end());
838 for (int k = seqs.size()-1; k > (parents-1); k--) {
844 //put seqs into vector to send to slayer
846 // cout << query->getAligned() << endl;
847 vector<Sequence*> seqsForSlayer;
848 for (int k = 0; k < seqs.size(); k++) {
849 // cout << seqs[k].seq->getAligned() << endl;
850 seqsForSlayer.push_back(seqs[k].seq);
851 // cout << seqs[k].seq->getName() << endl;
854 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
857 chimeraFlags = slayer.getResults(query, seqsForSlayer);
858 if (m->control_pressed) { return 0; }
859 chimeraResults = slayer.getOutput();
861 printResults.flag = chimeraFlags;
862 printResults.results = chimeraResults;
865 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
867 //cout << endl << endl;
870 catch(exception& e) {
871 m->errorOut(e, "ChimeraSlayer", "getChimeras");
875 //***************************************************************************************************************
876 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
878 out << querySeq->getName() << '\t';
879 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
881 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
882 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
884 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
887 catch(exception& e) {
888 m->errorOut(e, "ChimeraSlayer", "printBlock");
892 //***************************************************************************************************************
893 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
896 if ((leftChimeric) && (!rightChimeric)) { //print left
897 out << querySeq->getName() << '\t';
898 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
900 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
901 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
903 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
905 }else if ((!leftChimeric) && (rightChimeric)) { //print right
906 out << querySeq->getName() << '\t';
907 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
909 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
910 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
912 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
914 }else { //print both results
915 if (leftdata.flag == "yes") {
916 out << querySeq->getName() + "_LEFT" << '\t';
917 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
919 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
920 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
922 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
925 if (rightdata.flag == "yes") {
926 if (leftdata.flag == "yes") { out << endl; }
928 out << querySeq->getName() + "_RIGHT"<< '\t';
929 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
931 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
932 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
934 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
939 catch(exception& e) {
940 m->errorOut(e, "ChimeraSlayer", "printBlock");
944 //***************************************************************************************************************
945 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
950 if ((leftChimeric) && (!rightChimeric)) { //get left
951 out += querySeq->getName() + "\t";
952 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
954 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
955 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
957 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
959 }else if ((!leftChimeric) && (rightChimeric)) { //print right
960 out += querySeq->getName() + "\t";
961 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
963 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
964 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
966 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
968 }else { //print both results
970 if (leftdata.flag == "yes") {
971 out += querySeq->getName() + "_LEFT\t";
972 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
974 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
975 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
977 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
980 if (rightdata.flag == "yes") {
981 if (leftdata.flag == "yes") { out += "\n"; }
982 out += querySeq->getName() + "_RIGHT\t";
983 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
985 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
986 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
988 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
995 catch(exception& e) {
996 m->errorOut(e, "ChimeraSlayer", "getBlock");
1000 //***************************************************************************************************************
1001 string ChimeraSlayer::getBlock(data_struct data, string flag){
1004 string outputString = "";
1006 outputString += querySeq->getName() + "\t";
1007 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1009 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1010 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1012 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1014 return outputString;
1016 catch(exception& e) {
1017 m->errorOut(e, "ChimeraSlayer", "getBlock");
1021 //***************************************************************************************************************
1022 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1025 vector<Sequence*> refSeqs;
1027 if (searchMethod == "distance") {
1028 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1029 Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1031 refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1033 }else if (searchMethod == "blast") {
1034 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1035 }else if (searchMethod == "kmer") {
1036 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1037 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1041 catch(exception& e) {
1042 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1046 //***************************************************************************************************************/
1047 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1050 vector<Sequence*> refResults;
1052 //get parts of query
1053 string queryUnAligned = q->getUnaligned();
1054 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1055 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1056 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1057 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1058 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1060 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1061 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1064 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1065 // vector<int> smaller;
1066 // vector<int> larger;
1068 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1069 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1073 map<int, int>::iterator it;
1074 vector<int> mergedResults;
1077 // for (int i = 0; i < smaller.size(); i++) {
1078 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1080 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1082 //add left if you havent already
1083 it = seen.find(tempIndexesLeft[index]);
1084 if (it == seen.end()) {
1085 mergedResults.push_back(tempIndexesLeft[index]);
1086 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1089 //add right if you havent already
1090 it = seen.find(tempIndexesRight[index]);
1091 if (it == seen.end()) {
1092 mergedResults.push_back(tempIndexesRight[index]);
1093 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1099 for (int i = index; i < tempIndexesLeft.size(); i++) {
1100 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1102 //add right if you havent already
1103 it = seen.find(tempIndexesLeft[i]);
1104 if (it == seen.end()) {
1105 mergedResults.push_back(tempIndexesLeft[i]);
1106 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1110 for (int i = index; i < tempIndexesRight.size(); i++) {
1111 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1113 //add right if you havent already
1114 it = seen.find(tempIndexesRight[i]);
1115 if (it == seen.end()) {
1116 mergedResults.push_back(tempIndexesRight[i]);
1117 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1120 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1121 //cout << qname << endl;
1123 for (int i = 0; i < mergedResults.size(); i++) {
1124 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1125 if (db[mergedResults[i]]->getName() != q->getName()) {
1126 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1127 refResults.push_back(temp);
1131 //cout << endl << endl;
1136 if (refResults.size() == 0) { m->mothurOut("[WARNING]: mothur found 0 potential parents, so we are not able to check " + q->getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
1140 catch(exception& e) {
1141 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1145 //***************************************************************************************************************
1146 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1148 vector<Sequence*> refResults;
1150 //get parts of query
1151 string queryUnAligned = q->getUnaligned();
1152 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1153 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1155 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1156 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1158 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1159 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1163 map<int, int>::iterator it;
1164 vector<int> mergedResults;
1167 // for (int i = 0; i < smaller.size(); i++) {
1168 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1170 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1172 //add left if you havent already
1173 it = seen.find(tempIndexesLeft[index]);
1174 if (it == seen.end()) {
1175 mergedResults.push_back(tempIndexesLeft[index]);
1176 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1179 //add right if you havent already
1180 it = seen.find(tempIndexesRight[index]);
1181 if (it == seen.end()) {
1182 mergedResults.push_back(tempIndexesRight[index]);
1183 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1189 for (int i = index; i < tempIndexesLeft.size(); i++) {
1190 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1192 //add right if you havent already
1193 it = seen.find(tempIndexesLeft[i]);
1194 if (it == seen.end()) {
1195 mergedResults.push_back(tempIndexesLeft[i]);
1196 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1200 for (int i = index; i < tempIndexesRight.size(); i++) {
1201 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1203 //add right if you havent already
1204 it = seen.find(tempIndexesRight[i]);
1205 if (it == seen.end()) {
1206 mergedResults.push_back(tempIndexesRight[i]);
1207 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1211 for (int i = 0; i < mergedResults.size(); i++) {
1212 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1213 if (db[mergedResults[i]]->getName() != q->getName()) {
1214 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1215 refResults.push_back(temp);
1226 catch(exception& e) {
1227 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1231 //***************************************************************************************************************