5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
41 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45 //***************************************************************************************************************
46 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
47 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
49 fastafile = file; templateSeqs = readSeqs(fastafile);
50 templateFileName = temp;
69 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
71 if (searchMethod == "distance") {
72 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74 //run filter on template copying templateSeqs into filteredTemplateSeqs
75 for (int i = 0; i < templateSeqs.size(); i++) {
76 if (m->control_pressed) { break; }
78 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
80 filteredTemplateSeqs.push_back(newSeq);
85 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
89 //***************************************************************************************************************
90 int ChimeraSlayer::doPrep() {
92 if (searchMethod == "distance") {
93 //read in all query seqs
94 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
96 vector<Sequence*> temp = templateSeqs;
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
99 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
101 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
103 if (m->control_pressed) { return 0; }
105 //run filter on template copying templateSeqs into filteredTemplateSeqs
106 for (int i = 0; i < templateSeqs.size(); i++) {
107 if (m->control_pressed) { return 0; }
109 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
111 filteredTemplateSeqs.push_back(newSeq);
114 string kmerDBNameLeft;
115 string kmerDBNameRight;
117 //generate the kmerdb to pass to maligner
118 if (searchMethod == "kmer") {
119 string templatePath = m->hasPath(templateFileName);
120 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
121 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
123 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
124 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
126 for (int i = 0; i < templateSeqs.size(); i++) {
128 if (m->control_pressed) { return 0; }
130 string leftFrag = templateSeqs[i]->getUnaligned();
131 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
133 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
134 databaseLeft->addSequence(leftTemp);
136 databaseLeft->generateDB();
137 databaseLeft->setNumSeqs(templateSeqs.size());
139 for (int i = 0; i < templateSeqs.size(); i++) {
140 if (m->control_pressed) { return 0; }
142 string rightFrag = templateSeqs[i]->getUnaligned();
143 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
145 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
146 databaseRight->addSequence(rightTemp);
148 databaseRight->generateDB();
149 databaseRight->setNumSeqs(templateSeqs.size());
153 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
154 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
155 bool needToGenerateLeft = true;
157 if(kmerFileTestLeft){
158 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
159 if (GoodFile) { needToGenerateLeft = false; }
162 if(needToGenerateLeft){
164 for (int i = 0; i < templateSeqs.size(); i++) {
166 if (m->control_pressed) { return 0; }
168 string leftFrag = templateSeqs[i]->getUnaligned();
169 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
171 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
172 databaseLeft->addSequence(leftTemp);
174 databaseLeft->generateDB();
177 databaseLeft->readKmerDB(kmerFileTestLeft);
179 kmerFileTestLeft.close();
181 databaseLeft->setNumSeqs(templateSeqs.size());
184 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
185 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
186 bool needToGenerateRight = true;
188 if(kmerFileTestRight){
189 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
190 if (GoodFile) { needToGenerateRight = false; }
193 if(needToGenerateRight){
195 for (int i = 0; i < templateSeqs.size(); i++) {
196 if (m->control_pressed) { return 0; }
198 string rightFrag = templateSeqs[i]->getUnaligned();
199 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
201 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
202 databaseRight->addSequence(rightTemp);
204 databaseRight->generateDB();
207 databaseRight->readKmerDB(kmerFileTestRight);
209 kmerFileTestRight.close();
211 databaseRight->setNumSeqs(templateSeqs.size());
213 }else if (searchMethod == "blast") {
216 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
218 if (m->control_pressed) { return 0; }
220 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
221 databaseLeft->generateDB();
222 databaseLeft->setNumSeqs(templateSeqs.size());
228 catch(exception& e) {
229 m->errorOut(e, "ChimeraSlayer", "doprep");
233 //***************************************************************************************************************
234 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
237 //when template=self, the query file is sorted from most abundance to least abundant
238 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
239 vector<Sequence*> userTemplate;
241 int myAbund = priority[q.getName()];
243 for (int i = 0; i < templateSeqs.size(); i++) {
245 if (m->control_pressed) { return userTemplate; }
247 //have I reached a sequence with the same abundance as myself?
248 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
250 //if its am not chimeric add it
251 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
252 userTemplate.push_back(templateSeqs[i]);
253 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
257 string kmerDBNameLeft;
258 string kmerDBNameRight;
260 //generate the kmerdb to pass to maligner
261 if (searchMethod == "kmer") {
262 string templatePath = m->hasPath(templateFileName);
263 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
264 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
266 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
267 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
269 for (int i = 0; i < userTemplate.size(); i++) {
271 if (m->control_pressed) { return userTemplate; }
273 string leftFrag = userTemplate[i]->getUnaligned();
274 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
276 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
277 databaseLeft->addSequence(leftTemp);
279 databaseLeft->generateDB();
280 databaseLeft->setNumSeqs(userTemplate.size());
282 for (int i = 0; i < userTemplate.size(); i++) {
283 if (m->control_pressed) { return userTemplate; }
285 string rightFrag = userTemplate[i]->getUnaligned();
286 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
288 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
289 databaseRight->addSequence(rightTemp);
291 databaseRight->generateDB();
292 databaseRight->setNumSeqs(userTemplate.size());
297 for (int i = 0; i < userTemplate.size(); i++) {
299 if (m->control_pressed) { return userTemplate; }
301 string leftFrag = userTemplate[i]->getUnaligned();
302 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
304 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
305 databaseLeft->addSequence(leftTemp);
307 databaseLeft->generateDB();
308 databaseLeft->setNumSeqs(userTemplate.size());
310 for (int i = 0; i < userTemplate.size(); i++) {
311 if (m->control_pressed) { return userTemplate; }
313 string rightFrag = userTemplate[i]->getUnaligned();
314 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
316 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
317 databaseRight->addSequence(rightTemp);
319 databaseRight->generateDB();
320 databaseRight->setNumSeqs(userTemplate.size());
322 }else if (searchMethod == "blast") {
325 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
327 if (m->control_pressed) { return userTemplate; }
329 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
330 databaseLeft->generateDB();
331 databaseLeft->setNumSeqs(userTemplate.size());
337 catch(exception& e) {
338 m->errorOut(e, "ChimeraSlayer", "getTemplate");
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {
345 if (templateFileName != "self") {
346 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
347 else if (searchMethod == "blast") { delete databaseLeft; }
350 //***************************************************************************************************************
351 void ChimeraSlayer::printHeader(ostream& out) {
352 m->mothurOutEndLine();
353 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
354 m->mothurOutEndLine();
356 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
358 //***************************************************************************************************************
359 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
362 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
364 if (chimeraFlags == "yes") {
365 string chimeraFlag = "no";
366 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
368 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
371 if (chimeraFlag == "yes") {
372 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
373 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
374 outAcc << querySeq.getName() << endl;
376 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
379 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
380 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
382 string newAligned = trim.getAligned();
384 if (lengthLeft > lengthRight) { //trim right
385 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
387 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
389 trim.setAligned(newAligned);
394 printBlock(chimeraResults[0], chimeraFlag, out);
397 out << querySeq.getName() << "\tno" << endl;
403 catch(exception& e) {
404 m->errorOut(e, "ChimeraSlayer", "print");
408 //***************************************************************************************************************
409 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
414 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
415 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
418 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
420 string chimeraFlag = "no";
421 if (leftPiece.flag == "yes") {
423 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
425 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
428 if (rightPiece.flag == "yes") {
429 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
431 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
434 bool rightChimeric = false;
435 bool leftChimeric = false;
437 if (chimeraFlag == "yes") {
438 //which peice is chimeric or are both
439 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
440 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
442 if (rightChimeric || leftChimeric) {
443 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
444 outAcc << querySeq.getName() << endl;
446 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
449 string newAligned = trim.getAligned();
451 //right side is fine so keep that
452 if ((leftChimeric) && (!rightChimeric)) {
453 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
454 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
455 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
456 }else { //both sides are chimeric, keep longest piece
458 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
459 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
461 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
462 int length = lengthLeftLeft;
463 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
465 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
466 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
468 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
469 if (lengthRightRight > length) { longest = 4; }
471 if (longest == 1) { //leftleft
472 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
473 }else if (longest == 2) { //leftright
474 //get rid of leftleft
475 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
477 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
478 }else if (longest == 3) { //rightleft
480 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
481 //get rid of rightright
482 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
485 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
486 //get rid of rightleft
487 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
491 trim.setAligned(newAligned);
497 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
500 out << querySeq.getName() << "\tno" << endl;
506 catch(exception& e) {
507 m->errorOut(e, "ChimeraSlayer", "print");
513 //***************************************************************************************************************
514 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
517 bool results = false;
518 string outAccString = "";
519 string outputString = "";
524 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
525 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
529 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
531 string chimeraFlag = "no";
532 if (leftPiece.flag == "yes") {
534 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
536 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
539 if (rightPiece.flag == "yes") {
540 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
542 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
545 bool rightChimeric = false;
546 bool leftChimeric = false;
550 if (chimeraFlag == "yes") {
551 //which peice is chimeric or are both
552 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
553 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
555 if (rightChimeric || leftChimeric) {
556 cout << querySeq.getName() << "\tyes" << endl;
557 outAccString += querySeq.getName() + "\n";
560 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
562 //write to accnos file
563 int length = outAccString.length();
564 char* buf2 = new char[length];
565 memcpy(buf2, outAccString.c_str(), length);
567 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
571 string newAligned = trim.getAligned();
573 //right side is fine so keep that
574 if ((leftChimeric) && (!rightChimeric)) {
575 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
576 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
577 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
578 }else { //both sides are chimeric, keep longest piece
580 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
581 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
583 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
584 int length = lengthLeftLeft;
585 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
587 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
588 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
590 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
591 if (lengthRightRight > length) { longest = 4; }
593 if (longest == 1) { //leftleft
594 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
595 }else if (longest == 2) { //leftright
596 //get rid of leftleft
597 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
599 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
600 }else if (longest == 3) { //rightleft
602 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
603 //get rid of rightright
604 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
607 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
608 //get rid of rightleft
609 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
613 trim.setAligned(newAligned);
619 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
620 outputString += "\n";
622 //write to output file
623 int length = outputString.length();
624 char* buf = new char[length];
625 memcpy(buf, outputString.c_str(), length);
627 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
631 outputString += querySeq.getName() + "\tno\n";
633 //write to output file
634 int length = outputString.length();
635 char* buf = new char[length];
636 memcpy(buf, outputString.c_str(), length);
638 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
645 catch(exception& e) {
646 m->errorOut(e, "ChimeraSlayer", "print");
650 //***************************************************************************************************************
651 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
654 bool results = false;
655 string outAccString = "";
656 string outputString = "";
659 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
661 if (chimeraFlags == "yes") {
662 string chimeraFlag = "no";
663 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
665 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
668 if (chimeraFlag == "yes") {
669 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
670 cout << querySeq.getName() << "\tyes" << endl;
671 outAccString += querySeq.getName() + "\n";
674 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
676 //write to accnos file
677 int length = outAccString.length();
678 char* buf2 = new char[length];
679 memcpy(buf2, outAccString.c_str(), length);
681 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
685 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
686 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
688 string newAligned = trim.getAligned();
689 if (lengthLeft > lengthRight) { //trim right
690 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
692 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
694 trim.setAligned(newAligned);
699 outputString = getBlock(chimeraResults[0], chimeraFlag);
700 outputString += "\n";
702 //write to output file
703 int length = outputString.length();
704 char* buf = new char[length];
705 memcpy(buf, outputString.c_str(), length);
707 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
711 outputString += querySeq.getName() + "\tno\n";
713 //write to output file
714 int length = outputString.length();
715 char* buf = new char[length];
716 memcpy(buf, outputString.c_str(), length);
718 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
724 catch(exception& e) {
725 m->errorOut(e, "ChimeraSlayer", "print");
731 //***************************************************************************************************************
732 int ChimeraSlayer::getChimeras(Sequence* query) {
735 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
736 printResults.trimQuery = trimQuery;
739 printResults.flag = "no";
743 //you must create a template
744 vector<Sequence*> thisTemplate;
745 vector<Sequence*> thisFilteredTemplate;
746 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
747 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
749 if (m->control_pressed) { return 0; }
751 if (thisTemplate.size() == 0) { return 0; } //not chimeric
753 //moved this out of maligner - 4/29/11
754 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
756 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
757 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
759 if (templateFileName == "self") {
760 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
761 else if (searchMethod == "blast") { delete databaseLeft; }
764 if (m->control_pressed) { return 0; }
766 string chimeraFlag = maligner.getResults(*query, decalc);
768 if (m->control_pressed) { return 0; }
770 vector<results> Results = maligner.getOutput();
772 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
774 if (chimeraFlag == "yes") {
777 vector<string> parents;
778 for (int i = 0; i < Results.size(); i++) {
779 parents.push_back(Results[i].parentAligned);
782 ChimeraReAligner realigner;
783 realigner.reAlign(query, parents);
787 // cout << query->getAligned() << endl;
788 //get sequence that were given from maligner results
789 vector<SeqCompare> seqs;
790 map<string, float> removeDups;
791 map<string, float>::iterator itDup;
792 map<string, string> parentNameSeq;
793 map<string, string>::iterator itSeq;
794 for (int j = 0; j < Results.size(); j++) {
796 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
797 //only add if you are not a duplicate
798 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
801 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
803 itDup = removeDups.find(Results[j].parent);
804 if (itDup == removeDups.end()) { //this is not duplicate
805 removeDups[Results[j].parent] = dist;
806 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
807 }else if (dist > itDup->second) { //is this a stronger number for this parent
808 removeDups[Results[j].parent] = dist;
809 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
816 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
817 itSeq = parentNameSeq.find(itDup->first);
818 Sequence seq(itDup->first, itSeq->second);
822 member.dist = itDup->second;
823 seqs.push_back(member);
826 //limit number of parents to explore - default 3
827 if (Results.size() > parents) {
829 sort(seqs.begin(), seqs.end(), compareSeqCompare);
830 //prioritize larger more similiar sequence fragments
831 reverse(seqs.begin(), seqs.end());
833 //for (int k = seqs.size()-1; k > (parents-1); k--) {
834 // delete seqs[k].seq;
839 //put seqs into vector to send to slayer
841 // cout << query->getAligned() << endl;
842 vector<Sequence> seqsForSlayer;
843 for (int k = 0; k < seqs.size(); k++) {
844 // cout << seqs[k].seq->getAligned() << endl;
845 seqsForSlayer.push_back(seqs[k].seq);
846 // cout << seqs[k].seq->getName() << endl;
849 if (m->control_pressed) { return 0; }
852 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
853 if (m->control_pressed) { return 0; }
854 chimeraResults = slayer.getOutput();
856 printResults.flag = chimeraFlags;
857 printResults.results = chimeraResults;
860 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
862 //cout << endl << endl;
865 catch(exception& e) {
866 m->errorOut(e, "ChimeraSlayer", "getChimeras");
870 //***************************************************************************************************************
871 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
873 out << querySeq.getName() << '\t';
874 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
876 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
877 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
879 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
882 catch(exception& e) {
883 m->errorOut(e, "ChimeraSlayer", "printBlock");
887 //***************************************************************************************************************
888 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
891 if ((leftChimeric) && (!rightChimeric)) { //print left
892 out << querySeq.getName() << '\t';
893 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
895 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
896 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
898 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
900 }else if ((!leftChimeric) && (rightChimeric)) { //print right
901 out << querySeq.getName() << '\t';
902 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
904 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
905 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
907 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
909 }else { //print both results
910 if (leftdata.flag == "yes") {
911 out << querySeq.getName() + "_LEFT" << '\t';
912 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
914 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
915 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
917 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
920 if (rightdata.flag == "yes") {
921 if (leftdata.flag == "yes") { out << endl; }
923 out << querySeq.getName() + "_RIGHT"<< '\t';
924 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
926 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
927 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
929 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
934 catch(exception& e) {
935 m->errorOut(e, "ChimeraSlayer", "printBlock");
939 //***************************************************************************************************************
940 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
945 if ((leftChimeric) && (!rightChimeric)) { //get left
946 out += querySeq.getName() + "\t";
947 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
949 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
950 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
952 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
954 }else if ((!leftChimeric) && (rightChimeric)) { //print right
955 out += querySeq.getName() + "\t";
956 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
958 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
959 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
961 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
963 }else { //print both results
965 if (leftdata.flag == "yes") {
966 out += querySeq.getName() + "_LEFT\t";
967 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
969 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
970 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
972 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
975 if (rightdata.flag == "yes") {
976 if (leftdata.flag == "yes") { out += "\n"; }
977 out += querySeq.getName() + "_RIGHT\t";
978 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
980 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
981 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
983 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
990 catch(exception& e) {
991 m->errorOut(e, "ChimeraSlayer", "getBlock");
995 //***************************************************************************************************************
996 string ChimeraSlayer::getBlock(data_struct data, string flag){
999 string outputString = "";
1001 outputString += querySeq.getName() + "\t";
1002 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1004 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1005 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1007 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1009 return outputString;
1011 catch(exception& e) {
1012 m->errorOut(e, "ChimeraSlayer", "getBlock");
1016 //***************************************************************************************************************
1017 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1020 vector<Sequence> refSeqs;
1022 if (searchMethod == "distance") {
1023 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1024 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1026 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1028 }else if (searchMethod == "blast") {
1029 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1030 }else if (searchMethod == "kmer") {
1031 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1032 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1036 catch(exception& e) {
1037 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1041 //***************************************************************************************************************/
1042 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1045 vector<Sequence> refResults;
1047 //get parts of query
1048 string queryUnAligned = q.getUnaligned();
1049 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1050 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1051 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1052 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1053 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1055 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1056 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1059 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1060 // vector<int> smaller;
1061 // vector<int> larger;
1063 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1064 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1068 map<int, int>::iterator it;
1069 vector<int> mergedResults;
1072 // for (int i = 0; i < smaller.size(); i++) {
1073 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1075 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1077 //add left if you havent already
1078 it = seen.find(tempIndexesLeft[index]);
1079 if (it == seen.end()) {
1080 mergedResults.push_back(tempIndexesLeft[index]);
1081 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1084 //add right if you havent already
1085 it = seen.find(tempIndexesRight[index]);
1086 if (it == seen.end()) {
1087 mergedResults.push_back(tempIndexesRight[index]);
1088 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1094 for (int i = index; i < tempIndexesLeft.size(); i++) {
1095 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1097 //add right if you havent already
1098 it = seen.find(tempIndexesLeft[i]);
1099 if (it == seen.end()) {
1100 mergedResults.push_back(tempIndexesLeft[i]);
1101 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1105 for (int i = index; i < tempIndexesRight.size(); i++) {
1106 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1108 //add right if you havent already
1109 it = seen.find(tempIndexesRight[i]);
1110 if (it == seen.end()) {
1111 mergedResults.push_back(tempIndexesRight[i]);
1112 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1115 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1116 //cout << qname << endl;
1118 for (int i = 0; i < mergedResults.size(); i++) {
1119 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1120 if (db[mergedResults[i]]->getName() != q.getName()) {
1121 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1122 refResults.push_back(temp);
1125 //cout << endl << endl;
1130 if (refResults.size() == 0) { m->mothurOut("[WARNING]: megablast found 0 potential parents, so we are not able to check " + q.getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
1134 catch(exception& e) {
1135 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1139 //***************************************************************************************************************
1140 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1142 vector<Sequence> refResults;
1144 //get parts of query
1145 string queryUnAligned = q.getUnaligned();
1146 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1147 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1149 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1150 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1152 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1153 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1157 map<int, int>::iterator it;
1158 vector<int> mergedResults;
1161 // for (int i = 0; i < smaller.size(); i++) {
1162 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1164 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1166 //add left if you havent already
1167 it = seen.find(tempIndexesLeft[index]);
1168 if (it == seen.end()) {
1169 mergedResults.push_back(tempIndexesLeft[index]);
1170 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1173 //add right if you havent already
1174 it = seen.find(tempIndexesRight[index]);
1175 if (it == seen.end()) {
1176 mergedResults.push_back(tempIndexesRight[index]);
1177 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1183 for (int i = index; i < tempIndexesLeft.size(); i++) {
1184 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1186 //add right if you havent already
1187 it = seen.find(tempIndexesLeft[i]);
1188 if (it == seen.end()) {
1189 mergedResults.push_back(tempIndexesLeft[i]);
1190 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1194 for (int i = index; i < tempIndexesRight.size(); i++) {
1195 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1197 //add right if you havent already
1198 it = seen.find(tempIndexesRight[i]);
1199 if (it == seen.end()) {
1200 mergedResults.push_back(tempIndexesRight[i]);
1201 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1205 for (int i = 0; i < mergedResults.size(); i++) {
1206 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1207 if (db[mergedResults[i]]->getName() != q.getName()) {
1208 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1209 refResults.push_back(temp);
1220 catch(exception& e) {
1221 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1225 //***************************************************************************************************************