5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
49 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
50 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas) : Chimera() {
52 fastafile = file; templateSeqs = readSeqs(fastafile);
53 templateFileName = temp;
74 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
76 if (searchMethod == "distance") {
77 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
79 //run filter on template copying templateSeqs into filteredTemplateSeqs
80 for (int i = 0; i < templateSeqs.size(); i++) {
81 if (m->control_pressed) { break; }
83 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
85 filteredTemplateSeqs.push_back(newSeq);
90 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
94 //***************************************************************************************************************
95 int ChimeraSlayer::doPrep() {
97 if (searchMethod == "distance") {
98 //read in all query seqs
99 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
101 vector<Sequence*> temp = templateSeqs;
102 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
104 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
106 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
108 if (m->control_pressed) { return 0; }
110 //run filter on template copying templateSeqs into filteredTemplateSeqs
111 for (int i = 0; i < templateSeqs.size(); i++) {
112 if (m->control_pressed) { return 0; }
114 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
116 filteredTemplateSeqs.push_back(newSeq);
119 string kmerDBNameLeft;
120 string kmerDBNameRight;
122 //generate the kmerdb to pass to maligner
123 if (searchMethod == "kmer") {
124 string templatePath = m->hasPath(templateFileName);
125 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
126 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
128 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
129 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
131 for (int i = 0; i < templateSeqs.size(); i++) {
133 if (m->control_pressed) { return 0; }
135 string leftFrag = templateSeqs[i]->getUnaligned();
136 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
138 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
139 databaseLeft->addSequence(leftTemp);
141 databaseLeft->generateDB();
142 databaseLeft->setNumSeqs(templateSeqs.size());
144 for (int i = 0; i < templateSeqs.size(); i++) {
145 if (m->control_pressed) { return 0; }
147 string rightFrag = templateSeqs[i]->getUnaligned();
148 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
150 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
151 databaseRight->addSequence(rightTemp);
153 databaseRight->generateDB();
154 databaseRight->setNumSeqs(templateSeqs.size());
158 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
159 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
160 bool needToGenerateLeft = true;
162 if(kmerFileTestLeft){
163 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
164 if (GoodFile) { needToGenerateLeft = false; }
167 if(needToGenerateLeft){
169 for (int i = 0; i < templateSeqs.size(); i++) {
171 if (m->control_pressed) { return 0; }
173 string leftFrag = templateSeqs[i]->getUnaligned();
174 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
176 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
177 databaseLeft->addSequence(leftTemp);
179 databaseLeft->generateDB();
182 databaseLeft->readKmerDB(kmerFileTestLeft);
184 kmerFileTestLeft.close();
186 databaseLeft->setNumSeqs(templateSeqs.size());
189 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
190 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
191 bool needToGenerateRight = true;
193 if(kmerFileTestRight){
194 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
195 if (GoodFile) { needToGenerateRight = false; }
198 if(needToGenerateRight){
200 for (int i = 0; i < templateSeqs.size(); i++) {
201 if (m->control_pressed) { return 0; }
203 string rightFrag = templateSeqs[i]->getUnaligned();
204 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
206 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
207 databaseRight->addSequence(rightTemp);
209 databaseRight->generateDB();
212 databaseRight->readKmerDB(kmerFileTestRight);
214 kmerFileTestRight.close();
216 databaseRight->setNumSeqs(templateSeqs.size());
218 }else if (searchMethod == "blast") {
221 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation);
223 if (m->control_pressed) { return 0; }
225 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
226 databaseLeft->generateDB();
227 databaseLeft->setNumSeqs(templateSeqs.size());
233 catch(exception& e) {
234 m->errorOut(e, "ChimeraSlayer", "doprep");
238 //***************************************************************************************************************
239 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
242 //when template=self, the query file is sorted from most abundance to least abundant
243 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
244 vector<Sequence*> userTemplate;
246 int myAbund = priority[q.getName()];
248 for (int i = 0; i < templateSeqs.size(); i++) {
250 if (m->control_pressed) { return userTemplate; }
252 //have I reached a sequence with the same abundance as myself?
253 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
255 //if its am not chimeric add it
256 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
257 userTemplate.push_back(templateSeqs[i]);
258 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
262 //avoids nuisance error from formatdb for making blank blast database
263 if (userTemplate.size() == 0) {
267 string kmerDBNameLeft;
268 string kmerDBNameRight;
270 //generate the kmerdb to pass to maligner
271 if (searchMethod == "kmer") {
272 string templatePath = m->hasPath(templateFileName);
273 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
274 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
276 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
277 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
279 for (int i = 0; i < userTemplate.size(); i++) {
281 if (m->control_pressed) { return userTemplate; }
283 string leftFrag = userTemplate[i]->getUnaligned();
284 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
286 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
287 databaseLeft->addSequence(leftTemp);
289 databaseLeft->generateDB();
290 databaseLeft->setNumSeqs(userTemplate.size());
292 for (int i = 0; i < userTemplate.size(); i++) {
293 if (m->control_pressed) { return userTemplate; }
295 string rightFrag = userTemplate[i]->getUnaligned();
296 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
298 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
299 databaseRight->addSequence(rightTemp);
301 databaseRight->generateDB();
302 databaseRight->setNumSeqs(userTemplate.size());
307 for (int i = 0; i < userTemplate.size(); i++) {
309 if (m->control_pressed) { return userTemplate; }
311 string leftFrag = userTemplate[i]->getUnaligned();
312 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
314 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
315 databaseLeft->addSequence(leftTemp);
317 databaseLeft->generateDB();
318 databaseLeft->setNumSeqs(userTemplate.size());
320 for (int i = 0; i < userTemplate.size(); i++) {
321 if (m->control_pressed) { return userTemplate; }
323 string rightFrag = userTemplate[i]->getUnaligned();
324 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
326 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
327 databaseRight->addSequence(rightTemp);
329 databaseRight->generateDB();
330 databaseRight->setNumSeqs(userTemplate.size());
332 }else if (searchMethod == "blast") {
335 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation);
337 if (m->control_pressed) { return userTemplate; }
339 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
340 databaseLeft->generateDB();
341 databaseLeft->setNumSeqs(userTemplate.size());
347 catch(exception& e) {
348 m->errorOut(e, "ChimeraSlayer", "getTemplate");
353 //***************************************************************************************************************
354 ChimeraSlayer::~ChimeraSlayer() {
355 if (templateFileName != "self") {
356 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
357 else if (searchMethod == "blast") { delete databaseLeft; }
360 //***************************************************************************************************************
361 void ChimeraSlayer::printHeader(ostream& out) {
362 m->mothurOutEndLine();
363 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
364 m->mothurOutEndLine();
366 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
368 //***************************************************************************************************************
369 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
372 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
374 if (chimeraFlags == "yes") {
375 string chimeraFlag = "no";
376 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
378 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
381 if (chimeraFlag == "yes") {
382 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
383 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
384 outAcc << querySeq.getName() << endl;
386 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
389 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
390 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
392 string newAligned = trim.getAligned();
394 if (lengthLeft > lengthRight) { //trim right
395 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
397 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
399 trim.setAligned(newAligned);
404 printBlock(chimeraResults[0], chimeraFlag, out);
407 out << querySeq.getName() << "\tno" << endl;
413 catch(exception& e) {
414 m->errorOut(e, "ChimeraSlayer", "print");
418 //***************************************************************************************************************
419 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
424 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
425 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
428 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
430 string chimeraFlag = "no";
431 if (leftPiece.flag == "yes") {
433 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
435 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
438 if (rightPiece.flag == "yes") {
439 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
441 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
444 bool rightChimeric = false;
445 bool leftChimeric = false;
447 if (chimeraFlag == "yes") {
448 //which peice is chimeric or are both
449 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
450 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
452 if (rightChimeric || leftChimeric) {
453 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
454 outAcc << querySeq.getName() << endl;
456 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
459 string newAligned = trim.getAligned();
461 //right side is fine so keep that
462 if ((leftChimeric) && (!rightChimeric)) {
463 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
464 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
465 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466 }else { //both sides are chimeric, keep longest piece
468 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
469 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
471 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
472 int length = lengthLeftLeft;
473 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
475 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
476 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
478 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
479 if (lengthRightRight > length) { longest = 4; }
481 if (longest == 1) { //leftleft
482 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
483 }else if (longest == 2) { //leftright
484 //get rid of leftleft
485 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
487 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
488 }else if (longest == 3) { //rightleft
490 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
491 //get rid of rightright
492 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
495 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
496 //get rid of rightleft
497 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
501 trim.setAligned(newAligned);
507 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
510 out << querySeq.getName() << "\tno" << endl;
516 catch(exception& e) {
517 m->errorOut(e, "ChimeraSlayer", "print");
523 //***************************************************************************************************************
524 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
527 bool results = false;
528 string outAccString = "";
529 string outputString = "";
534 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
535 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
539 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
541 string chimeraFlag = "no";
542 if (leftPiece.flag == "yes") {
544 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
546 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
549 if (rightPiece.flag == "yes") {
550 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
552 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
555 bool rightChimeric = false;
556 bool leftChimeric = false;
560 if (chimeraFlag == "yes") {
561 //which peice is chimeric or are both
562 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
563 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
565 if (rightChimeric || leftChimeric) {
566 cout << querySeq.getName() << "\tyes" << endl;
567 outAccString += querySeq.getName() + "\n";
570 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
572 //write to accnos file
573 int length = outAccString.length();
574 char* buf2 = new char[length];
575 memcpy(buf2, outAccString.c_str(), length);
577 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
581 string newAligned = trim.getAligned();
583 //right side is fine so keep that
584 if ((leftChimeric) && (!rightChimeric)) {
585 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
586 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
587 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
588 }else { //both sides are chimeric, keep longest piece
590 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
591 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
593 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
594 int length = lengthLeftLeft;
595 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
597 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
598 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
600 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
601 if (lengthRightRight > length) { longest = 4; }
603 if (longest == 1) { //leftleft
604 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
605 }else if (longest == 2) { //leftright
606 //get rid of leftleft
607 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
609 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
610 }else if (longest == 3) { //rightleft
612 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
613 //get rid of rightright
614 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
617 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
618 //get rid of rightleft
619 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
623 trim.setAligned(newAligned);
629 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
630 outputString += "\n";
632 //write to output file
633 int length = outputString.length();
634 char* buf = new char[length];
635 memcpy(buf, outputString.c_str(), length);
637 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
641 outputString += querySeq.getName() + "\tno\n";
643 //write to output file
644 int length = outputString.length();
645 char* buf = new char[length];
646 memcpy(buf, outputString.c_str(), length);
648 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
655 catch(exception& e) {
656 m->errorOut(e, "ChimeraSlayer", "print");
660 //***************************************************************************************************************
661 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
664 bool results = false;
665 string outAccString = "";
666 string outputString = "";
669 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
671 if (chimeraFlags == "yes") {
672 string chimeraFlag = "no";
673 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
675 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
678 if (chimeraFlag == "yes") {
679 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
680 cout << querySeq.getName() << "\tyes" << endl;
681 outAccString += querySeq.getName() + "\n";
684 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
686 //write to accnos file
687 int length = outAccString.length();
688 char* buf2 = new char[length];
689 memcpy(buf2, outAccString.c_str(), length);
691 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
695 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
696 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
698 string newAligned = trim.getAligned();
699 if (lengthLeft > lengthRight) { //trim right
700 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
702 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
704 trim.setAligned(newAligned);
709 outputString = getBlock(chimeraResults[0], chimeraFlag);
710 outputString += "\n";
712 //write to output file
713 int length = outputString.length();
714 char* buf = new char[length];
715 memcpy(buf, outputString.c_str(), length);
717 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
721 outputString += querySeq.getName() + "\tno\n";
723 //write to output file
724 int length = outputString.length();
725 char* buf = new char[length];
726 memcpy(buf, outputString.c_str(), length);
728 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
734 catch(exception& e) {
735 m->errorOut(e, "ChimeraSlayer", "print");
741 //***************************************************************************************************************
742 int ChimeraSlayer::getChimeras(Sequence* query) {
745 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
746 printResults.trimQuery = trimQuery;
749 printResults.flag = "no";
753 //you must create a template
754 vector<Sequence*> thisTemplate;
755 vector<Sequence*> thisFilteredTemplate;
756 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
757 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
759 if (m->control_pressed) { return 0; }
760 if (thisTemplate.size() == 0) { return 0; } //not chimeric
762 //moved this out of maligner - 4/29/11
763 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
765 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
766 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
768 if (templateFileName == "self") {
769 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
770 else if (searchMethod == "blast") { delete databaseLeft; }
773 if (m->control_pressed) { return 0; }
775 string chimeraFlag = maligner.getResults(*query, decalc);
777 if (m->control_pressed) { return 0; }
779 vector<results> Results = maligner.getOutput();
781 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
783 if (chimeraFlag == "yes") {
786 vector<string> parents;
787 for (int i = 0; i < Results.size(); i++) {
788 parents.push_back(Results[i].parentAligned);
791 ChimeraReAligner realigner;
792 realigner.reAlign(query, parents);
796 // cout << query->getAligned() << endl;
797 //get sequence that were given from maligner results
798 vector<SeqCompare> seqs;
799 map<string, float> removeDups;
800 map<string, float>::iterator itDup;
801 map<string, string> parentNameSeq;
802 map<string, string>::iterator itSeq;
803 for (int j = 0; j < Results.size(); j++) {
805 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
806 //only add if you are not a duplicate
807 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
810 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
812 itDup = removeDups.find(Results[j].parent);
813 if (itDup == removeDups.end()) { //this is not duplicate
814 removeDups[Results[j].parent] = dist;
815 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
816 }else if (dist > itDup->second) { //is this a stronger number for this parent
817 removeDups[Results[j].parent] = dist;
818 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
825 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
826 itSeq = parentNameSeq.find(itDup->first);
827 Sequence seq(itDup->first, itSeq->second);
831 member.dist = itDup->second;
832 seqs.push_back(member);
835 //limit number of parents to explore - default 3
836 if (Results.size() > parents) {
838 sort(seqs.begin(), seqs.end(), compareSeqCompare);
839 //prioritize larger more similiar sequence fragments
840 reverse(seqs.begin(), seqs.end());
842 //for (int k = seqs.size()-1; k > (parents-1); k--) {
843 // delete seqs[k].seq;
848 //put seqs into vector to send to slayer
850 // cout << query->getAligned() << endl;
851 vector<Sequence> seqsForSlayer;
852 for (int k = 0; k < seqs.size(); k++) {
853 // cout << seqs[k].seq->getAligned() << endl;
854 seqsForSlayer.push_back(seqs[k].seq);
855 // cout << seqs[k].seq->getName() << endl;
858 if (m->control_pressed) { return 0; }
861 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
862 if (m->control_pressed) { return 0; }
863 chimeraResults = slayer.getOutput();
865 printResults.flag = chimeraFlags;
866 printResults.results = chimeraResults;
869 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
871 //cout << endl << endl;
874 catch(exception& e) {
875 m->errorOut(e, "ChimeraSlayer", "getChimeras");
879 //***************************************************************************************************************
880 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
882 out << querySeq.getName() << '\t';
883 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
885 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
886 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
888 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
891 catch(exception& e) {
892 m->errorOut(e, "ChimeraSlayer", "printBlock");
896 //***************************************************************************************************************
897 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
900 if ((leftChimeric) && (!rightChimeric)) { //print left
901 out << querySeq.getName() << '\t';
902 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
904 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
905 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
907 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
909 }else if ((!leftChimeric) && (rightChimeric)) { //print right
910 out << querySeq.getName() << '\t';
911 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
913 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
914 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
916 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
918 }else { //print both results
919 if (leftdata.flag == "yes") {
920 out << querySeq.getName() + "_LEFT" << '\t';
921 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
923 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
924 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
926 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
929 if (rightdata.flag == "yes") {
930 if (leftdata.flag == "yes") { out << endl; }
932 out << querySeq.getName() + "_RIGHT"<< '\t';
933 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
935 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
936 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
938 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
943 catch(exception& e) {
944 m->errorOut(e, "ChimeraSlayer", "printBlock");
948 //***************************************************************************************************************
949 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
954 if ((leftChimeric) && (!rightChimeric)) { //get left
955 out += querySeq.getName() + "\t";
956 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
958 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
959 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
961 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
963 }else if ((!leftChimeric) && (rightChimeric)) { //print right
964 out += querySeq.getName() + "\t";
965 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
967 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
968 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
970 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
972 }else { //print both results
974 if (leftdata.flag == "yes") {
975 out += querySeq.getName() + "_LEFT\t";
976 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
978 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
979 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
981 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
984 if (rightdata.flag == "yes") {
985 if (leftdata.flag == "yes") { out += "\n"; }
986 out += querySeq.getName() + "_RIGHT\t";
987 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
989 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
990 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
992 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
999 catch(exception& e) {
1000 m->errorOut(e, "ChimeraSlayer", "getBlock");
1004 //***************************************************************************************************************
1005 string ChimeraSlayer::getBlock(data_struct data, string flag){
1008 string outputString = "";
1010 outputString += querySeq.getName() + "\t";
1011 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1013 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1014 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1016 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1018 return outputString;
1020 catch(exception& e) {
1021 m->errorOut(e, "ChimeraSlayer", "getBlock");
1025 //***************************************************************************************************************
1026 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1029 vector<Sequence> refSeqs;
1031 if (searchMethod == "distance") {
1032 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1033 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1035 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1037 }else if (searchMethod == "blast") {
1038 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1039 }else if (searchMethod == "kmer") {
1040 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1041 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1045 catch(exception& e) {
1046 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1050 //***************************************************************************************************************/
1051 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1054 vector<Sequence> refResults;
1056 //get parts of query
1057 string queryUnAligned = q.getUnaligned();
1058 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1059 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1060 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1061 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1062 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1064 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1065 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1068 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1069 // vector<int> smaller;
1070 // vector<int> larger;
1072 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1073 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1077 map<int, int>::iterator it;
1078 vector<int> mergedResults;
1081 // for (int i = 0; i < smaller.size(); i++) {
1082 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1084 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1086 //add left if you havent already
1087 it = seen.find(tempIndexesLeft[index]);
1088 if (it == seen.end()) {
1089 mergedResults.push_back(tempIndexesLeft[index]);
1090 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1093 //add right if you havent already
1094 it = seen.find(tempIndexesRight[index]);
1095 if (it == seen.end()) {
1096 mergedResults.push_back(tempIndexesRight[index]);
1097 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1103 for (int i = index; i < tempIndexesLeft.size(); i++) {
1104 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1106 //add right if you havent already
1107 it = seen.find(tempIndexesLeft[i]);
1108 if (it == seen.end()) {
1109 mergedResults.push_back(tempIndexesLeft[i]);
1110 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1114 for (int i = index; i < tempIndexesRight.size(); i++) {
1115 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1117 //add right if you havent already
1118 it = seen.find(tempIndexesRight[i]);
1119 if (it == seen.end()) {
1120 mergedResults.push_back(tempIndexesRight[i]);
1121 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1124 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1125 //cout << qname << endl;
1127 if (mergedResults.size() == 0) { numNoParents++; }
1129 for (int i = 0; i < mergedResults.size(); i++) {
1130 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1131 if (db[mergedResults[i]]->getName() != q.getName()) {
1132 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1133 refResults.push_back(temp);
1136 //cout << endl << endl;
1143 catch(exception& e) {
1144 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1148 //***************************************************************************************************************
1149 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1151 vector<Sequence> refResults;
1153 //get parts of query
1154 string queryUnAligned = q.getUnaligned();
1155 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1156 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1158 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1159 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1161 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1162 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1166 map<int, int>::iterator it;
1167 vector<int> mergedResults;
1170 // for (int i = 0; i < smaller.size(); i++) {
1171 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1173 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1175 //add left if you havent already
1176 it = seen.find(tempIndexesLeft[index]);
1177 if (it == seen.end()) {
1178 mergedResults.push_back(tempIndexesLeft[index]);
1179 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1182 //add right if you havent already
1183 it = seen.find(tempIndexesRight[index]);
1184 if (it == seen.end()) {
1185 mergedResults.push_back(tempIndexesRight[index]);
1186 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1192 for (int i = index; i < tempIndexesLeft.size(); i++) {
1193 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1195 //add right if you havent already
1196 it = seen.find(tempIndexesLeft[i]);
1197 if (it == seen.end()) {
1198 mergedResults.push_back(tempIndexesLeft[i]);
1199 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1203 for (int i = index; i < tempIndexesRight.size(); i++) {
1204 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1206 //add right if you havent already
1207 it = seen.find(tempIndexesRight[i]);
1208 if (it == seen.end()) {
1209 mergedResults.push_back(tempIndexesRight[i]);
1210 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1214 for (int i = 0; i < mergedResults.size(); i++) {
1215 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1216 if (db[mergedResults[i]]->getName() != q.getName()) {
1217 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1218 refResults.push_back(temp);
1229 catch(exception& e) {
1230 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1234 //***************************************************************************************************************