5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 //run filter on template
76 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
81 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
88 if (searchMethod == "distance") {
89 //read in all query seqs
90 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
92 vector<Sequence*> temp = templateSeqs;
93 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
95 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
99 if (m->control_pressed) { return 0; }
101 //run filter on template copying templateSeqs into filteredTemplateSeqs
102 for (int i = 0; i < templateSeqs.size(); i++) {
103 if (m->control_pressed) { return 0; }
105 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
107 filteredTemplateSeqs.push_back(newSeq);
110 string kmerDBNameLeft;
111 string kmerDBNameRight;
113 //generate the kmerdb to pass to maligner
114 if (searchMethod == "kmer") {
115 string templatePath = m->hasPath(templateFileName);
116 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
117 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
119 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
120 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
122 for (int i = 0; i < templateSeqs.size(); i++) {
124 if (m->control_pressed) { return 0; }
126 string leftFrag = templateSeqs[i]->getUnaligned();
127 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
129 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
130 databaseLeft->addSequence(leftTemp);
132 databaseLeft->generateDB();
133 databaseLeft->setNumSeqs(templateSeqs.size());
135 for (int i = 0; i < templateSeqs.size(); i++) {
136 if (m->control_pressed) { return 0; }
138 string rightFrag = templateSeqs[i]->getUnaligned();
139 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
141 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
142 databaseRight->addSequence(rightTemp);
144 databaseRight->generateDB();
145 databaseRight->setNumSeqs(templateSeqs.size());
149 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
150 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
151 bool needToGenerateLeft = true;
153 if(kmerFileTestLeft){
154 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
155 if (GoodFile) { needToGenerateLeft = false; }
158 if(needToGenerateLeft){
160 for (int i = 0; i < templateSeqs.size(); i++) {
162 if (m->control_pressed) { return 0; }
164 string leftFrag = templateSeqs[i]->getUnaligned();
165 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
167 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
168 databaseLeft->addSequence(leftTemp);
170 databaseLeft->generateDB();
173 databaseLeft->readKmerDB(kmerFileTestLeft);
175 kmerFileTestLeft.close();
177 databaseLeft->setNumSeqs(templateSeqs.size());
180 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
181 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
182 bool needToGenerateRight = true;
184 if(kmerFileTestRight){
185 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
186 if (GoodFile) { needToGenerateRight = false; }
189 if(needToGenerateRight){
191 for (int i = 0; i < templateSeqs.size(); i++) {
192 if (m->control_pressed) { return 0; }
194 string rightFrag = templateSeqs[i]->getUnaligned();
195 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
197 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
198 databaseRight->addSequence(rightTemp);
200 databaseRight->generateDB();
203 databaseRight->readKmerDB(kmerFileTestRight);
205 kmerFileTestRight.close();
207 databaseRight->setNumSeqs(templateSeqs.size());
209 }else if (searchMethod == "blast") {
212 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
214 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
215 databaseLeft->generateDB();
216 databaseLeft->setNumSeqs(templateSeqs.size());
222 catch(exception& e) {
223 m->errorOut(e, "ChimeraSlayer", "doprep");
227 //***************************************************************************************************************
228 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
231 //when template=self, the query file is sorted from most abundance to least abundant
232 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
233 vector<Sequence*> userTemplate;
235 int myAbund = priority[q->getName()];
237 for (int i = 0; i < templateSeqs.size(); i++) {
239 if (m->control_pressed) { return userTemplate; }
241 //have I reached a sequence with the same abundance as myself?
242 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
244 //if its am not chimeric add it
245 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
246 userTemplate.push_back(templateSeqs[i]);
247 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
251 string kmerDBNameLeft;
252 string kmerDBNameRight;
254 //generate the kmerdb to pass to maligner
255 if (searchMethod == "kmer") {
256 string templatePath = m->hasPath(templateFileName);
257 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
258 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
260 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
261 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
263 for (int i = 0; i < userTemplate.size(); i++) {
265 if (m->control_pressed) { return userTemplate; }
267 string leftFrag = userTemplate[i]->getUnaligned();
268 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
270 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271 databaseLeft->addSequence(leftTemp);
273 databaseLeft->generateDB();
274 databaseLeft->setNumSeqs(userTemplate.size());
276 for (int i = 0; i < userTemplate.size(); i++) {
277 if (m->control_pressed) { return userTemplate; }
279 string rightFrag = userTemplate[i]->getUnaligned();
280 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
282 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283 databaseRight->addSequence(rightTemp);
285 databaseRight->generateDB();
286 databaseRight->setNumSeqs(userTemplate.size());
291 for (int i = 0; i < userTemplate.size(); i++) {
293 if (m->control_pressed) { return userTemplate; }
295 string leftFrag = userTemplate[i]->getUnaligned();
296 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
298 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
299 databaseLeft->addSequence(leftTemp);
301 databaseLeft->generateDB();
302 databaseLeft->setNumSeqs(userTemplate.size());
304 for (int i = 0; i < userTemplate.size(); i++) {
305 if (m->control_pressed) { return userTemplate; }
307 string rightFrag = userTemplate[i]->getUnaligned();
308 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
310 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
311 databaseRight->addSequence(rightTemp);
313 databaseRight->generateDB();
314 databaseRight->setNumSeqs(userTemplate.size());
316 }else if (searchMethod == "blast") {
319 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
321 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
322 databaseLeft->generateDB();
323 databaseLeft->setNumSeqs(userTemplate.size());
329 catch(exception& e) {
330 m->errorOut(e, "ChimeraSlayer", "getTemplate");
335 //***************************************************************************************************************
336 ChimeraSlayer::~ChimeraSlayer() {
338 if (templateFileName != "self") {
339 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
340 else if (searchMethod == "blast") { delete databaseLeft; }
343 //***************************************************************************************************************
344 void ChimeraSlayer::printHeader(ostream& out) {
345 m->mothurOutEndLine();
346 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
347 m->mothurOutEndLine();
349 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
351 //***************************************************************************************************************
352 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
354 Sequence* trim = NULL;
355 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
357 if (chimeraFlags == "yes") {
358 string chimeraFlag = "no";
359 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
361 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
364 if (chimeraFlag == "yes") {
365 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
366 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
367 outAcc << querySeq->getName() << endl;
369 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
372 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
373 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
375 string newAligned = trim->getAligned();
377 if (lengthLeft > lengthRight) { //trim right
378 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
380 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
382 trim->setAligned(newAligned);
387 printBlock(chimeraResults[0], chimeraFlag, out);
390 out << querySeq->getName() << "\tno" << endl;
396 catch(exception& e) {
397 m->errorOut(e, "ChimeraSlayer", "print");
401 //***************************************************************************************************************
402 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
404 Sequence* trim = NULL;
407 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
408 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
411 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
413 string chimeraFlag = "no";
414 if (leftPiece.flag == "yes") {
416 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
418 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
421 if (rightPiece.flag == "yes") {
422 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
424 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
427 bool rightChimeric = false;
428 bool leftChimeric = false;
430 if (chimeraFlag == "yes") {
431 //which peice is chimeric or are both
432 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
433 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
435 if (rightChimeric || leftChimeric) {
436 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
437 outAcc << querySeq->getName() << endl;
439 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
442 string newAligned = trim->getAligned();
444 //right side is fine so keep that
445 if ((leftChimeric) && (!rightChimeric)) {
446 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
447 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
448 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449 }else { //both sides are chimeric, keep longest piece
451 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
452 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
454 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
455 int length = lengthLeftLeft;
456 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
458 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
459 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
461 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
462 if (lengthRightRight > length) { longest = 4; }
464 if (longest == 1) { //leftleft
465 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466 }else if (longest == 2) { //leftright
467 //get rid of leftleft
468 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
470 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
471 }else if (longest == 3) { //rightleft
473 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
474 //get rid of rightright
475 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
478 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
479 //get rid of rightleft
480 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
484 trim->setAligned(newAligned);
490 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
493 out << querySeq->getName() << "\tno" << endl;
499 catch(exception& e) {
500 m->errorOut(e, "ChimeraSlayer", "print");
506 //***************************************************************************************************************
507 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
510 bool results = false;
511 string outAccString = "";
512 string outputString = "";
514 Sequence* trim = NULL;
517 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
518 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
522 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
524 string chimeraFlag = "no";
525 if (leftPiece.flag == "yes") {
527 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
529 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
532 if (rightPiece.flag == "yes") {
533 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
535 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
538 bool rightChimeric = false;
539 bool leftChimeric = false;
541 if (chimeraFlag == "yes") {
542 //which peice is chimeric or are both
543 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
544 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
546 if (rightChimeric || leftChimeric) {
547 cout << querySeq->getName() << "\tyes" << endl;
548 outAccString += querySeq->getName() + "\n";
551 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
553 //write to accnos file
554 int length = outAccString.length();
555 char* buf2 = new char[length];
556 memcpy(buf2, outAccString.c_str(), length);
558 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
562 string newAligned = trim->getAligned();
564 //right side is fine so keep that
565 if ((leftChimeric) && (!rightChimeric)) {
566 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
567 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
568 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
569 }else { //both sides are chimeric, keep longest piece
571 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
572 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
574 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
575 int length = lengthLeftLeft;
576 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
578 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
579 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
581 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
582 if (lengthRightRight > length) { longest = 4; }
584 if (longest == 1) { //leftleft
585 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586 }else if (longest == 2) { //leftright
587 //get rid of leftleft
588 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
590 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
591 }else if (longest == 3) { //rightleft
593 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
594 //get rid of rightright
595 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
598 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
599 //get rid of rightleft
600 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
604 trim->setAligned(newAligned);
610 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
611 outputString += "\n";
613 //write to output file
614 int length = outputString.length();
615 char* buf = new char[length];
616 memcpy(buf, outputString.c_str(), length);
618 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
622 outputString += querySeq->getName() + "\tno\n";
624 //write to output file
625 int length = outputString.length();
626 char* buf = new char[length];
627 memcpy(buf, outputString.c_str(), length);
629 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
636 catch(exception& e) {
637 m->errorOut(e, "ChimeraSlayer", "print");
641 //***************************************************************************************************************
642 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
645 bool results = false;
646 string outAccString = "";
647 string outputString = "";
649 Sequence* trim = NULL;
650 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
652 if (chimeraFlags == "yes") {
653 string chimeraFlag = "no";
654 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
656 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
659 if (chimeraFlag == "yes") {
660 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
661 cout << querySeq->getName() << "\tyes" << endl;
662 outAccString += querySeq->getName() + "\n";
665 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
667 //write to accnos file
668 int length = outAccString.length();
669 char* buf2 = new char[length];
670 memcpy(buf2, outAccString.c_str(), length);
672 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
676 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
677 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
679 string newAligned = trim->getAligned();
680 if (lengthLeft > lengthRight) { //trim right
681 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
683 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
685 trim->setAligned(newAligned);
690 outputString = getBlock(chimeraResults[0], chimeraFlag);
691 outputString += "\n";
693 //write to output file
694 int length = outputString.length();
695 char* buf = new char[length];
696 memcpy(buf, outputString.c_str(), length);
698 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
702 outputString += querySeq->getName() + "\tno\n";
704 //write to output file
705 int length = outputString.length();
706 char* buf = new char[length];
707 memcpy(buf, outputString.c_str(), length);
709 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
715 catch(exception& e) {
716 m->errorOut(e, "ChimeraSlayer", "print");
722 //***************************************************************************************************************
723 int ChimeraSlayer::getChimeras(Sequence* query) {
726 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
727 printResults.trimQuery = trimQuery;
730 printResults.flag = "no";
734 //you must create a template
735 vector<Sequence*> thisTemplate;
736 vector<Sequence*> thisFilteredTemplate;
737 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
738 else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
740 if (m->control_pressed) { return 0; }
742 if (thisTemplate.size() == 0) { return 0; } //not chimeric
744 //moved this out of maligner - 4/29/11
745 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
747 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
748 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
750 if (templateFileName == "self") {
751 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
752 else if (searchMethod == "blast") { delete databaseLeft; }
755 if (m->control_pressed) { return 0; }
757 string chimeraFlag = maligner.getResults(query, decalc);
759 if (m->control_pressed) { return 0; }
761 vector<results> Results = maligner.getOutput();
763 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
765 if (chimeraFlag == "yes") {
768 vector<string> parents;
769 for (int i = 0; i < Results.size(); i++) {
770 parents.push_back(Results[i].parentAligned);
773 ChimeraReAligner realigner;
774 realigner.reAlign(query, parents);
778 //get sequence that were given from maligner results
779 vector<SeqDist> seqs;
780 map<string, float> removeDups;
781 map<string, float>::iterator itDup;
782 map<string, string> parentNameSeq;
783 map<string, string>::iterator itSeq;
784 for (int j = 0; j < Results.size(); j++) {
785 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
786 //only add if you are not a duplicate
787 itDup = removeDups.find(Results[j].parent);
788 if (itDup == removeDups.end()) { //this is not duplicate
789 removeDups[Results[j].parent] = dist;
790 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
791 }else if (dist > itDup->second) { //is this a stronger number for this parent
792 removeDups[Results[j].parent] = dist;
793 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
797 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
798 itSeq = parentNameSeq.find(itDup->first);
799 Sequence* seq = new Sequence(itDup->first, itSeq->second);
803 member.dist = itDup->second;
805 seqs.push_back(member);
808 //limit number of parents to explore - default 3
809 if (Results.size() > parents) {
811 sort(seqs.begin(), seqs.end(), compareSeqDist);
812 //prioritize larger more similiar sequence fragments
813 reverse(seqs.begin(), seqs.end());
815 for (int k = seqs.size()-1; k > (parents-1); k--) {
821 //put seqs into vector to send to slayer
822 vector<Sequence*> seqsForSlayer;
823 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
825 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
828 chimeraFlags = slayer.getResults(query, seqsForSlayer);
829 if (m->control_pressed) { return 0; }
830 chimeraResults = slayer.getOutput();
832 printResults.flag = chimeraFlags;
833 printResults.results = chimeraResults;
836 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
841 catch(exception& e) {
842 m->errorOut(e, "ChimeraSlayer", "getChimeras");
846 //***************************************************************************************************************
847 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
849 out << querySeq->getName() << '\t';
850 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
852 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
853 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
855 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
858 catch(exception& e) {
859 m->errorOut(e, "ChimeraSlayer", "printBlock");
863 //***************************************************************************************************************
864 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
867 if ((leftChimeric) && (!rightChimeric)) { //print left
868 out << querySeq->getName() << '\t';
869 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
871 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
872 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
874 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
876 }else if ((!leftChimeric) && (rightChimeric)) { //print right
877 out << querySeq->getName() << '\t';
878 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
880 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
881 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
883 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
885 }else { //print both results
886 if (leftdata.flag == "yes") {
887 out << querySeq->getName() + "_LEFT" << '\t';
888 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
890 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
891 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
893 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
896 if (rightdata.flag == "yes") {
897 if (leftdata.flag == "yes") { out << endl; }
899 out << querySeq->getName() + "_RIGHT"<< '\t';
900 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
902 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
903 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
905 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
910 catch(exception& e) {
911 m->errorOut(e, "ChimeraSlayer", "printBlock");
915 //***************************************************************************************************************
916 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
921 if ((leftChimeric) && (!rightChimeric)) { //get left
922 out += querySeq->getName() + "\t";
923 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
925 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
926 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
928 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
930 }else if ((!leftChimeric) && (rightChimeric)) { //print right
931 out += querySeq->getName() + "\t";
932 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
934 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
935 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
937 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
939 }else { //print both results
941 if (leftdata.flag == "yes") {
942 out += querySeq->getName() + "_LEFT\t";
943 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
945 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
946 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
948 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
951 if (rightdata.flag == "yes") {
952 if (leftdata.flag == "yes") { out += "\n"; }
953 out += querySeq->getName() + "_RIGHT\t";
954 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
956 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
957 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
959 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
966 catch(exception& e) {
967 m->errorOut(e, "ChimeraSlayer", "getBlock");
971 //***************************************************************************************************************
972 string ChimeraSlayer::getBlock(data_struct data, string flag){
975 string outputString = "";
977 outputString += querySeq->getName() + "\t";
978 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
980 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
981 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
983 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
987 catch(exception& e) {
988 m->errorOut(e, "ChimeraSlayer", "getBlock");
992 //***************************************************************************************************************
993 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
996 vector<Sequence*> refSeqs;
998 if (searchMethod == "distance") {
999 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1000 Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1002 refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1004 }else if (searchMethod == "blast") {
1005 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1006 }else if (searchMethod == "kmer") {
1007 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1008 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1012 catch(exception& e) {
1013 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1017 //***************************************************************************************************************/
1018 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1021 vector<Sequence*> refResults;
1023 //get parts of query
1024 string queryUnAligned = q->getUnaligned();
1025 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1026 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1028 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1029 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1031 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1032 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1033 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1034 vector<int> smaller;
1037 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1038 else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1042 map<int, int>::iterator it;
1043 vector<int> mergedResults;
1044 for (int i = 0; i < smaller.size(); i++) {
1045 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1047 //add left if you havent already
1048 it = seen.find(smaller[i]);
1049 if (it == seen.end()) {
1050 mergedResults.push_back(smaller[i]);
1051 seen[smaller[i]] = smaller[i];
1054 //add right if you havent already
1055 it = seen.find(larger[i]);
1056 if (it == seen.end()) {
1057 mergedResults.push_back(larger[i]);
1058 seen[larger[i]] = larger[i];
1062 for (int i = smaller.size(); i < larger.size(); i++) {
1063 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1065 //add right if you havent already
1066 it = seen.find(larger[i]);
1067 if (it == seen.end()) {
1068 mergedResults.push_back(larger[i]);
1069 seen[larger[i]] = larger[i];
1073 for (int i = 0; i < mergedResults.size(); i++) {
1074 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1075 if (db[mergedResults[i]]->getName() != q->getName()) {
1076 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1077 refResults.push_back(temp);
1087 catch(exception& e) {
1088 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1092 //***************************************************************************************************************
1093 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1095 vector<Sequence*> refResults;
1097 //get parts of query
1098 string queryUnAligned = q->getUnaligned();
1099 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1100 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1102 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1103 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1105 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1106 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1110 map<int, int>::iterator it;
1111 vector<int> mergedResults;
1112 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1114 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1116 //add left if you havent already
1117 it = seen.find(tempIndexesLeft[i]);
1118 if (it == seen.end()) {
1119 mergedResults.push_back(tempIndexesLeft[i]);
1120 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1123 //add right if you havent already
1124 it = seen.find(tempIndexesRight[i]);
1125 if (it == seen.end()) {
1126 mergedResults.push_back(tempIndexesRight[i]);
1127 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1131 //numWanted = mergedResults.size();
1133 //cout << q->getName() << endl;
1135 for (int i = 0; i < mergedResults.size(); i++) {
1136 //cout << db[mergedResults[i]]->getName() << endl;
1137 if (db[mergedResults[i]]->getName() != q->getName()) {
1138 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1139 refResults.push_back(temp);
1148 catch(exception& e) {
1149 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1153 //***************************************************************************************************************