5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 //run filter on template
76 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
81 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
88 if (searchMethod == "distance") {
89 //read in all query seqs
90 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
92 vector<Sequence*> temp = templateSeqs;
93 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
95 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
99 if (m->control_pressed) { return 0; }
101 //run filter on template copying templateSeqs into filteredTemplateSeqs
102 for (int i = 0; i < templateSeqs.size(); i++) {
103 if (m->control_pressed) { return 0; }
105 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
107 filteredTemplateSeqs.push_back(newSeq);
110 string kmerDBNameLeft;
111 string kmerDBNameRight;
113 //generate the kmerdb to pass to maligner
114 if (searchMethod == "kmer") {
115 string templatePath = m->hasPath(templateFileName);
116 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
117 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
119 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
120 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
122 for (int i = 0; i < templateSeqs.size(); i++) {
124 if (m->control_pressed) { return 0; }
126 string leftFrag = templateSeqs[i]->getUnaligned();
127 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
129 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
130 databaseLeft->addSequence(leftTemp);
132 databaseLeft->generateDB();
133 databaseLeft->setNumSeqs(templateSeqs.size());
135 for (int i = 0; i < templateSeqs.size(); i++) {
136 if (m->control_pressed) { return 0; }
138 string rightFrag = templateSeqs[i]->getUnaligned();
139 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
141 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
142 databaseRight->addSequence(rightTemp);
144 databaseRight->generateDB();
145 databaseRight->setNumSeqs(templateSeqs.size());
149 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
150 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
151 bool needToGenerateLeft = true;
153 if(kmerFileTestLeft){
154 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
155 if (GoodFile) { needToGenerateLeft = false; }
158 if(needToGenerateLeft){
160 for (int i = 0; i < templateSeqs.size(); i++) {
162 if (m->control_pressed) { return 0; }
164 string leftFrag = templateSeqs[i]->getUnaligned();
165 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
167 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
168 databaseLeft->addSequence(leftTemp);
170 databaseLeft->generateDB();
173 databaseLeft->readKmerDB(kmerFileTestLeft);
175 kmerFileTestLeft.close();
177 databaseLeft->setNumSeqs(templateSeqs.size());
180 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
181 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
182 bool needToGenerateRight = true;
184 if(kmerFileTestRight){
185 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
186 if (GoodFile) { needToGenerateRight = false; }
189 if(needToGenerateRight){
191 for (int i = 0; i < templateSeqs.size(); i++) {
192 if (m->control_pressed) { return 0; }
194 string rightFrag = templateSeqs[i]->getUnaligned();
195 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
197 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
198 databaseRight->addSequence(rightTemp);
200 databaseRight->generateDB();
203 databaseRight->readKmerDB(kmerFileTestRight);
205 kmerFileTestRight.close();
207 databaseRight->setNumSeqs(templateSeqs.size());
209 }else if (searchMethod == "blast") {
212 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
214 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
215 databaseLeft->generateDB();
216 databaseLeft->setNumSeqs(templateSeqs.size());
222 catch(exception& e) {
223 m->errorOut(e, "ChimeraSlayer", "doprep");
227 //***************************************************************************************************************
228 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
231 //when template=self, the query file is sorted from most abundance to least abundant
232 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
233 vector<Sequence*> userTemplate;
235 int myAbund = priority[q->getName()];
237 for (int i = 0; i < templateSeqs.size(); i++) {
239 if (m->control_pressed) { return userTemplate; }
241 //have I reached a sequence with the same abundance as myself?
242 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
244 //if its am not chimeric add it
245 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
246 userTemplate.push_back(templateSeqs[i]);
247 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
251 string kmerDBNameLeft;
252 string kmerDBNameRight;
254 //generate the kmerdb to pass to maligner
255 if (searchMethod == "kmer") {
256 string templatePath = m->hasPath(templateFileName);
257 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
258 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
260 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
261 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
263 for (int i = 0; i < userTemplate.size(); i++) {
265 if (m->control_pressed) { return userTemplate; }
267 string leftFrag = userTemplate[i]->getUnaligned();
268 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
270 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271 databaseLeft->addSequence(leftTemp);
273 databaseLeft->generateDB();
274 databaseLeft->setNumSeqs(userTemplate.size());
276 for (int i = 0; i < userTemplate.size(); i++) {
277 if (m->control_pressed) { return userTemplate; }
279 string rightFrag = userTemplate[i]->getUnaligned();
280 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
282 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283 databaseRight->addSequence(rightTemp);
285 databaseRight->generateDB();
286 databaseRight->setNumSeqs(userTemplate.size());
291 for (int i = 0; i < userTemplate.size(); i++) {
293 if (m->control_pressed) { return userTemplate; }
295 string leftFrag = userTemplate[i]->getUnaligned();
296 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
298 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
299 databaseLeft->addSequence(leftTemp);
301 databaseLeft->generateDB();
302 databaseLeft->setNumSeqs(userTemplate.size());
304 for (int i = 0; i < userTemplate.size(); i++) {
305 if (m->control_pressed) { return userTemplate; }
307 string rightFrag = userTemplate[i]->getUnaligned();
308 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
310 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
311 databaseRight->addSequence(rightTemp);
313 databaseRight->generateDB();
314 databaseRight->setNumSeqs(userTemplate.size());
316 }else if (searchMethod == "blast") {
319 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
321 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
322 databaseLeft->generateDB();
323 databaseLeft->setNumSeqs(userTemplate.size());
329 catch(exception& e) {
330 m->errorOut(e, "ChimeraSlayer", "getTemplate");
335 //***************************************************************************************************************
336 ChimeraSlayer::~ChimeraSlayer() {
338 if (templateFileName != "self") {
339 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
340 else if (searchMethod == "blast") { delete databaseLeft; }
343 //***************************************************************************************************************
344 void ChimeraSlayer::printHeader(ostream& out) {
345 m->mothurOutEndLine();
346 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
347 m->mothurOutEndLine();
349 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
351 //***************************************************************************************************************
352 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
354 Sequence* trim = NULL;
355 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
357 if (chimeraFlags == "yes") {
358 string chimeraFlag = "no";
359 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
361 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
364 if (chimeraFlag == "yes") {
365 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
366 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
367 outAcc << querySeq->getName() << endl;
369 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
372 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
373 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
375 string newAligned = trim->getAligned();
377 if (lengthLeft > lengthRight) { //trim right
378 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
380 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
382 trim->setAligned(newAligned);
387 printBlock(chimeraResults[0], chimeraFlag, out);
390 out << querySeq->getName() << "\tno" << endl;
396 catch(exception& e) {
397 m->errorOut(e, "ChimeraSlayer", "print");
401 //***************************************************************************************************************
402 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
404 Sequence* trim = NULL;
407 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
408 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
411 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
413 string chimeraFlag = "no";
414 if (leftPiece.flag == "yes") {
416 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
418 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
421 if (rightPiece.flag == "yes") {
422 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
424 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
427 bool rightChimeric = false;
428 bool leftChimeric = false;
430 if (chimeraFlag == "yes") {
431 //which peice is chimeric or are both
432 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
433 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
435 if (rightChimeric || leftChimeric) {
436 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
437 outAcc << querySeq->getName() << endl;
439 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
442 string newAligned = trim->getAligned();
444 //right side is fine so keep that
445 if ((leftChimeric) && (!rightChimeric)) {
446 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
447 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
448 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449 }else { //both sides are chimeric, keep longest piece
451 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
452 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
454 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
455 int length = lengthLeftLeft;
456 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
458 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
459 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
461 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
462 if (lengthRightRight > length) { longest = 4; }
464 if (longest == 1) { //leftleft
465 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466 }else if (longest == 2) { //leftright
467 //get rid of leftleft
468 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
470 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
471 }else if (longest == 3) { //rightleft
473 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
474 //get rid of rightright
475 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
478 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
479 //get rid of rightleft
480 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
484 trim->setAligned(newAligned);
490 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
493 out << querySeq->getName() << "\tno" << endl;
499 catch(exception& e) {
500 m->errorOut(e, "ChimeraSlayer", "print");
506 //***************************************************************************************************************
507 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
510 bool results = false;
511 string outAccString = "";
512 string outputString = "";
514 Sequence* trim = NULL;
517 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
518 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
522 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
524 string chimeraFlag = "no";
525 if (leftPiece.flag == "yes") {
527 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
529 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
532 if (rightPiece.flag == "yes") {
533 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
535 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
538 bool rightChimeric = false;
539 bool leftChimeric = false;
541 if (chimeraFlag == "yes") {
542 //which peice is chimeric or are both
543 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
544 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
546 if (rightChimeric || leftChimeric) {
547 cout << querySeq->getName() << "\tyes" << endl;
548 outAccString += querySeq->getName() + "\n";
551 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
553 //write to accnos file
554 int length = outAccString.length();
555 char* buf2 = new char[length];
556 memcpy(buf2, outAccString.c_str(), length);
558 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
562 string newAligned = trim->getAligned();
564 //right side is fine so keep that
565 if ((leftChimeric) && (!rightChimeric)) {
566 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
567 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
568 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
569 }else { //both sides are chimeric, keep longest piece
571 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
572 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
574 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
575 int length = lengthLeftLeft;
576 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
578 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
579 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
581 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
582 if (lengthRightRight > length) { longest = 4; }
584 if (longest == 1) { //leftleft
585 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586 }else if (longest == 2) { //leftright
587 //get rid of leftleft
588 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
590 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
591 }else if (longest == 3) { //rightleft
593 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
594 //get rid of rightright
595 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
598 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
599 //get rid of rightleft
600 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
604 trim->setAligned(newAligned);
610 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
611 outputString += "\n";
613 //write to output file
614 int length = outputString.length();
615 char* buf = new char[length];
616 memcpy(buf, outputString.c_str(), length);
618 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
622 outputString += querySeq->getName() + "\tno\n";
624 //write to output file
625 int length = outputString.length();
626 char* buf = new char[length];
627 memcpy(buf, outputString.c_str(), length);
629 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
636 catch(exception& e) {
637 m->errorOut(e, "ChimeraSlayer", "print");
641 //***************************************************************************************************************
642 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
645 bool results = false;
646 string outAccString = "";
647 string outputString = "";
649 Sequence* trim = NULL;
650 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
652 if (chimeraFlags == "yes") {
653 string chimeraFlag = "no";
654 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
656 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
659 if (chimeraFlag == "yes") {
660 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
661 cout << querySeq->getName() << "\tyes" << endl;
662 outAccString += querySeq->getName() + "\n";
665 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
667 //write to accnos file
668 int length = outAccString.length();
669 char* buf2 = new char[length];
670 memcpy(buf2, outAccString.c_str(), length);
672 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
676 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
677 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
679 string newAligned = trim->getAligned();
680 if (lengthLeft > lengthRight) { //trim right
681 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
683 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
685 trim->setAligned(newAligned);
690 outputString = getBlock(chimeraResults[0], chimeraFlag);
691 outputString += "\n";
693 //write to output file
694 int length = outputString.length();
695 char* buf = new char[length];
696 memcpy(buf, outputString.c_str(), length);
698 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
702 outputString += querySeq->getName() + "\tno\n";
704 //write to output file
705 int length = outputString.length();
706 char* buf = new char[length];
707 memcpy(buf, outputString.c_str(), length);
709 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
715 catch(exception& e) {
716 m->errorOut(e, "ChimeraSlayer", "print");
722 //***************************************************************************************************************
723 int ChimeraSlayer::getChimeras(Sequence* query) {
726 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
727 printResults.trimQuery = trimQuery;
730 printResults.flag = "no";
734 //you must create a template
735 vector<Sequence*> thisTemplate;
736 vector<Sequence*> thisFilteredTemplate;
737 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
738 else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
740 if (m->control_pressed) { return 0; }
742 if (thisTemplate.size() == 0) { return 0; } //not chimeric
744 //moved this out of maligner - 4/29/11
745 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
747 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
748 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
750 if (templateFileName == "self") {
751 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
752 else if (searchMethod == "blast") { delete databaseLeft; }
755 if (m->control_pressed) { return 0; }
757 string chimeraFlag = maligner.getResults(query, decalc);
759 if (m->control_pressed) { return 0; }
761 vector<results> Results = maligner.getOutput();
763 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
765 if (chimeraFlag == "yes") {
768 ChimeraReAligner realigner;
769 realigner.reAlign(query, Results);
772 //get sequence that were given from maligner results
773 vector<SeqDist> seqs;
774 map<string, float> removeDups;
775 map<string, float>::iterator itDup;
776 map<string, string> parentNameSeq;
777 map<string, string>::iterator itSeq;
778 for (int j = 0; j < Results.size(); j++) {
779 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
780 //only add if you are not a duplicate
781 itDup = removeDups.find(Results[j].parent);
782 if (itDup == removeDups.end()) { //this is not duplicate
783 removeDups[Results[j].parent] = dist;
784 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
785 }else if (dist > itDup->second) { //is this a stronger number for this parent
786 removeDups[Results[j].parent] = dist;
787 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
791 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
792 itSeq = parentNameSeq.find(itDup->first);
793 Sequence* seq = new Sequence(itDup->first, itSeq->second);
797 member.dist = itDup->second;
799 seqs.push_back(member);
802 //limit number of parents to explore - default 3
803 if (Results.size() > parents) {
805 sort(seqs.begin(), seqs.end(), compareSeqDist);
806 //prioritize larger more similiar sequence fragments
807 reverse(seqs.begin(), seqs.end());
809 for (int k = seqs.size()-1; k > (parents-1); k--) {
815 //put seqs into vector to send to slayer
816 vector<Sequence*> seqsForSlayer;
817 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
819 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
822 chimeraFlags = slayer.getResults(query, seqsForSlayer);
823 if (m->control_pressed) { return 0; }
824 chimeraResults = slayer.getOutput();
826 printResults.flag = chimeraFlags;
827 printResults.results = chimeraResults;
830 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
835 catch(exception& e) {
836 m->errorOut(e, "ChimeraSlayer", "getChimeras");
840 //***************************************************************************************************************
841 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
843 out << querySeq->getName() << '\t';
844 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
846 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
847 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
849 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
852 catch(exception& e) {
853 m->errorOut(e, "ChimeraSlayer", "printBlock");
857 //***************************************************************************************************************
858 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
861 if ((leftChimeric) && (!rightChimeric)) { //print left
862 out << querySeq->getName() << '\t';
863 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
865 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
866 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
868 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
870 }else if ((!leftChimeric) && (rightChimeric)) { //print right
871 out << querySeq->getName() << '\t';
872 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
874 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
875 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
877 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
879 }else { //print both results
880 if (leftdata.flag == "yes") {
881 out << querySeq->getName() + "_LEFT" << '\t';
882 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
884 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
885 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
887 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
890 if (rightdata.flag == "yes") {
891 if (leftdata.flag == "yes") { out << endl; }
893 out << querySeq->getName() + "_RIGHT"<< '\t';
894 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
896 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
897 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
899 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
904 catch(exception& e) {
905 m->errorOut(e, "ChimeraSlayer", "printBlock");
909 //***************************************************************************************************************
910 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
915 if ((leftChimeric) && (!rightChimeric)) { //get left
916 out += querySeq->getName() + "\t";
917 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
919 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
920 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
922 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
924 }else if ((!leftChimeric) && (rightChimeric)) { //print right
925 out += querySeq->getName() + "\t";
926 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
928 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
929 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
931 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
933 }else { //print both results
935 if (leftdata.flag == "yes") {
936 out += querySeq->getName() + "_LEFT\t";
937 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
939 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
940 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
942 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
945 if (rightdata.flag == "yes") {
946 if (leftdata.flag == "yes") { out += "\n"; }
947 out += querySeq->getName() + "_RIGHT\t";
948 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
950 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
951 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
953 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
960 catch(exception& e) {
961 m->errorOut(e, "ChimeraSlayer", "getBlock");
965 //***************************************************************************************************************
966 string ChimeraSlayer::getBlock(data_struct data, string flag){
969 string outputString = "";
971 outputString += querySeq->getName() + "\t";
972 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
974 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
975 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
977 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
981 catch(exception& e) {
982 m->errorOut(e, "ChimeraSlayer", "getBlock");
986 //***************************************************************************************************************
987 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
990 vector<Sequence*> refSeqs;
992 if (searchMethod == "distance") {
993 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
994 Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
996 refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted);
998 }else if (searchMethod == "blast") {
999 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1000 }else if (searchMethod == "kmer") {
1001 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1002 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1006 catch(exception& e) {
1007 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1011 //***************************************************************************************************************/
1012 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1015 vector<Sequence*> refResults;
1017 //get parts of query
1018 string queryUnAligned = q->getUnaligned();
1019 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1020 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1022 Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
1023 Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
1025 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
1026 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
1028 vector<int> smaller;
1031 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1032 else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1036 map<int, int>::iterator it;
1037 vector<int> mergedResults;
1038 for (int i = 0; i < smaller.size(); i++) {
1039 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1041 //add left if you havent already
1042 it = seen.find(smaller[i]);
1043 if (it == seen.end()) {
1044 mergedResults.push_back(smaller[i]);
1045 seen[smaller[i]] = smaller[i];
1048 //add right if you havent already
1049 it = seen.find(larger[i]);
1050 if (it == seen.end()) {
1051 mergedResults.push_back(larger[i]);
1052 seen[larger[i]] = larger[i];
1056 for (int i = smaller.size(); i < larger.size(); i++) {
1057 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1059 //add right if you havent already
1060 it = seen.find(larger[i]);
1061 if (it == seen.end()) {
1062 mergedResults.push_back(larger[i]);
1063 seen[larger[i]] = larger[i];
1067 for (int i = 0; i < mergedResults.size(); i++) {
1069 if (db[mergedResults[i]]->getName() != q->getName()) {
1070 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1071 refResults.push_back(temp);
1081 catch(exception& e) {
1082 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1086 //***************************************************************************************************************
1087 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1089 vector<Sequence*> refResults;
1091 //get parts of query
1092 string queryUnAligned = q->getUnaligned();
1093 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1094 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1096 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1097 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1099 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1100 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1104 map<int, int>::iterator it;
1105 vector<int> mergedResults;
1106 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1108 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1110 //add left if you havent already
1111 it = seen.find(tempIndexesLeft[i]);
1112 if (it == seen.end()) {
1113 mergedResults.push_back(tempIndexesLeft[i]);
1114 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1117 //add right if you havent already
1118 it = seen.find(tempIndexesRight[i]);
1119 if (it == seen.end()) {
1120 mergedResults.push_back(tempIndexesRight[i]);
1121 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1125 //numWanted = mergedResults.size();
1127 //cout << q->getName() << endl;
1129 for (int i = 0; i < mergedResults.size(); i++) {
1130 //cout << db[mergedResults[i]]->getName() << endl;
1131 if (db[mergedResults[i]]->getName() != q->getName()) {
1132 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1133 refResults.push_back(temp);
1142 catch(exception& e) {
1143 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1147 //***************************************************************************************************************