5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 //run filter on template
76 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { break; } runFilter(templateSeqs[i]); }
81 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
88 if (searchMethod == "distance") {
89 //read in all query seqs
90 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
92 vector<Sequence*> temp = templateSeqs;
93 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
95 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
97 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
99 if (m->control_pressed) { return 0; }
101 //run filter on template copying templateSeqs into filteredTemplateSeqs
102 for (int i = 0; i < templateSeqs.size(); i++) {
103 if (m->control_pressed) { return 0; }
105 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
107 filteredTemplateSeqs.push_back(newSeq);
110 string kmerDBNameLeft;
111 string kmerDBNameRight;
113 //generate the kmerdb to pass to maligner
114 if (searchMethod == "kmer") {
115 string templatePath = m->hasPath(templateFileName);
116 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
117 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
119 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
120 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
122 for (int i = 0; i < templateSeqs.size(); i++) {
124 if (m->control_pressed) { return 0; }
126 string leftFrag = templateSeqs[i]->getUnaligned();
127 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
129 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
130 databaseLeft->addSequence(leftTemp);
132 databaseLeft->generateDB();
133 databaseLeft->setNumSeqs(templateSeqs.size());
135 for (int i = 0; i < templateSeqs.size(); i++) {
136 if (m->control_pressed) { return 0; }
138 string rightFrag = templateSeqs[i]->getUnaligned();
139 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
141 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
142 databaseRight->addSequence(rightTemp);
144 databaseRight->generateDB();
145 databaseRight->setNumSeqs(templateSeqs.size());
149 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
150 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
151 bool needToGenerateLeft = true;
153 if(kmerFileTestLeft){
154 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
155 if (GoodFile) { needToGenerateLeft = false; }
158 if(needToGenerateLeft){
160 for (int i = 0; i < templateSeqs.size(); i++) {
162 if (m->control_pressed) { return 0; }
164 string leftFrag = templateSeqs[i]->getUnaligned();
165 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
167 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
168 databaseLeft->addSequence(leftTemp);
170 databaseLeft->generateDB();
173 databaseLeft->readKmerDB(kmerFileTestLeft);
175 kmerFileTestLeft.close();
177 databaseLeft->setNumSeqs(templateSeqs.size());
180 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
181 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
182 bool needToGenerateRight = true;
184 if(kmerFileTestRight){
185 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
186 if (GoodFile) { needToGenerateRight = false; }
189 if(needToGenerateRight){
191 for (int i = 0; i < templateSeqs.size(); i++) {
192 if (m->control_pressed) { return 0; }
194 string rightFrag = templateSeqs[i]->getUnaligned();
195 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
197 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
198 databaseRight->addSequence(rightTemp);
200 databaseRight->generateDB();
203 databaseRight->readKmerDB(kmerFileTestRight);
205 kmerFileTestRight.close();
207 databaseRight->setNumSeqs(templateSeqs.size());
209 }else if (searchMethod == "blast") {
212 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
214 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
215 databaseLeft->generateDB();
216 databaseLeft->setNumSeqs(templateSeqs.size());
222 catch(exception& e) {
223 m->errorOut(e, "ChimeraSlayer", "doprep");
227 //***************************************************************************************************************
228 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
231 //when template=self, the query file is sorted from most abundance to least abundant
232 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
233 vector<Sequence*> userTemplate;
235 int myAbund = priority[q->getName()];
237 for (int i = 0; i < templateSeqs.size(); i++) {
239 if (m->control_pressed) { return userTemplate; }
241 //have I reached a sequence with the same abundance as myself?
242 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
244 //if its am not chimeric add it
245 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
246 userTemplate.push_back(templateSeqs[i]);
247 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
251 string kmerDBNameLeft;
252 string kmerDBNameRight;
254 //generate the kmerdb to pass to maligner
255 if (searchMethod == "kmer") {
256 string templatePath = m->hasPath(templateFileName);
257 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
258 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
260 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
261 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
263 for (int i = 0; i < userTemplate.size(); i++) {
265 if (m->control_pressed) { return userTemplate; }
267 string leftFrag = userTemplate[i]->getUnaligned();
268 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
270 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271 databaseLeft->addSequence(leftTemp);
273 databaseLeft->generateDB();
274 databaseLeft->setNumSeqs(userTemplate.size());
276 for (int i = 0; i < userTemplate.size(); i++) {
277 if (m->control_pressed) { return userTemplate; }
279 string rightFrag = userTemplate[i]->getUnaligned();
280 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
282 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283 databaseRight->addSequence(rightTemp);
285 databaseRight->generateDB();
286 databaseRight->setNumSeqs(userTemplate.size());
291 for (int i = 0; i < userTemplate.size(); i++) {
293 if (m->control_pressed) { return userTemplate; }
295 string leftFrag = userTemplate[i]->getUnaligned();
296 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
298 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
299 databaseLeft->addSequence(leftTemp);
301 databaseLeft->generateDB();
302 databaseLeft->setNumSeqs(userTemplate.size());
304 for (int i = 0; i < userTemplate.size(); i++) {
305 if (m->control_pressed) { return userTemplate; }
307 string rightFrag = userTemplate[i]->getUnaligned();
308 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
310 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
311 databaseRight->addSequence(rightTemp);
313 databaseRight->generateDB();
314 databaseRight->setNumSeqs(userTemplate.size());
316 }else if (searchMethod == "blast") {
319 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
321 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
322 databaseLeft->generateDB();
323 databaseLeft->setNumSeqs(userTemplate.size());
329 catch(exception& e) {
330 m->errorOut(e, "ChimeraSlayer", "getTemplate");
335 //***************************************************************************************************************
336 ChimeraSlayer::~ChimeraSlayer() {
338 if (templateFileName != "self") {
339 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
340 else if (searchMethod == "blast") { delete databaseLeft; }
343 //***************************************************************************************************************
344 void ChimeraSlayer::printHeader(ostream& out) {
345 m->mothurOutEndLine();
346 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
347 m->mothurOutEndLine();
349 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
351 //***************************************************************************************************************
352 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
354 Sequence* trim = NULL;
355 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
357 if (chimeraFlags == "yes") {
358 string chimeraFlag = "no";
359 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
361 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
364 if (chimeraFlag == "yes") {
365 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
366 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
367 outAcc << querySeq->getName() << endl;
369 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
372 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
373 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
375 string newAligned = trim->getAligned();
377 if (lengthLeft > lengthRight) { //trim right
378 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
380 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
382 trim->setAligned(newAligned);
387 printBlock(chimeraResults[0], chimeraFlag, out);
390 out << querySeq->getName() << "\tno" << endl;
396 catch(exception& e) {
397 m->errorOut(e, "ChimeraSlayer", "print");
401 //***************************************************************************************************************
402 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
404 Sequence* trim = NULL;
407 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
408 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
411 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
413 string chimeraFlag = "no";
414 if (leftPiece.flag == "yes") {
416 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
418 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
421 if (rightPiece.flag == "yes") {
422 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
424 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
427 bool rightChimeric = false;
428 bool leftChimeric = false;
430 if (chimeraFlag == "yes") {
431 //which peice is chimeric or are both
432 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
433 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
435 if (rightChimeric || leftChimeric) {
436 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
437 outAcc << querySeq->getName() << endl;
439 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
442 string newAligned = trim->getAligned();
444 //right side is fine so keep that
445 if ((leftChimeric) && (!rightChimeric)) {
446 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
447 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
448 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449 }else { //both sides are chimeric, keep longest piece
451 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
452 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
454 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
455 int length = lengthLeftLeft;
456 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
458 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
459 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
461 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
462 if (lengthRightRight > length) { longest = 4; }
464 if (longest == 1) { //leftleft
465 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466 }else if (longest == 2) { //leftright
467 //get rid of leftleft
468 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
470 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
471 }else if (longest == 3) { //rightleft
473 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
474 //get rid of rightright
475 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
478 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
479 //get rid of rightleft
480 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
484 trim->setAligned(newAligned);
490 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
493 out << querySeq->getName() << "\tno" << endl;
499 catch(exception& e) {
500 m->errorOut(e, "ChimeraSlayer", "print");
506 //***************************************************************************************************************
507 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
510 bool results = false;
511 string outAccString = "";
512 string outputString = "";
514 Sequence* trim = NULL;
517 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
518 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
522 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
524 string chimeraFlag = "no";
525 if (leftPiece.flag == "yes") {
527 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
529 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
532 if (rightPiece.flag == "yes") {
533 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
535 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
538 bool rightChimeric = false;
539 bool leftChimeric = false;
541 if (chimeraFlag == "yes") {
542 //which peice is chimeric or are both
543 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
544 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
546 if (rightChimeric || leftChimeric) {
547 cout << querySeq->getName() << "\tyes" << endl;
548 outAccString += querySeq->getName() + "\n";
551 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
553 //write to accnos file
554 int length = outAccString.length();
555 char* buf2 = new char[length];
556 memcpy(buf2, outAccString.c_str(), length);
558 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
562 string newAligned = trim->getAligned();
564 //right side is fine so keep that
565 if ((leftChimeric) && (!rightChimeric)) {
566 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
567 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
568 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
569 }else { //both sides are chimeric, keep longest piece
571 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
572 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
574 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
575 int length = lengthLeftLeft;
576 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
578 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
579 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
581 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
582 if (lengthRightRight > length) { longest = 4; }
584 if (longest == 1) { //leftleft
585 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586 }else if (longest == 2) { //leftright
587 //get rid of leftleft
588 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
590 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
591 }else if (longest == 3) { //rightleft
593 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
594 //get rid of rightright
595 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
598 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
599 //get rid of rightleft
600 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
604 trim->setAligned(newAligned);
610 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
611 outputString += "\n";
613 //write to output file
614 int length = outputString.length();
615 char* buf = new char[length];
616 memcpy(buf, outputString.c_str(), length);
618 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
622 outputString += querySeq->getName() + "\tno\n";
624 //write to output file
625 int length = outputString.length();
626 char* buf = new char[length];
627 memcpy(buf, outputString.c_str(), length);
629 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
636 catch(exception& e) {
637 m->errorOut(e, "ChimeraSlayer", "print");
641 //***************************************************************************************************************
642 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
645 bool results = false;
646 string outAccString = "";
647 string outputString = "";
649 Sequence* trim = NULL;
650 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
652 if (chimeraFlags == "yes") {
653 string chimeraFlag = "no";
654 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
656 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
659 if (chimeraFlag == "yes") {
660 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
661 cout << querySeq->getName() << "\tyes" << endl;
662 outAccString += querySeq->getName() + "\n";
665 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
667 //write to accnos file
668 int length = outAccString.length();
669 char* buf2 = new char[length];
670 memcpy(buf2, outAccString.c_str(), length);
672 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
676 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
677 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
679 string newAligned = trim->getAligned();
680 if (lengthLeft > lengthRight) { //trim right
681 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
683 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
685 trim->setAligned(newAligned);
690 outputString = getBlock(chimeraResults[0], chimeraFlag);
691 outputString += "\n";
693 //write to output file
694 int length = outputString.length();
695 char* buf = new char[length];
696 memcpy(buf, outputString.c_str(), length);
698 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
702 outputString += querySeq->getName() + "\tno\n";
704 //write to output file
705 int length = outputString.length();
706 char* buf = new char[length];
707 memcpy(buf, outputString.c_str(), length);
709 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
715 catch(exception& e) {
716 m->errorOut(e, "ChimeraSlayer", "print");
722 //***************************************************************************************************************
723 int ChimeraSlayer::getChimeras(Sequence* query) {
726 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
727 printResults.trimQuery = trimQuery;
730 printResults.flag = "no";
734 //you must create a template
735 vector<Sequence*> thisTemplate;
736 vector<Sequence*> thisFilteredTemplate;
737 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
738 else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
740 if (m->control_pressed) { return 0; }
742 if (thisTemplate.size() == 0) { return 0; } //not chimeric
744 //moved this out of maligner - 4/29/11
745 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
747 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
748 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
750 if (templateFileName == "self") {
751 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
752 else if (searchMethod == "blast") { delete databaseLeft; }
755 if (m->control_pressed) { return 0; }
757 string chimeraFlag = maligner.getResults(query, decalc);
759 if (m->control_pressed) { return 0; }
761 vector<results> Results = maligner.getOutput();
763 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
765 if (chimeraFlag == "yes") {
768 vector<Sequence*> parents;
769 for (int i = 0; i < Results.size(); i++) {
770 cout << Results[i].parent << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd << endl;
771 Sequence* parent = new Sequence(Results[i].parent, Results[i].parentAligned);
773 parents.push_back(parent);
776 ChimeraReAligner realigner;
777 //realigner.reAlign(query, parents);
779 for (int i = 0; i < parents.size(); i++) { delete parents[i]; }
781 //query->printSequence(cout);
782 //get sequence that were given from maligner results
783 vector<SeqDist> seqs;
784 map<string, float> removeDups;
785 map<string, float>::iterator itDup;
786 map<string, string> parentNameSeq;
787 map<string, string>::iterator itSeq;
788 for (int j = 0; j < Results.size(); j++) {
789 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
790 //only add if you are not a duplicate
791 itDup = removeDups.find(Results[j].parent);
792 if (itDup == removeDups.end()) { //this is not duplicate
793 removeDups[Results[j].parent] = dist;
794 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
795 }else if (dist > itDup->second) { //is this a stronger number for this parent
796 removeDups[Results[j].parent] = dist;
797 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
801 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
802 itSeq = parentNameSeq.find(itDup->first);
803 Sequence* seq = new Sequence(itDup->first, itSeq->second);
807 member.dist = itDup->second;
809 seqs.push_back(member);
812 //limit number of parents to explore - default 3
813 if (Results.size() > parents) {
815 sort(seqs.begin(), seqs.end(), compareSeqDist);
816 //prioritize larger more similiar sequence fragments
817 reverse(seqs.begin(), seqs.end());
819 for (int k = seqs.size()-1; k > (parents-1); k--) {
825 //put seqs into vector to send to slayer
826 vector<Sequence*> seqsForSlayer;
827 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
829 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
832 chimeraFlags = slayer.getResults(query, seqsForSlayer);
833 if (m->control_pressed) { return 0; }
834 chimeraResults = slayer.getOutput();
836 printResults.flag = chimeraFlags;
837 printResults.results = chimeraResults;
840 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
845 catch(exception& e) {
846 m->errorOut(e, "ChimeraSlayer", "getChimeras");
850 //***************************************************************************************************************
851 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
853 out << querySeq->getName() << '\t';
854 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
856 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
857 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
859 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
862 catch(exception& e) {
863 m->errorOut(e, "ChimeraSlayer", "printBlock");
867 //***************************************************************************************************************
868 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
871 if ((leftChimeric) && (!rightChimeric)) { //print left
872 out << querySeq->getName() << '\t';
873 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
875 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
876 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
878 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
880 }else if ((!leftChimeric) && (rightChimeric)) { //print right
881 out << querySeq->getName() << '\t';
882 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
884 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
885 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
887 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
889 }else { //print both results
890 if (leftdata.flag == "yes") {
891 out << querySeq->getName() + "_LEFT" << '\t';
892 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
894 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
895 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
897 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
900 if (rightdata.flag == "yes") {
901 if (leftdata.flag == "yes") { out << endl; }
903 out << querySeq->getName() + "_RIGHT"<< '\t';
904 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
906 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
907 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
909 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
914 catch(exception& e) {
915 m->errorOut(e, "ChimeraSlayer", "printBlock");
919 //***************************************************************************************************************
920 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
925 if ((leftChimeric) && (!rightChimeric)) { //get left
926 out += querySeq->getName() + "\t";
927 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
929 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
930 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
932 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
934 }else if ((!leftChimeric) && (rightChimeric)) { //print right
935 out += querySeq->getName() + "\t";
936 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
938 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
939 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
941 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
943 }else { //print both results
945 if (leftdata.flag == "yes") {
946 out += querySeq->getName() + "_LEFT\t";
947 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
949 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
950 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
952 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
955 if (rightdata.flag == "yes") {
956 if (leftdata.flag == "yes") { out += "\n"; }
957 out += querySeq->getName() + "_RIGHT\t";
958 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
960 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
961 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
963 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
970 catch(exception& e) {
971 m->errorOut(e, "ChimeraSlayer", "getBlock");
975 //***************************************************************************************************************
976 string ChimeraSlayer::getBlock(data_struct data, string flag){
979 string outputString = "";
981 outputString += querySeq->getName() + "\t";
982 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
984 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
985 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
987 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
991 catch(exception& e) {
992 m->errorOut(e, "ChimeraSlayer", "getBlock");
996 //***************************************************************************************************************
997 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1000 vector<Sequence*> refSeqs;
1002 if (searchMethod == "distance") {
1003 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1004 Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1006 refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted);
1008 }else if (searchMethod == "blast") {
1009 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1010 }else if (searchMethod == "kmer") {
1011 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1012 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1016 catch(exception& e) {
1017 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1021 //***************************************************************************************************************/
1022 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1025 vector<Sequence*> refResults;
1027 //get parts of query
1028 string queryUnAligned = q->getUnaligned();
1029 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1030 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1032 Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
1033 Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
1035 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
1036 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
1038 vector<int> smaller;
1041 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1042 else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1046 map<int, int>::iterator it;
1047 vector<int> mergedResults;
1048 for (int i = 0; i < smaller.size(); i++) {
1049 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1051 //add left if you havent already
1052 it = seen.find(smaller[i]);
1053 if (it == seen.end()) {
1054 mergedResults.push_back(smaller[i]);
1055 seen[smaller[i]] = smaller[i];
1058 //add right if you havent already
1059 it = seen.find(larger[i]);
1060 if (it == seen.end()) {
1061 mergedResults.push_back(larger[i]);
1062 seen[larger[i]] = larger[i];
1066 for (int i = smaller.size(); i < larger.size(); i++) {
1067 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1069 //add right if you havent already
1070 it = seen.find(larger[i]);
1071 if (it == seen.end()) {
1072 mergedResults.push_back(larger[i]);
1073 seen[larger[i]] = larger[i];
1077 for (int i = 0; i < mergedResults.size(); i++) {
1078 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1079 if (db[mergedResults[i]]->getName() != q->getName()) {
1080 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1081 refResults.push_back(temp);
1091 catch(exception& e) {
1092 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1096 //***************************************************************************************************************
1097 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1099 vector<Sequence*> refResults;
1101 //get parts of query
1102 string queryUnAligned = q->getUnaligned();
1103 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1104 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1106 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1107 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1109 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1110 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1114 map<int, int>::iterator it;
1115 vector<int> mergedResults;
1116 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1118 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1120 //add left if you havent already
1121 it = seen.find(tempIndexesLeft[i]);
1122 if (it == seen.end()) {
1123 mergedResults.push_back(tempIndexesLeft[i]);
1124 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1127 //add right if you havent already
1128 it = seen.find(tempIndexesRight[i]);
1129 if (it == seen.end()) {
1130 mergedResults.push_back(tempIndexesRight[i]);
1131 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1135 //numWanted = mergedResults.size();
1137 //cout << q->getName() << endl;
1139 for (int i = 0; i < mergedResults.size(); i++) {
1140 //cout << db[mergedResults[i]]->getName() << endl;
1141 if (db[mergedResults[i]]->getName() != q->getName()) {
1142 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1143 refResults.push_back(temp);
1152 catch(exception& e) {
1153 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1157 //***************************************************************************************************************