5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 if (searchMethod == "distance") {
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 //run filter on template copying templateSeqs into filteredTemplateSeqs
79 for (int i = 0; i < templateSeqs.size(); i++) {
80 if (m->control_pressed) { break; }
82 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
84 filteredTemplateSeqs.push_back(newSeq);
89 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
93 //***************************************************************************************************************
94 int ChimeraSlayer::doPrep() {
96 if (searchMethod == "distance") {
97 //read in all query seqs
98 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
100 vector<Sequence*> temp = templateSeqs;
101 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
103 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
105 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
107 if (m->control_pressed) { return 0; }
109 //run filter on template copying templateSeqs into filteredTemplateSeqs
110 for (int i = 0; i < templateSeqs.size(); i++) {
111 if (m->control_pressed) { return 0; }
113 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
115 filteredTemplateSeqs.push_back(newSeq);
118 string kmerDBNameLeft;
119 string kmerDBNameRight;
121 //generate the kmerdb to pass to maligner
122 if (searchMethod == "kmer") {
123 string templatePath = m->hasPath(templateFileName);
124 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
125 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
127 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
128 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
130 for (int i = 0; i < templateSeqs.size(); i++) {
132 if (m->control_pressed) { return 0; }
134 string leftFrag = templateSeqs[i]->getUnaligned();
135 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
137 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
138 databaseLeft->addSequence(leftTemp);
140 databaseLeft->generateDB();
141 databaseLeft->setNumSeqs(templateSeqs.size());
143 for (int i = 0; i < templateSeqs.size(); i++) {
144 if (m->control_pressed) { return 0; }
146 string rightFrag = templateSeqs[i]->getUnaligned();
147 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
149 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
150 databaseRight->addSequence(rightTemp);
152 databaseRight->generateDB();
153 databaseRight->setNumSeqs(templateSeqs.size());
157 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
158 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
159 bool needToGenerateLeft = true;
161 if(kmerFileTestLeft){
162 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
163 if (GoodFile) { needToGenerateLeft = false; }
166 if(needToGenerateLeft){
168 for (int i = 0; i < templateSeqs.size(); i++) {
170 if (m->control_pressed) { return 0; }
172 string leftFrag = templateSeqs[i]->getUnaligned();
173 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
175 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
176 databaseLeft->addSequence(leftTemp);
178 databaseLeft->generateDB();
181 databaseLeft->readKmerDB(kmerFileTestLeft);
183 kmerFileTestLeft.close();
185 databaseLeft->setNumSeqs(templateSeqs.size());
188 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
189 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
190 bool needToGenerateRight = true;
192 if(kmerFileTestRight){
193 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
194 if (GoodFile) { needToGenerateRight = false; }
197 if(needToGenerateRight){
199 for (int i = 0; i < templateSeqs.size(); i++) {
200 if (m->control_pressed) { return 0; }
202 string rightFrag = templateSeqs[i]->getUnaligned();
203 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
205 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
206 databaseRight->addSequence(rightTemp);
208 databaseRight->generateDB();
211 databaseRight->readKmerDB(kmerFileTestRight);
213 kmerFileTestRight.close();
215 databaseRight->setNumSeqs(templateSeqs.size());
217 }else if (searchMethod == "blast") {
220 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
222 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
223 databaseLeft->generateDB();
224 databaseLeft->setNumSeqs(templateSeqs.size());
230 catch(exception& e) {
231 m->errorOut(e, "ChimeraSlayer", "doprep");
235 //***************************************************************************************************************
236 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
239 //when template=self, the query file is sorted from most abundance to least abundant
240 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
241 vector<Sequence*> userTemplate;
243 int myAbund = priority[q->getName()];
245 for (int i = 0; i < templateSeqs.size(); i++) {
247 if (m->control_pressed) { return userTemplate; }
249 //have I reached a sequence with the same abundance as myself?
250 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
252 //if its am not chimeric add it
253 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
254 userTemplate.push_back(templateSeqs[i]);
255 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
259 string kmerDBNameLeft;
260 string kmerDBNameRight;
262 //generate the kmerdb to pass to maligner
263 if (searchMethod == "kmer") {
264 string templatePath = m->hasPath(templateFileName);
265 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
266 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
268 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
269 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
271 for (int i = 0; i < userTemplate.size(); i++) {
273 if (m->control_pressed) { return userTemplate; }
275 string leftFrag = userTemplate[i]->getUnaligned();
276 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
278 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
279 databaseLeft->addSequence(leftTemp);
281 databaseLeft->generateDB();
282 databaseLeft->setNumSeqs(userTemplate.size());
284 for (int i = 0; i < userTemplate.size(); i++) {
285 if (m->control_pressed) { return userTemplate; }
287 string rightFrag = userTemplate[i]->getUnaligned();
288 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
290 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
291 databaseRight->addSequence(rightTemp);
293 databaseRight->generateDB();
294 databaseRight->setNumSeqs(userTemplate.size());
299 for (int i = 0; i < userTemplate.size(); i++) {
301 if (m->control_pressed) { return userTemplate; }
303 string leftFrag = userTemplate[i]->getUnaligned();
304 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
306 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
307 databaseLeft->addSequence(leftTemp);
309 databaseLeft->generateDB();
310 databaseLeft->setNumSeqs(userTemplate.size());
312 for (int i = 0; i < userTemplate.size(); i++) {
313 if (m->control_pressed) { return userTemplate; }
315 string rightFrag = userTemplate[i]->getUnaligned();
316 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
318 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
319 databaseRight->addSequence(rightTemp);
321 databaseRight->generateDB();
322 databaseRight->setNumSeqs(userTemplate.size());
324 }else if (searchMethod == "blast") {
327 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
329 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
330 databaseLeft->generateDB();
331 databaseLeft->setNumSeqs(userTemplate.size());
337 catch(exception& e) {
338 m->errorOut(e, "ChimeraSlayer", "getTemplate");
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {
346 if (templateFileName != "self") {
347 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
348 else if (searchMethod == "blast") { delete databaseLeft; }
351 //***************************************************************************************************************
352 void ChimeraSlayer::printHeader(ostream& out) {
353 m->mothurOutEndLine();
354 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
355 m->mothurOutEndLine();
357 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
359 //***************************************************************************************************************
360 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
362 Sequence* trim = NULL;
363 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
365 if (chimeraFlags == "yes") {
366 string chimeraFlag = "no";
367 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
369 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
372 if (chimeraFlag == "yes") {
373 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
374 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
375 outAcc << querySeq->getName() << endl;
377 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
380 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
381 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
383 string newAligned = trim->getAligned();
385 if (lengthLeft > lengthRight) { //trim right
386 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
388 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
390 trim->setAligned(newAligned);
395 printBlock(chimeraResults[0], chimeraFlag, out);
398 out << querySeq->getName() << "\tno" << endl;
404 catch(exception& e) {
405 m->errorOut(e, "ChimeraSlayer", "print");
409 //***************************************************************************************************************
410 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
412 Sequence* trim = NULL;
415 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
416 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
419 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
421 string chimeraFlag = "no";
422 if (leftPiece.flag == "yes") {
424 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
426 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
429 if (rightPiece.flag == "yes") {
430 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
432 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
435 bool rightChimeric = false;
436 bool leftChimeric = false;
438 if (chimeraFlag == "yes") {
439 //which peice is chimeric or are both
440 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
441 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
443 if (rightChimeric || leftChimeric) {
444 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
445 outAcc << querySeq->getName() << endl;
447 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
450 string newAligned = trim->getAligned();
452 //right side is fine so keep that
453 if ((leftChimeric) && (!rightChimeric)) {
454 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
455 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
456 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457 }else { //both sides are chimeric, keep longest piece
459 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
460 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
462 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
463 int length = lengthLeftLeft;
464 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
466 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
467 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
469 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
470 if (lengthRightRight > length) { longest = 4; }
472 if (longest == 1) { //leftleft
473 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
474 }else if (longest == 2) { //leftright
475 //get rid of leftleft
476 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
478 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
479 }else if (longest == 3) { //rightleft
481 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
482 //get rid of rightright
483 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
486 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
487 //get rid of rightleft
488 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
492 trim->setAligned(newAligned);
498 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
501 out << querySeq->getName() << "\tno" << endl;
507 catch(exception& e) {
508 m->errorOut(e, "ChimeraSlayer", "print");
514 //***************************************************************************************************************
515 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
518 bool results = false;
519 string outAccString = "";
520 string outputString = "";
522 Sequence* trim = NULL;
525 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
526 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
530 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
532 string chimeraFlag = "no";
533 if (leftPiece.flag == "yes") {
535 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
537 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
540 if (rightPiece.flag == "yes") {
541 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
543 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
546 bool rightChimeric = false;
547 bool leftChimeric = false;
549 if (chimeraFlag == "yes") {
550 //which peice is chimeric or are both
551 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
552 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
554 if (rightChimeric || leftChimeric) {
555 // cout << querySeq->getName() << "\tyes" << endl;
556 outAccString += querySeq->getName() + "\n";
559 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
561 //write to accnos file
562 int length = outAccString.length();
563 char* buf2 = new char[length];
564 memcpy(buf2, outAccString.c_str(), length);
566 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
570 string newAligned = trim->getAligned();
572 //right side is fine so keep that
573 if ((leftChimeric) && (!rightChimeric)) {
574 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
575 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
576 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
577 }else { //both sides are chimeric, keep longest piece
579 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
580 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
582 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
583 int length = lengthLeftLeft;
584 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
586 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
587 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
589 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
590 if (lengthRightRight > length) { longest = 4; }
592 if (longest == 1) { //leftleft
593 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
594 }else if (longest == 2) { //leftright
595 //get rid of leftleft
596 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
598 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
599 }else if (longest == 3) { //rightleft
601 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
602 //get rid of rightright
603 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
606 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
607 //get rid of rightleft
608 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
612 trim->setAligned(newAligned);
618 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
619 outputString += "\n";
621 //write to output file
622 int length = outputString.length();
623 char* buf = new char[length];
624 memcpy(buf, outputString.c_str(), length);
626 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
630 outputString += querySeq->getName() + "\tno\n";
632 //write to output file
633 int length = outputString.length();
634 char* buf = new char[length];
635 memcpy(buf, outputString.c_str(), length);
637 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
644 catch(exception& e) {
645 m->errorOut(e, "ChimeraSlayer", "print");
649 //***************************************************************************************************************
650 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
653 bool results = false;
654 string outAccString = "";
655 string outputString = "";
657 Sequence* trim = NULL;
658 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
660 if (chimeraFlags == "yes") {
661 string chimeraFlag = "no";
662 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
664 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
667 if (chimeraFlag == "yes") {
668 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
669 cout << querySeq->getName() << "\tyes" << endl;
670 outAccString += querySeq->getName() + "\n";
673 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
675 //write to accnos file
676 int length = outAccString.length();
677 char* buf2 = new char[length];
678 memcpy(buf2, outAccString.c_str(), length);
680 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
684 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
685 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
687 string newAligned = trim->getAligned();
688 if (lengthLeft > lengthRight) { //trim right
689 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
691 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
693 trim->setAligned(newAligned);
698 outputString = getBlock(chimeraResults[0], chimeraFlag);
699 outputString += "\n";
701 //write to output file
702 int length = outputString.length();
703 char* buf = new char[length];
704 memcpy(buf, outputString.c_str(), length);
706 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
710 outputString += querySeq->getName() + "\tno\n";
712 //write to output file
713 int length = outputString.length();
714 char* buf = new char[length];
715 memcpy(buf, outputString.c_str(), length);
717 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
723 catch(exception& e) {
724 m->errorOut(e, "ChimeraSlayer", "print");
730 //***************************************************************************************************************
731 int ChimeraSlayer::getChimeras(Sequence* query) {
734 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
735 printResults.trimQuery = trimQuery;
738 printResults.flag = "no";
742 //you must create a template
743 vector<Sequence*> thisTemplate;
744 vector<Sequence*> thisFilteredTemplate;
745 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
746 else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
748 if (m->control_pressed) { return 0; }
750 if (thisTemplate.size() == 0) { return 0; } //not chimeric
752 //moved this out of maligner - 4/29/11
753 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
755 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
756 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
758 if (templateFileName == "self") {
759 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
760 else if (searchMethod == "blast") { delete databaseLeft; }
763 if (m->control_pressed) { return 0; }
765 string chimeraFlag = maligner.getResults(query, decalc);
767 if (m->control_pressed) { return 0; }
769 vector<results> Results = maligner.getOutput();
771 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
773 if (chimeraFlag == "yes") {
776 vector<string> parents;
777 for (int i = 0; i < Results.size(); i++) {
778 parents.push_back(Results[i].parentAligned);
781 ChimeraReAligner realigner;
782 realigner.reAlign(query, parents);
786 //get sequence that were given from maligner results
787 vector<SeqDist> seqs;
788 map<string, float> removeDups;
789 map<string, float>::iterator itDup;
790 map<string, string> parentNameSeq;
791 map<string, string>::iterator itSeq;
792 for (int j = 0; j < Results.size(); j++) {
793 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
794 //only add if you are not a duplicate
796 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
798 itDup = removeDups.find(Results[j].parent);
799 if (itDup == removeDups.end()) { //this is not duplicate
800 removeDups[Results[j].parent] = dist;
801 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
802 }else if (dist > itDup->second) { //is this a stronger number for this parent
803 removeDups[Results[j].parent] = dist;
804 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
811 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
812 itSeq = parentNameSeq.find(itDup->first);
813 Sequence* seq = new Sequence(itDup->first, itSeq->second);
817 member.dist = itDup->second;
818 seqs.push_back(member);
821 //limit number of parents to explore - default 3
822 if (Results.size() > parents) {
824 sort(seqs.begin(), seqs.end(), compareSeqDist);
825 //prioritize larger more similiar sequence fragments
826 reverse(seqs.begin(), seqs.end());
828 for (int k = seqs.size()-1; k > (parents-1); k--) {
834 //put seqs into vector to send to slayer
836 // cout << query->getAligned() << endl;
837 vector<Sequence*> seqsForSlayer;
838 for (int k = 0; k < seqs.size(); k++) {
839 // cout << seqs[k].seq->getAligned() << endl;
840 seqsForSlayer.push_back(seqs[k].seq);
844 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
847 chimeraFlags = slayer.getResults(query, seqsForSlayer);
848 if (m->control_pressed) { return 0; }
849 chimeraResults = slayer.getOutput();
851 printResults.flag = chimeraFlags;
852 printResults.results = chimeraResults;
855 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
857 //cout << endl << endl;
860 catch(exception& e) {
861 m->errorOut(e, "ChimeraSlayer", "getChimeras");
865 //***************************************************************************************************************
866 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
868 out << querySeq->getName() << '\t';
869 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
871 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
872 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
874 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
877 catch(exception& e) {
878 m->errorOut(e, "ChimeraSlayer", "printBlock");
882 //***************************************************************************************************************
883 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
886 if ((leftChimeric) && (!rightChimeric)) { //print left
887 out << querySeq->getName() << '\t';
888 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
890 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
891 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
893 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
895 }else if ((!leftChimeric) && (rightChimeric)) { //print right
896 out << querySeq->getName() << '\t';
897 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
899 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
900 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
902 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
904 }else { //print both results
905 if (leftdata.flag == "yes") {
906 out << querySeq->getName() + "_LEFT" << '\t';
907 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
909 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
910 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
912 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
915 if (rightdata.flag == "yes") {
916 if (leftdata.flag == "yes") { out << endl; }
918 out << querySeq->getName() + "_RIGHT"<< '\t';
919 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
921 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
922 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
924 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
929 catch(exception& e) {
930 m->errorOut(e, "ChimeraSlayer", "printBlock");
934 //***************************************************************************************************************
935 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
940 if ((leftChimeric) && (!rightChimeric)) { //get left
941 out += querySeq->getName() + "\t";
942 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
944 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
945 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
947 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
949 }else if ((!leftChimeric) && (rightChimeric)) { //print right
950 out += querySeq->getName() + "\t";
951 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
953 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
954 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
956 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
958 }else { //print both results
960 if (leftdata.flag == "yes") {
961 out += querySeq->getName() + "_LEFT\t";
962 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
964 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
965 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
967 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
970 if (rightdata.flag == "yes") {
971 if (leftdata.flag == "yes") { out += "\n"; }
972 out += querySeq->getName() + "_RIGHT\t";
973 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
975 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
976 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
978 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
985 catch(exception& e) {
986 m->errorOut(e, "ChimeraSlayer", "getBlock");
990 //***************************************************************************************************************
991 string ChimeraSlayer::getBlock(data_struct data, string flag){
994 string outputString = "";
996 outputString += querySeq->getName() + "\t";
997 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
999 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1000 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1002 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1004 return outputString;
1006 catch(exception& e) {
1007 m->errorOut(e, "ChimeraSlayer", "getBlock");
1011 //***************************************************************************************************************
1012 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1015 vector<Sequence*> refSeqs;
1017 if (searchMethod == "distance") {
1018 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1019 Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1021 refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1023 }else if (searchMethod == "blast") {
1024 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1025 }else if (searchMethod == "kmer") {
1026 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1027 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1031 catch(exception& e) {
1032 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1036 //***************************************************************************************************************/
1037 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1040 vector<Sequence*> refResults;
1042 //get parts of query
1043 string queryUnAligned = q->getUnaligned();
1044 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1045 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1046 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1047 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1048 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1050 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1051 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1052 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1053 vector<int> smaller;
1056 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1057 else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1061 map<int, int>::iterator it;
1062 vector<int> mergedResults;
1063 for (int i = 0; i < smaller.size(); i++) {
1064 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1066 //add left if you havent already
1067 it = seen.find(smaller[i]);
1068 if (it == seen.end()) {
1069 mergedResults.push_back(smaller[i]);
1070 seen[smaller[i]] = smaller[i];
1073 //add right if you havent already
1074 it = seen.find(larger[i]);
1075 if (it == seen.end()) {
1076 mergedResults.push_back(larger[i]);
1077 seen[larger[i]] = larger[i];
1081 for (int i = smaller.size(); i < larger.size(); i++) {
1082 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1084 //add right if you havent already
1085 it = seen.find(larger[i]);
1086 if (it == seen.end()) {
1087 mergedResults.push_back(larger[i]);
1088 seen[larger[i]] = larger[i];
1092 for (int i = 0; i < mergedResults.size(); i++) {
1093 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1094 if (db[mergedResults[i]]->getName() != q->getName()) {
1095 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1096 refResults.push_back(temp);
1102 // for(int i=0;i<refResults.size();i++){
1103 // cout << refResults[i]->getName() << endl;
1111 catch(exception& e) {
1112 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1116 //***************************************************************************************************************
1117 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1119 vector<Sequence*> refResults;
1121 //get parts of query
1122 string queryUnAligned = q->getUnaligned();
1123 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1124 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1126 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1127 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1129 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1130 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1134 map<int, int>::iterator it;
1135 vector<int> mergedResults;
1136 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1138 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1140 //add left if you havent already
1141 it = seen.find(tempIndexesLeft[i]);
1142 if (it == seen.end()) {
1143 mergedResults.push_back(tempIndexesLeft[i]);
1144 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1147 //add right if you havent already
1148 it = seen.find(tempIndexesRight[i]);
1149 if (it == seen.end()) {
1150 mergedResults.push_back(tempIndexesRight[i]);
1151 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1155 //numWanted = mergedResults.size();
1157 //cout << q->getName() << endl;
1159 for (int i = 0; i < mergedResults.size(); i++) {
1160 //cout << db[mergedResults[i]]->getName() << endl;
1161 if (db[mergedResults[i]]->getName() != q->getName()) {
1162 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1163 refResults.push_back(temp);
1172 catch(exception& e) {
1173 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1177 //***************************************************************************************************************