5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
44 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
48 //***************************************************************************************************************
50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
51 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() {
53 fastafile = file; templateSeqs = readSeqs(fastafile);
54 templateFileName = temp;
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 if (searchMethod == "distance") {
79 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
81 //run filter on template copying templateSeqs into filteredTemplateSeqs
82 for (int i = 0; i < templateSeqs.size(); i++) {
83 if (m->control_pressed) { break; }
85 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
87 filteredTemplateSeqs.push_back(newSeq);
92 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
96 //***************************************************************************************************************
97 int ChimeraSlayer::doPrep() {
99 if (searchMethod == "distance") {
100 //read in all query seqs
101 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
103 vector<Sequence*> temp = templateSeqs;
104 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
106 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
108 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
110 if (m->control_pressed) { return 0; }
112 //run filter on template copying templateSeqs into filteredTemplateSeqs
113 for (int i = 0; i < templateSeqs.size(); i++) {
114 if (m->control_pressed) { return 0; }
116 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
118 filteredTemplateSeqs.push_back(newSeq);
121 string kmerDBNameLeft;
122 string kmerDBNameRight;
124 //generate the kmerdb to pass to maligner
125 if (searchMethod == "kmer") {
126 string templatePath = m->hasPath(templateFileName);
127 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
128 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
130 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
131 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
133 for (int i = 0; i < templateSeqs.size(); i++) {
135 if (m->control_pressed) { return 0; }
137 string leftFrag = templateSeqs[i]->getUnaligned();
138 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
140 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
141 databaseLeft->addSequence(leftTemp);
143 databaseLeft->generateDB();
144 databaseLeft->setNumSeqs(templateSeqs.size());
146 for (int i = 0; i < templateSeqs.size(); i++) {
147 if (m->control_pressed) { return 0; }
149 string rightFrag = templateSeqs[i]->getUnaligned();
150 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
152 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
153 databaseRight->addSequence(rightTemp);
155 databaseRight->generateDB();
156 databaseRight->setNumSeqs(templateSeqs.size());
160 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
161 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
162 bool needToGenerateLeft = true;
164 if(kmerFileTestLeft){
165 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
166 if (GoodFile) { needToGenerateLeft = false; }
169 if(needToGenerateLeft){
171 for (int i = 0; i < templateSeqs.size(); i++) {
173 if (m->control_pressed) { return 0; }
175 string leftFrag = templateSeqs[i]->getUnaligned();
176 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
178 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
179 databaseLeft->addSequence(leftTemp);
181 databaseLeft->generateDB();
184 databaseLeft->readKmerDB(kmerFileTestLeft);
186 kmerFileTestLeft.close();
188 databaseLeft->setNumSeqs(templateSeqs.size());
191 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
192 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
193 bool needToGenerateRight = true;
195 if(kmerFileTestRight){
196 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
197 if (GoodFile) { needToGenerateRight = false; }
200 if(needToGenerateRight){
202 for (int i = 0; i < templateSeqs.size(); i++) {
203 if (m->control_pressed) { return 0; }
205 string rightFrag = templateSeqs[i]->getUnaligned();
206 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
208 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
209 databaseRight->addSequence(rightTemp);
211 databaseRight->generateDB();
214 databaseRight->readKmerDB(kmerFileTestRight);
216 kmerFileTestRight.close();
218 databaseRight->setNumSeqs(templateSeqs.size());
220 }else if (searchMethod == "blast") {
223 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
225 if (m->control_pressed) { return 0; }
227 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
228 databaseLeft->generateDB();
229 databaseLeft->setNumSeqs(templateSeqs.size());
235 catch(exception& e) {
236 m->errorOut(e, "ChimeraSlayer", "doprep");
240 //***************************************************************************************************************
241 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
244 //when template=self, the query file is sorted from most abundance to least abundant
245 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
246 vector<Sequence*> userTemplate;
248 int myAbund = priority[q.getName()];
250 for (int i = 0; i < templateSeqs.size(); i++) {
252 if (m->control_pressed) { return userTemplate; }
254 //have I reached a sequence with the same abundance as myself?
255 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
257 //if its am not chimeric add it
258 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
259 userTemplate.push_back(templateSeqs[i]);
260 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
264 //avoids nuisance error from formatdb for making blank blast database
265 if (userTemplate.size() == 0) {
269 string kmerDBNameLeft;
270 string kmerDBNameRight;
272 //generate the kmerdb to pass to maligner
273 if (searchMethod == "kmer") {
274 string templatePath = m->hasPath(templateFileName);
275 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
276 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
278 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
279 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
281 for (int i = 0; i < userTemplate.size(); i++) {
283 if (m->control_pressed) { return userTemplate; }
285 string leftFrag = userTemplate[i]->getUnaligned();
286 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
288 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
289 databaseLeft->addSequence(leftTemp);
291 databaseLeft->generateDB();
292 databaseLeft->setNumSeqs(userTemplate.size());
294 for (int i = 0; i < userTemplate.size(); i++) {
295 if (m->control_pressed) { return userTemplate; }
297 string rightFrag = userTemplate[i]->getUnaligned();
298 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
300 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
301 databaseRight->addSequence(rightTemp);
303 databaseRight->generateDB();
304 databaseRight->setNumSeqs(userTemplate.size());
309 for (int i = 0; i < userTemplate.size(); i++) {
311 if (m->control_pressed) { return userTemplate; }
313 string leftFrag = userTemplate[i]->getUnaligned();
314 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
316 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
317 databaseLeft->addSequence(leftTemp);
319 databaseLeft->generateDB();
320 databaseLeft->setNumSeqs(userTemplate.size());
322 for (int i = 0; i < userTemplate.size(); i++) {
323 if (m->control_pressed) { return userTemplate; }
325 string rightFrag = userTemplate[i]->getUnaligned();
326 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
328 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
329 databaseRight->addSequence(rightTemp);
331 databaseRight->generateDB();
332 databaseRight->setNumSeqs(userTemplate.size());
334 }else if (searchMethod == "blast") {
337 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
339 if (m->control_pressed) { return userTemplate; }
341 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
342 databaseLeft->generateDB();
343 databaseLeft->setNumSeqs(userTemplate.size());
349 catch(exception& e) {
350 m->errorOut(e, "ChimeraSlayer", "getTemplate");
355 //***************************************************************************************************************
356 ChimeraSlayer::~ChimeraSlayer() {
357 if (templateFileName != "self") {
358 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
359 else if (searchMethod == "blast") { delete databaseLeft; }
362 //***************************************************************************************************************
363 void ChimeraSlayer::printHeader(ostream& out) {
364 m->mothurOutEndLine();
365 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
366 m->mothurOutEndLine();
368 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
370 //***************************************************************************************************************
371 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
374 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
376 if (chimeraFlags == "yes") {
377 string chimeraFlag = "no";
378 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
380 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
383 if (chimeraFlag == "yes") {
384 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
385 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
386 outAcc << querySeq.getName() << endl;
388 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
391 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
392 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
394 string newAligned = trim.getAligned();
396 if (lengthLeft > lengthRight) { //trim right
397 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
399 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
401 trim.setAligned(newAligned);
406 printBlock(chimeraResults[0], chimeraFlag, out);
409 out << querySeq.getName() << "\tno" << endl;
415 catch(exception& e) {
416 m->errorOut(e, "ChimeraSlayer", "print");
420 //***************************************************************************************************************
421 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
426 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
427 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
430 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
432 string chimeraFlag = "no";
433 if (leftPiece.flag == "yes") {
435 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
437 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
440 if (rightPiece.flag == "yes") {
441 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
443 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
446 bool rightChimeric = false;
447 bool leftChimeric = false;
449 if (chimeraFlag == "yes") {
450 //which peice is chimeric or are both
451 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
452 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
454 if (rightChimeric || leftChimeric) {
455 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
456 outAcc << querySeq.getName() << endl;
458 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
461 string newAligned = trim.getAligned();
463 //right side is fine so keep that
464 if ((leftChimeric) && (!rightChimeric)) {
465 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
466 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
467 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
468 }else { //both sides are chimeric, keep longest piece
470 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
471 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
473 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
474 int length = lengthLeftLeft;
475 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
477 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
478 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
480 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
481 if (lengthRightRight > length) { longest = 4; }
483 if (longest == 1) { //leftleft
484 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
485 }else if (longest == 2) { //leftright
486 //get rid of leftleft
487 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
489 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
490 }else if (longest == 3) { //rightleft
492 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
493 //get rid of rightright
494 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
497 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
498 //get rid of rightleft
499 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
503 trim.setAligned(newAligned);
509 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
512 out << querySeq.getName() << "\tno" << endl;
518 catch(exception& e) {
519 m->errorOut(e, "ChimeraSlayer", "print");
525 //***************************************************************************************************************
526 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
529 bool results = false;
530 string outAccString = "";
531 string outputString = "";
536 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
537 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
541 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
543 string chimeraFlag = "no";
544 if (leftPiece.flag == "yes") {
546 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
548 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
551 if (rightPiece.flag == "yes") {
552 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
554 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
557 bool rightChimeric = false;
558 bool leftChimeric = false;
562 if (chimeraFlag == "yes") {
563 //which peice is chimeric or are both
564 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
565 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
567 if (rightChimeric || leftChimeric) {
568 cout << querySeq.getName() << "\tyes" << endl;
569 outAccString += querySeq.getName() + "\n";
572 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
574 //write to accnos file
575 int length = outAccString.length();
576 char* buf2 = new char[length];
577 memcpy(buf2, outAccString.c_str(), length);
579 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
583 string newAligned = trim.getAligned();
585 //right side is fine so keep that
586 if ((leftChimeric) && (!rightChimeric)) {
587 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
588 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
589 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
590 }else { //both sides are chimeric, keep longest piece
592 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
593 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
595 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
596 int length = lengthLeftLeft;
597 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
599 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
600 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
602 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
603 if (lengthRightRight > length) { longest = 4; }
605 if (longest == 1) { //leftleft
606 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
607 }else if (longest == 2) { //leftright
608 //get rid of leftleft
609 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
611 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
612 }else if (longest == 3) { //rightleft
614 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
615 //get rid of rightright
616 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
619 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
620 //get rid of rightleft
621 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
625 trim.setAligned(newAligned);
631 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
632 outputString += "\n";
634 //write to output file
635 int length = outputString.length();
636 char* buf = new char[length];
637 memcpy(buf, outputString.c_str(), length);
639 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
643 outputString += querySeq.getName() + "\tno\n";
645 //write to output file
646 int length = outputString.length();
647 char* buf = new char[length];
648 memcpy(buf, outputString.c_str(), length);
650 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
657 catch(exception& e) {
658 m->errorOut(e, "ChimeraSlayer", "print");
662 //***************************************************************************************************************
663 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
666 bool results = false;
667 string outAccString = "";
668 string outputString = "";
671 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
673 if (chimeraFlags == "yes") {
674 string chimeraFlag = "no";
675 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
677 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
680 if (chimeraFlag == "yes") {
681 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
682 cout << querySeq.getName() << "\tyes" << endl;
683 outAccString += querySeq.getName() + "\n";
686 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
688 //write to accnos file
689 int length = outAccString.length();
690 char* buf2 = new char[length];
691 memcpy(buf2, outAccString.c_str(), length);
693 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
697 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
698 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
700 string newAligned = trim.getAligned();
701 if (lengthLeft > lengthRight) { //trim right
702 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
704 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
706 trim.setAligned(newAligned);
711 outputString = getBlock(chimeraResults[0], chimeraFlag);
712 outputString += "\n";
714 //write to output file
715 int length = outputString.length();
716 char* buf = new char[length];
717 memcpy(buf, outputString.c_str(), length);
719 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
723 outputString += querySeq.getName() + "\tno\n";
725 //write to output file
726 int length = outputString.length();
727 char* buf = new char[length];
728 memcpy(buf, outputString.c_str(), length);
730 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
736 catch(exception& e) {
737 m->errorOut(e, "ChimeraSlayer", "print");
743 //***************************************************************************************************************
744 int ChimeraSlayer::getChimeras(Sequence* query) {
747 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
748 printResults.trimQuery = trimQuery;
751 printResults.flag = "no";
755 //you must create a template
756 vector<Sequence*> thisTemplate;
757 vector<Sequence*> thisFilteredTemplate;
758 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
759 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
761 if (m->control_pressed) { return 0; }
762 if (thisTemplate.size() == 0) { return 0; } //not chimeric
764 //moved this out of maligner - 4/29/11
765 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
767 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
768 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
770 if (templateFileName == "self") {
771 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
772 else if (searchMethod == "blast") { delete databaseLeft; }
775 if (m->control_pressed) { return 0; }
777 string chimeraFlag = maligner.getResults(*query, decalc);
779 if (m->control_pressed) { return 0; }
781 vector<results> Results = maligner.getOutput();
783 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
785 if (chimeraFlag == "yes") {
788 vector<string> parents;
789 for (int i = 0; i < Results.size(); i++) {
790 parents.push_back(Results[i].parentAligned);
793 ChimeraReAligner realigner;
794 realigner.reAlign(query, parents);
798 // cout << query->getAligned() << endl;
799 //get sequence that were given from maligner results
800 vector<SeqCompare> seqs;
801 map<string, float> removeDups;
802 map<string, float>::iterator itDup;
803 map<string, string> parentNameSeq;
804 map<string, string>::iterator itSeq;
805 for (int j = 0; j < Results.size(); j++) {
807 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
808 //only add if you are not a duplicate
809 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
812 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
814 itDup = removeDups.find(Results[j].parent);
815 if (itDup == removeDups.end()) { //this is not duplicate
816 removeDups[Results[j].parent] = dist;
817 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
818 }else if (dist > itDup->second) { //is this a stronger number for this parent
819 removeDups[Results[j].parent] = dist;
820 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
827 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
828 itSeq = parentNameSeq.find(itDup->first);
829 Sequence seq(itDup->first, itSeq->second);
833 member.dist = itDup->second;
834 seqs.push_back(member);
837 //limit number of parents to explore - default 3
838 if (Results.size() > parents) {
840 sort(seqs.begin(), seqs.end(), compareSeqCompare);
841 //prioritize larger more similiar sequence fragments
842 reverse(seqs.begin(), seqs.end());
844 //for (int k = seqs.size()-1; k > (parents-1); k--) {
845 // delete seqs[k].seq;
850 //put seqs into vector to send to slayer
852 // cout << query->getAligned() << endl;
853 vector<Sequence> seqsForSlayer;
854 for (int k = 0; k < seqs.size(); k++) {
855 // cout << seqs[k].seq->getAligned() << endl;
856 seqsForSlayer.push_back(seqs[k].seq);
857 // cout << seqs[k].seq->getName() << endl;
860 if (m->control_pressed) { return 0; }
863 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
864 if (m->control_pressed) { return 0; }
865 chimeraResults = slayer.getOutput();
867 printResults.flag = chimeraFlags;
868 printResults.results = chimeraResults;
871 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
873 //cout << endl << endl;
876 catch(exception& e) {
877 m->errorOut(e, "ChimeraSlayer", "getChimeras");
881 //***************************************************************************************************************
882 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
884 out << querySeq.getName() << '\t';
885 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
887 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
888 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
890 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
893 catch(exception& e) {
894 m->errorOut(e, "ChimeraSlayer", "printBlock");
898 //***************************************************************************************************************
899 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
902 if ((leftChimeric) && (!rightChimeric)) { //print left
903 out << querySeq.getName() << '\t';
904 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
906 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
907 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
909 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
911 }else if ((!leftChimeric) && (rightChimeric)) { //print right
912 out << querySeq.getName() << '\t';
913 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
915 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
916 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
918 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
920 }else { //print both results
921 if (leftdata.flag == "yes") {
922 out << querySeq.getName() + "_LEFT" << '\t';
923 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
925 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
926 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
928 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
931 if (rightdata.flag == "yes") {
932 if (leftdata.flag == "yes") { out << endl; }
934 out << querySeq.getName() + "_RIGHT"<< '\t';
935 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
937 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
938 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
940 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
945 catch(exception& e) {
946 m->errorOut(e, "ChimeraSlayer", "printBlock");
950 //***************************************************************************************************************
951 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
956 if ((leftChimeric) && (!rightChimeric)) { //get left
957 out += querySeq.getName() + "\t";
958 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
960 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
961 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
963 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
965 }else if ((!leftChimeric) && (rightChimeric)) { //print right
966 out += querySeq.getName() + "\t";
967 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
969 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
970 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
972 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
974 }else { //print both results
976 if (leftdata.flag == "yes") {
977 out += querySeq.getName() + "_LEFT\t";
978 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
980 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
981 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
983 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
986 if (rightdata.flag == "yes") {
987 if (leftdata.flag == "yes") { out += "\n"; }
988 out += querySeq.getName() + "_RIGHT\t";
989 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
991 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
992 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
994 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
1001 catch(exception& e) {
1002 m->errorOut(e, "ChimeraSlayer", "getBlock");
1006 //***************************************************************************************************************
1007 string ChimeraSlayer::getBlock(data_struct data, string flag){
1010 string outputString = "";
1012 outputString += querySeq.getName() + "\t";
1013 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1015 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1016 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1018 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1020 return outputString;
1022 catch(exception& e) {
1023 m->errorOut(e, "ChimeraSlayer", "getBlock");
1027 //***************************************************************************************************************
1028 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1031 vector<Sequence> refSeqs;
1033 if (searchMethod == "distance") {
1034 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1035 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1037 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1039 }else if (searchMethod == "blast") {
1040 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1041 }else if (searchMethod == "kmer") {
1042 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1043 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1047 catch(exception& e) {
1048 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1052 //***************************************************************************************************************/
1053 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1056 vector<Sequence> refResults;
1058 //get parts of query
1059 string queryUnAligned = q.getUnaligned();
1060 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1061 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1062 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1063 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1064 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1066 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1067 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1070 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1071 // vector<int> smaller;
1072 // vector<int> larger;
1074 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1075 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1079 map<int, int>::iterator it;
1080 vector<int> mergedResults;
1083 // for (int i = 0; i < smaller.size(); i++) {
1084 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1086 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1088 //add left if you havent already
1089 it = seen.find(tempIndexesLeft[index]);
1090 if (it == seen.end()) {
1091 mergedResults.push_back(tempIndexesLeft[index]);
1092 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1095 //add right if you havent already
1096 it = seen.find(tempIndexesRight[index]);
1097 if (it == seen.end()) {
1098 mergedResults.push_back(tempIndexesRight[index]);
1099 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1105 for (int i = index; i < tempIndexesLeft.size(); i++) {
1106 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1108 //add right if you havent already
1109 it = seen.find(tempIndexesLeft[i]);
1110 if (it == seen.end()) {
1111 mergedResults.push_back(tempIndexesLeft[i]);
1112 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1116 for (int i = index; i < tempIndexesRight.size(); i++) {
1117 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1119 //add right if you havent already
1120 it = seen.find(tempIndexesRight[i]);
1121 if (it == seen.end()) {
1122 mergedResults.push_back(tempIndexesRight[i]);
1123 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1126 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1127 //cout << qname << endl;
1129 if (mergedResults.size() == 0) { numNoParents++; }
1131 for (int i = 0; i < mergedResults.size(); i++) {
1132 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1133 if (db[mergedResults[i]]->getName() != q.getName()) {
1134 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1135 refResults.push_back(temp);
1138 //cout << endl << endl;
1145 catch(exception& e) {
1146 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1150 //***************************************************************************************************************
1151 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1153 vector<Sequence> refResults;
1155 //get parts of query
1156 string queryUnAligned = q.getUnaligned();
1157 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1158 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1160 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1161 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1163 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1164 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1168 map<int, int>::iterator it;
1169 vector<int> mergedResults;
1172 // for (int i = 0; i < smaller.size(); i++) {
1173 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1175 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1177 //add left if you havent already
1178 it = seen.find(tempIndexesLeft[index]);
1179 if (it == seen.end()) {
1180 mergedResults.push_back(tempIndexesLeft[index]);
1181 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1184 //add right if you havent already
1185 it = seen.find(tempIndexesRight[index]);
1186 if (it == seen.end()) {
1187 mergedResults.push_back(tempIndexesRight[index]);
1188 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1194 for (int i = index; i < tempIndexesLeft.size(); i++) {
1195 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1197 //add right if you havent already
1198 it = seen.find(tempIndexesLeft[i]);
1199 if (it == seen.end()) {
1200 mergedResults.push_back(tempIndexesLeft[i]);
1201 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1205 for (int i = index; i < tempIndexesRight.size(); i++) {
1206 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1208 //add right if you havent already
1209 it = seen.find(tempIndexesRight[i]);
1210 if (it == seen.end()) {
1211 mergedResults.push_back(tempIndexesRight[i]);
1212 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1216 for (int i = 0; i < mergedResults.size(); i++) {
1217 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1218 if (db[mergedResults[i]]->getName() != q.getName()) {
1219 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1220 refResults.push_back(temp);
1231 catch(exception& e) {
1232 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1236 //***************************************************************************************************************