5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
42 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
46 //***************************************************************************************************************
47 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
48 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
50 fastafile = file; templateSeqs = readSeqs(fastafile);
51 templateFileName = temp;
71 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
73 if (searchMethod == "distance") {
74 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
76 //run filter on template copying templateSeqs into filteredTemplateSeqs
77 for (int i = 0; i < templateSeqs.size(); i++) {
78 if (m->control_pressed) { break; }
80 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
82 filteredTemplateSeqs.push_back(newSeq);
87 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
91 //***************************************************************************************************************
92 int ChimeraSlayer::doPrep() {
94 if (searchMethod == "distance") {
95 //read in all query seqs
96 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
98 vector<Sequence*> temp = templateSeqs;
99 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
101 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
103 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
105 if (m->control_pressed) { return 0; }
107 //run filter on template copying templateSeqs into filteredTemplateSeqs
108 for (int i = 0; i < templateSeqs.size(); i++) {
109 if (m->control_pressed) { return 0; }
111 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
113 filteredTemplateSeqs.push_back(newSeq);
116 string kmerDBNameLeft;
117 string kmerDBNameRight;
119 //generate the kmerdb to pass to maligner
120 if (searchMethod == "kmer") {
121 string templatePath = m->hasPath(templateFileName);
122 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
123 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
125 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
126 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
128 for (int i = 0; i < templateSeqs.size(); i++) {
130 if (m->control_pressed) { return 0; }
132 string leftFrag = templateSeqs[i]->getUnaligned();
133 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
135 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
136 databaseLeft->addSequence(leftTemp);
138 databaseLeft->generateDB();
139 databaseLeft->setNumSeqs(templateSeqs.size());
141 for (int i = 0; i < templateSeqs.size(); i++) {
142 if (m->control_pressed) { return 0; }
144 string rightFrag = templateSeqs[i]->getUnaligned();
145 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
147 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
148 databaseRight->addSequence(rightTemp);
150 databaseRight->generateDB();
151 databaseRight->setNumSeqs(templateSeqs.size());
155 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
156 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
157 bool needToGenerateLeft = true;
159 if(kmerFileTestLeft){
160 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
161 if (GoodFile) { needToGenerateLeft = false; }
164 if(needToGenerateLeft){
166 for (int i = 0; i < templateSeqs.size(); i++) {
168 if (m->control_pressed) { return 0; }
170 string leftFrag = templateSeqs[i]->getUnaligned();
171 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
173 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
174 databaseLeft->addSequence(leftTemp);
176 databaseLeft->generateDB();
179 databaseLeft->readKmerDB(kmerFileTestLeft);
181 kmerFileTestLeft.close();
183 databaseLeft->setNumSeqs(templateSeqs.size());
186 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
187 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
188 bool needToGenerateRight = true;
190 if(kmerFileTestRight){
191 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
192 if (GoodFile) { needToGenerateRight = false; }
195 if(needToGenerateRight){
197 for (int i = 0; i < templateSeqs.size(); i++) {
198 if (m->control_pressed) { return 0; }
200 string rightFrag = templateSeqs[i]->getUnaligned();
201 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
203 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
204 databaseRight->addSequence(rightTemp);
206 databaseRight->generateDB();
209 databaseRight->readKmerDB(kmerFileTestRight);
211 kmerFileTestRight.close();
213 databaseRight->setNumSeqs(templateSeqs.size());
215 }else if (searchMethod == "blast") {
218 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
220 if (m->control_pressed) { return 0; }
222 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
223 databaseLeft->generateDB();
224 databaseLeft->setNumSeqs(templateSeqs.size());
230 catch(exception& e) {
231 m->errorOut(e, "ChimeraSlayer", "doprep");
235 //***************************************************************************************************************
236 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
239 //when template=self, the query file is sorted from most abundance to least abundant
240 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
241 vector<Sequence*> userTemplate;
243 int myAbund = priority[q.getName()];
245 for (int i = 0; i < templateSeqs.size(); i++) {
247 if (m->control_pressed) { return userTemplate; }
249 //have I reached a sequence with the same abundance as myself?
250 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
252 //if its am not chimeric add it
253 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
254 userTemplate.push_back(templateSeqs[i]);
255 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
259 string kmerDBNameLeft;
260 string kmerDBNameRight;
262 //generate the kmerdb to pass to maligner
263 if (searchMethod == "kmer") {
264 string templatePath = m->hasPath(templateFileName);
265 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
266 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
268 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
269 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
271 for (int i = 0; i < userTemplate.size(); i++) {
273 if (m->control_pressed) { return userTemplate; }
275 string leftFrag = userTemplate[i]->getUnaligned();
276 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
278 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
279 databaseLeft->addSequence(leftTemp);
281 databaseLeft->generateDB();
282 databaseLeft->setNumSeqs(userTemplate.size());
284 for (int i = 0; i < userTemplate.size(); i++) {
285 if (m->control_pressed) { return userTemplate; }
287 string rightFrag = userTemplate[i]->getUnaligned();
288 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
290 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
291 databaseRight->addSequence(rightTemp);
293 databaseRight->generateDB();
294 databaseRight->setNumSeqs(userTemplate.size());
299 for (int i = 0; i < userTemplate.size(); i++) {
301 if (m->control_pressed) { return userTemplate; }
303 string leftFrag = userTemplate[i]->getUnaligned();
304 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
306 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
307 databaseLeft->addSequence(leftTemp);
309 databaseLeft->generateDB();
310 databaseLeft->setNumSeqs(userTemplate.size());
312 for (int i = 0; i < userTemplate.size(); i++) {
313 if (m->control_pressed) { return userTemplate; }
315 string rightFrag = userTemplate[i]->getUnaligned();
316 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
318 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
319 databaseRight->addSequence(rightTemp);
321 databaseRight->generateDB();
322 databaseRight->setNumSeqs(userTemplate.size());
324 }else if (searchMethod == "blast") {
327 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
329 if (m->control_pressed) { return userTemplate; }
331 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
332 databaseLeft->generateDB();
333 databaseLeft->setNumSeqs(userTemplate.size());
339 catch(exception& e) {
340 m->errorOut(e, "ChimeraSlayer", "getTemplate");
345 //***************************************************************************************************************
346 ChimeraSlayer::~ChimeraSlayer() {
347 if (templateFileName != "self") {
348 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
349 else if (searchMethod == "blast") { delete databaseLeft; }
352 //***************************************************************************************************************
353 void ChimeraSlayer::printHeader(ostream& out) {
354 m->mothurOutEndLine();
355 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
356 m->mothurOutEndLine();
358 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
360 //***************************************************************************************************************
361 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
364 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
366 if (chimeraFlags == "yes") {
367 string chimeraFlag = "no";
368 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
370 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
373 if (chimeraFlag == "yes") {
374 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
375 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
376 outAcc << querySeq.getName() << endl;
378 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
381 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
382 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
384 string newAligned = trim.getAligned();
386 if (lengthLeft > lengthRight) { //trim right
387 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
389 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
391 trim.setAligned(newAligned);
396 printBlock(chimeraResults[0], chimeraFlag, out);
399 out << querySeq.getName() << "\tno" << endl;
405 catch(exception& e) {
406 m->errorOut(e, "ChimeraSlayer", "print");
410 //***************************************************************************************************************
411 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
416 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
417 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
420 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
422 string chimeraFlag = "no";
423 if (leftPiece.flag == "yes") {
425 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
427 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
430 if (rightPiece.flag == "yes") {
431 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
433 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
436 bool rightChimeric = false;
437 bool leftChimeric = false;
439 if (chimeraFlag == "yes") {
440 //which peice is chimeric or are both
441 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
442 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
444 if (rightChimeric || leftChimeric) {
445 m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
446 outAcc << querySeq.getName() << endl;
448 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
451 string newAligned = trim.getAligned();
453 //right side is fine so keep that
454 if ((leftChimeric) && (!rightChimeric)) {
455 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
456 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
457 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
458 }else { //both sides are chimeric, keep longest piece
460 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
461 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
463 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
464 int length = lengthLeftLeft;
465 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
467 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
468 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
470 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
471 if (lengthRightRight > length) { longest = 4; }
473 if (longest == 1) { //leftleft
474 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
475 }else if (longest == 2) { //leftright
476 //get rid of leftleft
477 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
479 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
480 }else if (longest == 3) { //rightleft
482 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
483 //get rid of rightright
484 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
487 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
488 //get rid of rightleft
489 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
493 trim.setAligned(newAligned);
499 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
502 out << querySeq.getName() << "\tno" << endl;
508 catch(exception& e) {
509 m->errorOut(e, "ChimeraSlayer", "print");
515 //***************************************************************************************************************
516 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
519 bool results = false;
520 string outAccString = "";
521 string outputString = "";
526 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
527 trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned);
531 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
533 string chimeraFlag = "no";
534 if (leftPiece.flag == "yes") {
536 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
538 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
541 if (rightPiece.flag == "yes") {
542 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
544 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
547 bool rightChimeric = false;
548 bool leftChimeric = false;
552 if (chimeraFlag == "yes") {
553 //which peice is chimeric or are both
554 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
555 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
557 if (rightChimeric || leftChimeric) {
558 cout << querySeq.getName() << "\tyes" << endl;
559 outAccString += querySeq.getName() + "\n";
562 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
564 //write to accnos file
565 int length = outAccString.length();
566 char* buf2 = new char[length];
567 memcpy(buf2, outAccString.c_str(), length);
569 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
573 string newAligned = trim.getAligned();
575 //right side is fine so keep that
576 if ((leftChimeric) && (!rightChimeric)) {
577 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
578 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
579 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
580 }else { //both sides are chimeric, keep longest piece
582 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
583 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
585 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
586 int length = lengthLeftLeft;
587 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
589 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
590 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
592 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
593 if (lengthRightRight > length) { longest = 4; }
595 if (longest == 1) { //leftleft
596 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
597 }else if (longest == 2) { //leftright
598 //get rid of leftleft
599 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
601 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
602 }else if (longest == 3) { //rightleft
604 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
605 //get rid of rightright
606 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
609 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
610 //get rid of rightleft
611 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
615 trim.setAligned(newAligned);
621 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
622 outputString += "\n";
624 //write to output file
625 int length = outputString.length();
626 char* buf = new char[length];
627 memcpy(buf, outputString.c_str(), length);
629 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
633 outputString += querySeq.getName() + "\tno\n";
635 //write to output file
636 int length = outputString.length();
637 char* buf = new char[length];
638 memcpy(buf, outputString.c_str(), length);
640 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
647 catch(exception& e) {
648 m->errorOut(e, "ChimeraSlayer", "print");
652 //***************************************************************************************************************
653 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
656 bool results = false;
657 string outAccString = "";
658 string outputString = "";
661 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
663 if (chimeraFlags == "yes") {
664 string chimeraFlag = "no";
665 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
667 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
670 if (chimeraFlag == "yes") {
671 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
672 cout << querySeq.getName() << "\tyes" << endl;
673 outAccString += querySeq.getName() + "\n";
676 if (templateFileName == "self") { chimericSeqs.insert(querySeq.getName()); }
678 //write to accnos file
679 int length = outAccString.length();
680 char* buf2 = new char[length];
681 memcpy(buf2, outAccString.c_str(), length);
683 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
687 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
688 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
690 string newAligned = trim.getAligned();
691 if (lengthLeft > lengthRight) { //trim right
692 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
694 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
696 trim.setAligned(newAligned);
701 outputString = getBlock(chimeraResults[0], chimeraFlag);
702 outputString += "\n";
704 //write to output file
705 int length = outputString.length();
706 char* buf = new char[length];
707 memcpy(buf, outputString.c_str(), length);
709 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
713 outputString += querySeq.getName() + "\tno\n";
715 //write to output file
716 int length = outputString.length();
717 char* buf = new char[length];
718 memcpy(buf, outputString.c_str(), length);
720 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
726 catch(exception& e) {
727 m->errorOut(e, "ChimeraSlayer", "print");
733 //***************************************************************************************************************
734 int ChimeraSlayer::getChimeras(Sequence* query) {
737 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
738 printResults.trimQuery = trimQuery;
741 printResults.flag = "no";
745 //you must create a template
746 vector<Sequence*> thisTemplate;
747 vector<Sequence*> thisFilteredTemplate;
748 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
749 else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
751 if (m->control_pressed) { return 0; }
753 if (thisTemplate.size() == 0) { return 0; } //not chimeric
755 //moved this out of maligner - 4/29/11
756 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
758 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
759 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
761 if (templateFileName == "self") {
762 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
763 else if (searchMethod == "blast") { delete databaseLeft; }
766 if (m->control_pressed) { return 0; }
768 string chimeraFlag = maligner.getResults(*query, decalc);
770 if (m->control_pressed) { return 0; }
772 vector<results> Results = maligner.getOutput();
774 //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
776 if (chimeraFlag == "yes") {
779 vector<string> parents;
780 for (int i = 0; i < Results.size(); i++) {
781 parents.push_back(Results[i].parentAligned);
784 ChimeraReAligner realigner;
785 realigner.reAlign(query, parents);
789 // cout << query->getAligned() << endl;
790 //get sequence that were given from maligner results
791 vector<SeqCompare> seqs;
792 map<string, float> removeDups;
793 map<string, float>::iterator itDup;
794 map<string, string> parentNameSeq;
795 map<string, string>::iterator itSeq;
796 for (int j = 0; j < Results.size(); j++) {
798 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
799 //only add if you are not a duplicate
800 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
803 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
805 itDup = removeDups.find(Results[j].parent);
806 if (itDup == removeDups.end()) { //this is not duplicate
807 removeDups[Results[j].parent] = dist;
808 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
809 }else if (dist > itDup->second) { //is this a stronger number for this parent
810 removeDups[Results[j].parent] = dist;
811 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
818 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
819 itSeq = parentNameSeq.find(itDup->first);
820 Sequence seq(itDup->first, itSeq->second);
824 member.dist = itDup->second;
825 seqs.push_back(member);
828 //limit number of parents to explore - default 3
829 if (Results.size() > parents) {
831 sort(seqs.begin(), seqs.end(), compareSeqCompare);
832 //prioritize larger more similiar sequence fragments
833 reverse(seqs.begin(), seqs.end());
835 //for (int k = seqs.size()-1; k > (parents-1); k--) {
836 // delete seqs[k].seq;
841 //put seqs into vector to send to slayer
843 // cout << query->getAligned() << endl;
844 vector<Sequence> seqsForSlayer;
845 for (int k = 0; k < seqs.size(); k++) {
846 // cout << seqs[k].seq->getAligned() << endl;
847 seqsForSlayer.push_back(seqs[k].seq);
848 // cout << seqs[k].seq->getName() << endl;
851 if (m->control_pressed) { return 0; }
854 chimeraFlags = slayer.getResults(*query, seqsForSlayer);
855 if (m->control_pressed) { return 0; }
856 chimeraResults = slayer.getOutput();
858 printResults.flag = chimeraFlags;
859 printResults.results = chimeraResults;
862 //for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
864 //cout << endl << endl;
867 catch(exception& e) {
868 m->errorOut(e, "ChimeraSlayer", "getChimeras");
872 //***************************************************************************************************************
873 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
875 out << querySeq.getName() << '\t';
876 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
878 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
879 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
881 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
884 catch(exception& e) {
885 m->errorOut(e, "ChimeraSlayer", "printBlock");
889 //***************************************************************************************************************
890 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
893 if ((leftChimeric) && (!rightChimeric)) { //print left
894 out << querySeq.getName() << '\t';
895 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
897 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
898 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
900 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
902 }else if ((!leftChimeric) && (rightChimeric)) { //print right
903 out << querySeq.getName() << '\t';
904 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
906 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
907 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
909 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
911 }else { //print both results
912 if (leftdata.flag == "yes") {
913 out << querySeq.getName() + "_LEFT" << '\t';
914 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
916 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
917 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
919 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
922 if (rightdata.flag == "yes") {
923 if (leftdata.flag == "yes") { out << endl; }
925 out << querySeq.getName() + "_RIGHT"<< '\t';
926 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
928 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
929 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
931 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
936 catch(exception& e) {
937 m->errorOut(e, "ChimeraSlayer", "printBlock");
941 //***************************************************************************************************************
942 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
947 if ((leftChimeric) && (!rightChimeric)) { //get left
948 out += querySeq.getName() + "\t";
949 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
951 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
952 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
954 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
956 }else if ((!leftChimeric) && (rightChimeric)) { //print right
957 out += querySeq.getName() + "\t";
958 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
960 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
961 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
963 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
965 }else { //print both results
967 if (leftdata.flag == "yes") {
968 out += querySeq.getName() + "_LEFT\t";
969 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
971 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
972 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
974 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
977 if (rightdata.flag == "yes") {
978 if (leftdata.flag == "yes") { out += "\n"; }
979 out += querySeq.getName() + "_RIGHT\t";
980 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
982 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
983 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
985 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
992 catch(exception& e) {
993 m->errorOut(e, "ChimeraSlayer", "getBlock");
997 //***************************************************************************************************************
998 string ChimeraSlayer::getBlock(data_struct data, string flag){
1001 string outputString = "";
1003 outputString += querySeq.getName() + "\t";
1004 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1006 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1007 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1009 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1011 return outputString;
1013 catch(exception& e) {
1014 m->errorOut(e, "ChimeraSlayer", "getBlock");
1018 //***************************************************************************************************************
1019 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1022 vector<Sequence> refSeqs;
1024 if (searchMethod == "distance") {
1025 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1026 Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1028 refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1030 }else if (searchMethod == "blast") {
1031 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1032 }else if (searchMethod == "kmer") {
1033 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1034 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1038 catch(exception& e) {
1039 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1043 //***************************************************************************************************************/
1044 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1047 vector<Sequence> refResults;
1049 //get parts of query
1050 string queryUnAligned = q.getUnaligned();
1051 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1052 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1053 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1054 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1055 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1057 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1058 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1061 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1062 // vector<int> smaller;
1063 // vector<int> larger;
1065 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1066 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1070 map<int, int>::iterator it;
1071 vector<int> mergedResults;
1074 // for (int i = 0; i < smaller.size(); i++) {
1075 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1077 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1079 //add left if you havent already
1080 it = seen.find(tempIndexesLeft[index]);
1081 if (it == seen.end()) {
1082 mergedResults.push_back(tempIndexesLeft[index]);
1083 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1086 //add right if you havent already
1087 it = seen.find(tempIndexesRight[index]);
1088 if (it == seen.end()) {
1089 mergedResults.push_back(tempIndexesRight[index]);
1090 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1096 for (int i = index; i < tempIndexesLeft.size(); i++) {
1097 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1099 //add right if you havent already
1100 it = seen.find(tempIndexesLeft[i]);
1101 if (it == seen.end()) {
1102 mergedResults.push_back(tempIndexesLeft[i]);
1103 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1107 for (int i = index; i < tempIndexesRight.size(); i++) {
1108 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1110 //add right if you havent already
1111 it = seen.find(tempIndexesRight[i]);
1112 if (it == seen.end()) {
1113 mergedResults.push_back(tempIndexesRight[i]);
1114 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1117 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1118 //cout << qname << endl;
1120 if (mergedResults.size() == 0) { numNoParents++; }
1122 for (int i = 0; i < mergedResults.size(); i++) {
1123 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1124 if (db[mergedResults[i]]->getName() != q.getName()) {
1125 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1126 refResults.push_back(temp);
1129 //cout << endl << endl;
1136 catch(exception& e) {
1137 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1141 //***************************************************************************************************************
1142 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1144 vector<Sequence> refResults;
1146 //get parts of query
1147 string queryUnAligned = q.getUnaligned();
1148 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1149 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1151 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1152 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1154 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1155 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1159 map<int, int>::iterator it;
1160 vector<int> mergedResults;
1163 // for (int i = 0; i < smaller.size(); i++) {
1164 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1166 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1168 //add left if you havent already
1169 it = seen.find(tempIndexesLeft[index]);
1170 if (it == seen.end()) {
1171 mergedResults.push_back(tempIndexesLeft[index]);
1172 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1175 //add right if you havent already
1176 it = seen.find(tempIndexesRight[index]);
1177 if (it == seen.end()) {
1178 mergedResults.push_back(tempIndexesRight[index]);
1179 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1185 for (int i = index; i < tempIndexesLeft.size(); i++) {
1186 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1188 //add right if you havent already
1189 it = seen.find(tempIndexesLeft[i]);
1190 if (it == seen.end()) {
1191 mergedResults.push_back(tempIndexesLeft[i]);
1192 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1196 for (int i = index; i < tempIndexesRight.size(); i++) {
1197 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1199 //add right if you havent already
1200 it = seen.find(tempIndexesRight[i]);
1201 if (it == seen.end()) {
1202 mergedResults.push_back(tempIndexesRight[i]);
1203 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1207 for (int i = 0; i < mergedResults.size(); i++) {
1208 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1209 if (db[mergedResults[i]]->getName() != q.getName()) {
1210 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1211 refResults.push_back(temp);
1222 catch(exception& e) {
1223 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1227 //***************************************************************************************************************