5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
71 decalc = new DeCalculator();
73 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75 if (searchMethod == "distance") {
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 //run filter on template copying templateSeqs into filteredTemplateSeqs
79 for (int i = 0; i < templateSeqs.size(); i++) {
80 if (m->control_pressed) { break; }
82 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
84 filteredTemplateSeqs.push_back(newSeq);
89 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
93 //***************************************************************************************************************
94 int ChimeraSlayer::doPrep() {
96 if (searchMethod == "distance") {
97 //read in all query seqs
98 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
100 vector<Sequence*> temp = templateSeqs;
101 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
103 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
105 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
107 if (m->control_pressed) { return 0; }
109 //run filter on template copying templateSeqs into filteredTemplateSeqs
110 for (int i = 0; i < templateSeqs.size(); i++) {
111 if (m->control_pressed) { return 0; }
113 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
115 filteredTemplateSeqs.push_back(newSeq);
118 string kmerDBNameLeft;
119 string kmerDBNameRight;
121 //generate the kmerdb to pass to maligner
122 if (searchMethod == "kmer") {
123 string templatePath = m->hasPath(templateFileName);
124 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
125 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
127 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
128 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
130 for (int i = 0; i < templateSeqs.size(); i++) {
132 if (m->control_pressed) { return 0; }
134 string leftFrag = templateSeqs[i]->getUnaligned();
135 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
137 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
138 databaseLeft->addSequence(leftTemp);
140 databaseLeft->generateDB();
141 databaseLeft->setNumSeqs(templateSeqs.size());
143 for (int i = 0; i < templateSeqs.size(); i++) {
144 if (m->control_pressed) { return 0; }
146 string rightFrag = templateSeqs[i]->getUnaligned();
147 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
149 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
150 databaseRight->addSequence(rightTemp);
152 databaseRight->generateDB();
153 databaseRight->setNumSeqs(templateSeqs.size());
157 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
158 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
159 bool needToGenerateLeft = true;
161 if(kmerFileTestLeft){
162 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
163 if (GoodFile) { needToGenerateLeft = false; }
166 if(needToGenerateLeft){
168 for (int i = 0; i < templateSeqs.size(); i++) {
170 if (m->control_pressed) { return 0; }
172 string leftFrag = templateSeqs[i]->getUnaligned();
173 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
175 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
176 databaseLeft->addSequence(leftTemp);
178 databaseLeft->generateDB();
181 databaseLeft->readKmerDB(kmerFileTestLeft);
183 kmerFileTestLeft.close();
185 databaseLeft->setNumSeqs(templateSeqs.size());
188 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
189 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
190 bool needToGenerateRight = true;
192 if(kmerFileTestRight){
193 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
194 if (GoodFile) { needToGenerateRight = false; }
197 if(needToGenerateRight){
199 for (int i = 0; i < templateSeqs.size(); i++) {
200 if (m->control_pressed) { return 0; }
202 string rightFrag = templateSeqs[i]->getUnaligned();
203 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
205 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
206 databaseRight->addSequence(rightTemp);
208 databaseRight->generateDB();
211 databaseRight->readKmerDB(kmerFileTestRight);
213 kmerFileTestRight.close();
215 databaseRight->setNumSeqs(templateSeqs.size());
217 }else if (searchMethod == "blast") {
220 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
222 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
223 databaseLeft->generateDB();
224 databaseLeft->setNumSeqs(templateSeqs.size());
230 catch(exception& e) {
231 m->errorOut(e, "ChimeraSlayer", "doprep");
235 //***************************************************************************************************************
236 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
239 //when template=self, the query file is sorted from most abundance to least abundant
240 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
241 vector<Sequence*> userTemplate;
243 int myAbund = priority[q->getName()];
245 for (int i = 0; i < templateSeqs.size(); i++) {
247 if (m->control_pressed) { return userTemplate; }
249 //have I reached a sequence with the same abundance as myself?
250 if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
252 //if its am not chimeric add it
253 if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) {
254 userTemplate.push_back(templateSeqs[i]);
255 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
259 string kmerDBNameLeft;
260 string kmerDBNameRight;
262 //generate the kmerdb to pass to maligner
263 if (searchMethod == "kmer") {
264 string templatePath = m->hasPath(templateFileName);
265 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
266 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
268 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
269 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
271 for (int i = 0; i < userTemplate.size(); i++) {
273 if (m->control_pressed) { return userTemplate; }
275 string leftFrag = userTemplate[i]->getUnaligned();
276 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
278 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
279 databaseLeft->addSequence(leftTemp);
281 databaseLeft->generateDB();
282 databaseLeft->setNumSeqs(userTemplate.size());
284 for (int i = 0; i < userTemplate.size(); i++) {
285 if (m->control_pressed) { return userTemplate; }
287 string rightFrag = userTemplate[i]->getUnaligned();
288 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
290 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
291 databaseRight->addSequence(rightTemp);
293 databaseRight->generateDB();
294 databaseRight->setNumSeqs(userTemplate.size());
299 for (int i = 0; i < userTemplate.size(); i++) {
301 if (m->control_pressed) { return userTemplate; }
303 string leftFrag = userTemplate[i]->getUnaligned();
304 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
306 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
307 databaseLeft->addSequence(leftTemp);
309 databaseLeft->generateDB();
310 databaseLeft->setNumSeqs(userTemplate.size());
312 for (int i = 0; i < userTemplate.size(); i++) {
313 if (m->control_pressed) { return userTemplate; }
315 string rightFrag = userTemplate[i]->getUnaligned();
316 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
318 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
319 databaseRight->addSequence(rightTemp);
321 databaseRight->generateDB();
322 databaseRight->setNumSeqs(userTemplate.size());
324 }else if (searchMethod == "blast") {
327 databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
329 for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; } databaseLeft->addSequence(*userTemplate[i]); }
330 databaseLeft->generateDB();
331 databaseLeft->setNumSeqs(userTemplate.size());
337 catch(exception& e) {
338 m->errorOut(e, "ChimeraSlayer", "getTemplate");
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {
346 if (templateFileName != "self") {
347 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
348 else if (searchMethod == "blast") { delete databaseLeft; }
351 //***************************************************************************************************************
352 void ChimeraSlayer::printHeader(ostream& out) {
353 m->mothurOutEndLine();
354 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
355 m->mothurOutEndLine();
357 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
359 //***************************************************************************************************************
360 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
362 Sequence* trim = NULL;
363 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
365 if (chimeraFlags == "yes") {
366 string chimeraFlag = "no";
367 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
369 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
372 if (chimeraFlag == "yes") {
373 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
374 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
375 outAcc << querySeq->getName() << endl;
377 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
380 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
381 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
383 string newAligned = trim->getAligned();
385 if (lengthLeft > lengthRight) { //trim right
386 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
388 for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
390 trim->setAligned(newAligned);
395 printBlock(chimeraResults[0], chimeraFlag, out);
398 out << querySeq->getName() << "\tno" << endl;
404 catch(exception& e) {
405 m->errorOut(e, "ChimeraSlayer", "print");
409 //***************************************************************************************************************
410 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
412 Sequence* trim = NULL;
415 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
416 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
419 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
421 string chimeraFlag = "no";
422 if (leftPiece.flag == "yes") {
424 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
426 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
429 if (rightPiece.flag == "yes") {
430 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
432 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
435 bool rightChimeric = false;
436 bool leftChimeric = false;
438 if (chimeraFlag == "yes") {
439 //which peice is chimeric or are both
440 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
441 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
443 if (rightChimeric || leftChimeric) {
444 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
445 outAcc << querySeq->getName() << endl;
447 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
450 string newAligned = trim->getAligned();
452 //right side is fine so keep that
453 if ((leftChimeric) && (!rightChimeric)) {
454 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
455 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
456 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457 }else { //both sides are chimeric, keep longest piece
459 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
460 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
462 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
463 int length = lengthLeftLeft;
464 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
466 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
467 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
469 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
470 if (lengthRightRight > length) { longest = 4; }
472 if (longest == 1) { //leftleft
473 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
474 }else if (longest == 2) { //leftright
475 //get rid of leftleft
476 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
478 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
479 }else if (longest == 3) { //rightleft
481 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
482 //get rid of rightright
483 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
486 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
487 //get rid of rightleft
488 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
492 trim->setAligned(newAligned);
498 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
501 out << querySeq->getName() << "\tno" << endl;
507 catch(exception& e) {
508 m->errorOut(e, "ChimeraSlayer", "print");
514 //***************************************************************************************************************
515 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
518 bool results = false;
519 string outAccString = "";
520 string outputString = "";
522 Sequence* trim = NULL;
525 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
526 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
530 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
532 string chimeraFlag = "no";
533 if (leftPiece.flag == "yes") {
535 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
537 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
540 if (rightPiece.flag == "yes") {
541 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
543 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
546 bool rightChimeric = false;
547 bool leftChimeric = false;
551 if (chimeraFlag == "yes") {
552 //which peice is chimeric or are both
553 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
554 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
556 if (rightChimeric || leftChimeric) {
557 cout << querySeq->getName() << "\tyes" << endl;
558 outAccString += querySeq->getName() + "\n";
561 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
563 //write to accnos file
564 int length = outAccString.length();
565 char* buf2 = new char[length];
566 memcpy(buf2, outAccString.c_str(), length);
568 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
572 string newAligned = trim->getAligned();
574 //right side is fine so keep that
575 if ((leftChimeric) && (!rightChimeric)) {
576 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
577 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
578 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
579 }else { //both sides are chimeric, keep longest piece
581 int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
582 int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
584 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
585 int length = lengthLeftLeft;
586 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
588 int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
589 int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
591 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
592 if (lengthRightRight > length) { longest = 4; }
594 if (longest == 1) { //leftleft
595 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
596 }else if (longest == 2) { //leftright
597 //get rid of leftleft
598 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
600 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
601 }else if (longest == 3) { //rightleft
603 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
604 //get rid of rightright
605 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
608 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; }
609 //get rid of rightleft
610 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
614 trim->setAligned(newAligned);
620 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
621 outputString += "\n";
623 //write to output file
624 int length = outputString.length();
625 char* buf = new char[length];
626 memcpy(buf, outputString.c_str(), length);
628 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
632 outputString += querySeq->getName() + "\tno\n";
634 //write to output file
635 int length = outputString.length();
636 char* buf = new char[length];
637 memcpy(buf, outputString.c_str(), length);
639 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
646 catch(exception& e) {
647 m->errorOut(e, "ChimeraSlayer", "print");
651 //***************************************************************************************************************
652 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
655 bool results = false;
656 string outAccString = "";
657 string outputString = "";
659 Sequence* trim = NULL;
660 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
662 if (chimeraFlags == "yes") {
663 string chimeraFlag = "no";
664 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
666 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
669 if (chimeraFlag == "yes") {
670 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
671 cout << querySeq->getName() << "\tyes" << endl;
672 outAccString += querySeq->getName() + "\n";
675 if (templateFileName == "self") { chimericSeqs.insert(querySeq->getName()); }
677 //write to accnos file
678 int length = outAccString.length();
679 char* buf2 = new char[length];
680 memcpy(buf2, outAccString.c_str(), length);
682 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
686 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
687 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
689 string newAligned = trim->getAligned();
690 if (lengthLeft > lengthRight) { //trim right
691 for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
693 for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
695 trim->setAligned(newAligned);
700 outputString = getBlock(chimeraResults[0], chimeraFlag);
701 outputString += "\n";
703 //write to output file
704 int length = outputString.length();
705 char* buf = new char[length];
706 memcpy(buf, outputString.c_str(), length);
708 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
712 outputString += querySeq->getName() + "\tno\n";
714 //write to output file
715 int length = outputString.length();
716 char* buf = new char[length];
717 memcpy(buf, outputString.c_str(), length);
719 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
725 catch(exception& e) {
726 m->errorOut(e, "ChimeraSlayer", "print");
732 //***************************************************************************************************************
733 int ChimeraSlayer::getChimeras(Sequence* query) {
736 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
737 printResults.trimQuery = trimQuery;
740 printResults.flag = "no";
744 //you must create a template
745 vector<Sequence*> thisTemplate;
746 vector<Sequence*> thisFilteredTemplate;
747 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
748 else { thisTemplate = getTemplate(query, thisFilteredTemplate); } //fills this template and creates the databases
750 if (m->control_pressed) { return 0; }
752 if (thisTemplate.size() == 0) { return 0; } //not chimeric
754 //moved this out of maligner - 4/29/11
755 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
757 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov);
758 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
760 if (templateFileName == "self") {
761 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
762 else if (searchMethod == "blast") { delete databaseLeft; }
765 if (m->control_pressed) { return 0; }
767 string chimeraFlag = maligner.getResults(query, decalc);
769 if (m->control_pressed) { return 0; }
771 vector<results> Results = maligner.getOutput();
773 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
775 if (chimeraFlag == "yes") {
778 vector<string> parents;
779 for (int i = 0; i < Results.size(); i++) {
780 parents.push_back(Results[i].parentAligned);
783 ChimeraReAligner realigner;
784 realigner.reAlign(query, parents);
788 // cout << query->getAligned() << endl;
789 //get sequence that were given from maligner results
790 vector<SeqDist> seqs;
791 map<string, float> removeDups;
792 map<string, float>::iterator itDup;
793 map<string, string> parentNameSeq;
794 map<string, string>::iterator itSeq;
795 for (int j = 0; j < Results.size(); j++) {
797 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
798 //only add if you are not a duplicate
799 // cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
802 if(Results[j].queryToParentLocal >= 90){ //local match has to be over 90% similarity
804 itDup = removeDups.find(Results[j].parent);
805 if (itDup == removeDups.end()) { //this is not duplicate
806 removeDups[Results[j].parent] = dist;
807 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
808 }else if (dist > itDup->second) { //is this a stronger number for this parent
809 removeDups[Results[j].parent] = dist;
810 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
817 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
818 itSeq = parentNameSeq.find(itDup->first);
819 Sequence* seq = new Sequence(itDup->first, itSeq->second);
823 member.dist = itDup->second;
824 seqs.push_back(member);
827 //limit number of parents to explore - default 3
828 if (Results.size() > parents) {
830 sort(seqs.begin(), seqs.end(), compareSeqDist);
831 //prioritize larger more similiar sequence fragments
832 reverse(seqs.begin(), seqs.end());
834 for (int k = seqs.size()-1; k > (parents-1); k--) {
840 //put seqs into vector to send to slayer
842 // cout << query->getAligned() << endl;
843 vector<Sequence*> seqsForSlayer;
844 for (int k = 0; k < seqs.size(); k++) {
845 // cout << seqs[k].seq->getAligned() << endl;
846 seqsForSlayer.push_back(seqs[k].seq);
847 // cout << seqs[k].seq->getName() << endl;
850 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
853 chimeraFlags = slayer.getResults(query, seqsForSlayer);
854 if (m->control_pressed) { return 0; }
855 chimeraResults = slayer.getOutput();
857 printResults.flag = chimeraFlags;
858 printResults.results = chimeraResults;
861 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
863 //cout << endl << endl;
866 catch(exception& e) {
867 m->errorOut(e, "ChimeraSlayer", "getChimeras");
871 //***************************************************************************************************************
872 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
874 out << querySeq->getName() << '\t';
875 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
877 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
878 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
880 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
883 catch(exception& e) {
884 m->errorOut(e, "ChimeraSlayer", "printBlock");
888 //***************************************************************************************************************
889 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
892 if ((leftChimeric) && (!rightChimeric)) { //print left
893 out << querySeq->getName() << '\t';
894 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
896 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
897 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
899 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
901 }else if ((!leftChimeric) && (rightChimeric)) { //print right
902 out << querySeq->getName() << '\t';
903 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
905 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
906 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
908 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
910 }else { //print both results
911 if (leftdata.flag == "yes") {
912 out << querySeq->getName() + "_LEFT" << '\t';
913 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
915 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
916 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
918 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
921 if (rightdata.flag == "yes") {
922 if (leftdata.flag == "yes") { out << endl; }
924 out << querySeq->getName() + "_RIGHT"<< '\t';
925 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
927 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
928 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
930 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';
935 catch(exception& e) {
936 m->errorOut(e, "ChimeraSlayer", "printBlock");
940 //***************************************************************************************************************
941 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
946 if ((leftChimeric) && (!rightChimeric)) { //get left
947 out += querySeq->getName() + "\t";
948 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
950 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
951 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
953 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
955 }else if ((!leftChimeric) && (rightChimeric)) { //print right
956 out += querySeq->getName() + "\t";
957 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
959 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
960 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
962 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
964 }else { //print both results
966 if (leftdata.flag == "yes") {
967 out += querySeq->getName() + "_LEFT\t";
968 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
970 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
971 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
973 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
976 if (rightdata.flag == "yes") {
977 if (leftdata.flag == "yes") { out += "\n"; }
978 out += querySeq->getName() + "_RIGHT\t";
979 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
981 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
982 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
984 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";
991 catch(exception& e) {
992 m->errorOut(e, "ChimeraSlayer", "getBlock");
996 //***************************************************************************************************************
997 string ChimeraSlayer::getBlock(data_struct data, string flag){
1000 string outputString = "";
1002 outputString += querySeq->getName() + "\t";
1003 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1005 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1006 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1008 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1010 return outputString;
1012 catch(exception& e) {
1013 m->errorOut(e, "ChimeraSlayer", "getBlock");
1017 //***************************************************************************************************************
1018 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1021 vector<Sequence*> refSeqs;
1023 if (searchMethod == "distance") {
1024 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1025 Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1027 refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1029 }else if (searchMethod == "blast") {
1030 refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1031 }else if (searchMethod == "kmer") {
1032 refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1033 }else { m->mothurOut("not valid search."); exit(1); } //should never get here
1037 catch(exception& e) {
1038 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1042 //***************************************************************************************************************/
1043 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1046 vector<Sequence*> refResults;
1048 //get parts of query
1049 string queryUnAligned = q->getUnaligned();
1050 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1051 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1052 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;
1053 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1054 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1056 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1057 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1060 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1061 // vector<int> smaller;
1062 // vector<int> larger;
1064 // if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; }
1065 // else { smaller = tempIndexesLeft; larger = tempIndexesRight; }
1069 map<int, int>::iterator it;
1070 vector<int> mergedResults;
1073 // for (int i = 0; i < smaller.size(); i++) {
1074 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1076 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1078 //add left if you havent already
1079 it = seen.find(tempIndexesLeft[index]);
1080 if (it == seen.end()) {
1081 mergedResults.push_back(tempIndexesLeft[index]);
1082 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1085 //add right if you havent already
1086 it = seen.find(tempIndexesRight[index]);
1087 if (it == seen.end()) {
1088 mergedResults.push_back(tempIndexesRight[index]);
1089 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1095 for (int i = index; i < tempIndexesLeft.size(); i++) {
1096 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1098 //add right if you havent already
1099 it = seen.find(tempIndexesLeft[i]);
1100 if (it == seen.end()) {
1101 mergedResults.push_back(tempIndexesLeft[i]);
1102 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1106 for (int i = index; i < tempIndexesRight.size(); i++) {
1107 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1109 //add right if you havent already
1110 it = seen.find(tempIndexesRight[i]);
1111 if (it == seen.end()) {
1112 mergedResults.push_back(tempIndexesRight[i]);
1113 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1116 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));
1117 //cout << qname << endl;
1119 for (int i = 0; i < mergedResults.size(); i++) {
1120 //cout << q->getName() << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1121 if (db[mergedResults[i]]->getName() != q->getName()) {
1122 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1123 refResults.push_back(temp);
1127 //cout << endl << endl;
1132 if (refResults.size() == 0) { m->mothurOut("[WARNING]: mothur found 0 potential parents, so we are not able to check " + q->getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
1136 catch(exception& e) {
1137 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1141 //***************************************************************************************************************
1142 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1144 vector<Sequence*> refResults;
1146 //get parts of query
1147 string queryUnAligned = q->getUnaligned();
1148 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1149 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1151 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1152 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1154 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1155 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1159 map<int, int>::iterator it;
1160 vector<int> mergedResults;
1163 // for (int i = 0; i < smaller.size(); i++) {
1164 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1166 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1168 //add left if you havent already
1169 it = seen.find(tempIndexesLeft[index]);
1170 if (it == seen.end()) {
1171 mergedResults.push_back(tempIndexesLeft[index]);
1172 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1175 //add right if you havent already
1176 it = seen.find(tempIndexesRight[index]);
1177 if (it == seen.end()) {
1178 mergedResults.push_back(tempIndexesRight[index]);
1179 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1185 for (int i = index; i < tempIndexesLeft.size(); i++) {
1186 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1188 //add right if you havent already
1189 it = seen.find(tempIndexesLeft[i]);
1190 if (it == seen.end()) {
1191 mergedResults.push_back(tempIndexesLeft[i]);
1192 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1196 for (int i = index; i < tempIndexesRight.size(); i++) {
1197 if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1199 //add right if you havent already
1200 it = seen.find(tempIndexesRight[i]);
1201 if (it == seen.end()) {
1202 mergedResults.push_back(tempIndexesRight[i]);
1203 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1207 for (int i = 0; i < mergedResults.size(); i++) {
1208 //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl;
1209 if (db[mergedResults[i]]->getName() != q->getName()) {
1210 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1211 refResults.push_back(temp);
1222 catch(exception& e) {
1223 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1227 //***************************************************************************************************************