5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
68 includeAbunds = abunds;
71 //read name file and create nameMapRank
74 decalc = new DeCalculator();
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 //run filter on template
79 for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); }
83 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
87 //***************************************************************************************************************
88 int ChimeraSlayer::readNameFile(string name) {
91 m->openInputFile(name, in);
94 int minRank = 10000000;
98 if (m->control_pressed) { in.close(); return 0; }
100 string thisname, repnames;
102 in >> thisname; m->gobble(in); //read from first column
103 in >> repnames; //read from second column
105 map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
106 if (it == nameMapRank.end()) {
108 vector<string> splitRepNames;
109 m->splitAtComma(repnames, splitRepNames);
111 nameMapRank[thisname] = splitRepNames;
113 if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
114 if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
116 }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); }
122 //sanity check to make sure files match
123 for (int i = 0; i < templateSeqs.size(); i++) {
124 map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
126 if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true; }
129 if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
134 catch(exception& e) {
135 m->errorOut(e, "ChimeraSlayer", "readNameFile");
140 //***************************************************************************************************************
141 int ChimeraSlayer::doPrep() {
144 //read in all query seqs
145 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
147 vector<Sequence*> temp = templateSeqs;
148 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
150 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
152 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
154 if (m->control_pressed) { return 0; }
156 //run filter on template
157 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
159 string kmerDBNameLeft;
160 string kmerDBNameRight;
162 //generate the kmerdb to pass to maligner
163 if (searchMethod == "kmer") {
164 string templatePath = m->hasPath(templateFileName);
165 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
166 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
168 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
169 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
171 for (int i = 0; i < templateSeqs.size(); i++) {
173 if (m->control_pressed) { return 0; }
175 string leftFrag = templateSeqs[i]->getUnaligned();
176 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
178 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
179 databaseLeft->addSequence(leftTemp);
181 databaseLeft->generateDB();
182 databaseLeft->setNumSeqs(templateSeqs.size());
184 for (int i = 0; i < templateSeqs.size(); i++) {
185 if (m->control_pressed) { return 0; }
187 string rightFrag = templateSeqs[i]->getUnaligned();
188 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
190 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
191 databaseRight->addSequence(rightTemp);
193 databaseRight->generateDB();
194 databaseRight->setNumSeqs(templateSeqs.size());
198 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
199 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
200 bool needToGenerateLeft = true;
202 if(kmerFileTestLeft){
203 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
204 if (GoodFile) { needToGenerateLeft = false; }
207 if(needToGenerateLeft){
209 for (int i = 0; i < templateSeqs.size(); i++) {
211 if (m->control_pressed) { return 0; }
213 string leftFrag = templateSeqs[i]->getUnaligned();
214 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
216 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
217 databaseLeft->addSequence(leftTemp);
219 databaseLeft->generateDB();
222 databaseLeft->readKmerDB(kmerFileTestLeft);
224 kmerFileTestLeft.close();
226 databaseLeft->setNumSeqs(templateSeqs.size());
229 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
230 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
231 bool needToGenerateRight = true;
233 if(kmerFileTestRight){
234 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
235 if (GoodFile) { needToGenerateRight = false; }
238 if(needToGenerateRight){
240 for (int i = 0; i < templateSeqs.size(); i++) {
241 if (m->control_pressed) { return 0; }
243 string rightFrag = templateSeqs[i]->getUnaligned();
244 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
246 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
247 databaseRight->addSequence(rightTemp);
249 databaseRight->generateDB();
252 databaseRight->readKmerDB(kmerFileTestRight);
254 kmerFileTestRight.close();
256 databaseRight->setNumSeqs(templateSeqs.size());
258 }else if (searchMethod == "blast") {
261 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
263 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
264 databaseLeft->generateDB();
265 databaseLeft->setNumSeqs(templateSeqs.size());
271 catch(exception& e) {
272 m->errorOut(e, "ChimeraSlayer", "doprep");
276 //***************************************************************************************************************
277 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
280 vector<Sequence*> thisTemplate;
283 string thisName = q->getName();
284 map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
285 thisRank = (itRank->second).size();
287 //create list of names we want to put into the template
288 set<string> namesToAdd;
289 for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
290 if (itRank->first != thisName) {
291 if (includeAbunds == "greaterequal") {
292 if ((itRank->second).size() >= thisRank) {
293 //you are more abundant than me or equal to my abundance
294 for (int i = 0; i < (itRank->second).size(); i++) {
295 namesToAdd.insert((itRank->second)[i]);
298 }else if (includeAbunds == "greater") {
299 if ((itRank->second).size() > thisRank) {
300 //you are more abundant than me
301 for (int i = 0; i < (itRank->second).size(); i++) {
302 namesToAdd.insert((itRank->second)[i]);
305 }else if (includeAbunds == "all") {
307 for (int i = 0; i < (itRank->second).size(); i++) {
308 namesToAdd.insert((itRank->second)[i]);
314 for (int i = 0; i < templateSeqs.size(); i++) {
315 if (namesToAdd.count(templateSeqs[i]->getName()) != 0) {
316 thisTemplate.push_back(templateSeqs[i]);
320 string kmerDBNameLeft;
321 string kmerDBNameRight;
323 //generate the kmerdb to pass to maligner
324 if (searchMethod == "kmer") {
325 string templatePath = m->hasPath(templateFileName);
326 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
327 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
329 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
330 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
332 for (int i = 0; i < thisTemplate.size(); i++) {
334 if (m->control_pressed) { return thisTemplate; }
336 string leftFrag = thisTemplate[i]->getUnaligned();
337 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
339 Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
340 databaseLeft->addSequence(leftTemp);
342 databaseLeft->generateDB();
343 databaseLeft->setNumSeqs(thisTemplate.size());
345 for (int i = 0; i < thisTemplate.size(); i++) {
346 if (m->control_pressed) { return thisTemplate; }
348 string rightFrag = thisTemplate[i]->getUnaligned();
349 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
351 Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
352 databaseRight->addSequence(rightTemp);
354 databaseRight->generateDB();
355 databaseRight->setNumSeqs(thisTemplate.size());
360 for (int i = 0; i < thisTemplate.size(); i++) {
362 if (m->control_pressed) { return thisTemplate; }
364 string leftFrag = thisTemplate[i]->getUnaligned();
365 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
367 Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
368 databaseLeft->addSequence(leftTemp);
370 databaseLeft->generateDB();
371 databaseLeft->setNumSeqs(thisTemplate.size());
373 for (int i = 0; i < thisTemplate.size(); i++) {
374 if (m->control_pressed) { return thisTemplate; }
376 string rightFrag = thisTemplate[i]->getUnaligned();
377 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
379 Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
380 databaseRight->addSequence(rightTemp);
382 databaseRight->generateDB();
383 databaseRight->setNumSeqs(thisTemplate.size());
385 }else if (searchMethod == "blast") {
388 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
390 for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; } databaseLeft->addSequence(*thisTemplate[i]); }
391 databaseLeft->generateDB();
392 databaseLeft->setNumSeqs(thisTemplate.size());
398 catch(exception& e) {
399 m->errorOut(e, "ChimeraSlayer", "getTemplate");
404 //***************************************************************************************************************
405 ChimeraSlayer::~ChimeraSlayer() {
407 if (templateFileName != "self") {
408 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
409 else if (searchMethod == "blast") { delete databaseLeft; }
412 //***************************************************************************************************************
413 void ChimeraSlayer::printHeader(ostream& out) {
414 m->mothurOutEndLine();
415 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
416 m->mothurOutEndLine();
418 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
420 //***************************************************************************************************************
421 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
423 Sequence* trim = NULL;
424 if (trimChimera) { trim = trimQuery; }
426 if (chimeraFlags == "yes") {
427 string chimeraFlag = "no";
428 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
430 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
433 if (chimeraFlag == "yes") {
434 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
435 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
436 outAcc << querySeq->getName() << endl;
439 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
440 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
442 string newAligned = trim->getAligned();
444 if (lengthLeft > lengthRight) { //trim right
445 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
447 for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
449 trim->setAligned(newAligned);
455 printBlock(chimeraResults[0], chimeraFlag, out);
457 }else { out << querySeq->getName() << "\tno" << endl; }
462 catch(exception& e) {
463 m->errorOut(e, "ChimeraSlayer", "print");
467 //***************************************************************************************************************
468 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
470 Sequence* trim = NULL;
473 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
474 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
477 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
479 string chimeraFlag = "no";
480 if (leftPiece.flag == "yes") {
482 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
484 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
487 if (rightPiece.flag == "yes") {
488 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
490 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
493 bool rightChimeric = false;
494 bool leftChimeric = false;
496 if (chimeraFlag == "yes") {
497 //which peice is chimeric or are both
498 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
499 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
501 if (rightChimeric || leftChimeric) {
502 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
503 outAcc << querySeq->getName() << endl;
506 string newAligned = trim->getAligned();
508 //right side is fine so keep that
509 if ((leftChimeric) && (!rightChimeric)) {
510 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
511 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
512 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
513 }else { //both sides are chimeric, keep longest piece
515 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
516 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
518 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
519 int length = lengthLeftLeft;
520 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
522 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
523 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
525 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
526 if (lengthRightRight > length) { longest = 4; }
528 if (longest == 1) { //leftleft
529 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
530 }else if (longest == 2) { //leftright
531 //get rid of leftleft
532 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
534 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
535 }else if (longest == 3) { //rightleft
537 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
538 //get rid of rightright
539 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
542 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
543 //get rid of rightleft
544 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
548 trim->setAligned(newAligned);
554 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
556 }else { out << querySeq->getName() << "\tno" << endl; }
561 catch(exception& e) {
562 m->errorOut(e, "ChimeraSlayer", "print");
568 //***************************************************************************************************************
569 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
572 bool results = false;
573 string outAccString = "";
574 string outputString = "";
576 Sequence* trim = NULL;
579 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
580 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
584 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
586 string chimeraFlag = "no";
587 if (leftPiece.flag == "yes") {
589 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
591 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
594 if (rightPiece.flag == "yes") {
595 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
597 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
600 bool rightChimeric = false;
601 bool leftChimeric = false;
603 if (chimeraFlag == "yes") {
604 //which peice is chimeric or are both
605 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
606 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
608 if (rightChimeric || leftChimeric) {
609 cout << querySeq->getName() << "\tyes" << endl;
610 outAccString += querySeq->getName() + "\n";
613 //write to accnos file
614 int length = outAccString.length();
615 char* buf2 = new char[length];
616 memcpy(buf2, outAccString.c_str(), length);
618 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
622 string newAligned = trim->getAligned();
624 //right side is fine so keep that
625 if ((leftChimeric) && (!rightChimeric)) {
626 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
627 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
628 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
629 }else { //both sides are chimeric, keep longest piece
631 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
632 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
634 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
635 int length = lengthLeftLeft;
636 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
638 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
639 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
641 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
642 if (lengthRightRight > length) { longest = 4; }
644 if (longest == 1) { //leftleft
645 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
646 }else if (longest == 2) { //leftright
647 //get rid of leftleft
648 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
650 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
651 }else if (longest == 3) { //rightleft
653 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
654 //get rid of rightright
655 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
658 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
659 //get rid of rightleft
660 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
664 trim->setAligned(newAligned);
670 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
671 outputString += "\n";
673 //write to output file
674 int length = outputString.length();
675 char* buf = new char[length];
676 memcpy(buf, outputString.c_str(), length);
678 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
682 outputString += querySeq->getName() + "\tno\n";
684 //write to output file
685 int length = outputString.length();
686 char* buf = new char[length];
687 memcpy(buf, outputString.c_str(), length);
689 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
696 catch(exception& e) {
697 m->errorOut(e, "ChimeraSlayer", "print");
701 //***************************************************************************************************************
702 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
705 bool results = false;
706 string outAccString = "";
707 string outputString = "";
709 Sequence* trim = NULL;
710 if (trimChimera) { trim = trimQuery; }
712 if (chimeraFlags == "yes") {
713 string chimeraFlag = "no";
714 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
716 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
719 if (chimeraFlag == "yes") {
720 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
721 cout << querySeq->getName() << "\tyes" << endl;
722 outAccString += querySeq->getName() + "\n";
725 //write to accnos file
726 int length = outAccString.length();
727 char* buf2 = new char[length];
728 memcpy(buf2, outAccString.c_str(), length);
730 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
734 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
735 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
737 string newAligned = trim->getAligned();
738 if (lengthLeft > lengthRight) { //trim right
739 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
741 for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
743 trim->setAligned(newAligned);
748 outputString = getBlock(chimeraResults[0], chimeraFlag);
749 outputString += "\n";
751 //write to output file
752 int length = outputString.length();
753 char* buf = new char[length];
754 memcpy(buf, outputString.c_str(), length);
756 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
760 outputString += querySeq->getName() + "\tno\n";
762 //write to output file
763 int length = outputString.length();
764 char* buf = new char[length];
765 memcpy(buf, outputString.c_str(), length);
767 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
773 catch(exception& e) {
774 m->errorOut(e, "ChimeraSlayer", "print");
780 //***************************************************************************************************************
781 int ChimeraSlayer::getChimeras(Sequence* query) {
783 if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); printResults.trimQuery = *trimQuery; }
786 printResults.flag = "no";
789 spotMap = runFilter(query);
790 printResults.spotMap = spotMap;
794 //you must create a template
795 vector<Sequence*> thisTemplate;
796 if (templateFileName != "self") { thisTemplate = templateSeqs; }
797 else { thisTemplate = getTemplate(query); } //fills thistemplate and creates the databases
799 if (m->control_pressed) { return 0; }
801 if (thisTemplate.size() == 0) { return 0; } //not chimeric
803 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
804 Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
805 Slayer slayer(window, increment, minSim, divR, iters, minSNP);
807 if (templateFileName == "self") {
808 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
809 else if (searchMethod == "blast") { delete databaseLeft; }
812 if (m->control_pressed) { return 0; }
814 string chimeraFlag = maligner.getResults(query, decalc);
815 if (m->control_pressed) { return 0; }
816 vector<results> Results = maligner.getOutput();
818 //found in testing realigning only made things worse
820 ChimeraReAligner realigner(thisTemplate, match, misMatch);
821 realigner.reAlign(query, Results);
824 if (chimeraFlag == "yes") {
826 //get sequence that were given from maligner results
827 vector<SeqDist> seqs;
828 map<string, float> removeDups;
829 map<string, float>::iterator itDup;
830 map<string, string> parentNameSeq;
831 map<string, string>::iterator itSeq;
832 for (int j = 0; j < Results.size(); j++) {
833 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
834 //only add if you are not a duplicate
835 itDup = removeDups.find(Results[j].parent);
836 if (itDup == removeDups.end()) { //this is not duplicate
837 removeDups[Results[j].parent] = dist;
838 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
839 }else if (dist > itDup->second) { //is this a stronger number for this parent
840 removeDups[Results[j].parent] = dist;
841 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
845 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
846 itSeq = parentNameSeq.find(itDup->first);
847 Sequence* seq = new Sequence(itDup->first, itSeq->second);
851 member.dist = itDup->second;
853 seqs.push_back(member);
856 //limit number of parents to explore - default 3
857 if (Results.size() > parents) {
859 sort(seqs.begin(), seqs.end(), compareSeqDist);
860 //prioritize larger more similiar sequence fragments
861 reverse(seqs.begin(), seqs.end());
863 for (int k = seqs.size()-1; k > (parents-1); k--) {
869 //put seqs into vector to send to slayer
870 vector<Sequence*> seqsForSlayer;
872 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
874 //mask then send to slayer...
876 decalc->setMask(seqMask);
879 decalc->runMask(query);
882 for (int k = 0; k < seqsForSlayer.size(); k++) {
883 decalc->runMask(seqsForSlayer[k]);
886 spotMap = decalc->getMaskMap();
889 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
892 chimeraFlags = slayer.getResults(query, seqsForSlayer);
893 if (m->control_pressed) { return 0; }
894 chimeraResults = slayer.getOutput();
897 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
899 printResults.spotMap = spotMap;
900 printResults.flag = chimeraFlags;
901 printResults.results = chimeraResults;
906 catch(exception& e) {
907 m->errorOut(e, "ChimeraSlayer", "getChimeras");
911 //***************************************************************************************************************
912 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
914 out << querySeq->getName() << '\t';
915 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
917 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
918 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
920 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
923 catch(exception& e) {
924 m->errorOut(e, "ChimeraSlayer", "printBlock");
928 //***************************************************************************************************************
929 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
932 if ((leftChimeric) && (!rightChimeric)) { //print left
933 out << querySeq->getName() << '\t';
934 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
936 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
937 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
939 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
941 }else if ((!leftChimeric) && (rightChimeric)) { //print right
942 out << querySeq->getName() << '\t';
943 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
945 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
946 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
948 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
950 }else { //print both results
951 if (leftdata.flag == "yes") {
952 out << querySeq->getName() + "_LEFT" << '\t';
953 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
955 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
956 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
958 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
961 if (rightdata.flag == "yes") {
962 if (leftdata.flag == "yes") { out << endl; }
964 out << querySeq->getName() + "_RIGHT"<< '\t';
965 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
967 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
968 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
970 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
975 catch(exception& e) {
976 m->errorOut(e, "ChimeraSlayer", "printBlock");
980 //***************************************************************************************************************
981 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
986 if ((leftChimeric) && (!rightChimeric)) { //get left
987 out += querySeq->getName() + "\t";
988 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
990 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
991 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
993 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
995 }else if ((!leftChimeric) && (rightChimeric)) { //print right
996 out += querySeq->getName() + "\t";
997 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
999 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1000 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1002 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
1004 }else { //print both results
1006 if (leftdata.flag == "yes") {
1007 out += querySeq->getName() + "_LEFT\t";
1008 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1010 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1011 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1013 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
1016 if (rightdata.flag == "yes") {
1017 if (leftdata.flag == "yes") { out += "\n"; }
1018 out += querySeq->getName() + "_RIGHT\t";
1019 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1021 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1022 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1024 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
1031 catch(exception& e) {
1032 m->errorOut(e, "ChimeraSlayer", "getBlock");
1036 //***************************************************************************************************************
1037 string ChimeraSlayer::getBlock(data_struct data, string flag){
1040 string outputString = "";
1042 outputString += querySeq->getName() + "\t";
1043 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1045 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1046 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1048 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
1050 return outputString;
1052 catch(exception& e) {
1053 m->errorOut(e, "ChimeraSlayer", "getBlock");
1057 //***************************************************************************************************************/