5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
68 includeAbunds = abunds;
71 //read name file and create nameMapRank
74 decalc = new DeCalculator();
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 //run filter on template
79 for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); }
83 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
87 //***************************************************************************************************************
88 int ChimeraSlayer::readNameFile(string name) {
91 m->openInputFile(name, in);
94 int minRank = 10000000;
98 if (m->control_pressed) { in.close(); return 0; }
100 string thisname, repnames;
102 in >> thisname; m->gobble(in); //read from first column
103 in >> repnames; //read from second column
105 map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
106 if (it == nameMapRank.end()) {
108 vector<string> splitRepNames;
109 m->splitAtComma(repnames, splitRepNames);
111 nameMapRank[thisname] = splitRepNames;
113 if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
114 if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
116 }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); }
122 //sanity check to make sure files match
123 for (int i = 0; i < templateSeqs.size(); i++) {
124 map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
126 if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true; }
129 if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
134 catch(exception& e) {
135 m->errorOut(e, "ChimeraSlayer", "readNameFile");
140 //***************************************************************************************************************
141 int ChimeraSlayer::doPrep() {
144 //read in all query seqs
145 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
147 vector<Sequence*> temp = templateSeqs;
148 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
150 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
152 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
154 if (m->control_pressed) { return 0; }
156 //run filter on template
157 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
159 string kmerDBNameLeft;
160 string kmerDBNameRight;
162 //generate the kmerdb to pass to maligner
163 if (searchMethod == "kmer") {
164 string templatePath = m->hasPath(templateFileName);
165 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
166 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
168 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
169 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
171 for (int i = 0; i < templateSeqs.size(); i++) {
173 if (m->control_pressed) { return 0; }
175 string leftFrag = templateSeqs[i]->getUnaligned();
176 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
178 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
179 databaseLeft->addSequence(leftTemp);
181 databaseLeft->generateDB();
182 databaseLeft->setNumSeqs(templateSeqs.size());
184 for (int i = 0; i < templateSeqs.size(); i++) {
185 if (m->control_pressed) { return 0; }
187 string rightFrag = templateSeqs[i]->getUnaligned();
188 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
190 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
191 databaseRight->addSequence(rightTemp);
193 databaseRight->generateDB();
194 databaseRight->setNumSeqs(templateSeqs.size());
198 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
199 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
200 bool needToGenerateLeft = true;
202 if(kmerFileTestLeft){
203 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
204 if (GoodFile) { needToGenerateLeft = false; }
207 if(needToGenerateLeft){
209 for (int i = 0; i < templateSeqs.size(); i++) {
211 if (m->control_pressed) { return 0; }
213 string leftFrag = templateSeqs[i]->getUnaligned();
214 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
216 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
217 databaseLeft->addSequence(leftTemp);
219 databaseLeft->generateDB();
222 databaseLeft->readKmerDB(kmerFileTestLeft);
224 kmerFileTestLeft.close();
226 databaseLeft->setNumSeqs(templateSeqs.size());
229 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
230 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
231 bool needToGenerateRight = true;
233 if(kmerFileTestRight){
234 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
235 if (GoodFile) { needToGenerateRight = false; }
238 if(needToGenerateRight){
240 for (int i = 0; i < templateSeqs.size(); i++) {
241 if (m->control_pressed) { return 0; }
243 string rightFrag = templateSeqs[i]->getUnaligned();
244 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
246 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
247 databaseRight->addSequence(rightTemp);
249 databaseRight->generateDB();
252 databaseRight->readKmerDB(kmerFileTestRight);
254 kmerFileTestRight.close();
256 databaseRight->setNumSeqs(templateSeqs.size());
258 }else if (searchMethod == "blast") {
261 databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
262 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
263 databaseLeft->generateDB();
264 databaseLeft->setNumSeqs(templateSeqs.size());
270 catch(exception& e) {
271 m->errorOut(e, "ChimeraSlayer", "doprep");
275 //***************************************************************************************************************
276 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
279 vector<Sequence*> thisTemplate;
282 string thisName = q->getName();
283 map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
284 thisRank = (itRank->second).size();
286 //create list of names we want to put into the template
287 set<string> namesToAdd;
288 for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
289 if (itRank->first != thisName) {
290 if (includeAbunds == "greaterequal") {
291 if ((itRank->second).size() >= thisRank) {
292 //you are more abundant than me or equal to my abundance
293 for (int i = 0; i < (itRank->second).size(); i++) {
294 namesToAdd.insert((itRank->second)[i]);
297 }else if (includeAbunds == "greater") {
298 if ((itRank->second).size() > thisRank) {
299 //you are more abundant than me
300 for (int i = 0; i < (itRank->second).size(); i++) {
301 namesToAdd.insert((itRank->second)[i]);
304 }else if (includeAbunds == "all") {
306 for (int i = 0; i < (itRank->second).size(); i++) {
307 namesToAdd.insert((itRank->second)[i]);
313 for (int i = 0; i < templateSeqs.size(); i++) {
314 if (namesToAdd.count(templateSeqs[i]->getName()) != 0) {
315 thisTemplate.push_back(templateSeqs[i]);
319 string kmerDBNameLeft;
320 string kmerDBNameRight;
322 //generate the kmerdb to pass to maligner
323 if (searchMethod == "kmer") {
324 string templatePath = m->hasPath(templateFileName);
325 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
326 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
328 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
329 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
331 for (int i = 0; i < thisTemplate.size(); i++) {
333 if (m->control_pressed) { return thisTemplate; }
335 string leftFrag = thisTemplate[i]->getUnaligned();
336 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
338 Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
339 databaseLeft->addSequence(leftTemp);
341 databaseLeft->generateDB();
342 databaseLeft->setNumSeqs(thisTemplate.size());
344 for (int i = 0; i < thisTemplate.size(); i++) {
345 if (m->control_pressed) { return thisTemplate; }
347 string rightFrag = thisTemplate[i]->getUnaligned();
348 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
350 Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
351 databaseRight->addSequence(rightTemp);
353 databaseRight->generateDB();
354 databaseRight->setNumSeqs(thisTemplate.size());
359 for (int i = 0; i < thisTemplate.size(); i++) {
361 if (m->control_pressed) { return thisTemplate; }
363 string leftFrag = thisTemplate[i]->getUnaligned();
364 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
366 Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
367 databaseLeft->addSequence(leftTemp);
369 databaseLeft->generateDB();
370 databaseLeft->setNumSeqs(thisTemplate.size());
372 for (int i = 0; i < thisTemplate.size(); i++) {
373 if (m->control_pressed) { return thisTemplate; }
375 string rightFrag = thisTemplate[i]->getUnaligned();
376 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
378 Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
379 databaseRight->addSequence(rightTemp);
381 databaseRight->generateDB();
382 databaseRight->setNumSeqs(thisTemplate.size());
384 }else if (searchMethod == "blast") {
387 databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
388 for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; } databaseLeft->addSequence(*thisTemplate[i]); }
389 databaseLeft->generateDB();
390 databaseLeft->setNumSeqs(thisTemplate.size());
396 catch(exception& e) {
397 m->errorOut(e, "ChimeraSlayer", "getTemplate");
402 //***************************************************************************************************************
403 ChimeraSlayer::~ChimeraSlayer() {
405 if (templateFileName != "self") {
406 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
407 else if (searchMethod == "blast") { delete databaseLeft; }
410 //***************************************************************************************************************
411 void ChimeraSlayer::printHeader(ostream& out) {
412 m->mothurOutEndLine();
413 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
414 m->mothurOutEndLine();
416 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
418 //***************************************************************************************************************
419 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
421 Sequence* trim = NULL;
422 if (trimChimera) { trim = trimQuery; }
424 if (chimeraFlags == "yes") {
425 string chimeraFlag = "no";
426 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
428 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
431 if (chimeraFlag == "yes") {
432 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
433 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
434 outAcc << querySeq->getName() << endl;
437 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
438 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
440 string newAligned = trim->getAligned();
442 if (lengthLeft > lengthRight) { //trim right
443 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
445 for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
447 trim->setAligned(newAligned);
453 printBlock(chimeraResults[0], chimeraFlag, out);
455 }else { out << querySeq->getName() << "\tno" << endl; }
460 catch(exception& e) {
461 m->errorOut(e, "ChimeraSlayer", "print");
465 //***************************************************************************************************************
466 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
468 Sequence* trim = NULL;
471 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
472 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
475 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
477 string chimeraFlag = "no";
478 if (leftPiece.flag == "yes") {
480 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
482 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
485 if (rightPiece.flag == "yes") {
486 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
488 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
491 bool rightChimeric = false;
492 bool leftChimeric = false;
494 if (chimeraFlag == "yes") {
495 //which peice is chimeric or are both
496 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
497 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
499 if (rightChimeric || leftChimeric) {
500 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
501 outAcc << querySeq->getName() << endl;
504 string newAligned = trim->getAligned();
506 //right side is fine so keep that
507 if ((leftChimeric) && (!rightChimeric)) {
508 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
509 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
510 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
511 }else { //both sides are chimeric, keep longest piece
513 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
514 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
516 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
517 int length = lengthLeftLeft;
518 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
520 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
521 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
523 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
524 if (lengthRightRight > length) { longest = 4; }
526 if (longest == 1) { //leftleft
527 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
528 }else if (longest == 2) { //leftright
529 //get rid of leftleft
530 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
532 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
533 }else if (longest == 3) { //rightleft
535 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
536 //get rid of rightright
537 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
540 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
541 //get rid of rightleft
542 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
546 trim->setAligned(newAligned);
552 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
554 }else { out << querySeq->getName() << "\tno" << endl; }
559 catch(exception& e) {
560 m->errorOut(e, "ChimeraSlayer", "print");
566 //***************************************************************************************************************
567 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
570 bool results = false;
571 string outAccString = "";
572 string outputString = "";
574 Sequence* trim = NULL;
577 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
578 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
582 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
584 string chimeraFlag = "no";
585 if (leftPiece.flag == "yes") {
587 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
589 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
592 if (rightPiece.flag == "yes") {
593 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
595 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
598 bool rightChimeric = false;
599 bool leftChimeric = false;
601 if (chimeraFlag == "yes") {
602 //which peice is chimeric or are both
603 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
604 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
606 if (rightChimeric || leftChimeric) {
607 cout << querySeq->getName() << "\tyes" << endl;
608 outAccString += querySeq->getName() + "\n";
611 //write to accnos file
612 int length = outAccString.length();
613 char* buf2 = new char[length];
614 memcpy(buf2, outAccString.c_str(), length);
616 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
620 string newAligned = trim->getAligned();
622 //right side is fine so keep that
623 if ((leftChimeric) && (!rightChimeric)) {
624 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
625 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
626 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
627 }else { //both sides are chimeric, keep longest piece
629 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
630 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
632 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
633 int length = lengthLeftLeft;
634 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
636 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
637 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
639 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
640 if (lengthRightRight > length) { longest = 4; }
642 if (longest == 1) { //leftleft
643 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
644 }else if (longest == 2) { //leftright
645 //get rid of leftleft
646 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
648 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
649 }else if (longest == 3) { //rightleft
651 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
652 //get rid of rightright
653 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
656 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
657 //get rid of rightleft
658 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
662 trim->setAligned(newAligned);
668 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
669 outputString += "\n";
671 //write to output file
672 int length = outputString.length();
673 char* buf = new char[length];
674 memcpy(buf, outputString.c_str(), length);
676 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
680 outputString += querySeq->getName() + "\tno\n";
682 //write to output file
683 int length = outputString.length();
684 char* buf = new char[length];
685 memcpy(buf, outputString.c_str(), length);
687 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
694 catch(exception& e) {
695 m->errorOut(e, "ChimeraSlayer", "print");
699 //***************************************************************************************************************
700 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
703 bool results = false;
704 string outAccString = "";
705 string outputString = "";
707 Sequence* trim = NULL;
708 if (trimChimera) { trim = trimQuery; }
710 if (chimeraFlags == "yes") {
711 string chimeraFlag = "no";
712 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
714 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
717 if (chimeraFlag == "yes") {
718 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
719 cout << querySeq->getName() << "\tyes" << endl;
720 outAccString += querySeq->getName() + "\n";
723 //write to accnos file
724 int length = outAccString.length();
725 char* buf2 = new char[length];
726 memcpy(buf2, outAccString.c_str(), length);
728 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
732 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
733 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
735 string newAligned = trim->getAligned();
736 if (lengthLeft > lengthRight) { //trim right
737 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
739 for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
741 trim->setAligned(newAligned);
746 outputString = getBlock(chimeraResults[0], chimeraFlag);
747 outputString += "\n";
749 //write to output file
750 int length = outputString.length();
751 char* buf = new char[length];
752 memcpy(buf, outputString.c_str(), length);
754 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
758 outputString += querySeq->getName() + "\tno\n";
760 //write to output file
761 int length = outputString.length();
762 char* buf = new char[length];
763 memcpy(buf, outputString.c_str(), length);
765 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
771 catch(exception& e) {
772 m->errorOut(e, "ChimeraSlayer", "print");
778 //***************************************************************************************************************
779 int ChimeraSlayer::getChimeras(Sequence* query) {
781 if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); printResults.trimQuery = *trimQuery; }
784 printResults.flag = "no";
787 spotMap = runFilter(query);
788 printResults.spotMap = spotMap;
792 //you must create a template
793 vector<Sequence*> thisTemplate;
794 if (templateFileName != "self") { thisTemplate = templateSeqs; }
795 else { thisTemplate = getTemplate(query); } //fills thistemplate and creates the databases
797 if (m->control_pressed) { return 0; }
799 if (thisTemplate.size() == 0) { return 0; } //not chimeric
801 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
802 Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
803 Slayer slayer(window, increment, minSim, divR, iters, minSNP);
805 if (templateFileName == "self") {
806 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
807 else if (searchMethod == "blast") { delete databaseLeft; }
810 if (m->control_pressed) { return 0; }
812 string chimeraFlag = maligner.getResults(query, decalc);
813 if (m->control_pressed) { return 0; }
814 vector<results> Results = maligner.getOutput();
816 //found in testing realigning only made things worse
818 ChimeraReAligner realigner(thisTemplate, match, misMatch);
819 realigner.reAlign(query, Results);
822 if (chimeraFlag == "yes") {
824 //get sequence that were given from maligner results
825 vector<SeqDist> seqs;
826 map<string, float> removeDups;
827 map<string, float>::iterator itDup;
828 map<string, string> parentNameSeq;
829 map<string, string>::iterator itSeq;
830 for (int j = 0; j < Results.size(); j++) {
831 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
832 //only add if you are not a duplicate
833 itDup = removeDups.find(Results[j].parent);
834 if (itDup == removeDups.end()) { //this is not duplicate
835 removeDups[Results[j].parent] = dist;
836 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
837 }else if (dist > itDup->second) { //is this a stronger number for this parent
838 removeDups[Results[j].parent] = dist;
839 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
843 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
844 itSeq = parentNameSeq.find(itDup->first);
845 Sequence* seq = new Sequence(itDup->first, itSeq->second);
849 member.dist = itDup->second;
851 seqs.push_back(member);
854 //limit number of parents to explore - default 3
855 if (Results.size() > parents) {
857 sort(seqs.begin(), seqs.end(), compareSeqDist);
858 //prioritize larger more similiar sequence fragments
859 reverse(seqs.begin(), seqs.end());
861 for (int k = seqs.size()-1; k > (parents-1); k--) {
867 //put seqs into vector to send to slayer
868 vector<Sequence*> seqsForSlayer;
870 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
872 //mask then send to slayer...
874 decalc->setMask(seqMask);
877 decalc->runMask(query);
880 for (int k = 0; k < seqsForSlayer.size(); k++) {
881 decalc->runMask(seqsForSlayer[k]);
884 spotMap = decalc->getMaskMap();
887 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
890 chimeraFlags = slayer.getResults(query, seqsForSlayer);
891 if (m->control_pressed) { return 0; }
892 chimeraResults = slayer.getOutput();
895 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
897 printResults.spotMap = spotMap;
898 printResults.flag = chimeraFlags;
899 printResults.results = chimeraResults;
904 catch(exception& e) {
905 m->errorOut(e, "ChimeraSlayer", "getChimeras");
909 //***************************************************************************************************************
910 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
912 out << querySeq->getName() << '\t';
913 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
915 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
916 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
918 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
921 catch(exception& e) {
922 m->errorOut(e, "ChimeraSlayer", "printBlock");
926 //***************************************************************************************************************
927 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
930 if ((leftChimeric) && (!rightChimeric)) { //print left
931 out << querySeq->getName() << '\t';
932 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
934 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
935 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
937 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
939 }else if ((!leftChimeric) && (rightChimeric)) { //print right
940 out << querySeq->getName() << '\t';
941 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
943 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
944 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
946 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
948 }else { //print both results
949 if (leftdata.flag == "yes") {
950 out << querySeq->getName() + "_LEFT" << '\t';
951 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
953 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
954 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
956 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
959 if (rightdata.flag == "yes") {
960 if (leftdata.flag == "yes") { out << endl; }
962 out << querySeq->getName() + "_RIGHT"<< '\t';
963 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
965 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
966 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
968 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
973 catch(exception& e) {
974 m->errorOut(e, "ChimeraSlayer", "printBlock");
978 //***************************************************************************************************************
979 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
984 if ((leftChimeric) && (!rightChimeric)) { //get left
985 out += querySeq->getName() + "\t";
986 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
988 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
989 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
991 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
993 }else if ((!leftChimeric) && (rightChimeric)) { //print right
994 out += querySeq->getName() + "\t";
995 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
997 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
998 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1000 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
1002 }else { //print both results
1004 if (leftdata.flag == "yes") {
1005 out += querySeq->getName() + "_LEFT\t";
1006 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1008 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1009 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1011 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
1014 if (rightdata.flag == "yes") {
1015 if (leftdata.flag == "yes") { out += "\n"; }
1016 out += querySeq->getName() + "_RIGHT\t";
1017 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1019 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1020 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1022 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
1029 catch(exception& e) {
1030 m->errorOut(e, "ChimeraSlayer", "getBlock");
1034 //***************************************************************************************************************
1035 string ChimeraSlayer::getBlock(data_struct data, string flag){
1038 string outputString = "";
1040 outputString += querySeq->getName() + "\t";
1041 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1043 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1044 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1046 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
1048 return outputString;
1050 catch(exception& e) {
1051 m->errorOut(e, "ChimeraSlayer", "getBlock");
1055 //***************************************************************************************************************/