5 * Created by westcott on 9/25/09.
6 * Copyright 2009 Pschloss Lab. All rights reserved.
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
13 #include "blastdb.hpp"
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div,
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
20 templateFileName = temp; templateSeqs = readSeqs(temp);
38 decalc = new DeCalculator();
43 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div,
49 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
51 fastafile = file; templateSeqs = readSeqs(fastafile);
52 templateFileName = temp;
68 includeAbunds = abunds;
71 //read name file and create nameMapRank
74 decalc = new DeCalculator();
76 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78 //run filter on template
79 for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]); }
83 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
87 //***************************************************************************************************************
88 int ChimeraSlayer::readNameFile(string name) {
91 m->openInputFile(name, in);
94 int minRank = 10000000;
98 if (m->control_pressed) { in.close(); return 0; }
100 string thisname, repnames;
102 in >> thisname; m->gobble(in); //read from first column
103 in >> repnames; //read from second column
105 map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
106 if (it == nameMapRank.end()) {
108 vector<string> splitRepNames;
109 m->splitAtComma(repnames, splitRepNames);
111 nameMapRank[thisname] = splitRepNames;
113 if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
114 if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
116 }else{ m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine(); }
122 //sanity check to make sure files match
123 for (int i = 0; i < templateSeqs.size(); i++) {
124 map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
126 if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true; }
129 if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
134 catch(exception& e) {
135 m->errorOut(e, "ChimeraSlayer", "readNameFile");
140 //***************************************************************************************************************
141 int ChimeraSlayer::doPrep() {
144 //read in all query seqs
145 vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
147 vector<Sequence*> temp = templateSeqs;
148 for (int i = 0; i < tempQuerySeqs.size(); i++) { temp.push_back(tempQuerySeqs[i]); }
150 createFilter(temp, 0.0); //just removed columns where all seqs have a gap
152 for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i]; }
154 if (m->control_pressed) { return 0; }
156 //run filter on template
157 for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } runFilter(templateSeqs[i]); }
159 string kmerDBNameLeft;
160 string kmerDBNameRight;
162 //generate the kmerdb to pass to maligner
163 if (searchMethod == "kmer") {
164 string templatePath = m->hasPath(templateFileName);
165 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
166 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
168 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
169 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
171 for (int i = 0; i < templateSeqs.size(); i++) {
173 if (m->control_pressed) { return 0; }
175 string leftFrag = templateSeqs[i]->getUnaligned();
176 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
178 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
179 databaseLeft->addSequence(leftTemp);
181 databaseLeft->generateDB();
182 databaseLeft->setNumSeqs(templateSeqs.size());
184 for (int i = 0; i < templateSeqs.size(); i++) {
185 if (m->control_pressed) { return 0; }
187 string rightFrag = templateSeqs[i]->getUnaligned();
188 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
190 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
191 databaseRight->addSequence(rightTemp);
193 databaseRight->generateDB();
194 databaseRight->setNumSeqs(templateSeqs.size());
198 kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
199 ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
200 bool needToGenerateLeft = true;
202 if(kmerFileTestLeft){
203 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
204 if (GoodFile) { needToGenerateLeft = false; }
207 if(needToGenerateLeft){
209 for (int i = 0; i < templateSeqs.size(); i++) {
211 if (m->control_pressed) { return 0; }
213 string leftFrag = templateSeqs[i]->getUnaligned();
214 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
216 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
217 databaseLeft->addSequence(leftTemp);
219 databaseLeft->generateDB();
222 databaseLeft->readKmerDB(kmerFileTestLeft);
224 kmerFileTestLeft.close();
226 databaseLeft->setNumSeqs(templateSeqs.size());
229 kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
230 ifstream kmerFileTestRight(kmerDBNameRight.c_str());
231 bool needToGenerateRight = true;
233 if(kmerFileTestRight){
234 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
235 if (GoodFile) { needToGenerateRight = false; }
238 if(needToGenerateRight){
240 for (int i = 0; i < templateSeqs.size(); i++) {
241 if (m->control_pressed) { return 0; }
243 string rightFrag = templateSeqs[i]->getUnaligned();
244 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
246 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
247 databaseRight->addSequence(rightTemp);
249 databaseRight->generateDB();
252 databaseRight->readKmerDB(kmerFileTestRight);
254 kmerFileTestRight.close();
256 databaseRight->setNumSeqs(templateSeqs.size());
258 }else if (searchMethod == "blast") {
261 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
263 for (int i = 0; i < templateSeqs.size(); i++) { databaseLeft->addSequence(*templateSeqs[i]); }
264 databaseLeft->generateDB();
265 databaseLeft->setNumSeqs(templateSeqs.size());
271 catch(exception& e) {
272 m->errorOut(e, "ChimeraSlayer", "doprep");
276 //***************************************************************************************************************
277 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
280 vector<Sequence*> thisTemplate;
283 string thisName = q->getName();
284 map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
285 thisRank = (itRank->second).size();
287 //create list of names we want to put into the template
288 set<string> namesToAdd;
289 for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
290 if (itRank->first != thisName) {
291 if (includeAbunds == "greaterequal") {
292 if ((itRank->second).size() >= thisRank) {
293 //you are more abundant than me or equal to my abundance
294 for (int i = 0; i < (itRank->second).size(); i++) {
295 namesToAdd.insert((itRank->second)[i]);
298 }else if (includeAbunds == "greater") {
299 if ((itRank->second).size() > thisRank) {
300 //you are more abundant than me
301 for (int i = 0; i < (itRank->second).size(); i++) {
302 namesToAdd.insert((itRank->second)[i]);
305 }else if (includeAbunds == "all") {
307 for (int i = 0; i < (itRank->second).size(); i++) {
308 namesToAdd.insert((itRank->second)[i]);
314 for (int i = 0; i < templateSeqs.size(); i++) {
315 if (namesToAdd.count(templateSeqs[i]->getName()) != 0) {
316 thisTemplate.push_back(templateSeqs[i]);
320 string kmerDBNameLeft;
321 string kmerDBNameRight;
323 //generate the kmerdb to pass to maligner
324 if (searchMethod == "kmer") {
325 string templatePath = m->hasPath(templateFileName);
326 string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
327 databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
329 string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
330 databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);
332 for (int i = 0; i < thisTemplate.size(); i++) {
334 if (m->control_pressed) { return thisTemplate; }
336 string leftFrag = thisTemplate[i]->getUnaligned();
337 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
339 Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
340 databaseLeft->addSequence(leftTemp);
342 databaseLeft->generateDB();
343 databaseLeft->setNumSeqs(thisTemplate.size());
345 for (int i = 0; i < thisTemplate.size(); i++) {
346 if (m->control_pressed) { return thisTemplate; }
348 string rightFrag = thisTemplate[i]->getUnaligned();
349 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
351 Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
352 databaseRight->addSequence(rightTemp);
354 databaseRight->generateDB();
355 databaseRight->setNumSeqs(thisTemplate.size());
360 for (int i = 0; i < thisTemplate.size(); i++) {
362 if (m->control_pressed) { return thisTemplate; }
364 string leftFrag = thisTemplate[i]->getUnaligned();
365 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
367 Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
368 databaseLeft->addSequence(leftTemp);
370 databaseLeft->generateDB();
371 databaseLeft->setNumSeqs(thisTemplate.size());
373 for (int i = 0; i < thisTemplate.size(); i++) {
374 if (m->control_pressed) { return thisTemplate; }
376 string rightFrag = thisTemplate[i]->getUnaligned();
377 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
379 Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
380 databaseRight->addSequence(rightTemp);
382 databaseRight->generateDB();
383 databaseRight->setNumSeqs(thisTemplate.size());
385 }else if (searchMethod == "blast") {
388 databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
390 for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; } databaseLeft->addSequence(*thisTemplate[i]); }
391 databaseLeft->generateDB();
392 databaseLeft->setNumSeqs(thisTemplate.size());
398 catch(exception& e) {
399 m->errorOut(e, "ChimeraSlayer", "getTemplate");
404 //***************************************************************************************************************
405 ChimeraSlayer::~ChimeraSlayer() {
407 if (templateFileName != "self") {
408 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
409 else if (searchMethod == "blast") { delete databaseLeft; }
412 //***************************************************************************************************************
413 void ChimeraSlayer::printHeader(ostream& out) {
414 m->mothurOutEndLine();
415 m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
416 m->mothurOutEndLine();
418 out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
420 //***************************************************************************************************************
421 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
423 Sequence* trim = NULL;
424 if (trimChimera) { trim = trimQuery; }
426 if (chimeraFlags == "yes") {
427 string chimeraFlag = "no";
428 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
430 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
433 if (chimeraFlag == "yes") {
434 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
435 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
436 outAcc << querySeq->getName() << endl;
439 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
440 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
442 string newAligned = trim->getAligned();
444 if (lengthLeft > lengthRight) { //trim right
445 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
447 for (int i = 0; i < spotMap[chimeraResults[0].winLEnd]; i++) { newAligned[i] = '.'; }
449 trim->setAligned(newAligned);
455 printBlock(chimeraResults[0], chimeraFlag, out);
457 }else { out << querySeq->getName() << "\tno" << endl; }
462 catch(exception& e) {
463 m->errorOut(e, "ChimeraSlayer", "print");
467 //***************************************************************************************************************
468 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
470 Sequence* trim = NULL;
473 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
474 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
477 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
479 string chimeraFlag = "no";
480 if (leftPiece.flag == "yes") {
482 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
484 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
487 if (rightPiece.flag == "yes") {
488 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
490 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
493 bool rightChimeric = false;
494 bool leftChimeric = false;
496 if (chimeraFlag == "yes") {
497 //which peice is chimeric or are both
498 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
499 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
501 if (rightChimeric || leftChimeric) {
502 m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
503 outAcc << querySeq->getName() << endl;
506 string newAligned = trim->getAligned();
508 //right side is fine so keep that
509 if ((leftChimeric) && (!rightChimeric)) {
510 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
511 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
512 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
513 }else { //both sides are chimeric, keep longest piece
515 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
516 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
518 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
519 int length = lengthLeftLeft;
520 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
522 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
523 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
525 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
526 if (lengthRightRight > length) { longest = 4; }
528 if (longest == 1) { //leftleft
529 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
530 }else if (longest == 2) { //leftright
531 //get rid of leftleft
532 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
534 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
535 }else if (longest == 3) { //rightleft
537 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
538 //get rid of rightright
539 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
542 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
543 //get rid of rightleft
544 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
548 trim->setAligned(newAligned);
554 printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
556 }else { out << querySeq->getName() << "\tno" << endl; }
561 catch(exception& e) {
562 m->errorOut(e, "ChimeraSlayer", "print");
568 //***************************************************************************************************************
569 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
572 bool results = false;
573 string outAccString = "";
574 string outputString = "";
576 Sequence* trim = NULL;
579 string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
580 trim = new Sequence(leftPiece.trimQuery.getName(), aligned);
584 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
586 string chimeraFlag = "no";
587 if (leftPiece.flag == "yes") {
589 if( (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
591 (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
594 if (rightPiece.flag == "yes") {
595 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
597 (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
600 bool rightChimeric = false;
601 bool leftChimeric = false;
603 if (chimeraFlag == "yes") {
604 //which peice is chimeric or are both
605 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
606 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } }
608 if (rightChimeric || leftChimeric) {
609 cout << querySeq->getName() << "\tyes" << endl;
610 outAccString += querySeq->getName() + "\n";
613 //write to accnos file
614 int length = outAccString.length();
615 char* buf2 = new char[length];
616 memcpy(buf2, outAccString.c_str(), length);
618 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
622 string newAligned = trim->getAligned();
624 //right side is fine so keep that
625 if ((leftChimeric) && (!rightChimeric)) {
626 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
627 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
628 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
629 }else { //both sides are chimeric, keep longest piece
631 int lengthLeftLeft = leftPiece.spotMap[leftPiece.results[0].winLEnd] - leftPiece.spotMap[leftPiece.results[0].winLStart];
632 int lengthLeftRight = leftPiece.spotMap[leftPiece.results[0].winREnd] - leftPiece.spotMap[leftPiece.results[0].winRStart];
634 int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
635 int length = lengthLeftLeft;
636 if (lengthLeftLeft < lengthLeftRight) { longest = 2; length = lengthLeftRight; }
638 int lengthRightLeft = rightPiece.spotMap[rightPiece.results[0].winLEnd] - rightPiece.spotMap[rightPiece.results[0].winLStart];
639 int lengthRightRight = rightPiece.spotMap[rightPiece.results[0].winREnd] - rightPiece.spotMap[rightPiece.results[0].winRStart];
641 if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft; }
642 if (lengthRightRight > length) { longest = 4; }
644 if (longest == 1) { //leftleft
645 for (int i = (leftPiece.spotMap[leftPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
646 }else if (longest == 2) { //leftright
647 //get rid of leftleft
648 for (int i = (leftPiece.spotMap[leftPiece.results[0].winLStart]-1); i < (leftPiece.spotMap[leftPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
650 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
651 }else if (longest == 3) { //rightleft
653 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
654 //get rid of rightright
655 for (int i = (rightPiece.spotMap[rightPiece.results[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
658 for (int i = 0; i < leftPiece.spotMap[leftPiece.results[0].winREnd]; i++) { newAligned[i] = '.'; }
659 //get rid of rightleft
660 for (int i = (rightPiece.spotMap[rightPiece.results[0].winLStart]-1); i < (rightPiece.spotMap[rightPiece.results[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
664 trim->setAligned(newAligned);
670 outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
671 outputString += "\n";
673 //write to output file
674 int length = outputString.length();
675 char* buf = new char[length];
676 memcpy(buf, outputString.c_str(), length);
678 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
682 outputString += querySeq->getName() + "\tno\n";
684 //write to output file
685 int length = outputString.length();
686 char* buf = new char[length];
687 memcpy(buf, outputString.c_str(), length);
689 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
696 catch(exception& e) {
697 m->errorOut(e, "ChimeraSlayer", "print");
701 //***************************************************************************************************************
702 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
705 bool results = false;
706 string outAccString = "";
707 string outputString = "";
709 Sequence* trim = NULL;
710 if (trimChimera) { trim = trimQuery; }
712 if (chimeraFlags == "yes") {
713 string chimeraFlag = "no";
714 if( (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
716 (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
719 if (chimeraFlag == "yes") {
720 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
721 cout << querySeq->getName() << "\tyes" << endl;
722 outAccString += querySeq->getName() + "\n";
725 //write to accnos file
726 int length = outAccString.length();
727 char* buf2 = new char[length];
728 memcpy(buf2, outAccString.c_str(), length);
730 MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
734 int lengthLeft = spotMap[chimeraResults[0].winLEnd] - spotMap[chimeraResults[0].winLStart];
735 int lengthRight = spotMap[chimeraResults[0].winREnd] - spotMap[chimeraResults[0].winRStart];
737 string newAligned = trim->getAligned();
738 if (lengthLeft > lengthRight) { //trim right
739 for (int i = (spotMap[chimeraResults[0].winRStart]-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
741 for (int i = 0; i < (spotMap[chimeraResults[0].winLEnd]-1); i++) { newAligned[i] = '.'; }
743 trim->setAligned(newAligned);
748 outputString = getBlock(chimeraResults[0], chimeraFlag);
749 outputString += "\n";
751 //write to output file
752 int length = outputString.length();
753 char* buf = new char[length];
754 memcpy(buf, outputString.c_str(), length);
756 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
760 outputString += querySeq->getName() + "\tno\n";
762 //write to output file
763 int length = outputString.length();
764 char* buf = new char[length];
765 memcpy(buf, outputString.c_str(), length);
767 MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
773 catch(exception& e) {
774 m->errorOut(e, "ChimeraSlayer", "print");
780 //***************************************************************************************************************
781 int ChimeraSlayer::getChimeras(Sequence* query) {
784 trimQuery = new Sequence(query->getName(), query->getAligned());
785 printResults.trimQuery = *trimQuery;
789 printResults.flag = "no";
792 spotMap = runFilter(query);
793 printResults.spotMap = spotMap;
797 //you must create a template
798 vector<Sequence*> thisTemplate;
799 if (templateFileName != "self") { thisTemplate = templateSeqs; }
800 else { thisTemplate = getTemplate(query); } //fills this template and creates the databases
802 if (m->control_pressed) { return 0; }
804 if (thisTemplate.size() == 0) { return 0; } //not chimeric
806 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
807 Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
808 Slayer slayer(window, increment, minSim, divR, iters, minSNP);
810 if (templateFileName == "self") {
811 if (searchMethod == "kmer") { delete databaseRight; delete databaseLeft; }
812 else if (searchMethod == "blast") { delete databaseLeft; }
815 if (m->control_pressed) { return 0; }
817 string chimeraFlag = maligner.getResults(query, decalc);
819 if (m->control_pressed) { return 0; }
821 vector<results> Results = maligner.getOutput();
824 ChimeraReAligner realigner(thisTemplate, match, misMatch);
825 realigner.reAlign(query, Results);
828 if (chimeraFlag == "yes") {
830 //get sequence that were given from maligner results
831 vector<SeqDist> seqs;
832 map<string, float> removeDups;
833 map<string, float>::iterator itDup;
834 map<string, string> parentNameSeq;
835 map<string, string>::iterator itSeq;
836 for (int j = 0; j < Results.size(); j++) {
837 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
838 //only add if you are not a duplicate
839 itDup = removeDups.find(Results[j].parent);
840 if (itDup == removeDups.end()) { //this is not duplicate
841 removeDups[Results[j].parent] = dist;
842 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
843 }else if (dist > itDup->second) { //is this a stronger number for this parent
844 removeDups[Results[j].parent] = dist;
845 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
849 for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
850 itSeq = parentNameSeq.find(itDup->first);
851 Sequence* seq = new Sequence(itDup->first, itSeq->second);
855 member.dist = itDup->second;
857 seqs.push_back(member);
860 //limit number of parents to explore - default 3
861 if (Results.size() > parents) {
863 sort(seqs.begin(), seqs.end(), compareSeqDist);
864 //prioritize larger more similiar sequence fragments
865 reverse(seqs.begin(), seqs.end());
867 for (int k = seqs.size()-1; k > (parents-1); k--) {
873 //put seqs into vector to send to slayer
874 vector<Sequence*> seqsForSlayer;
876 for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
878 //mask then send to slayer...
880 decalc->setMask(seqMask);
883 decalc->runMask(query);
886 for (int k = 0; k < seqsForSlayer.size(); k++) {
887 decalc->runMask(seqsForSlayer[k]);
890 spotMap = decalc->getMaskMap();
893 if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; }
896 chimeraFlags = slayer.getResults(query, seqsForSlayer);
897 if (m->control_pressed) { return 0; }
898 chimeraResults = slayer.getOutput();
901 for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
903 printResults.spotMap = spotMap;
904 printResults.flag = chimeraFlags;
905 printResults.results = chimeraResults;
910 catch(exception& e) {
911 m->errorOut(e, "ChimeraSlayer", "getChimeras");
915 //***************************************************************************************************************
916 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
918 out << querySeq->getName() << '\t';
919 out << data.parentA.getName() << "\t" << data.parentB.getName() << '\t';
921 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
922 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
924 out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
927 catch(exception& e) {
928 m->errorOut(e, "ChimeraSlayer", "printBlock");
932 //***************************************************************************************************************
933 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
936 if ((leftChimeric) && (!rightChimeric)) { //print left
937 out << querySeq->getName() << '\t';
938 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
940 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
941 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
943 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
945 }else if ((!leftChimeric) && (rightChimeric)) { //print right
946 out << querySeq->getName() << '\t';
947 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
949 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
950 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
952 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
954 }else { //print both results
955 if (leftdata.flag == "yes") {
956 out << querySeq->getName() + "_LEFT" << '\t';
957 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName() << '\t';
959 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
960 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
962 out << flag << '\t' << leftdata.spotMap[leftdata.results[0].winLStart] << "-" << leftdata.spotMap[leftdata.results[0].winLEnd] << '\t' << leftdata.spotMap[leftdata.results[0].winRStart] << "-" << leftdata.spotMap[leftdata.results[0].winREnd] << '\t';
965 if (rightdata.flag == "yes") {
966 if (leftdata.flag == "yes") { out << endl; }
968 out << querySeq->getName() + "_RIGHT"<< '\t';
969 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName() << '\t';
971 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
972 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
974 out << flag << '\t' << rightdata.spotMap[rightdata.results[0].winLStart] << "-" << rightdata.spotMap[rightdata.results[0].winLEnd] << '\t' << rightdata.spotMap[rightdata.results[0].winRStart] << "-" << rightdata.spotMap[rightdata.results[0].winREnd] << '\t';
979 catch(exception& e) {
980 m->errorOut(e, "ChimeraSlayer", "printBlock");
984 //***************************************************************************************************************
985 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
990 if ((leftChimeric) && (!rightChimeric)) { //get left
991 out += querySeq->getName() + "\t";
992 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
994 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
995 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
997 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
999 }else if ((!leftChimeric) && (rightChimeric)) { //print right
1000 out += querySeq->getName() + "\t";
1001 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1003 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1004 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1006 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
1008 }else { //print both results
1010 if (leftdata.flag == "yes") {
1011 out += querySeq->getName() + "_LEFT\t";
1012 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1014 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1015 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1017 out += flag + "\t" + toString(leftdata.spotMap[leftdata.results[0].winLStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winLEnd]) + "\t" + toString(leftdata.spotMap[leftdata.results[0].winRStart]) + "-" + toString(leftdata.spotMap[leftdata.results[0].winREnd]) + "\t";
1020 if (rightdata.flag == "yes") {
1021 if (leftdata.flag == "yes") { out += "\n"; }
1022 out += querySeq->getName() + "_RIGHT\t";
1023 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName() + "\t";
1025 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1026 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1028 out += flag + "\t" + toString(rightdata.spotMap[rightdata.results[0].winLStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winLEnd]) + "\t" + toString(rightdata.spotMap[rightdata.results[0].winRStart]) + "-" + toString(rightdata.spotMap[rightdata.results[0].winREnd]) + "\t";
1035 catch(exception& e) {
1036 m->errorOut(e, "ChimeraSlayer", "getBlock");
1040 //***************************************************************************************************************
1041 string ChimeraSlayer::getBlock(data_struct data, string flag){
1044 string outputString = "";
1046 outputString += querySeq->getName() + "\t";
1047 outputString += data.parentA.getName() + "\t" + data.parentB.getName() + "\t";
1049 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1050 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1052 outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
1054 return outputString;
1056 catch(exception& e) {
1057 m->errorOut(e, "ChimeraSlayer", "getBlock");
1061 //***************************************************************************************************************/