]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
removing chime source files from mother project.
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera()  {         
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37                 numNoParents = 0;
38                 blastlocation = blas;
39                 threadID = tid;
40         
41                 doPrep();
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45                 exit(1);
46         }
47 }
48 //***************************************************************************************************************
49 //template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
51                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera()  {       
52         try {
53                 byGroup = bg;
54                 fastafile = file; templateSeqs = readSeqs(fastafile);
55                 templateFileName = temp; 
56                 searchMethod = mode;
57                 kmerSize = k;
58                 match = ms;
59                 misMatch = mms;
60                 window = win;
61                 divR = div;
62                 minSim = minsim;
63                 minCov = mincov;
64                 minBS = minbs;
65                 minSNP = minsnp;
66                 parents = par;
67                 iters = it;
68                 increment = inc;
69                 numWanted = numw;
70                 realign = r; 
71                 trimChimera = trim;
72                 priority = prior;
73                 numNoParents = 0;
74                 blastlocation = blas;
75                 threadID = tid;
76                 
77                 
78                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
79                 
80                 if (searchMethod == "distance") { 
81                         //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
82                         
83                         //run filter on template copying templateSeqs into filteredTemplateSeqs
84                         for (int i = 0; i < templateSeqs.size(); i++) {  
85                                 if (m->control_pressed) {  break; }
86                                 
87                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
88                                 runFilter(newSeq);  
89                                 filteredTemplateSeqs.push_back(newSeq);
90                         }
91                 }
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
95                 exit(1);
96         }
97 }
98 //***************************************************************************************************************
99 //template=self
100 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
101                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera()  {        
102         try {
103                 fastafile = file; templateSeqs = readSeqs(fastafile);
104                 templateFileName = temp; 
105                 searchMethod = mode;
106                 kmerSize = k;
107                 match = ms;
108                 misMatch = mms;
109                 window = win;
110                 divR = div;
111                 minSim = minsim;
112                 minCov = mincov;
113                 minBS = minbs;
114                 minSNP = minsnp;
115                 parents = par;
116                 iters = it;
117                 increment = inc;
118                 numWanted = numw;
119                 realign = r; 
120                 trimChimera = trim;
121                 priority = prior;
122                 numNoParents = 0;
123                 blastlocation = blas;
124                 threadID = tid;
125                 
126                 
127                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
128                 
129                 if (searchMethod == "distance") { 
130                         //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
131                         
132                         //run filter on template copying templateSeqs into filteredTemplateSeqs
133                         for (int i = 0; i < templateSeqs.size(); i++) {  
134                                 if (m->control_pressed) {  break; }
135                                 
136                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
137                                 runFilter(newSeq);  
138                                 filteredTemplateSeqs.push_back(newSeq);
139                         }
140                 }
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
144                 exit(1);
145         }
146 }
147 //***************************************************************************************************************
148 int ChimeraSlayer::doPrep() {
149         try {
150                 if (searchMethod == "distance") { 
151                         //read in all query seqs
152                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
153                 
154                         vector<Sequence*> temp = templateSeqs;
155                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
156                 
157                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
158                 
159                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
160                 
161                         if (m->control_pressed) {  return 0; } 
162                 
163                         //run filter on template copying templateSeqs into filteredTemplateSeqs
164                         for (int i = 0; i < templateSeqs.size(); i++) {  
165                                 if (m->control_pressed) {  return 0; }
166                                 
167                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
168                                 runFilter(newSeq);  
169                                 filteredTemplateSeqs.push_back(newSeq);
170                         }
171                 }
172                 string  kmerDBNameLeft;
173                 string  kmerDBNameRight;
174         
175                 //generate the kmerdb to pass to maligner
176                 if (searchMethod == "kmer") { 
177                         string templatePath = m->hasPath(templateFileName);
178                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
179                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
180                                 
181                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
182                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
183                 #ifdef USE_MPI
184                         for (int i = 0; i < templateSeqs.size(); i++) {
185                                         
186                                 if (m->control_pressed) { return 0; } 
187                                         
188                                 string leftFrag = templateSeqs[i]->getUnaligned();
189                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
190                                         
191                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
192                                 databaseLeft->addSequence(leftTemp);    
193                         }
194                         databaseLeft->generateDB();
195                         databaseLeft->setNumSeqs(templateSeqs.size());
196                         
197                         for (int i = 0; i < templateSeqs.size(); i++) {
198                                 if (m->control_pressed) { return 0; } 
199                                         
200                                 string rightFrag = templateSeqs[i]->getUnaligned();
201                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
202                                         
203                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
204                                 databaseRight->addSequence(rightTemp);  
205                         }
206                         databaseRight->generateDB();
207                         databaseRight->setNumSeqs(templateSeqs.size());
208
209                 #else   
210                         //leftside
211                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
212                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
213                         bool needToGenerateLeft = true;
214                         
215                         if(kmerFileTestLeft){   
216                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
217                                 if (GoodFile) {  needToGenerateLeft = false;    }
218                         }
219                         
220                         if(needToGenerateLeft){ 
221                         
222                                 for (int i = 0; i < templateSeqs.size(); i++) {
223                                         
224                                         if (m->control_pressed) { return 0; } 
225                                         
226                                         string leftFrag = templateSeqs[i]->getUnaligned();
227                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
228                                         
229                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
230                                         databaseLeft->addSequence(leftTemp);    
231                                 }
232                                 databaseLeft->generateDB();
233                                 
234                         }else { 
235                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
236                         }
237                         kmerFileTestLeft.close();
238                         
239                         databaseLeft->setNumSeqs(templateSeqs.size());
240                         
241                         //rightside
242                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
243                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
244                         bool needToGenerateRight = true;
245                         
246                         if(kmerFileTestRight){  
247                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
248                                 if (GoodFile) {  needToGenerateRight = false;   }
249                         }
250                         
251                         if(needToGenerateRight){        
252                         
253                                 for (int i = 0; i < templateSeqs.size(); i++) {
254                                         if (m->control_pressed) { return 0; } 
255                                         
256                                         string rightFrag = templateSeqs[i]->getUnaligned();
257                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
258                                         
259                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
260                                         databaseRight->addSequence(rightTemp);  
261                                 }
262                                 databaseRight->generateDB();
263                                 
264                         }else { 
265                                 databaseRight->readKmerDB(kmerFileTestRight);   
266                         }
267                         kmerFileTestRight.close();
268                         
269                         databaseRight->setNumSeqs(templateSeqs.size());
270                 #endif  
271                 }else if (searchMethod == "blast") {
272                 
273                         //generate blastdb
274                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
275                         
276                         if (m->control_pressed) { return 0; }
277
278                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
279                         databaseLeft->generateDB();
280                         databaseLeft->setNumSeqs(templateSeqs.size());
281                 }
282                 
283                 return 0;
284
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "ChimeraSlayer", "doprep");
288                 exit(1);
289         }
290 }
291 //***************************************************************************************************************
292 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
293         try {
294                 
295                 //when template=self, the query file is sorted from most abundance to least abundant
296                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
297                 vector<Sequence*> userTemplate;
298                 
299                 int myAbund = priority[q.getName()];
300                 
301                 for (int i = 0; i < templateSeqs.size(); i++) {
302                         
303                         if (m->control_pressed) { return userTemplate; } 
304                         
305                         //have I reached a sequence with the same abundance as myself?
306                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
307                         
308                         //if its am not chimeric add it
309                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
310                                 userTemplate.push_back(templateSeqs[i]); 
311                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
312                         }
313                 }
314                 
315                 //avoids nuisance error from formatdb for making blank blast database
316                 if (userTemplate.size() == 0) {
317                         return userTemplate;
318                 }
319                 
320                 string  kmerDBNameLeft;
321                 string  kmerDBNameRight;
322                 
323                 //generate the kmerdb to pass to maligner
324                 if (searchMethod == "kmer") { 
325                         string templatePath = m->hasPath(templateFileName);
326                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
327                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
328                         
329                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
330                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
331 #ifdef USE_MPI
332                         for (int i = 0; i < userTemplate.size(); i++) {
333                                 
334                                 if (m->control_pressed) { return userTemplate; } 
335                                 
336                                 string leftFrag = userTemplate[i]->getUnaligned();
337                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
338                                 
339                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
340                                 databaseLeft->addSequence(leftTemp);    
341                         }
342                         databaseLeft->generateDB();
343                         databaseLeft->setNumSeqs(userTemplate.size());
344                         
345                         for (int i = 0; i < userTemplate.size(); i++) {
346                                 if (m->control_pressed) { return userTemplate; } 
347                                 
348                                 string rightFrag = userTemplate[i]->getUnaligned();
349                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
350                                 
351                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
352                                 databaseRight->addSequence(rightTemp);  
353                         }
354                         databaseRight->generateDB();
355                         databaseRight->setNumSeqs(userTemplate.size());
356                         
357 #else   
358                         
359                         
360                         for (int i = 0; i < userTemplate.size(); i++) {
361                                 
362                                 if (m->control_pressed) { return userTemplate; } 
363                                 
364                                 string leftFrag = userTemplate[i]->getUnaligned();
365                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
366                                 
367                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
368                                 databaseLeft->addSequence(leftTemp);    
369                         }
370                         databaseLeft->generateDB();
371                         databaseLeft->setNumSeqs(userTemplate.size());
372                                 
373                         for (int i = 0; i < userTemplate.size(); i++) {
374                                 if (m->control_pressed) { return userTemplate; }  
375                                         
376                                 string rightFrag = userTemplate[i]->getUnaligned();
377                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
378                                         
379                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
380                                 databaseRight->addSequence(rightTemp);  
381                         }
382                         databaseRight->generateDB();
383                         databaseRight->setNumSeqs(userTemplate.size());
384 #endif  
385                 }else if (searchMethod == "blast") {
386                         
387                         //generate blastdb
388                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
389                         
390                         if (m->control_pressed) { return userTemplate; }
391
392                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
393                         databaseLeft->generateDB();
394                         databaseLeft->setNumSeqs(userTemplate.size());
395                 }
396                 
397                 return userTemplate;
398                 
399         }
400         catch(exception& e) {
401                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
402                 exit(1);
403         }
404 }
405
406 //***************************************************************************************************************
407 ChimeraSlayer::~ChimeraSlayer() {       
408         if (templateFileName != "self") {
409                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
410                 else if (searchMethod == "blast") {  delete databaseLeft; }
411         }
412 }
413 //***************************************************************************************************************
414 void ChimeraSlayer::printHeader(ostream& out) {
415         m->mothurOutEndLine();
416         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
417         m->mothurOutEndLine();
418         
419         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
420 }
421 //***************************************************************************************************************
422 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
423         try {
424                 Sequence trim;
425                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
426                 
427                 if (chimeraFlags == "yes") {
428                         string chimeraFlag = "no";
429                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
430                            ||
431                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
432                         
433                         
434                         if (chimeraFlag == "yes") {     
435                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
436                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
437                                         outAcc << querySeq.getName() << endl;
438                                         
439                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
440                                         
441                                         if (trimChimera) {  
442                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
443                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
444                                                 
445                                                 string newAligned = trim.getAligned();
446
447                                                 if (lengthLeft > lengthRight) { //trim right
448                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449                                                 }else { //trim left
450                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
451                                                 }
452                                                 trim.setAligned(newAligned);
453                                         }
454                                 }
455                         }
456                         
457                         printBlock(chimeraResults[0], chimeraFlag, out);
458                         out << endl;
459                 }else {  
460                         out << querySeq.getName() << "\tno" << endl; 
461                 }
462                 
463                 return trim;
464                 
465         }
466         catch(exception& e) {
467                 m->errorOut(e, "ChimeraSlayer", "print");
468                 exit(1);
469         }
470 }
471 //***************************************************************************************************************
472 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
473         try {
474                 Sequence trim;
475                                 
476                 if (trimChimera) { 
477                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
478                         trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); 
479                 }
480                 
481                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
482                         
483                         string chimeraFlag = "no";
484                         if (leftPiece.flag == "yes") {
485                                 
486                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
487                                         ||
488                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
489                         }
490                         
491                         if (rightPiece.flag == "yes") {
492                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
493                                  ||
494                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
495                         }
496                         
497                         bool rightChimeric = false;
498                         bool leftChimeric = false;
499                         
500                         if (chimeraFlag == "yes") {     
501                                 //which peice is chimeric or are both
502                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
503                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
504                                 
505                                 if (rightChimeric || leftChimeric) {
506                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
507                                         outAcc << querySeq.getName() << endl;
508                                         
509                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
510                                         
511                                         if (trimChimera) {  
512                                                 string newAligned = trim.getAligned();
513                                                                                                 
514                                                 //right side is fine so keep that
515                                                 if ((leftChimeric) && (!rightChimeric)) {
516                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
517                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
518                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
519                                                 }else { //both sides are chimeric, keep longest piece
520                                                         
521                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
522                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
523                                                         
524                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
525                                                         int length = lengthLeftLeft;
526                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
527                                                         
528                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
529                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
530                                                         
531                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
532                                                         if (lengthRightRight > length) { longest = 4; }
533                                                         
534                                                         if (longest == 1) { //leftleft
535                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
536                                                         }else if (longest == 2) { //leftright
537                                                                 //get rid of leftleft
538                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
539                                                                 //get rid of right
540                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
541                                                         }else if (longest == 3) { //rightleft
542                                                                 //get rid of left
543                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
544                                                                 //get rid of rightright
545                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
546                                                         }else { //rightright
547                                                                 //get rid of left
548                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
549                                                                 //get rid of rightleft
550                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
551                                                         }
552                                                 }
553                                                         
554                                                 trim.setAligned(newAligned);
555                                         }
556                                         
557                                 }
558                         }
559                         
560                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
561                         out << endl;
562                 }else {  
563                         out << querySeq.getName() << "\tno" << endl;  
564                 }
565                 
566                 return trim;
567                 
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "ChimeraSlayer", "print");
571                 exit(1);
572         }
573 }
574
575 #ifdef USE_MPI
576 //***************************************************************************************************************
577 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
578         try {
579                 MPI_Status status;
580                 bool results = false;
581                 string outAccString = "";
582                 string outputString = "";
583                 
584                 Sequence trim;
585                 
586                 if (trimChimera) { 
587                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
588                         trim.setName(leftPiece.trimQuery.getName());  trim.setAligned(aligned);
589                 }
590                 
591                 
592                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
593                         
594                         string chimeraFlag = "no";
595                         if (leftPiece.flag == "yes") {
596                                 
597                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
598                                    ||
599                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
600                         }
601                         
602                         if (rightPiece.flag == "yes") {
603                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
604                                         ||
605                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
606                         }
607                         
608                         bool rightChimeric = false;
609                         bool leftChimeric = false;
610
611                         cout << endl;
612                         
613                         if (chimeraFlag == "yes") {     
614                                 //which peice is chimeric or are both
615                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
616                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
617                                 
618                                 if (rightChimeric || leftChimeric) {
619                                         cout << querySeq.getName() <<  "\tyes" << endl;
620                                         outAccString += querySeq.getName() + "\n";
621                                         results = true;
622                                         
623                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
624                                         
625                                         //write to accnos file
626                                         int length = outAccString.length();
627                                         char* buf2 = new char[length];
628                                         memcpy(buf2, outAccString.c_str(), length);
629                                 
630                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
631                                         delete buf2;
632                                         
633                                         if (trimChimera) {  
634                                                 string newAligned = trim.getAligned();
635                                                 
636                                                 //right side is fine so keep that
637                                                 if ((leftChimeric) && (!rightChimeric)) {
638                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
639                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
640                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
641                                                 }else { //both sides are chimeric, keep longest piece
642                                                         
643                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
644                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
645                                                         
646                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
647                                                         int length = lengthLeftLeft;
648                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
649                                                         
650                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
651                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
652                                                         
653                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
654                                                         if (lengthRightRight > length) { longest = 4; }
655                                                         
656                                                         if (longest == 1) { //leftleft
657                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
658                                                         }else if (longest == 2) { //leftright
659                                                                 //get rid of leftleft
660                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
661                                                                 //get rid of right
662                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
663                                                         }else if (longest == 3) { //rightleft
664                                                                 //get rid of left
665                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
666                                                                 //get rid of rightright
667                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
668                                                         }else { //rightright
669                                                                 //get rid of left
670                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
671                                                                 //get rid of rightleft
672                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
673                                                         }
674                                                 }
675                                                 
676                                                 trim.setAligned(newAligned);
677                                         }
678                                         
679                                 }
680                         }
681                         
682                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
683                         outputString += "\n";
684                 
685                         //write to output file
686                         int length = outputString.length();
687                         char* buf = new char[length];
688                         memcpy(buf, outputString.c_str(), length);
689                                 
690                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
691                         delete buf;
692
693                 }else {  
694                         outputString += querySeq.getName() + "\tno\n";  
695         
696                         //write to output file
697                         int length = outputString.length();
698                         char* buf = new char[length];
699                         memcpy(buf, outputString.c_str(), length);
700                                 
701                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
702                         delete buf;
703                 }
704                 
705                 
706                 return trim;
707         }
708         catch(exception& e) {
709                 m->errorOut(e, "ChimeraSlayer", "print");
710                 exit(1);
711         }
712 }
713 //***************************************************************************************************************
714 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
715         try {
716                 MPI_Status status;
717                 bool results = false;
718                 string outAccString = "";
719                 string outputString = "";
720                 
721                 Sequence trim;
722                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
723                 
724                 if (chimeraFlags == "yes") {
725                         string chimeraFlag = "no";
726                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
727                            ||
728                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
729                         
730                         
731                         if (chimeraFlag == "yes") {     
732                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
733                                         cout << querySeq.getName() <<  "\tyes" << endl;
734                                         outAccString += querySeq.getName() + "\n";
735                                         results = true;
736                                         
737                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
738                                         
739                                         //write to accnos file
740                                         int length = outAccString.length();
741                                         char* buf2 = new char[length];
742                                         memcpy(buf2, outAccString.c_str(), length);
743                                         
744                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
745                                         delete buf2;
746                                         
747                                         if (trimChimera) {  
748                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
749                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
750                                                 
751                                                 string newAligned = trim.getAligned();
752                                                 if (lengthLeft > lengthRight) { //trim right
753                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
754                                                 }else { //trim left
755                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
756                                                 }
757                                                 trim.setAligned(newAligned);    
758                                         }
759                                 }
760                         }
761                         
762                         outputString = getBlock(chimeraResults[0], chimeraFlag);
763                         outputString += "\n";
764                         
765                         //write to output file
766                         int length = outputString.length();
767                         char* buf = new char[length];
768                         memcpy(buf, outputString.c_str(), length);
769                         
770                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
771                         delete buf;
772                         
773                 }else {  
774                         outputString += querySeq.getName() + "\tno\n";  
775                         
776                         //write to output file
777                         int length = outputString.length();
778                         char* buf = new char[length];
779                         memcpy(buf, outputString.c_str(), length);
780                         
781                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
782                         delete buf;
783                 }
784                 
785                 return trim;
786         }
787         catch(exception& e) {
788                 m->errorOut(e, "ChimeraSlayer", "print");
789                 exit(1);
790         }
791 }
792 #endif
793
794 //***************************************************************************************************************
795 int ChimeraSlayer::getChimeras(Sequence* query) {
796         try {
797                 
798                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
799                 printResults.trimQuery = trimQuery; 
800                 
801                 chimeraFlags = "no";
802                 printResults.flag = "no";
803                 
804                 querySeq = *query;
805                 
806                 //you must create a template
807                 vector<Sequence*> thisTemplate;
808                 vector<Sequence*> thisFilteredTemplate;
809                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
810                 else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
811                 
812                 if (m->control_pressed) {  return 0;  }
813                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
814                 
815                 //moved this out of maligner - 4/29/11
816                 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
817                 
818                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
819                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
820                 
821                 if (templateFileName == "self") {
822                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
823                         else if (searchMethod == "blast") {  delete databaseLeft; }
824                 }
825         
826                 if (m->control_pressed) {  return 0;  }
827
828                 string chimeraFlag = maligner.getResults(*query, decalc);
829
830                 if (m->control_pressed) {  return 0;  }
831                 
832                 vector<results> Results = maligner.getOutput();
833                 
834                 //for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];        }
835                 
836                 if (chimeraFlag == "yes") {
837                         
838                         if (realign) {
839                                 vector<string> parents;
840                                 for (int i = 0; i < Results.size(); i++) {
841                                         parents.push_back(Results[i].parentAligned);
842                                 }
843                                 
844                                 ChimeraReAligner realigner;             
845                                 realigner.reAlign(query, parents);
846
847                         }
848                         
849 //                      cout << query->getAligned() << endl;
850                         //get sequence that were given from maligner results
851                         vector<SeqCompare> seqs;
852                         map<string, float> removeDups;
853                         map<string, float>::iterator itDup;
854                         map<string, string> parentNameSeq;
855                         map<string, string>::iterator itSeq;
856                         for (int j = 0; j < Results.size(); j++) {
857
858                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
859                                 //only add if you are not a duplicate
860 //                              cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
861                                 
862                                 
863                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
864                                 
865                                         itDup = removeDups.find(Results[j].parent);
866                                         if (itDup == removeDups.end()) { //this is not duplicate
867                                                 removeDups[Results[j].parent] = dist;
868                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
869                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
870                                                 removeDups[Results[j].parent] = dist;
871                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
872                                         }
873                                 
874                                 }
875                                 
876                         }
877                         
878                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
879                                 itSeq = parentNameSeq.find(itDup->first);
880                                 Sequence seq(itDup->first, itSeq->second);
881                                 
882                                 SeqCompare member;
883                                 member.seq = seq;
884                                 member.dist = itDup->second;
885                                 seqs.push_back(member);
886                         }
887                         
888                         //limit number of parents to explore - default 3
889                         if (Results.size() > parents) {
890                                 //sort by distance
891                                 sort(seqs.begin(), seqs.end(), compareSeqCompare);
892                                 //prioritize larger more similiar sequence fragments
893                                 reverse(seqs.begin(), seqs.end());
894                                 
895                                 //for (int k = seqs.size()-1; k > (parents-1); k--)  {  
896                                 //      delete seqs[k].seq;
897                                         //seqs.pop_back();      
898                                 //}
899                         }
900                 
901                         //put seqs into vector to send to slayer
902                         
903 //                      cout << query->getAligned() << endl;
904                         vector<Sequence> seqsForSlayer;
905                         for (int k = 0; k < seqs.size(); k++) {  
906 //                              cout << seqs[k].seq->getAligned() << endl;
907                                 seqsForSlayer.push_back(seqs[k].seq);   
908 //                              cout << seqs[k].seq->getName() << endl;
909                         }
910                         
911                         if (m->control_pressed) {  return 0;  }
912
913                         //send to slayer
914                         chimeraFlags = slayer.getResults(*query, seqsForSlayer);
915                         if (m->control_pressed) {  return 0;  }
916                         chimeraResults = slayer.getOutput();
917                         
918                         printResults.flag = chimeraFlags;
919                         printResults.results = chimeraResults;
920                         
921                         //free memory
922                         //for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
923                 }
924                 //cout << endl << endl;
925                 return 0;
926         }
927         catch(exception& e) {
928                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
929                 exit(1);
930         }
931 }
932 //***************************************************************************************************************
933 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
934         try {
935                 out << querySeq.getName() << '\t';
936                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
937         
938                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
939                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
940                 
941                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
942                 
943         }
944         catch(exception& e) {
945                 m->errorOut(e, "ChimeraSlayer", "printBlock");
946                 exit(1);
947         }
948 }
949 //***************************************************************************************************************
950 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
951         try {
952                 
953                 if ((leftChimeric) && (!rightChimeric)) { //print left
954                         out << querySeq.getName() << '\t';
955                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
956                         
957                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
958                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
959                 
960                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
961                 
962                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
963                         out << querySeq.getName() << '\t';
964                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
965                         
966                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
967                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
968                         
969                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
970                         
971                 }else  { //print both results
972                         if (leftdata.flag == "yes") {
973                                 out << querySeq.getName() + "_LEFT" << '\t';
974                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
975                                 
976                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
977                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
978                                 
979                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
980                         }
981                         
982                         if (rightdata.flag == "yes") {
983                                 if (leftdata.flag == "yes") { out << endl; }
984                                 
985                                 out << querySeq.getName() + "_RIGHT"<< '\t';
986                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
987                                 
988                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
989                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
990                                 
991                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
992                 
993                         }
994                 }
995         }
996         catch(exception& e) {
997                 m->errorOut(e, "ChimeraSlayer", "printBlock");
998                 exit(1);
999         }
1000 }
1001 //***************************************************************************************************************
1002 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
1003         try {
1004                 
1005                 string out = "";
1006                 
1007                 if ((leftChimeric) && (!rightChimeric)) { //get left
1008                         out += querySeq.getName() + "\t";
1009                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1010                         
1011                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1012                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1013                         
1014                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1015                         
1016                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
1017                         out += querySeq.getName() + "\t";
1018                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
1019                         
1020                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1021                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1022                         
1023                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
1024                         
1025                 }else  { //print both results
1026                         
1027                         if (leftdata.flag == "yes") {
1028                                 out += querySeq.getName() + "_LEFT\t";
1029                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1030                                 
1031                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1032                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1033                                 
1034                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1035                         }
1036                         
1037                         if (rightdata.flag == "yes") {
1038                                 if (leftdata.flag == "yes") { out += "\n"; }
1039                                 out +=  querySeq.getName() + "_RIGHT\t";
1040                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
1041                                 
1042                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1043                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1044                                 
1045                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
1046                         }
1047                 }
1048                 
1049                 return out;
1050                 
1051         }
1052         catch(exception& e) {
1053                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1054                 exit(1);
1055         }
1056 }
1057 //***************************************************************************************************************
1058 string ChimeraSlayer::getBlock(data_struct data, string flag){
1059         try {
1060                 
1061                 string outputString = "";
1062                 
1063                 outputString += querySeq.getName() + "\t";
1064                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1065                         
1066                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1067                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1068                 
1069                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1070                 
1071                 return outputString;
1072         }
1073         catch(exception& e) {
1074                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1075                 exit(1);
1076         }
1077 }
1078 //***************************************************************************************************************
1079 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1080         try {
1081                 
1082                 vector<Sequence> refSeqs;
1083                 
1084                 if (searchMethod == "distance") {
1085                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1086                         Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1087                         runFilter(newSeq);
1088                         refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1089                         delete newSeq;
1090                 }else if (searchMethod == "blast")  {
1091                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1092                 }else if (searchMethod == "kmer") {
1093                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1094                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1095                 
1096                 return refSeqs;
1097         }
1098         catch(exception& e) {
1099                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1100                 exit(1);
1101         }
1102 }
1103 //***************************************************************************************************************/
1104 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1105         try {   
1106                 
1107                 vector<Sequence> refResults;
1108                 
1109                 //get parts of query
1110                 string queryUnAligned = q.getUnaligned();
1111                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1112                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1113 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1114                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1115                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1116                 
1117                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1118                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1119                                 
1120                 
1121                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1122 //              vector<int> smaller;
1123 //              vector<int> larger;
1124 //              
1125 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1126 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1127                 
1128                 //merge results         
1129                 map<int, int> seen;
1130                 map<int, int>::iterator it;
1131                 vector<int> mergedResults;
1132                 
1133                 int index = 0;
1134 //              for (int i = 0; i < smaller.size(); i++) {
1135                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1136                         
1137                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1138         
1139                         //add left if you havent already
1140                         it = seen.find(tempIndexesLeft[index]);
1141                         if (it == seen.end()) {  
1142                                 mergedResults.push_back(tempIndexesLeft[index]);
1143                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1144                         }
1145                         
1146                         //add right if you havent already
1147                         it = seen.find(tempIndexesRight[index]);
1148                         if (it == seen.end()) {  
1149                                 mergedResults.push_back(tempIndexesRight[index]);
1150                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1151                         }
1152                         index++;
1153                 }
1154
1155                 
1156                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1157                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1158                         
1159                         //add right if you havent already
1160                         it = seen.find(tempIndexesLeft[i]);
1161                         if (it == seen.end()) {  
1162                                 mergedResults.push_back(tempIndexesLeft[i]);
1163                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1164                         }
1165                 }
1166
1167                 for (int i = index; i < tempIndexesRight.size(); i++) {
1168                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1169                         
1170                         //add right if you havent already
1171                         it = seen.find(tempIndexesRight[i]);
1172                         if (it == seen.end()) {  
1173                                 mergedResults.push_back(tempIndexesRight[i]);
1174                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1175                         }
1176                 }
1177                 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
1178                 //cout << qname << endl;        
1179                 
1180                 if (mergedResults.size() == 0) { numNoParents++; }
1181                 
1182                 for (int i = 0; i < mergedResults.size(); i++) {
1183                         //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1184                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1185                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1186                                 refResults.push_back(temp);
1187                         }
1188                 }
1189                 //cout << endl << endl;
1190
1191                 delete queryRight;
1192                 delete queryLeft;
1193                 
1194                 return refResults;
1195         }
1196         catch(exception& e) {
1197                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1198                 exit(1);
1199         }
1200 }
1201 //***************************************************************************************************************
1202 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1203         try {   
1204                 vector<Sequence> refResults;
1205                 
1206                 //get parts of query
1207                 string queryUnAligned = q.getUnaligned();
1208                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1209                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1210                 
1211                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1212                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1213                 
1214                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1215                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1216                 
1217                 //merge results         
1218                 map<int, int> seen;
1219                 map<int, int>::iterator it;
1220                 vector<int> mergedResults;
1221                 
1222                 int index = 0;
1223                 //              for (int i = 0; i < smaller.size(); i++) {
1224                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1225                         
1226                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1227                         
1228                         //add left if you havent already
1229                         it = seen.find(tempIndexesLeft[index]);
1230                         if (it == seen.end()) {  
1231                                 mergedResults.push_back(tempIndexesLeft[index]);
1232                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1233                         }
1234                         
1235                         //add right if you havent already
1236                         it = seen.find(tempIndexesRight[index]);
1237                         if (it == seen.end()) {  
1238                                 mergedResults.push_back(tempIndexesRight[index]);
1239                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1240                         }
1241                         index++;
1242                 }
1243                 
1244                 
1245                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1246                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1247                         
1248                         //add right if you havent already
1249                         it = seen.find(tempIndexesLeft[i]);
1250                         if (it == seen.end()) {  
1251                                 mergedResults.push_back(tempIndexesLeft[i]);
1252                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1253                         }
1254                 }
1255                 
1256                 for (int i = index; i < tempIndexesRight.size(); i++) {
1257                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1258                         
1259                         //add right if you havent already
1260                         it = seen.find(tempIndexesRight[i]);
1261                         if (it == seen.end()) {  
1262                                 mergedResults.push_back(tempIndexesRight[i]);
1263                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1264                         }
1265                 }
1266                 
1267                 for (int i = 0; i < mergedResults.size(); i++) {
1268                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1269                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1270                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1271                                 refResults.push_back(temp);
1272                                 
1273                         }
1274                 }
1275
1276                 //cout << endl;         
1277                 delete queryRight;
1278                 delete queryLeft;
1279                 
1280                 return refResults;
1281         }
1282         catch(exception& e) {
1283                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1284                 exit(1);
1285         }
1286 }
1287 //***************************************************************************************************************
1288
1289