]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
working on chimera.slayer
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 decalc = new DeCalculator();    
39                 
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 
71                 decalc = new DeCalculator();    
72                 
73                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74                 
75                 //run filter on template
76                 for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  break; }  runFilter(templateSeqs[i]);  }
77
78                 
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
82                 exit(1);
83         }
84 }
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
87         try {
88                 if (searchMethod == "distance") { 
89                         //read in all query seqs
90                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
91                 
92                         vector<Sequence*> temp = templateSeqs;
93                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
94                 
95                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
96                 
97                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
98                 
99                         if (m->control_pressed) {  return 0; } 
100                 
101                         //run filter on template copying templateSeqs into filteredTemplateSeqs
102                         for (int i = 0; i < templateSeqs.size(); i++) {  
103                                 if (m->control_pressed) {  return 0; }
104                                 
105                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
106                                 filteredTemplateSeqs.push_back(newSeq);
107                                 runFilter(newSeq);  
108                         }
109                 }
110                 string  kmerDBNameLeft;
111                 string  kmerDBNameRight;
112         
113                 //generate the kmerdb to pass to maligner
114                 if (searchMethod == "kmer") { 
115                         string templatePath = m->hasPath(templateFileName);
116                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
117                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
118                                 
119                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
120                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
121                 #ifdef USE_MPI
122                         for (int i = 0; i < templateSeqs.size(); i++) {
123                                         
124                                 if (m->control_pressed) { return 0; } 
125                                         
126                                 string leftFrag = templateSeqs[i]->getUnaligned();
127                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
128                                         
129                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
130                                 databaseLeft->addSequence(leftTemp);    
131                         }
132                         databaseLeft->generateDB();
133                         databaseLeft->setNumSeqs(templateSeqs.size());
134                         
135                         for (int i = 0; i < templateSeqs.size(); i++) {
136                                 if (m->control_pressed) { return 0; } 
137                                         
138                                 string rightFrag = templateSeqs[i]->getUnaligned();
139                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
140                                         
141                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
142                                 databaseRight->addSequence(rightTemp);  
143                         }
144                         databaseRight->generateDB();
145                         databaseRight->setNumSeqs(templateSeqs.size());
146
147                 #else   
148                         //leftside
149                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
150                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
151                         bool needToGenerateLeft = true;
152                         
153                         if(kmerFileTestLeft){   
154                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
155                                 if (GoodFile) {  needToGenerateLeft = false;    }
156                         }
157                         
158                         if(needToGenerateLeft){ 
159                         
160                                 for (int i = 0; i < templateSeqs.size(); i++) {
161                                         
162                                         if (m->control_pressed) { return 0; } 
163                                         
164                                         string leftFrag = templateSeqs[i]->getUnaligned();
165                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
166                                         
167                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
168                                         databaseLeft->addSequence(leftTemp);    
169                                 }
170                                 databaseLeft->generateDB();
171                                 
172                         }else { 
173                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
174                         }
175                         kmerFileTestLeft.close();
176                         
177                         databaseLeft->setNumSeqs(templateSeqs.size());
178                         
179                         //rightside
180                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
181                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
182                         bool needToGenerateRight = true;
183                         
184                         if(kmerFileTestRight){  
185                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
186                                 if (GoodFile) {  needToGenerateRight = false;   }
187                         }
188                         
189                         if(needToGenerateRight){        
190                         
191                                 for (int i = 0; i < templateSeqs.size(); i++) {
192                                         if (m->control_pressed) { return 0; } 
193                                         
194                                         string rightFrag = templateSeqs[i]->getUnaligned();
195                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
196                                         
197                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
198                                         databaseRight->addSequence(rightTemp);  
199                                 }
200                                 databaseRight->generateDB();
201                                 
202                         }else { 
203                                 databaseRight->readKmerDB(kmerFileTestRight);   
204                         }
205                         kmerFileTestRight.close();
206                         
207                         databaseRight->setNumSeqs(templateSeqs.size());
208                 #endif  
209                 }else if (searchMethod == "blast") {
210                 
211                         //generate blastdb
212                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
213
214                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
215                         databaseLeft->generateDB();
216                         databaseLeft->setNumSeqs(templateSeqs.size());
217                 }
218                 
219                 return 0;
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "ChimeraSlayer", "doprep");
224                 exit(1);
225         }
226 }
227 //***************************************************************************************************************
228 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
229         try {
230                 
231                 //when template=self, the query file is sorted from most abundance to least abundant
232                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
233                 vector<Sequence*> userTemplate;
234                 
235                 int myAbund = priority[q->getName()];
236                 
237                 for (int i = 0; i < templateSeqs.size(); i++) {
238                         
239                         if (m->control_pressed) { return userTemplate; } 
240                         
241                         //have I reached a sequence with the same abundance as myself?
242                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
243                         
244                         //if its am not chimeric add it
245                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
246                                 userTemplate.push_back(templateSeqs[i]); 
247                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
248                         }
249                 }
250                 
251                 string  kmerDBNameLeft;
252                 string  kmerDBNameRight;
253                 
254                 //generate the kmerdb to pass to maligner
255                 if (searchMethod == "kmer") { 
256                         string templatePath = m->hasPath(templateFileName);
257                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
258                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
259                         
260                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
261                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
262 #ifdef USE_MPI
263                         for (int i = 0; i < userTemplate.size(); i++) {
264                                 
265                                 if (m->control_pressed) { return userTemplate; } 
266                                 
267                                 string leftFrag = userTemplate[i]->getUnaligned();
268                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
269                                 
270                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271                                 databaseLeft->addSequence(leftTemp);    
272                         }
273                         databaseLeft->generateDB();
274                         databaseLeft->setNumSeqs(userTemplate.size());
275                         
276                         for (int i = 0; i < userTemplate.size(); i++) {
277                                 if (m->control_pressed) { return userTemplate; } 
278                                 
279                                 string rightFrag = userTemplate[i]->getUnaligned();
280                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
281                                 
282                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283                                 databaseRight->addSequence(rightTemp);  
284                         }
285                         databaseRight->generateDB();
286                         databaseRight->setNumSeqs(userTemplate.size());
287                         
288 #else   
289                         
290                         
291                         for (int i = 0; i < userTemplate.size(); i++) {
292                                 
293                                 if (m->control_pressed) { return userTemplate; } 
294                                 
295                                 string leftFrag = userTemplate[i]->getUnaligned();
296                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
297                                 
298                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
299                                 databaseLeft->addSequence(leftTemp);    
300                         }
301                         databaseLeft->generateDB();
302                         databaseLeft->setNumSeqs(userTemplate.size());
303                                 
304                         for (int i = 0; i < userTemplate.size(); i++) {
305                                 if (m->control_pressed) { return userTemplate; }  
306                                         
307                                 string rightFrag = userTemplate[i]->getUnaligned();
308                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
309                                         
310                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
311                                 databaseRight->addSequence(rightTemp);  
312                         }
313                         databaseRight->generateDB();
314                         databaseRight->setNumSeqs(userTemplate.size());
315 #endif  
316                 }else if (searchMethod == "blast") {
317                         
318                         //generate blastdb
319                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
320
321                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
322                         databaseLeft->generateDB();
323                         databaseLeft->setNumSeqs(userTemplate.size());
324                 }
325                 
326                 return userTemplate;
327                 
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
331                 exit(1);
332         }
333 }
334
335 //***************************************************************************************************************
336 ChimeraSlayer::~ChimeraSlayer() {       
337         delete decalc;  
338         if (templateFileName != "self") {
339                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
340                 else if (searchMethod == "blast") {  delete databaseLeft; }
341         }
342 }
343 //***************************************************************************************************************
344 void ChimeraSlayer::printHeader(ostream& out) {
345         m->mothurOutEndLine();
346         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
347         m->mothurOutEndLine();
348         
349         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
350 }
351 //***************************************************************************************************************
352 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
353         try {
354                 Sequence* trim = NULL;
355                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
356                 
357                 if (chimeraFlags == "yes") {
358                         string chimeraFlag = "no";
359                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
360                            ||
361                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
362                         
363                         
364                         if (chimeraFlag == "yes") {     
365                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
366                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
367                                         outAcc << querySeq->getName() << endl;
368                                         
369                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
370                                         
371                                         if (trimChimera) {  
372                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
373                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
374                                                 
375                                                 string newAligned = trim->getAligned();
376
377                                                 if (lengthLeft > lengthRight) { //trim right
378                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
379                                                 }else { //trim left
380                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
381                                                 }
382                                                 trim->setAligned(newAligned);
383                                         }
384                                 }
385                         }
386                         
387                         printBlock(chimeraResults[0], chimeraFlag, out);
388                         out << endl;
389                 }else {  
390                         out << querySeq->getName() << "\tno" << endl; 
391                 }
392                 
393                 return trim;
394                 
395         }
396         catch(exception& e) {
397                 m->errorOut(e, "ChimeraSlayer", "print");
398                 exit(1);
399         }
400 }
401 //***************************************************************************************************************
402 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
403         try {
404                 Sequence* trim = NULL;
405                                 
406                 if (trimChimera) { 
407                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
408                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
409                 }
410                 
411                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
412                         
413                         string chimeraFlag = "no";
414                         if (leftPiece.flag == "yes") {
415                                 
416                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
417                                         ||
418                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
419                         }
420                         
421                         if (rightPiece.flag == "yes") {
422                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
423                                  ||
424                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
425                         }
426                         
427                         bool rightChimeric = false;
428                         bool leftChimeric = false;
429                         
430                         if (chimeraFlag == "yes") {     
431                                 //which peice is chimeric or are both
432                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
433                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
434                                 
435                                 if (rightChimeric || leftChimeric) {
436                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
437                                         outAcc << querySeq->getName() << endl;
438                                         
439                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
440                                         
441                                         if (trimChimera) {  
442                                                 string newAligned = trim->getAligned();
443                                                                                                 
444                                                 //right side is fine so keep that
445                                                 if ((leftChimeric) && (!rightChimeric)) {
446                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
447                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
448                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449                                                 }else { //both sides are chimeric, keep longest piece
450                                                         
451                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
452                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
453                                                         
454                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
455                                                         int length = lengthLeftLeft;
456                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
457                                                         
458                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
459                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
460                                                         
461                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
462                                                         if (lengthRightRight > length) { longest = 4; }
463                                                         
464                                                         if (longest == 1) { //leftleft
465                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466                                                         }else if (longest == 2) { //leftright
467                                                                 //get rid of leftleft
468                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
469                                                                 //get rid of right
470                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
471                                                         }else if (longest == 3) { //rightleft
472                                                                 //get rid of left
473                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
474                                                                 //get rid of rightright
475                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
476                                                         }else { //rightright
477                                                                 //get rid of left
478                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
479                                                                 //get rid of rightleft
480                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
481                                                         }
482                                                 }
483                                                         
484                                                 trim->setAligned(newAligned);
485                                         }
486                                         
487                                 }
488                         }
489                         
490                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
491                         out << endl;
492                 }else {  
493                         out << querySeq->getName() << "\tno" << endl;  
494                 }
495                 
496                 return trim;
497                 
498         }
499         catch(exception& e) {
500                 m->errorOut(e, "ChimeraSlayer", "print");
501                 exit(1);
502         }
503 }
504
505 #ifdef USE_MPI
506 //***************************************************************************************************************
507 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
508         try {
509                 MPI_Status status;
510                 bool results = false;
511                 string outAccString = "";
512                 string outputString = "";
513                 
514                 Sequence* trim = NULL;
515                 
516                 if (trimChimera) { 
517                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
518                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
519                 }
520                 
521                 
522                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
523                         
524                         string chimeraFlag = "no";
525                         if (leftPiece.flag == "yes") {
526                                 
527                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
528                                    ||
529                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
530                         }
531                         
532                         if (rightPiece.flag == "yes") {
533                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
534                                         ||
535                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
536                         }
537                         
538                         bool rightChimeric = false;
539                         bool leftChimeric = false;
540                         
541                         if (chimeraFlag == "yes") {     
542                                 //which peice is chimeric or are both
543                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
544                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
545                                 
546                                 if (rightChimeric || leftChimeric) {
547                                         cout << querySeq->getName() <<  "\tyes" << endl;
548                                         outAccString += querySeq->getName() + "\n";
549                                         results = true;
550                                         
551                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
552                                         
553                                         //write to accnos file
554                                         int length = outAccString.length();
555                                         char* buf2 = new char[length];
556                                         memcpy(buf2, outAccString.c_str(), length);
557                                 
558                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
559                                         delete buf2;
560                                         
561                                         if (trimChimera) {  
562                                                 string newAligned = trim->getAligned();
563                                                 
564                                                 //right side is fine so keep that
565                                                 if ((leftChimeric) && (!rightChimeric)) {
566                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
567                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
568                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
569                                                 }else { //both sides are chimeric, keep longest piece
570                                                         
571                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
572                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
573                                                         
574                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
575                                                         int length = lengthLeftLeft;
576                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
577                                                         
578                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
579                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
580                                                         
581                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
582                                                         if (lengthRightRight > length) { longest = 4; }
583                                                         
584                                                         if (longest == 1) { //leftleft
585                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586                                                         }else if (longest == 2) { //leftright
587                                                                 //get rid of leftleft
588                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
589                                                                 //get rid of right
590                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
591                                                         }else if (longest == 3) { //rightleft
592                                                                 //get rid of left
593                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
594                                                                 //get rid of rightright
595                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
596                                                         }else { //rightright
597                                                                 //get rid of left
598                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
599                                                                 //get rid of rightleft
600                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
601                                                         }
602                                                 }
603                                                 
604                                                 trim->setAligned(newAligned);
605                                         }
606                                         
607                                 }
608                         }
609                         
610                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
611                         outputString += "\n";
612                 
613                         //write to output file
614                         int length = outputString.length();
615                         char* buf = new char[length];
616                         memcpy(buf, outputString.c_str(), length);
617                                 
618                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
619                         delete buf;
620
621                 }else {  
622                         outputString += querySeq->getName() + "\tno\n";  
623         
624                         //write to output file
625                         int length = outputString.length();
626                         char* buf = new char[length];
627                         memcpy(buf, outputString.c_str(), length);
628                                 
629                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
630                         delete buf;
631                 }
632                 
633                 
634                 return trim;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "ChimeraSlayer", "print");
638                 exit(1);
639         }
640 }
641 //***************************************************************************************************************
642 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
643         try {
644                 MPI_Status status;
645                 bool results = false;
646                 string outAccString = "";
647                 string outputString = "";
648                 
649                 Sequence* trim = NULL;
650                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
651                 
652                 if (chimeraFlags == "yes") {
653                         string chimeraFlag = "no";
654                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
655                            ||
656                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
657                         
658                         
659                         if (chimeraFlag == "yes") {     
660                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
661                                         cout << querySeq->getName() <<  "\tyes" << endl;
662                                         outAccString += querySeq->getName() + "\n";
663                                         results = true;
664                                         
665                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
666                                         
667                                         //write to accnos file
668                                         int length = outAccString.length();
669                                         char* buf2 = new char[length];
670                                         memcpy(buf2, outAccString.c_str(), length);
671                                         
672                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
673                                         delete buf2;
674                                         
675                                         if (trimChimera) {  
676                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
677                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
678                                                 
679                                                 string newAligned = trim->getAligned();
680                                                 if (lengthLeft > lengthRight) { //trim right
681                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
682                                                 }else { //trim left
683                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
684                                                 }
685                                                 trim->setAligned(newAligned);   
686                                         }
687                                 }
688                         }
689                         
690                         outputString = getBlock(chimeraResults[0], chimeraFlag);
691                         outputString += "\n";
692                         
693                         //write to output file
694                         int length = outputString.length();
695                         char* buf = new char[length];
696                         memcpy(buf, outputString.c_str(), length);
697                         
698                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
699                         delete buf;
700                         
701                 }else {  
702                         outputString += querySeq->getName() + "\tno\n";  
703                         
704                         //write to output file
705                         int length = outputString.length();
706                         char* buf = new char[length];
707                         memcpy(buf, outputString.c_str(), length);
708                         
709                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
710                         delete buf;
711                 }
712                 
713                 return trim;
714         }
715         catch(exception& e) {
716                 m->errorOut(e, "ChimeraSlayer", "print");
717                 exit(1);
718         }
719 }
720 #endif
721
722 //***************************************************************************************************************
723 int ChimeraSlayer::getChimeras(Sequence* query) {
724         try {
725                 
726                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
727                 printResults.trimQuery = trimQuery; 
728                 
729                 chimeraFlags = "no";
730                 printResults.flag = "no";
731                 
732                 querySeq = query;
733                 
734                 //you must create a template
735                 vector<Sequence*> thisTemplate;
736                 vector<Sequence*> thisFilteredTemplate;
737                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
738                 else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
739                 
740                 if (m->control_pressed) {  return 0;  }
741                 
742                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
743                 
744                 //moved this out of maligner - 4/29/11
745                 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
746                         
747                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
748                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
749                 
750                 if (templateFileName == "self") {
751                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
752                         else if (searchMethod == "blast") {  delete databaseLeft; }
753                 }
754         
755                 if (m->control_pressed) {  return 0;  }
756
757                 string chimeraFlag = maligner.getResults(query, decalc);
758                 
759                 if (m->control_pressed) {  return 0;  }
760                 
761                 vector<results> Results = maligner.getOutput();
762                 
763                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
764                 
765                 if (chimeraFlag == "yes") {
766                         
767                         if (realign) {
768                                 ChimeraReAligner realigner(thisTemplate, match, misMatch);
769                                 realigner.reAlign(query, Results);
770                         }
771                         
772                         //get sequence that were given from maligner results
773                         vector<SeqDist> seqs;
774                         map<string, float> removeDups;
775                         map<string, float>::iterator itDup;
776                         map<string, string> parentNameSeq;
777                         map<string, string>::iterator itSeq;
778                         for (int j = 0; j < Results.size(); j++) {
779                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
780                                 //only add if you are not a duplicate
781                                 itDup = removeDups.find(Results[j].parent);
782                                 if (itDup == removeDups.end()) { //this is not duplicate
783                                         removeDups[Results[j].parent] = dist;
784                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
785                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
786                                         removeDups[Results[j].parent] = dist;
787                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
788                                 }
789                         }
790                         
791                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
792                                 itSeq = parentNameSeq.find(itDup->first);
793                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
794                                 
795                                 SeqDist member;
796                                 member.seq = seq;
797                                 member.dist = itDup->second;
798                                 
799                                 seqs.push_back(member);
800                         }
801                         
802                         //limit number of parents to explore - default 3
803                         if (Results.size() > parents) {
804                                 //sort by distance
805                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
806                                 //prioritize larger more similiar sequence fragments
807                                 reverse(seqs.begin(), seqs.end());
808                                 
809                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
810                                         delete seqs[k].seq;
811                                         seqs.pop_back();        
812                                 }
813                         }
814                         
815                         //put seqs into vector to send to slayer
816                         vector<Sequence*> seqsForSlayer;
817                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
818                         
819                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
820
821                         //send to slayer
822                         chimeraFlags = slayer.getResults(query, seqsForSlayer);
823                         if (m->control_pressed) {  return 0;  }
824                         chimeraResults = slayer.getOutput();
825                         
826                         printResults.flag = chimeraFlags;
827                         printResults.results = chimeraResults;
828                         
829                         //free memory
830                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
831                 }
832                 
833                 return 0;
834         }
835         catch(exception& e) {
836                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
837                 exit(1);
838         }
839 }
840 //***************************************************************************************************************
841 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
842         try {
843                 out << querySeq->getName() << '\t';
844                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
845         
846                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
847                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
848                 
849                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
850                 
851         }
852         catch(exception& e) {
853                 m->errorOut(e, "ChimeraSlayer", "printBlock");
854                 exit(1);
855         }
856 }
857 //***************************************************************************************************************
858 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
859         try {
860                 
861                 if ((leftChimeric) && (!rightChimeric)) { //print left
862                         out << querySeq->getName() << '\t';
863                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
864                         
865                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
866                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
867                 
868                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
869                 
870                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
871                         out << querySeq->getName() << '\t';
872                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
873                         
874                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
875                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
876                         
877                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
878                         
879                 }else  { //print both results
880                         if (leftdata.flag == "yes") {
881                                 out << querySeq->getName() + "_LEFT" << '\t';
882                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
883                                 
884                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
885                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
886                                 
887                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
888                         }
889                         
890                         if (rightdata.flag == "yes") {
891                                 if (leftdata.flag == "yes") { out << endl; }
892                                 
893                                 out << querySeq->getName() + "_RIGHT"<< '\t';
894                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
895                                 
896                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
897                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
898                                 
899                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
900                 
901                         }
902                 }
903         }
904         catch(exception& e) {
905                 m->errorOut(e, "ChimeraSlayer", "printBlock");
906                 exit(1);
907         }
908 }
909 //***************************************************************************************************************
910 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
911         try {
912                 
913                 string out = "";
914                 
915                 if ((leftChimeric) && (!rightChimeric)) { //get left
916                         out += querySeq->getName() + "\t";
917                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
918                         
919                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
920                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
921                         
922                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
923                         
924                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
925                         out += querySeq->getName() + "\t";
926                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
927                         
928                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
929                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
930                         
931                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
932                         
933                 }else  { //print both results
934                         
935                         if (leftdata.flag == "yes") {
936                                 out += querySeq->getName() + "_LEFT\t";
937                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
938                                 
939                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
940                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
941                                 
942                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
943                         }
944                         
945                         if (rightdata.flag == "yes") {
946                                 if (leftdata.flag == "yes") { out += "\n"; }
947                                 out +=  querySeq->getName() + "_RIGHT\t";
948                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
949                                 
950                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
951                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
952                                 
953                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
954                         }
955                 }
956                 
957                 return out;
958                 
959         }
960         catch(exception& e) {
961                 m->errorOut(e, "ChimeraSlayer", "getBlock");
962                 exit(1);
963         }
964 }
965 //***************************************************************************************************************
966 string ChimeraSlayer::getBlock(data_struct data, string flag){
967         try {
968                 
969                 string outputString = "";
970                 
971                 outputString += querySeq->getName() + "\t";
972                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
973                         
974                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
975                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
976                 
977                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
978                 
979                 return outputString;
980         }
981         catch(exception& e) {
982                 m->errorOut(e, "ChimeraSlayer", "getBlock");
983                 exit(1);
984         }
985 }
986 //***************************************************************************************************************
987 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
988         try {
989                 
990                 vector<Sequence*> refSeqs;
991                 
992                 if (searchMethod == "distance") {
993                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
994                         Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
995                         runFilter(newSeq);
996                         refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted);
997                 }else if (searchMethod == "blast")  {
998                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
999                 }else if (searchMethod == "kmer") {
1000                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1001                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1002                 
1003                 return refSeqs;
1004         }
1005         catch(exception& e) {
1006                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1007                 exit(1);
1008         }
1009 }
1010 //***************************************************************************************************************/
1011 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1012         try {   
1013                 
1014                 vector<Sequence*> refResults;
1015                 
1016                 //get parts of query
1017                 string queryUnAligned = q->getUnaligned();
1018                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1019                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1020                 
1021                 Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
1022                 Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
1023                 
1024                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
1025                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
1026                 
1027                 vector<int> smaller;
1028                 vector<int> larger;
1029                 
1030                 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1031                 else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1032                 
1033                 //merge results         
1034                 map<int, int> seen;
1035                 map<int, int>::iterator it;
1036                 vector<int> mergedResults;
1037                 for (int i = 0; i < smaller.size(); i++) {
1038                         //add left if you havent already
1039                         it = seen.find(smaller[i]);
1040                         if (it == seen.end()) {  
1041                                 mergedResults.push_back(smaller[i]);
1042                                 seen[smaller[i]] = smaller[i];
1043                         }
1044                         
1045                         //add right if you havent already
1046                         it = seen.find(larger[i]);
1047                         if (it == seen.end()) {  
1048                                 mergedResults.push_back(larger[i]);
1049                                 seen[larger[i]] = larger[i];
1050                         }
1051                 }
1052                 
1053                 for (int i = smaller.size(); i < larger.size(); i++) {
1054                         //add right if you havent already
1055                         it = seen.find(larger[i]);
1056                         if (it == seen.end()) {  
1057                                 mergedResults.push_back(larger[i]);
1058                                 seen[larger[i]] = larger[i];
1059                         }
1060                 }
1061                 //numWanted = mergedResults.size();
1062
1063                 //cout << q->getName() << " merged results size = " << mergedResults.size() << '\t' << "numwanted = " << numWanted <<  endl;            
1064                 for (int i = 0; i < mergedResults.size(); i++) {
1065                         //cout << db[mergedResults[i]]->getName()  << '\t' << mergedResults[i] << endl; 
1066                         
1067                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1068                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1069                                 refResults.push_back(temp);
1070                                 //cout << db[mergedResults[i]]->getName() << endl;
1071                         }
1072                         
1073                         //cout << mergedResults[i] << endl;
1074                 }
1075                 //cout << "done " << q->getName()  << endl;             
1076                 delete queryRight;
1077                 delete queryLeft;
1078                 
1079                 return refResults;
1080         }
1081         catch(exception& e) {
1082                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1083                 exit(1);
1084         }
1085 }
1086 //***************************************************************************************************************
1087 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1088         try {   
1089                 
1090                 //get parts of query
1091                 string queryUnAligned = q->getUnaligned();
1092                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1093                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1094                 
1095                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1096                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1097                 
1098                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, numWanted);
1099                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, numWanted);
1100                 
1101                 //merge results         
1102                 map<int, int> seen;
1103                 map<int, int>::iterator it;
1104                         vector<int> mergedResults;
1105                 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1106                         //add left if you havent already
1107                         it = seen.find(tempIndexesLeft[i]);
1108                         if (it == seen.end()) {  
1109                                 mergedResults.push_back(tempIndexesLeft[i]);
1110                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1111                         }
1112                         
1113                         //add right if you havent already
1114                         it = seen.find(tempIndexesRight[i]);
1115                         if (it == seen.end()) {  
1116                                 mergedResults.push_back(tempIndexesRight[i]);
1117                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1118                         }
1119                 }
1120                 
1121                 //numWanted = mergedResults.size();
1122                         
1123                 //cout << q->getName() << endl;         
1124                 vector<Sequence*> refResults;
1125                 for (int i = 0; i < mergedResults.size(); i++) {
1126                         //cout << db[mergedResults[i]]->getName() << endl;      
1127                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1128                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1129                                 refResults.push_back(temp);
1130                         }
1131                 }
1132                 //cout << endl;         
1133                 delete queryRight;
1134                 delete queryLeft;
1135                 
1136                 return refResults;
1137         }
1138         catch(exception& e) {
1139                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1140                 exit(1);
1141         }
1142 }
1143 //***************************************************************************************************************
1144
1145