]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
5b1075902d6a038a4e863913bee7969c0ae52154
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 decalc = new DeCalculator();    
39                 
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 
71                 decalc = new DeCalculator();    
72                 
73                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74                 
75                 //run filter on template
76                 for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  break; }  runFilter(templateSeqs[i]);  }
77
78                 
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
82                 exit(1);
83         }
84 }
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
87         try {
88                 if (searchMethod == "distance") { 
89                         //read in all query seqs
90                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
91                 
92                         vector<Sequence*> temp = templateSeqs;
93                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
94                 
95                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
96                 
97                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
98                 
99                         if (m->control_pressed) {  return 0; } 
100                 
101                         //run filter on template copying templateSeqs into filteredTemplateSeqs
102                         for (int i = 0; i < templateSeqs.size(); i++) {  
103                                 if (m->control_pressed) {  return 0; }
104                                 
105                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
106                                 runFilter(newSeq);  
107                                 filteredTemplateSeqs.push_back(newSeq);
108                         }
109                 }
110                 string  kmerDBNameLeft;
111                 string  kmerDBNameRight;
112         
113                 //generate the kmerdb to pass to maligner
114                 if (searchMethod == "kmer") { 
115                         string templatePath = m->hasPath(templateFileName);
116                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
117                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
118                                 
119                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
120                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
121                 #ifdef USE_MPI
122                         for (int i = 0; i < templateSeqs.size(); i++) {
123                                         
124                                 if (m->control_pressed) { return 0; } 
125                                         
126                                 string leftFrag = templateSeqs[i]->getUnaligned();
127                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
128                                         
129                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
130                                 databaseLeft->addSequence(leftTemp);    
131                         }
132                         databaseLeft->generateDB();
133                         databaseLeft->setNumSeqs(templateSeqs.size());
134                         
135                         for (int i = 0; i < templateSeqs.size(); i++) {
136                                 if (m->control_pressed) { return 0; } 
137                                         
138                                 string rightFrag = templateSeqs[i]->getUnaligned();
139                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
140                                         
141                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
142                                 databaseRight->addSequence(rightTemp);  
143                         }
144                         databaseRight->generateDB();
145                         databaseRight->setNumSeqs(templateSeqs.size());
146
147                 #else   
148                         //leftside
149                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
150                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
151                         bool needToGenerateLeft = true;
152                         
153                         if(kmerFileTestLeft){   
154                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
155                                 if (GoodFile) {  needToGenerateLeft = false;    }
156                         }
157                         
158                         if(needToGenerateLeft){ 
159                         
160                                 for (int i = 0; i < templateSeqs.size(); i++) {
161                                         
162                                         if (m->control_pressed) { return 0; } 
163                                         
164                                         string leftFrag = templateSeqs[i]->getUnaligned();
165                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
166                                         
167                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
168                                         databaseLeft->addSequence(leftTemp);    
169                                 }
170                                 databaseLeft->generateDB();
171                                 
172                         }else { 
173                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
174                         }
175                         kmerFileTestLeft.close();
176                         
177                         databaseLeft->setNumSeqs(templateSeqs.size());
178                         
179                         //rightside
180                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
181                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
182                         bool needToGenerateRight = true;
183                         
184                         if(kmerFileTestRight){  
185                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
186                                 if (GoodFile) {  needToGenerateRight = false;   }
187                         }
188                         
189                         if(needToGenerateRight){        
190                         
191                                 for (int i = 0; i < templateSeqs.size(); i++) {
192                                         if (m->control_pressed) { return 0; } 
193                                         
194                                         string rightFrag = templateSeqs[i]->getUnaligned();
195                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
196                                         
197                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
198                                         databaseRight->addSequence(rightTemp);  
199                                 }
200                                 databaseRight->generateDB();
201                                 
202                         }else { 
203                                 databaseRight->readKmerDB(kmerFileTestRight);   
204                         }
205                         kmerFileTestRight.close();
206                         
207                         databaseRight->setNumSeqs(templateSeqs.size());
208                 #endif  
209                 }else if (searchMethod == "blast") {
210                 
211                         //generate blastdb
212                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
213
214                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
215                         databaseLeft->generateDB();
216                         databaseLeft->setNumSeqs(templateSeqs.size());
217                 }
218                 
219                 return 0;
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "ChimeraSlayer", "doprep");
224                 exit(1);
225         }
226 }
227 //***************************************************************************************************************
228 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
229         try {
230                 
231                 //when template=self, the query file is sorted from most abundance to least abundant
232                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
233                 vector<Sequence*> userTemplate;
234                 
235                 int myAbund = priority[q->getName()];
236                 
237                 for (int i = 0; i < templateSeqs.size(); i++) {
238                         
239                         if (m->control_pressed) { return userTemplate; } 
240                         
241                         //have I reached a sequence with the same abundance as myself?
242                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
243                         
244                         //if its am not chimeric add it
245                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
246                                 userTemplate.push_back(templateSeqs[i]); 
247                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
248                         }
249                 }
250                 
251                 string  kmerDBNameLeft;
252                 string  kmerDBNameRight;
253                 
254                 //generate the kmerdb to pass to maligner
255                 if (searchMethod == "kmer") { 
256                         string templatePath = m->hasPath(templateFileName);
257                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
258                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
259                         
260                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
261                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
262 #ifdef USE_MPI
263                         for (int i = 0; i < userTemplate.size(); i++) {
264                                 
265                                 if (m->control_pressed) { return userTemplate; } 
266                                 
267                                 string leftFrag = userTemplate[i]->getUnaligned();
268                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
269                                 
270                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271                                 databaseLeft->addSequence(leftTemp);    
272                         }
273                         databaseLeft->generateDB();
274                         databaseLeft->setNumSeqs(userTemplate.size());
275                         
276                         for (int i = 0; i < userTemplate.size(); i++) {
277                                 if (m->control_pressed) { return userTemplate; } 
278                                 
279                                 string rightFrag = userTemplate[i]->getUnaligned();
280                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
281                                 
282                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283                                 databaseRight->addSequence(rightTemp);  
284                         }
285                         databaseRight->generateDB();
286                         databaseRight->setNumSeqs(userTemplate.size());
287                         
288 #else   
289                         
290                         
291                         for (int i = 0; i < userTemplate.size(); i++) {
292                                 
293                                 if (m->control_pressed) { return userTemplate; } 
294                                 
295                                 string leftFrag = userTemplate[i]->getUnaligned();
296                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
297                                 
298                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
299                                 databaseLeft->addSequence(leftTemp);    
300                         }
301                         databaseLeft->generateDB();
302                         databaseLeft->setNumSeqs(userTemplate.size());
303                                 
304                         for (int i = 0; i < userTemplate.size(); i++) {
305                                 if (m->control_pressed) { return userTemplate; }  
306                                         
307                                 string rightFrag = userTemplate[i]->getUnaligned();
308                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
309                                         
310                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
311                                 databaseRight->addSequence(rightTemp);  
312                         }
313                         databaseRight->generateDB();
314                         databaseRight->setNumSeqs(userTemplate.size());
315 #endif  
316                 }else if (searchMethod == "blast") {
317                         
318                         //generate blastdb
319                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
320
321                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
322                         databaseLeft->generateDB();
323                         databaseLeft->setNumSeqs(userTemplate.size());
324                 }
325                 
326                 return userTemplate;
327                 
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
331                 exit(1);
332         }
333 }
334
335 //***************************************************************************************************************
336 ChimeraSlayer::~ChimeraSlayer() {       
337         delete decalc;  
338         if (templateFileName != "self") {
339                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
340                 else if (searchMethod == "blast") {  delete databaseLeft; }
341         }
342 }
343 //***************************************************************************************************************
344 void ChimeraSlayer::printHeader(ostream& out) {
345         m->mothurOutEndLine();
346         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
347         m->mothurOutEndLine();
348         
349         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
350 }
351 //***************************************************************************************************************
352 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
353         try {
354                 Sequence* trim = NULL;
355                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
356                 
357                 if (chimeraFlags == "yes") {
358                         string chimeraFlag = "no";
359                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
360                            ||
361                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
362                         
363                         
364                         if (chimeraFlag == "yes") {     
365                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
366                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
367                                         outAcc << querySeq->getName() << endl;
368                                         
369                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
370                                         
371                                         if (trimChimera) {  
372                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
373                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
374                                                 
375                                                 string newAligned = trim->getAligned();
376
377                                                 if (lengthLeft > lengthRight) { //trim right
378                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
379                                                 }else { //trim left
380                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
381                                                 }
382                                                 trim->setAligned(newAligned);
383                                         }
384                                 }
385                         }
386                         
387                         printBlock(chimeraResults[0], chimeraFlag, out);
388                         out << endl;
389                 }else {  
390                         out << querySeq->getName() << "\tno" << endl; 
391                 }
392                 
393                 return trim;
394                 
395         }
396         catch(exception& e) {
397                 m->errorOut(e, "ChimeraSlayer", "print");
398                 exit(1);
399         }
400 }
401 //***************************************************************************************************************
402 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
403         try {
404                 Sequence* trim = NULL;
405                                 
406                 if (trimChimera) { 
407                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
408                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
409                 }
410                 
411                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
412                         
413                         string chimeraFlag = "no";
414                         if (leftPiece.flag == "yes") {
415                                 
416                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
417                                         ||
418                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
419                         }
420                         
421                         if (rightPiece.flag == "yes") {
422                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
423                                  ||
424                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
425                         }
426                         
427                         bool rightChimeric = false;
428                         bool leftChimeric = false;
429                         
430                         if (chimeraFlag == "yes") {     
431                                 //which peice is chimeric or are both
432                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
433                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
434                                 
435                                 if (rightChimeric || leftChimeric) {
436                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
437                                         outAcc << querySeq->getName() << endl;
438                                         
439                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
440                                         
441                                         if (trimChimera) {  
442                                                 string newAligned = trim->getAligned();
443                                                                                                 
444                                                 //right side is fine so keep that
445                                                 if ((leftChimeric) && (!rightChimeric)) {
446                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
447                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
448                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449                                                 }else { //both sides are chimeric, keep longest piece
450                                                         
451                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
452                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
453                                                         
454                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
455                                                         int length = lengthLeftLeft;
456                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
457                                                         
458                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
459                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
460                                                         
461                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
462                                                         if (lengthRightRight > length) { longest = 4; }
463                                                         
464                                                         if (longest == 1) { //leftleft
465                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466                                                         }else if (longest == 2) { //leftright
467                                                                 //get rid of leftleft
468                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
469                                                                 //get rid of right
470                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
471                                                         }else if (longest == 3) { //rightleft
472                                                                 //get rid of left
473                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
474                                                                 //get rid of rightright
475                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
476                                                         }else { //rightright
477                                                                 //get rid of left
478                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
479                                                                 //get rid of rightleft
480                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
481                                                         }
482                                                 }
483                                                         
484                                                 trim->setAligned(newAligned);
485                                         }
486                                         
487                                 }
488                         }
489                         
490                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
491                         out << endl;
492                 }else {  
493                         out << querySeq->getName() << "\tno" << endl;  
494                 }
495                 
496                 return trim;
497                 
498         }
499         catch(exception& e) {
500                 m->errorOut(e, "ChimeraSlayer", "print");
501                 exit(1);
502         }
503 }
504
505 #ifdef USE_MPI
506 //***************************************************************************************************************
507 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
508         try {
509                 MPI_Status status;
510                 bool results = false;
511                 string outAccString = "";
512                 string outputString = "";
513                 
514                 Sequence* trim = NULL;
515                 
516                 if (trimChimera) { 
517                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
518                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
519                 }
520                 
521                 
522                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
523                         
524                         string chimeraFlag = "no";
525                         if (leftPiece.flag == "yes") {
526                                 
527                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
528                                    ||
529                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
530                         }
531                         
532                         if (rightPiece.flag == "yes") {
533                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
534                                         ||
535                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
536                         }
537                         
538                         bool rightChimeric = false;
539                         bool leftChimeric = false;
540                         
541                         if (chimeraFlag == "yes") {     
542                                 //which peice is chimeric or are both
543                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
544                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
545                                 
546                                 if (rightChimeric || leftChimeric) {
547                                         cout << querySeq->getName() <<  "\tyes" << endl;
548                                         outAccString += querySeq->getName() + "\n";
549                                         results = true;
550                                         
551                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
552                                         
553                                         //write to accnos file
554                                         int length = outAccString.length();
555                                         char* buf2 = new char[length];
556                                         memcpy(buf2, outAccString.c_str(), length);
557                                 
558                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
559                                         delete buf2;
560                                         
561                                         if (trimChimera) {  
562                                                 string newAligned = trim->getAligned();
563                                                 
564                                                 //right side is fine so keep that
565                                                 if ((leftChimeric) && (!rightChimeric)) {
566                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
567                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
568                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
569                                                 }else { //both sides are chimeric, keep longest piece
570                                                         
571                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
572                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
573                                                         
574                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
575                                                         int length = lengthLeftLeft;
576                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
577                                                         
578                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
579                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
580                                                         
581                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
582                                                         if (lengthRightRight > length) { longest = 4; }
583                                                         
584                                                         if (longest == 1) { //leftleft
585                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586                                                         }else if (longest == 2) { //leftright
587                                                                 //get rid of leftleft
588                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
589                                                                 //get rid of right
590                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
591                                                         }else if (longest == 3) { //rightleft
592                                                                 //get rid of left
593                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
594                                                                 //get rid of rightright
595                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
596                                                         }else { //rightright
597                                                                 //get rid of left
598                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
599                                                                 //get rid of rightleft
600                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
601                                                         }
602                                                 }
603                                                 
604                                                 trim->setAligned(newAligned);
605                                         }
606                                         
607                                 }
608                         }
609                         
610                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
611                         outputString += "\n";
612                 
613                         //write to output file
614                         int length = outputString.length();
615                         char* buf = new char[length];
616                         memcpy(buf, outputString.c_str(), length);
617                                 
618                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
619                         delete buf;
620
621                 }else {  
622                         outputString += querySeq->getName() + "\tno\n";  
623         
624                         //write to output file
625                         int length = outputString.length();
626                         char* buf = new char[length];
627                         memcpy(buf, outputString.c_str(), length);
628                                 
629                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
630                         delete buf;
631                 }
632                 
633                 
634                 return trim;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "ChimeraSlayer", "print");
638                 exit(1);
639         }
640 }
641 //***************************************************************************************************************
642 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
643         try {
644                 MPI_Status status;
645                 bool results = false;
646                 string outAccString = "";
647                 string outputString = "";
648                 
649                 Sequence* trim = NULL;
650                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
651                 
652                 if (chimeraFlags == "yes") {
653                         string chimeraFlag = "no";
654                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
655                            ||
656                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
657                         
658                         
659                         if (chimeraFlag == "yes") {     
660                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
661                                         cout << querySeq->getName() <<  "\tyes" << endl;
662                                         outAccString += querySeq->getName() + "\n";
663                                         results = true;
664                                         
665                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
666                                         
667                                         //write to accnos file
668                                         int length = outAccString.length();
669                                         char* buf2 = new char[length];
670                                         memcpy(buf2, outAccString.c_str(), length);
671                                         
672                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
673                                         delete buf2;
674                                         
675                                         if (trimChimera) {  
676                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
677                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
678                                                 
679                                                 string newAligned = trim->getAligned();
680                                                 if (lengthLeft > lengthRight) { //trim right
681                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
682                                                 }else { //trim left
683                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
684                                                 }
685                                                 trim->setAligned(newAligned);   
686                                         }
687                                 }
688                         }
689                         
690                         outputString = getBlock(chimeraResults[0], chimeraFlag);
691                         outputString += "\n";
692                         
693                         //write to output file
694                         int length = outputString.length();
695                         char* buf = new char[length];
696                         memcpy(buf, outputString.c_str(), length);
697                         
698                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
699                         delete buf;
700                         
701                 }else {  
702                         outputString += querySeq->getName() + "\tno\n";  
703                         
704                         //write to output file
705                         int length = outputString.length();
706                         char* buf = new char[length];
707                         memcpy(buf, outputString.c_str(), length);
708                         
709                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
710                         delete buf;
711                 }
712                 
713                 return trim;
714         }
715         catch(exception& e) {
716                 m->errorOut(e, "ChimeraSlayer", "print");
717                 exit(1);
718         }
719 }
720 #endif
721
722 //***************************************************************************************************************
723 int ChimeraSlayer::getChimeras(Sequence* query) {
724         try {
725                 
726                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
727                 printResults.trimQuery = trimQuery; 
728                 
729                 chimeraFlags = "no";
730                 printResults.flag = "no";
731                 
732                 querySeq = query;
733                 
734                 //you must create a template
735                 vector<Sequence*> thisTemplate;
736                 vector<Sequence*> thisFilteredTemplate;
737                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
738                 else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
739                 
740                 if (m->control_pressed) {  return 0;  }
741                 
742                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
743                 
744                 //moved this out of maligner - 4/29/11
745                 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
746                 
747                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
748                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
749                 
750                 if (templateFileName == "self") {
751                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
752                         else if (searchMethod == "blast") {  delete databaseLeft; }
753                 }
754         
755                 if (m->control_pressed) {  return 0;  }
756
757                 string chimeraFlag = maligner.getResults(query, decalc);
758
759                 if (m->control_pressed) {  return 0;  }
760                 
761                 vector<results> Results = maligner.getOutput();
762                 
763                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
764                 
765                 if (chimeraFlag == "yes") {
766
767                         if (realign) {
768                                 vector<string> parents;
769                                 for (int i = 0; i < Results.size(); i++) {
770                                         parents.push_back(Results[i].parentAligned);
771                                 }
772                                 
773                                 ChimeraReAligner realigner;             
774                                 realigner.reAlign(query, parents);
775
776                         }
777
778                         //get sequence that were given from maligner results
779                         vector<SeqDist> seqs;
780                         map<string, float> removeDups;
781                         map<string, float>::iterator itDup;
782                         map<string, string> parentNameSeq;
783                         map<string, string>::iterator itSeq;
784                         for (int j = 0; j < Results.size(); j++) {
785                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
786                                 //only add if you are not a duplicate
787                                 itDup = removeDups.find(Results[j].parent);
788                                 if (itDup == removeDups.end()) { //this is not duplicate
789                                         removeDups[Results[j].parent] = dist;
790                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
791                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
792                                         removeDups[Results[j].parent] = dist;
793                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
794                                 }
795                         }
796                         
797                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
798                                 itSeq = parentNameSeq.find(itDup->first);
799                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
800                                 
801                                 SeqDist member;
802                                 member.seq = seq;
803                                 member.dist = itDup->second;
804                                 
805                                 seqs.push_back(member);
806                         }
807                         
808                         //limit number of parents to explore - default 3
809                         if (Results.size() > parents) {
810                                 //sort by distance
811                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
812                                 //prioritize larger more similiar sequence fragments
813                                 reverse(seqs.begin(), seqs.end());
814                                 
815                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
816                                         delete seqs[k].seq;
817                                         seqs.pop_back();        
818                                 }
819                         }
820                 
821                         //put seqs into vector to send to slayer
822                         vector<Sequence*> seqsForSlayer;
823                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
824                         
825                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
826
827                         //send to slayer
828                         chimeraFlags = slayer.getResults(query, seqsForSlayer);
829                         if (m->control_pressed) {  return 0;  }
830                         chimeraResults = slayer.getOutput();
831                         
832                         printResults.flag = chimeraFlags;
833                         printResults.results = chimeraResults;
834                         
835                         //free memory
836                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
837                 }
838                 
839                 return 0;
840         }
841         catch(exception& e) {
842                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
843                 exit(1);
844         }
845 }
846 //***************************************************************************************************************
847 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
848         try {
849                 out << querySeq->getName() << '\t';
850                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
851         
852                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
853                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
854                 
855                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
856                 
857         }
858         catch(exception& e) {
859                 m->errorOut(e, "ChimeraSlayer", "printBlock");
860                 exit(1);
861         }
862 }
863 //***************************************************************************************************************
864 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
865         try {
866                 
867                 if ((leftChimeric) && (!rightChimeric)) { //print left
868                         out << querySeq->getName() << '\t';
869                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
870                         
871                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
872                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
873                 
874                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
875                 
876                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
877                         out << querySeq->getName() << '\t';
878                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
879                         
880                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
881                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
882                         
883                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
884                         
885                 }else  { //print both results
886                         if (leftdata.flag == "yes") {
887                                 out << querySeq->getName() + "_LEFT" << '\t';
888                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
889                                 
890                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
891                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
892                                 
893                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
894                         }
895                         
896                         if (rightdata.flag == "yes") {
897                                 if (leftdata.flag == "yes") { out << endl; }
898                                 
899                                 out << querySeq->getName() + "_RIGHT"<< '\t';
900                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
901                                 
902                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
903                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
904                                 
905                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
906                 
907                         }
908                 }
909         }
910         catch(exception& e) {
911                 m->errorOut(e, "ChimeraSlayer", "printBlock");
912                 exit(1);
913         }
914 }
915 //***************************************************************************************************************
916 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
917         try {
918                 
919                 string out = "";
920                 
921                 if ((leftChimeric) && (!rightChimeric)) { //get left
922                         out += querySeq->getName() + "\t";
923                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
924                         
925                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
926                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
927                         
928                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
929                         
930                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
931                         out += querySeq->getName() + "\t";
932                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
933                         
934                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
935                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
936                         
937                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
938                         
939                 }else  { //print both results
940                         
941                         if (leftdata.flag == "yes") {
942                                 out += querySeq->getName() + "_LEFT\t";
943                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
944                                 
945                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
946                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
947                                 
948                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
949                         }
950                         
951                         if (rightdata.flag == "yes") {
952                                 if (leftdata.flag == "yes") { out += "\n"; }
953                                 out +=  querySeq->getName() + "_RIGHT\t";
954                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
955                                 
956                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
957                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
958                                 
959                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
960                         }
961                 }
962                 
963                 return out;
964                 
965         }
966         catch(exception& e) {
967                 m->errorOut(e, "ChimeraSlayer", "getBlock");
968                 exit(1);
969         }
970 }
971 //***************************************************************************************************************
972 string ChimeraSlayer::getBlock(data_struct data, string flag){
973         try {
974                 
975                 string outputString = "";
976                 
977                 outputString += querySeq->getName() + "\t";
978                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
979                         
980                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
981                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
982                 
983                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
984                 
985                 return outputString;
986         }
987         catch(exception& e) {
988                 m->errorOut(e, "ChimeraSlayer", "getBlock");
989                 exit(1);
990         }
991 }
992 //***************************************************************************************************************
993 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
994         try {
995                 
996                 vector<Sequence*> refSeqs;
997                 
998                 if (searchMethod == "distance") {
999                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1000                         Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1001                         runFilter(newSeq);
1002                         refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1003                         delete newSeq;
1004                 }else if (searchMethod == "blast")  {
1005                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1006                 }else if (searchMethod == "kmer") {
1007                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1008                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1009                 
1010                 return refSeqs;
1011         }
1012         catch(exception& e) {
1013                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1014                 exit(1);
1015         }
1016 }
1017 //***************************************************************************************************************/
1018 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1019         try {   
1020                 
1021                 vector<Sequence*> refResults;
1022                 
1023                 //get parts of query
1024                 string queryUnAligned = q->getUnaligned();
1025                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1026                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1027                 
1028                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1029                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1030                 
1031                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1032                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1033                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1034                 vector<int> smaller;
1035                 vector<int> larger;
1036                 
1037                 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1038                 else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1039                 
1040                 //merge results         
1041                 map<int, int> seen;
1042                 map<int, int>::iterator it;
1043                 vector<int> mergedResults;
1044                 for (int i = 0; i < smaller.size(); i++) {
1045                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1046         
1047                         //add left if you havent already
1048                         it = seen.find(smaller[i]);
1049                         if (it == seen.end()) {  
1050                                 mergedResults.push_back(smaller[i]);
1051                                 seen[smaller[i]] = smaller[i];
1052                         }
1053                         
1054                         //add right if you havent already
1055                         it = seen.find(larger[i]);
1056                         if (it == seen.end()) {  
1057                                 mergedResults.push_back(larger[i]);
1058                                 seen[larger[i]] = larger[i];
1059                         }
1060                 }
1061                 
1062                 for (int i = smaller.size(); i < larger.size(); i++) {
1063                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1064                         
1065                         //add right if you havent already
1066                         it = seen.find(larger[i]);
1067                         if (it == seen.end()) {  
1068                                 mergedResults.push_back(larger[i]);
1069                                 seen[larger[i]] = larger[i];
1070                         }
1071                 }
1072
1073                 for (int i = 0; i < mergedResults.size(); i++) {
1074                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1075                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1076                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1077                                 refResults.push_back(temp);
1078                                 
1079                         }
1080                 }
1081                         
1082                 delete queryRight;
1083                 delete queryLeft;
1084                 
1085                 return refResults;
1086         }
1087         catch(exception& e) {
1088                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1089                 exit(1);
1090         }
1091 }
1092 //***************************************************************************************************************
1093 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1094         try {   
1095                 vector<Sequence*> refResults;
1096                 
1097                 //get parts of query
1098                 string queryUnAligned = q->getUnaligned();
1099                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1100                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1101                 
1102                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1103                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1104                 
1105                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1106                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1107                 
1108                 //merge results         
1109                 map<int, int> seen;
1110                 map<int, int>::iterator it;
1111                         vector<int> mergedResults;
1112                 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1113                         
1114                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1115                         
1116                         //add left if you havent already
1117                         it = seen.find(tempIndexesLeft[i]);
1118                         if (it == seen.end()) {  
1119                                 mergedResults.push_back(tempIndexesLeft[i]);
1120                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1121                         }
1122                         
1123                         //add right if you havent already
1124                         it = seen.find(tempIndexesRight[i]);
1125                         if (it == seen.end()) {  
1126                                 mergedResults.push_back(tempIndexesRight[i]);
1127                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1128                         }
1129                 }
1130                 
1131                 //numWanted = mergedResults.size();
1132                         
1133                 //cout << q->getName() << endl;         
1134                 
1135                 for (int i = 0; i < mergedResults.size(); i++) {
1136                         //cout << db[mergedResults[i]]->getName() << endl;      
1137                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1138                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1139                                 refResults.push_back(temp);
1140                         }
1141                 }
1142                 //cout << endl;         
1143                 delete queryRight;
1144                 delete queryLeft;
1145                 
1146                 return refResults;
1147         }
1148         catch(exception& e) {
1149                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1150                 exit(1);
1151         }
1152 }
1153 //***************************************************************************************************************
1154
1155