]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
chimera.slayer debugging
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 decalc = new DeCalculator();    
39                 
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 
71                 decalc = new DeCalculator();    
72                 
73                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74                 
75                 if (searchMethod == "distance") { 
76                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
77                         
78                         //run filter on template copying templateSeqs into filteredTemplateSeqs
79                         for (int i = 0; i < templateSeqs.size(); i++) {  
80                                 if (m->control_pressed) {  break; }
81                                 
82                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
83                                 runFilter(newSeq);  
84                                 filteredTemplateSeqs.push_back(newSeq);
85                         }
86                 }
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
90                 exit(1);
91         }
92 }
93 //***************************************************************************************************************
94 int ChimeraSlayer::doPrep() {
95         try {
96                 if (searchMethod == "distance") { 
97                         //read in all query seqs
98                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
99                 
100                         vector<Sequence*> temp = templateSeqs;
101                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
102                 
103                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
104                 
105                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
106                 
107                         if (m->control_pressed) {  return 0; } 
108                 
109                         //run filter on template copying templateSeqs into filteredTemplateSeqs
110                         for (int i = 0; i < templateSeqs.size(); i++) {  
111                                 if (m->control_pressed) {  return 0; }
112                                 
113                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
114                                 runFilter(newSeq);  
115                                 filteredTemplateSeqs.push_back(newSeq);
116                         }
117                 }
118                 string  kmerDBNameLeft;
119                 string  kmerDBNameRight;
120         
121                 //generate the kmerdb to pass to maligner
122                 if (searchMethod == "kmer") { 
123                         string templatePath = m->hasPath(templateFileName);
124                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
125                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
126                                 
127                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
128                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
129                 #ifdef USE_MPI
130                         for (int i = 0; i < templateSeqs.size(); i++) {
131                                         
132                                 if (m->control_pressed) { return 0; } 
133                                         
134                                 string leftFrag = templateSeqs[i]->getUnaligned();
135                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
136                                         
137                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
138                                 databaseLeft->addSequence(leftTemp);    
139                         }
140                         databaseLeft->generateDB();
141                         databaseLeft->setNumSeqs(templateSeqs.size());
142                         
143                         for (int i = 0; i < templateSeqs.size(); i++) {
144                                 if (m->control_pressed) { return 0; } 
145                                         
146                                 string rightFrag = templateSeqs[i]->getUnaligned();
147                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
148                                         
149                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
150                                 databaseRight->addSequence(rightTemp);  
151                         }
152                         databaseRight->generateDB();
153                         databaseRight->setNumSeqs(templateSeqs.size());
154
155                 #else   
156                         //leftside
157                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
158                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
159                         bool needToGenerateLeft = true;
160                         
161                         if(kmerFileTestLeft){   
162                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
163                                 if (GoodFile) {  needToGenerateLeft = false;    }
164                         }
165                         
166                         if(needToGenerateLeft){ 
167                         
168                                 for (int i = 0; i < templateSeqs.size(); i++) {
169                                         
170                                         if (m->control_pressed) { return 0; } 
171                                         
172                                         string leftFrag = templateSeqs[i]->getUnaligned();
173                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
174                                         
175                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
176                                         databaseLeft->addSequence(leftTemp);    
177                                 }
178                                 databaseLeft->generateDB();
179                                 
180                         }else { 
181                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
182                         }
183                         kmerFileTestLeft.close();
184                         
185                         databaseLeft->setNumSeqs(templateSeqs.size());
186                         
187                         //rightside
188                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
189                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
190                         bool needToGenerateRight = true;
191                         
192                         if(kmerFileTestRight){  
193                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
194                                 if (GoodFile) {  needToGenerateRight = false;   }
195                         }
196                         
197                         if(needToGenerateRight){        
198                         
199                                 for (int i = 0; i < templateSeqs.size(); i++) {
200                                         if (m->control_pressed) { return 0; } 
201                                         
202                                         string rightFrag = templateSeqs[i]->getUnaligned();
203                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
204                                         
205                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
206                                         databaseRight->addSequence(rightTemp);  
207                                 }
208                                 databaseRight->generateDB();
209                                 
210                         }else { 
211                                 databaseRight->readKmerDB(kmerFileTestRight);   
212                         }
213                         kmerFileTestRight.close();
214                         
215                         databaseRight->setNumSeqs(templateSeqs.size());
216                 #endif  
217                 }else if (searchMethod == "blast") {
218                 
219                         //generate blastdb
220                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
221
222                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
223                         databaseLeft->generateDB();
224                         databaseLeft->setNumSeqs(templateSeqs.size());
225                 }
226                 
227                 return 0;
228
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "ChimeraSlayer", "doprep");
232                 exit(1);
233         }
234 }
235 //***************************************************************************************************************
236 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
237         try {
238                 
239                 //when template=self, the query file is sorted from most abundance to least abundant
240                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
241                 vector<Sequence*> userTemplate;
242                 
243                 int myAbund = priority[q->getName()];
244                 
245                 for (int i = 0; i < templateSeqs.size(); i++) {
246                         
247                         if (m->control_pressed) { return userTemplate; } 
248                         
249                         //have I reached a sequence with the same abundance as myself?
250                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
251                         
252                         //if its am not chimeric add it
253                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
254                                 userTemplate.push_back(templateSeqs[i]); 
255                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
256                         }
257                 }
258                 
259                 string  kmerDBNameLeft;
260                 string  kmerDBNameRight;
261                 
262                 //generate the kmerdb to pass to maligner
263                 if (searchMethod == "kmer") { 
264                         string templatePath = m->hasPath(templateFileName);
265                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
266                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
267                         
268                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
269                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
270 #ifdef USE_MPI
271                         for (int i = 0; i < userTemplate.size(); i++) {
272                                 
273                                 if (m->control_pressed) { return userTemplate; } 
274                                 
275                                 string leftFrag = userTemplate[i]->getUnaligned();
276                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
277                                 
278                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
279                                 databaseLeft->addSequence(leftTemp);    
280                         }
281                         databaseLeft->generateDB();
282                         databaseLeft->setNumSeqs(userTemplate.size());
283                         
284                         for (int i = 0; i < userTemplate.size(); i++) {
285                                 if (m->control_pressed) { return userTemplate; } 
286                                 
287                                 string rightFrag = userTemplate[i]->getUnaligned();
288                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
289                                 
290                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
291                                 databaseRight->addSequence(rightTemp);  
292                         }
293                         databaseRight->generateDB();
294                         databaseRight->setNumSeqs(userTemplate.size());
295                         
296 #else   
297                         
298                         
299                         for (int i = 0; i < userTemplate.size(); i++) {
300                                 
301                                 if (m->control_pressed) { return userTemplate; } 
302                                 
303                                 string leftFrag = userTemplate[i]->getUnaligned();
304                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
305                                 
306                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
307                                 databaseLeft->addSequence(leftTemp);    
308                         }
309                         databaseLeft->generateDB();
310                         databaseLeft->setNumSeqs(userTemplate.size());
311                                 
312                         for (int i = 0; i < userTemplate.size(); i++) {
313                                 if (m->control_pressed) { return userTemplate; }  
314                                         
315                                 string rightFrag = userTemplate[i]->getUnaligned();
316                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
317                                         
318                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
319                                 databaseRight->addSequence(rightTemp);  
320                         }
321                         databaseRight->generateDB();
322                         databaseRight->setNumSeqs(userTemplate.size());
323 #endif  
324                 }else if (searchMethod == "blast") {
325                         
326                         //generate blastdb
327                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
328
329                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
330                         databaseLeft->generateDB();
331                         databaseLeft->setNumSeqs(userTemplate.size());
332                 }
333                 
334                 return userTemplate;
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
339                 exit(1);
340         }
341 }
342
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {       
345         delete decalc;  
346         if (templateFileName != "self") {
347                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
348                 else if (searchMethod == "blast") {  delete databaseLeft; }
349         }
350 }
351 //***************************************************************************************************************
352 void ChimeraSlayer::printHeader(ostream& out) {
353         m->mothurOutEndLine();
354         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
355         m->mothurOutEndLine();
356         
357         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
358 }
359 //***************************************************************************************************************
360 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
361         try {
362                 Sequence* trim = NULL;
363                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
364                 
365                 if (chimeraFlags == "yes") {
366                         string chimeraFlag = "no";
367                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
368                            ||
369                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
370                         
371                         
372                         if (chimeraFlag == "yes") {     
373                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
374                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
375                                         outAcc << querySeq->getName() << endl;
376                                         
377                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
378                                         
379                                         if (trimChimera) {  
380                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
381                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
382                                                 
383                                                 string newAligned = trim->getAligned();
384
385                                                 if (lengthLeft > lengthRight) { //trim right
386                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
387                                                 }else { //trim left
388                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
389                                                 }
390                                                 trim->setAligned(newAligned);
391                                         }
392                                 }
393                         }
394                         
395                         printBlock(chimeraResults[0], chimeraFlag, out);
396                         out << endl;
397                 }else {  
398                         out << querySeq->getName() << "\tno" << endl; 
399                 }
400                 
401                 return trim;
402                 
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "ChimeraSlayer", "print");
406                 exit(1);
407         }
408 }
409 //***************************************************************************************************************
410 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
411         try {
412                 Sequence* trim = NULL;
413                                 
414                 if (trimChimera) { 
415                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
416                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
417                 }
418                 
419                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
420                         
421                         string chimeraFlag = "no";
422                         if (leftPiece.flag == "yes") {
423                                 
424                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
425                                         ||
426                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
427                         }
428                         
429                         if (rightPiece.flag == "yes") {
430                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
431                                  ||
432                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
433                         }
434                         
435                         bool rightChimeric = false;
436                         bool leftChimeric = false;
437                         
438                         if (chimeraFlag == "yes") {     
439                                 //which peice is chimeric or are both
440                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
441                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
442                                 
443                                 if (rightChimeric || leftChimeric) {
444                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
445                                         outAcc << querySeq->getName() << endl;
446                                         
447                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
448                                         
449                                         if (trimChimera) {  
450                                                 string newAligned = trim->getAligned();
451                                                                                                 
452                                                 //right side is fine so keep that
453                                                 if ((leftChimeric) && (!rightChimeric)) {
454                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
455                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
456                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457                                                 }else { //both sides are chimeric, keep longest piece
458                                                         
459                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
460                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
461                                                         
462                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
463                                                         int length = lengthLeftLeft;
464                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
465                                                         
466                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
467                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
468                                                         
469                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
470                                                         if (lengthRightRight > length) { longest = 4; }
471                                                         
472                                                         if (longest == 1) { //leftleft
473                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
474                                                         }else if (longest == 2) { //leftright
475                                                                 //get rid of leftleft
476                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
477                                                                 //get rid of right
478                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
479                                                         }else if (longest == 3) { //rightleft
480                                                                 //get rid of left
481                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
482                                                                 //get rid of rightright
483                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
484                                                         }else { //rightright
485                                                                 //get rid of left
486                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
487                                                                 //get rid of rightleft
488                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
489                                                         }
490                                                 }
491                                                         
492                                                 trim->setAligned(newAligned);
493                                         }
494                                         
495                                 }
496                         }
497                         
498                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
499                         out << endl;
500                 }else {  
501                         out << querySeq->getName() << "\tno" << endl;  
502                 }
503                 
504                 return trim;
505                 
506         }
507         catch(exception& e) {
508                 m->errorOut(e, "ChimeraSlayer", "print");
509                 exit(1);
510         }
511 }
512
513 #ifdef USE_MPI
514 //***************************************************************************************************************
515 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
516         try {
517                 MPI_Status status;
518                 bool results = false;
519                 string outAccString = "";
520                 string outputString = "";
521                 
522                 Sequence* trim = NULL;
523                 
524                 if (trimChimera) { 
525                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
526                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
527                 }
528                 
529                 
530                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
531                         
532                         string chimeraFlag = "no";
533                         if (leftPiece.flag == "yes") {
534                                 
535                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
536                                    ||
537                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
538                         }
539                         
540                         if (rightPiece.flag == "yes") {
541                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
542                                         ||
543                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
544                         }
545                         
546                         bool rightChimeric = false;
547                         bool leftChimeric = false;
548                         
549                         if (chimeraFlag == "yes") {     
550                                 //which peice is chimeric or are both
551                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
552                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
553                                 
554                                 if (rightChimeric || leftChimeric) {
555 //                                      cout << querySeq->getName() <<  "\tyes" << endl;
556                                         outAccString += querySeq->getName() + "\n";
557                                         results = true;
558                                         
559                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
560                                         
561                                         //write to accnos file
562                                         int length = outAccString.length();
563                                         char* buf2 = new char[length];
564                                         memcpy(buf2, outAccString.c_str(), length);
565                                 
566                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
567                                         delete buf2;
568                                         
569                                         if (trimChimera) {  
570                                                 string newAligned = trim->getAligned();
571                                                 
572                                                 //right side is fine so keep that
573                                                 if ((leftChimeric) && (!rightChimeric)) {
574                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
575                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
576                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
577                                                 }else { //both sides are chimeric, keep longest piece
578                                                         
579                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
580                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
581                                                         
582                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
583                                                         int length = lengthLeftLeft;
584                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
585                                                         
586                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
587                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
588                                                         
589                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
590                                                         if (lengthRightRight > length) { longest = 4; }
591                                                         
592                                                         if (longest == 1) { //leftleft
593                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
594                                                         }else if (longest == 2) { //leftright
595                                                                 //get rid of leftleft
596                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
597                                                                 //get rid of right
598                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
599                                                         }else if (longest == 3) { //rightleft
600                                                                 //get rid of left
601                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
602                                                                 //get rid of rightright
603                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
604                                                         }else { //rightright
605                                                                 //get rid of left
606                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
607                                                                 //get rid of rightleft
608                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
609                                                         }
610                                                 }
611                                                 
612                                                 trim->setAligned(newAligned);
613                                         }
614                                         
615                                 }
616                         }
617                         
618                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
619                         outputString += "\n";
620                 
621                         //write to output file
622                         int length = outputString.length();
623                         char* buf = new char[length];
624                         memcpy(buf, outputString.c_str(), length);
625                                 
626                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
627                         delete buf;
628
629                 }else {  
630                         outputString += querySeq->getName() + "\tno\n";  
631         
632                         //write to output file
633                         int length = outputString.length();
634                         char* buf = new char[length];
635                         memcpy(buf, outputString.c_str(), length);
636                                 
637                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
638                         delete buf;
639                 }
640                 
641                 
642                 return trim;
643         }
644         catch(exception& e) {
645                 m->errorOut(e, "ChimeraSlayer", "print");
646                 exit(1);
647         }
648 }
649 //***************************************************************************************************************
650 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
651         try {
652                 MPI_Status status;
653                 bool results = false;
654                 string outAccString = "";
655                 string outputString = "";
656                 
657                 Sequence* trim = NULL;
658                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
659                 
660                 if (chimeraFlags == "yes") {
661                         string chimeraFlag = "no";
662                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
663                            ||
664                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
665                         
666                         
667                         if (chimeraFlag == "yes") {     
668                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
669                                         cout << querySeq->getName() <<  "\tyes" << endl;
670                                         outAccString += querySeq->getName() + "\n";
671                                         results = true;
672                                         
673                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
674                                         
675                                         //write to accnos file
676                                         int length = outAccString.length();
677                                         char* buf2 = new char[length];
678                                         memcpy(buf2, outAccString.c_str(), length);
679                                         
680                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
681                                         delete buf2;
682                                         
683                                         if (trimChimera) {  
684                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
685                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
686                                                 
687                                                 string newAligned = trim->getAligned();
688                                                 if (lengthLeft > lengthRight) { //trim right
689                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
690                                                 }else { //trim left
691                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
692                                                 }
693                                                 trim->setAligned(newAligned);   
694                                         }
695                                 }
696                         }
697                         
698                         outputString = getBlock(chimeraResults[0], chimeraFlag);
699                         outputString += "\n";
700                         
701                         //write to output file
702                         int length = outputString.length();
703                         char* buf = new char[length];
704                         memcpy(buf, outputString.c_str(), length);
705                         
706                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
707                         delete buf;
708                         
709                 }else {  
710                         outputString += querySeq->getName() + "\tno\n";  
711                         
712                         //write to output file
713                         int length = outputString.length();
714                         char* buf = new char[length];
715                         memcpy(buf, outputString.c_str(), length);
716                         
717                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
718                         delete buf;
719                 }
720                 
721                 return trim;
722         }
723         catch(exception& e) {
724                 m->errorOut(e, "ChimeraSlayer", "print");
725                 exit(1);
726         }
727 }
728 #endif
729
730 //***************************************************************************************************************
731 int ChimeraSlayer::getChimeras(Sequence* query) {
732         try {
733                 
734                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
735                 printResults.trimQuery = trimQuery; 
736                 
737                 chimeraFlags = "no";
738                 printResults.flag = "no";
739                 
740                 querySeq = query;
741                 
742                 //you must create a template
743                 vector<Sequence*> thisTemplate;
744                 vector<Sequence*> thisFilteredTemplate;
745                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
746                 else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
747                 
748                 if (m->control_pressed) {  return 0;  }
749                 
750                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
751                 
752                 //moved this out of maligner - 4/29/11
753                 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
754                 
755                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
756                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
757                 
758                 if (templateFileName == "self") {
759                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
760                         else if (searchMethod == "blast") {  delete databaseLeft; }
761                 }
762         
763                 if (m->control_pressed) {  return 0;  }
764
765                 string chimeraFlag = maligner.getResults(query, decalc);
766
767                 if (m->control_pressed) {  return 0;  }
768                 
769                 vector<results> Results = maligner.getOutput();
770                 
771                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
772                 
773                 if (chimeraFlag == "yes") {
774
775                         if (realign) {
776                                 vector<string> parents;
777                                 for (int i = 0; i < Results.size(); i++) {
778                                         parents.push_back(Results[i].parentAligned);
779                                 }
780                                 
781                                 ChimeraReAligner realigner;             
782                                 realigner.reAlign(query, parents);
783
784                         }
785
786                         //get sequence that were given from maligner results
787                         vector<SeqDist> seqs;
788                         map<string, float> removeDups;
789                         map<string, float>::iterator itDup;
790                         map<string, string> parentNameSeq;
791                         map<string, string>::iterator itSeq;
792                         for (int j = 0; j < Results.size(); j++) {
793                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
794                                 //only add if you are not a duplicate
795                                 itDup = removeDups.find(Results[j].parent);
796                                 if (itDup == removeDups.end()) { //this is not duplicate
797                                         removeDups[Results[j].parent] = dist;
798                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
799                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
800                                         removeDups[Results[j].parent] = dist;
801                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
802                                 }
803                         }
804                         
805                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
806                                 itSeq = parentNameSeq.find(itDup->first);
807                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
808                                 
809                                 SeqDist member;
810                                 member.seq = seq;
811                                 member.dist = itDup->second;
812                                 
813                                 seqs.push_back(member);
814                         }
815                         
816                         //limit number of parents to explore - default 3
817                         if (Results.size() > parents) {
818                                 //sort by distance
819                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
820                                 //prioritize larger more similiar sequence fragments
821                                 reverse(seqs.begin(), seqs.end());
822                                 
823                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
824                                         delete seqs[k].seq;
825                                         seqs.pop_back();        
826                                 }
827                         }
828                 
829                         //put seqs into vector to send to slayer
830                         
831 //                      cout << query->getAligned() << endl;
832                         vector<Sequence*> seqsForSlayer;
833                         for (int k = 0; k < seqs.size(); k++) {  
834 //                              cout << seqs[k].seq->getAligned() << endl;
835                                 seqsForSlayer.push_back(seqs[k].seq);   
836                         
837                         }
838                         
839                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
840
841                         //send to slayer
842                         chimeraFlags = slayer.getResults(query, seqsForSlayer);
843                         if (m->control_pressed) {  return 0;  }
844                         chimeraResults = slayer.getOutput();
845                         
846                         printResults.flag = chimeraFlags;
847                         printResults.results = chimeraResults;
848                         
849                         //free memory
850                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
851                 }
852                 //cout << endl << endl;
853                 return 0;
854         }
855         catch(exception& e) {
856                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
857                 exit(1);
858         }
859 }
860 //***************************************************************************************************************
861 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
862         try {
863                 out << querySeq->getName() << '\t';
864                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
865         
866                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
867                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
868                 
869                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
870                 
871         }
872         catch(exception& e) {
873                 m->errorOut(e, "ChimeraSlayer", "printBlock");
874                 exit(1);
875         }
876 }
877 //***************************************************************************************************************
878 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
879         try {
880                 
881                 if ((leftChimeric) && (!rightChimeric)) { //print left
882                         out << querySeq->getName() << '\t';
883                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
884                         
885                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
886                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
887                 
888                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
889                 
890                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
891                         out << querySeq->getName() << '\t';
892                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
893                         
894                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
895                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
896                         
897                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
898                         
899                 }else  { //print both results
900                         if (leftdata.flag == "yes") {
901                                 out << querySeq->getName() + "_LEFT" << '\t';
902                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
903                                 
904                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
905                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
906                                 
907                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
908                         }
909                         
910                         if (rightdata.flag == "yes") {
911                                 if (leftdata.flag == "yes") { out << endl; }
912                                 
913                                 out << querySeq->getName() + "_RIGHT"<< '\t';
914                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
915                                 
916                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
917                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
918                                 
919                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
920                 
921                         }
922                 }
923         }
924         catch(exception& e) {
925                 m->errorOut(e, "ChimeraSlayer", "printBlock");
926                 exit(1);
927         }
928 }
929 //***************************************************************************************************************
930 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
931         try {
932                 
933                 string out = "";
934                 
935                 if ((leftChimeric) && (!rightChimeric)) { //get left
936                         out += querySeq->getName() + "\t";
937                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
938                         
939                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
940                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
941                         
942                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
943                         
944                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
945                         out += querySeq->getName() + "\t";
946                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
947                         
948                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
949                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
950                         
951                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
952                         
953                 }else  { //print both results
954                         
955                         if (leftdata.flag == "yes") {
956                                 out += querySeq->getName() + "_LEFT\t";
957                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
958                                 
959                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
960                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
961                                 
962                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
963                         }
964                         
965                         if (rightdata.flag == "yes") {
966                                 if (leftdata.flag == "yes") { out += "\n"; }
967                                 out +=  querySeq->getName() + "_RIGHT\t";
968                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
969                                 
970                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
971                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
972                                 
973                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
974                         }
975                 }
976                 
977                 return out;
978                 
979         }
980         catch(exception& e) {
981                 m->errorOut(e, "ChimeraSlayer", "getBlock");
982                 exit(1);
983         }
984 }
985 //***************************************************************************************************************
986 string ChimeraSlayer::getBlock(data_struct data, string flag){
987         try {
988                 
989                 string outputString = "";
990                 
991                 outputString += querySeq->getName() + "\t";
992                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
993                         
994                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
995                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
996                 
997                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
998                 
999                 return outputString;
1000         }
1001         catch(exception& e) {
1002                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1003                 exit(1);
1004         }
1005 }
1006 //***************************************************************************************************************
1007 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1008         try {
1009                 
1010                 vector<Sequence*> refSeqs;
1011                 
1012                 if (searchMethod == "distance") {
1013                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1014                         Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1015                         runFilter(newSeq);
1016                         refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1017                         delete newSeq;
1018                 }else if (searchMethod == "blast")  {
1019                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1020                 }else if (searchMethod == "kmer") {
1021                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1022                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1023                 
1024                 return refSeqs;
1025         }
1026         catch(exception& e) {
1027                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1028                 exit(1);
1029         }
1030 }
1031 //***************************************************************************************************************/
1032 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1033         try {   
1034                 
1035                 vector<Sequence*> refResults;
1036                 
1037                 //get parts of query
1038                 string queryUnAligned = q->getUnaligned();
1039                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1040                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1041 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1042                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1043                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1044                 
1045                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1046                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1047                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1048                 vector<int> smaller;
1049                 vector<int> larger;
1050                 
1051                 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1052                 else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1053                 
1054                 //merge results         
1055                 map<int, int> seen;
1056                 map<int, int>::iterator it;
1057                 vector<int> mergedResults;
1058                 for (int i = 0; i < smaller.size(); i++) {
1059                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1060         
1061                         //add left if you havent already
1062                         it = seen.find(smaller[i]);
1063                         if (it == seen.end()) {  
1064                                 mergedResults.push_back(smaller[i]);
1065                                 seen[smaller[i]] = smaller[i];
1066                         }
1067                         
1068                         //add right if you havent already
1069                         it = seen.find(larger[i]);
1070                         if (it == seen.end()) {  
1071                                 mergedResults.push_back(larger[i]);
1072                                 seen[larger[i]] = larger[i];
1073                         }
1074                 }
1075                 
1076                 for (int i = smaller.size(); i < larger.size(); i++) {
1077                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1078                         
1079                         //add right if you havent already
1080                         it = seen.find(larger[i]);
1081                         if (it == seen.end()) {  
1082                                 mergedResults.push_back(larger[i]);
1083                                 seen[larger[i]] = larger[i];
1084                         }
1085                 }
1086
1087                 for (int i = 0; i < mergedResults.size(); i++) {
1088                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1089                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1090                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1091                                 refResults.push_back(temp);
1092                                 
1093                         }
1094                 }
1095                 
1096                 
1097 //              for(int i=0;i<refResults.size();i++){
1098 //                      cout << refResults[i]->getName() << endl;
1099 //              }
1100                         
1101                 delete queryRight;
1102                 delete queryLeft;
1103                 
1104                 return refResults;
1105         }
1106         catch(exception& e) {
1107                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1108                 exit(1);
1109         }
1110 }
1111 //***************************************************************************************************************
1112 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1113         try {   
1114                 vector<Sequence*> refResults;
1115                 
1116                 //get parts of query
1117                 string queryUnAligned = q->getUnaligned();
1118                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1119                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1120                 
1121                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1122                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1123                 
1124                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1125                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1126                 
1127                 //merge results         
1128                 map<int, int> seen;
1129                 map<int, int>::iterator it;
1130                         vector<int> mergedResults;
1131                 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1132                         
1133                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1134                         
1135                         //add left if you havent already
1136                         it = seen.find(tempIndexesLeft[i]);
1137                         if (it == seen.end()) {  
1138                                 mergedResults.push_back(tempIndexesLeft[i]);
1139                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1140                         }
1141                         
1142                         //add right if you havent already
1143                         it = seen.find(tempIndexesRight[i]);
1144                         if (it == seen.end()) {  
1145                                 mergedResults.push_back(tempIndexesRight[i]);
1146                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1147                         }
1148                 }
1149                 
1150                 //numWanted = mergedResults.size();
1151                         
1152                 //cout << q->getName() << endl;         
1153                 
1154                 for (int i = 0; i < mergedResults.size(); i++) {
1155                         //cout << db[mergedResults[i]]->getName() << endl;      
1156                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1157                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1158                                 refResults.push_back(temp);
1159                         }
1160                 }
1161                 //cout << endl;         
1162                 delete queryRight;
1163                 delete queryLeft;
1164                 
1165                 return refResults;
1166         }
1167         catch(exception& e) {
1168                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1169                 exit(1);
1170         }
1171 }
1172 //***************************************************************************************************************
1173
1174