]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
efd29d64035e390c7b48aba87f3b93fdd8c1eb68
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 decalc = new DeCalculator();    
39                 
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 
71                 decalc = new DeCalculator();    
72                 
73                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74                 
75                 if (searchMethod == "distance") { 
76                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
77                         
78                         //run filter on template copying templateSeqs into filteredTemplateSeqs
79                         for (int i = 0; i < templateSeqs.size(); i++) {  
80                                 if (m->control_pressed) {  break; }
81                                 
82                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
83                                 runFilter(newSeq);  
84                                 filteredTemplateSeqs.push_back(newSeq);
85                         }
86                 }
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
90                 exit(1);
91         }
92 }
93 //***************************************************************************************************************
94 int ChimeraSlayer::doPrep() {
95         try {
96                 if (searchMethod == "distance") { 
97                         //read in all query seqs
98                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
99                 
100                         vector<Sequence*> temp = templateSeqs;
101                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
102                 
103                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
104                 
105                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
106                 
107                         if (m->control_pressed) {  return 0; } 
108                 
109                         //run filter on template copying templateSeqs into filteredTemplateSeqs
110                         for (int i = 0; i < templateSeqs.size(); i++) {  
111                                 if (m->control_pressed) {  return 0; }
112                                 
113                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
114                                 runFilter(newSeq);  
115                                 filteredTemplateSeqs.push_back(newSeq);
116                         }
117                 }
118                 string  kmerDBNameLeft;
119                 string  kmerDBNameRight;
120         
121                 //generate the kmerdb to pass to maligner
122                 if (searchMethod == "kmer") { 
123                         string templatePath = m->hasPath(templateFileName);
124                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
125                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
126                                 
127                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
128                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
129                 #ifdef USE_MPI
130                         for (int i = 0; i < templateSeqs.size(); i++) {
131                                         
132                                 if (m->control_pressed) { return 0; } 
133                                         
134                                 string leftFrag = templateSeqs[i]->getUnaligned();
135                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
136                                         
137                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
138                                 databaseLeft->addSequence(leftTemp);    
139                         }
140                         databaseLeft->generateDB();
141                         databaseLeft->setNumSeqs(templateSeqs.size());
142                         
143                         for (int i = 0; i < templateSeqs.size(); i++) {
144                                 if (m->control_pressed) { return 0; } 
145                                         
146                                 string rightFrag = templateSeqs[i]->getUnaligned();
147                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
148                                         
149                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
150                                 databaseRight->addSequence(rightTemp);  
151                         }
152                         databaseRight->generateDB();
153                         databaseRight->setNumSeqs(templateSeqs.size());
154
155                 #else   
156                         //leftside
157                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
158                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
159                         bool needToGenerateLeft = true;
160                         
161                         if(kmerFileTestLeft){   
162                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
163                                 if (GoodFile) {  needToGenerateLeft = false;    }
164                         }
165                         
166                         if(needToGenerateLeft){ 
167                         
168                                 for (int i = 0; i < templateSeqs.size(); i++) {
169                                         
170                                         if (m->control_pressed) { return 0; } 
171                                         
172                                         string leftFrag = templateSeqs[i]->getUnaligned();
173                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
174                                         
175                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
176                                         databaseLeft->addSequence(leftTemp);    
177                                 }
178                                 databaseLeft->generateDB();
179                                 
180                         }else { 
181                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
182                         }
183                         kmerFileTestLeft.close();
184                         
185                         databaseLeft->setNumSeqs(templateSeqs.size());
186                         
187                         //rightside
188                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
189                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
190                         bool needToGenerateRight = true;
191                         
192                         if(kmerFileTestRight){  
193                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
194                                 if (GoodFile) {  needToGenerateRight = false;   }
195                         }
196                         
197                         if(needToGenerateRight){        
198                         
199                                 for (int i = 0; i < templateSeqs.size(); i++) {
200                                         if (m->control_pressed) { return 0; } 
201                                         
202                                         string rightFrag = templateSeqs[i]->getUnaligned();
203                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
204                                         
205                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
206                                         databaseRight->addSequence(rightTemp);  
207                                 }
208                                 databaseRight->generateDB();
209                                 
210                         }else { 
211                                 databaseRight->readKmerDB(kmerFileTestRight);   
212                         }
213                         kmerFileTestRight.close();
214                         
215                         databaseRight->setNumSeqs(templateSeqs.size());
216                 #endif  
217                 }else if (searchMethod == "blast") {
218                 
219                         //generate blastdb
220                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
221
222                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
223                         databaseLeft->generateDB();
224                         databaseLeft->setNumSeqs(templateSeqs.size());
225                 }
226                 
227                 return 0;
228
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "ChimeraSlayer", "doprep");
232                 exit(1);
233         }
234 }
235 //***************************************************************************************************************
236 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
237         try {
238                 
239                 //when template=self, the query file is sorted from most abundance to least abundant
240                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
241                 vector<Sequence*> userTemplate;
242                 
243                 int myAbund = priority[q->getName()];
244                 
245                 for (int i = 0; i < templateSeqs.size(); i++) {
246                         
247                         if (m->control_pressed) { return userTemplate; } 
248                         
249                         //have I reached a sequence with the same abundance as myself?
250                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
251                         
252                         //if its am not chimeric add it
253                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
254                                 userTemplate.push_back(templateSeqs[i]); 
255                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
256                         }
257                 }
258                 
259                 string  kmerDBNameLeft;
260                 string  kmerDBNameRight;
261                 
262                 //generate the kmerdb to pass to maligner
263                 if (searchMethod == "kmer") { 
264                         string templatePath = m->hasPath(templateFileName);
265                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
266                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
267                         
268                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
269                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
270 #ifdef USE_MPI
271                         for (int i = 0; i < userTemplate.size(); i++) {
272                                 
273                                 if (m->control_pressed) { return userTemplate; } 
274                                 
275                                 string leftFrag = userTemplate[i]->getUnaligned();
276                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
277                                 
278                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
279                                 databaseLeft->addSequence(leftTemp);    
280                         }
281                         databaseLeft->generateDB();
282                         databaseLeft->setNumSeqs(userTemplate.size());
283                         
284                         for (int i = 0; i < userTemplate.size(); i++) {
285                                 if (m->control_pressed) { return userTemplate; } 
286                                 
287                                 string rightFrag = userTemplate[i]->getUnaligned();
288                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
289                                 
290                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
291                                 databaseRight->addSequence(rightTemp);  
292                         }
293                         databaseRight->generateDB();
294                         databaseRight->setNumSeqs(userTemplate.size());
295                         
296 #else   
297                         
298                         
299                         for (int i = 0; i < userTemplate.size(); i++) {
300                                 
301                                 if (m->control_pressed) { return userTemplate; } 
302                                 
303                                 string leftFrag = userTemplate[i]->getUnaligned();
304                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
305                                 
306                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
307                                 databaseLeft->addSequence(leftTemp);    
308                         }
309                         databaseLeft->generateDB();
310                         databaseLeft->setNumSeqs(userTemplate.size());
311                                 
312                         for (int i = 0; i < userTemplate.size(); i++) {
313                                 if (m->control_pressed) { return userTemplate; }  
314                                         
315                                 string rightFrag = userTemplate[i]->getUnaligned();
316                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
317                                         
318                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
319                                 databaseRight->addSequence(rightTemp);  
320                         }
321                         databaseRight->generateDB();
322                         databaseRight->setNumSeqs(userTemplate.size());
323 #endif  
324                 }else if (searchMethod == "blast") {
325                         
326                         //generate blastdb
327                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
328
329                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
330                         databaseLeft->generateDB();
331                         databaseLeft->setNumSeqs(userTemplate.size());
332                 }
333                 
334                 return userTemplate;
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
339                 exit(1);
340         }
341 }
342
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {       
345         delete decalc;  
346         if (templateFileName != "self") {
347                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
348                 else if (searchMethod == "blast") {  delete databaseLeft; }
349         }
350 }
351 //***************************************************************************************************************
352 void ChimeraSlayer::printHeader(ostream& out) {
353         m->mothurOutEndLine();
354         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
355         m->mothurOutEndLine();
356         
357         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
358 }
359 //***************************************************************************************************************
360 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
361         try {
362                 Sequence* trim = NULL;
363                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
364                 
365                 if (chimeraFlags == "yes") {
366                         string chimeraFlag = "no";
367                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
368                            ||
369                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
370                         
371                         
372                         if (chimeraFlag == "yes") {     
373                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
374                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
375                                         outAcc << querySeq->getName() << endl;
376                                         
377                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
378                                         
379                                         if (trimChimera) {  
380                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
381                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
382                                                 
383                                                 string newAligned = trim->getAligned();
384
385                                                 if (lengthLeft > lengthRight) { //trim right
386                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
387                                                 }else { //trim left
388                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
389                                                 }
390                                                 trim->setAligned(newAligned);
391                                         }
392                                 }
393                         }
394                         
395                         printBlock(chimeraResults[0], chimeraFlag, out);
396                         out << endl;
397                 }else {  
398                         out << querySeq->getName() << "\tno" << endl; 
399                 }
400                 
401                 return trim;
402                 
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "ChimeraSlayer", "print");
406                 exit(1);
407         }
408 }
409 //***************************************************************************************************************
410 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
411         try {
412                 Sequence* trim = NULL;
413                                 
414                 if (trimChimera) { 
415                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
416                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
417                 }
418                 
419                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
420                         
421                         string chimeraFlag = "no";
422                         if (leftPiece.flag == "yes") {
423                                 
424                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
425                                         ||
426                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
427                         }
428                         
429                         if (rightPiece.flag == "yes") {
430                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
431                                  ||
432                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
433                         }
434                         
435                         bool rightChimeric = false;
436                         bool leftChimeric = false;
437                         
438                         if (chimeraFlag == "yes") {     
439                                 //which peice is chimeric or are both
440                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
441                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
442                                 
443                                 if (rightChimeric || leftChimeric) {
444                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
445                                         outAcc << querySeq->getName() << endl;
446                                         
447                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
448                                         
449                                         if (trimChimera) {  
450                                                 string newAligned = trim->getAligned();
451                                                                                                 
452                                                 //right side is fine so keep that
453                                                 if ((leftChimeric) && (!rightChimeric)) {
454                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
455                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
456                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457                                                 }else { //both sides are chimeric, keep longest piece
458                                                         
459                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
460                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
461                                                         
462                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
463                                                         int length = lengthLeftLeft;
464                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
465                                                         
466                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
467                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
468                                                         
469                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
470                                                         if (lengthRightRight > length) { longest = 4; }
471                                                         
472                                                         if (longest == 1) { //leftleft
473                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
474                                                         }else if (longest == 2) { //leftright
475                                                                 //get rid of leftleft
476                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
477                                                                 //get rid of right
478                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
479                                                         }else if (longest == 3) { //rightleft
480                                                                 //get rid of left
481                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
482                                                                 //get rid of rightright
483                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
484                                                         }else { //rightright
485                                                                 //get rid of left
486                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
487                                                                 //get rid of rightleft
488                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
489                                                         }
490                                                 }
491                                                         
492                                                 trim->setAligned(newAligned);
493                                         }
494                                         
495                                 }
496                         }
497                         
498                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
499                         out << endl;
500                 }else {  
501                         out << querySeq->getName() << "\tno" << endl;  
502                 }
503                 
504                 return trim;
505                 
506         }
507         catch(exception& e) {
508                 m->errorOut(e, "ChimeraSlayer", "print");
509                 exit(1);
510         }
511 }
512
513 #ifdef USE_MPI
514 //***************************************************************************************************************
515 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
516         try {
517                 MPI_Status status;
518                 bool results = false;
519                 string outAccString = "";
520                 string outputString = "";
521                 
522                 Sequence* trim = NULL;
523                 
524                 if (trimChimera) { 
525                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
526                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
527                 }
528                 
529                 
530                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
531                         
532                         string chimeraFlag = "no";
533                         if (leftPiece.flag == "yes") {
534                                 
535                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
536                                    ||
537                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
538                         }
539                         
540                         if (rightPiece.flag == "yes") {
541                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
542                                         ||
543                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
544                         }
545                         
546                         bool rightChimeric = false;
547                         bool leftChimeric = false;
548
549                         cout << endl;
550                         
551                         if (chimeraFlag == "yes") {     
552                                 //which peice is chimeric or are both
553                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
554                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
555                                 
556                                 if (rightChimeric || leftChimeric) {
557                                         cout << querySeq->getName() <<  "\tyes" << endl;
558                                         outAccString += querySeq->getName() + "\n";
559                                         results = true;
560                                         
561                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
562                                         
563                                         //write to accnos file
564                                         int length = outAccString.length();
565                                         char* buf2 = new char[length];
566                                         memcpy(buf2, outAccString.c_str(), length);
567                                 
568                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
569                                         delete buf2;
570                                         
571                                         if (trimChimera) {  
572                                                 string newAligned = trim->getAligned();
573                                                 
574                                                 //right side is fine so keep that
575                                                 if ((leftChimeric) && (!rightChimeric)) {
576                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
577                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
578                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
579                                                 }else { //both sides are chimeric, keep longest piece
580                                                         
581                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
582                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
583                                                         
584                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
585                                                         int length = lengthLeftLeft;
586                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
587                                                         
588                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
589                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
590                                                         
591                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
592                                                         if (lengthRightRight > length) { longest = 4; }
593                                                         
594                                                         if (longest == 1) { //leftleft
595                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
596                                                         }else if (longest == 2) { //leftright
597                                                                 //get rid of leftleft
598                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
599                                                                 //get rid of right
600                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
601                                                         }else if (longest == 3) { //rightleft
602                                                                 //get rid of left
603                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
604                                                                 //get rid of rightright
605                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
606                                                         }else { //rightright
607                                                                 //get rid of left
608                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
609                                                                 //get rid of rightleft
610                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
611                                                         }
612                                                 }
613                                                 
614                                                 trim->setAligned(newAligned);
615                                         }
616                                         
617                                 }
618                         }
619                         
620                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
621                         outputString += "\n";
622                 
623                         //write to output file
624                         int length = outputString.length();
625                         char* buf = new char[length];
626                         memcpy(buf, outputString.c_str(), length);
627                                 
628                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
629                         delete buf;
630
631                 }else {  
632                         outputString += querySeq->getName() + "\tno\n";  
633         
634                         //write to output file
635                         int length = outputString.length();
636                         char* buf = new char[length];
637                         memcpy(buf, outputString.c_str(), length);
638                                 
639                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
640                         delete buf;
641                 }
642                 
643                 
644                 return trim;
645         }
646         catch(exception& e) {
647                 m->errorOut(e, "ChimeraSlayer", "print");
648                 exit(1);
649         }
650 }
651 //***************************************************************************************************************
652 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
653         try {
654                 MPI_Status status;
655                 bool results = false;
656                 string outAccString = "";
657                 string outputString = "";
658                 
659                 Sequence* trim = NULL;
660                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
661                 
662                 if (chimeraFlags == "yes") {
663                         string chimeraFlag = "no";
664                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
665                            ||
666                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
667                         
668                         
669                         if (chimeraFlag == "yes") {     
670                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
671                                         cout << querySeq->getName() <<  "\tyes" << endl;
672                                         outAccString += querySeq->getName() + "\n";
673                                         results = true;
674                                         
675                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
676                                         
677                                         //write to accnos file
678                                         int length = outAccString.length();
679                                         char* buf2 = new char[length];
680                                         memcpy(buf2, outAccString.c_str(), length);
681                                         
682                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
683                                         delete buf2;
684                                         
685                                         if (trimChimera) {  
686                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
687                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
688                                                 
689                                                 string newAligned = trim->getAligned();
690                                                 if (lengthLeft > lengthRight) { //trim right
691                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
692                                                 }else { //trim left
693                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
694                                                 }
695                                                 trim->setAligned(newAligned);   
696                                         }
697                                 }
698                         }
699                         
700                         outputString = getBlock(chimeraResults[0], chimeraFlag);
701                         outputString += "\n";
702                         
703                         //write to output file
704                         int length = outputString.length();
705                         char* buf = new char[length];
706                         memcpy(buf, outputString.c_str(), length);
707                         
708                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
709                         delete buf;
710                         
711                 }else {  
712                         outputString += querySeq->getName() + "\tno\n";  
713                         
714                         //write to output file
715                         int length = outputString.length();
716                         char* buf = new char[length];
717                         memcpy(buf, outputString.c_str(), length);
718                         
719                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
720                         delete buf;
721                 }
722                 
723                 return trim;
724         }
725         catch(exception& e) {
726                 m->errorOut(e, "ChimeraSlayer", "print");
727                 exit(1);
728         }
729 }
730 #endif
731
732 //***************************************************************************************************************
733 int ChimeraSlayer::getChimeras(Sequence* query) {
734         try {
735                 
736                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
737                 printResults.trimQuery = trimQuery; 
738                 
739                 chimeraFlags = "no";
740                 printResults.flag = "no";
741                 
742                 querySeq = query;
743                 
744                 //you must create a template
745                 vector<Sequence*> thisTemplate;
746                 vector<Sequence*> thisFilteredTemplate;
747                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
748                 else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
749                 
750                 if (m->control_pressed) {  return 0;  }
751                 
752                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
753                 
754                 //moved this out of maligner - 4/29/11
755                 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
756                 
757                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
758                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
759                 
760                 if (templateFileName == "self") {
761                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
762                         else if (searchMethod == "blast") {  delete databaseLeft; }
763                 }
764         
765                 if (m->control_pressed) {  return 0;  }
766
767                 string chimeraFlag = maligner.getResults(query, decalc);
768
769                 if (m->control_pressed) {  return 0;  }
770                 
771                 vector<results> Results = maligner.getOutput();
772                 
773                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
774                 
775                 if (chimeraFlag == "yes") {
776
777                         if (realign) {
778                                 vector<string> parents;
779                                 for (int i = 0; i < Results.size(); i++) {
780                                         parents.push_back(Results[i].parentAligned);
781                                 }
782                                 
783                                 ChimeraReAligner realigner;             
784                                 realigner.reAlign(query, parents);
785
786                         }
787                         
788                         //get sequence that were given from maligner results
789                         vector<SeqDist> seqs;
790                         map<string, float> removeDups;
791                         map<string, float>::iterator itDup;
792                         map<string, string> parentNameSeq;
793                         map<string, string>::iterator itSeq;
794                         for (int j = 0; j < Results.size(); j++) {
795                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
796                                 //only add if you are not a duplicate
797
798                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
799                                 
800                                         itDup = removeDups.find(Results[j].parent);
801                                         if (itDup == removeDups.end()) { //this is not duplicate
802                                                 removeDups[Results[j].parent] = dist;
803                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
804                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
805                                                 removeDups[Results[j].parent] = dist;
806                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
807                                         }
808                                 
809                                 }
810                                 
811                         }
812                         
813                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
814                                 itSeq = parentNameSeq.find(itDup->first);
815                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
816                                 
817                                 SeqDist member;
818                                 member.seq = seq;
819                                 member.dist = itDup->second;
820                                 seqs.push_back(member);
821                         }
822                         
823                         //limit number of parents to explore - default 3
824                         if (Results.size() > parents) {
825                                 //sort by distance
826                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
827                                 //prioritize larger more similiar sequence fragments
828                                 reverse(seqs.begin(), seqs.end());
829                                 
830                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
831                                         delete seqs[k].seq;
832                                         seqs.pop_back();        
833                                 }
834                         }
835                 
836                         //put seqs into vector to send to slayer
837                         
838 //                      cout << query->getAligned() << endl;
839                         vector<Sequence*> seqsForSlayer;
840                         for (int k = 0; k < seqs.size(); k++) {  
841 //                              cout << seqs[k].seq->getAligned() << endl;
842                                 seqsForSlayer.push_back(seqs[k].seq);   
843                         
844                         }
845                         
846                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
847
848                         //send to slayer
849                         chimeraFlags = slayer.getResults(query, seqsForSlayer);
850                         if (m->control_pressed) {  return 0;  }
851                         chimeraResults = slayer.getOutput();
852                         
853                         printResults.flag = chimeraFlags;
854                         printResults.results = chimeraResults;
855                         
856                         //free memory
857                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
858                 }
859                 //cout << endl << endl;
860                 return 0;
861         }
862         catch(exception& e) {
863                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
864                 exit(1);
865         }
866 }
867 //***************************************************************************************************************
868 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
869         try {
870                 out << querySeq->getName() << '\t';
871                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
872         
873                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
874                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
875                 
876                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
877                 
878         }
879         catch(exception& e) {
880                 m->errorOut(e, "ChimeraSlayer", "printBlock");
881                 exit(1);
882         }
883 }
884 //***************************************************************************************************************
885 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
886         try {
887                 
888                 if ((leftChimeric) && (!rightChimeric)) { //print left
889                         out << querySeq->getName() << '\t';
890                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
891                         
892                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
893                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
894                 
895                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
896                 
897                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
898                         out << querySeq->getName() << '\t';
899                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
900                         
901                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
902                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
903                         
904                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
905                         
906                 }else  { //print both results
907                         if (leftdata.flag == "yes") {
908                                 out << querySeq->getName() + "_LEFT" << '\t';
909                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
910                                 
911                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
912                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
913                                 
914                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
915                         }
916                         
917                         if (rightdata.flag == "yes") {
918                                 if (leftdata.flag == "yes") { out << endl; }
919                                 
920                                 out << querySeq->getName() + "_RIGHT"<< '\t';
921                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
922                                 
923                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
924                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
925                                 
926                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
927                 
928                         }
929                 }
930         }
931         catch(exception& e) {
932                 m->errorOut(e, "ChimeraSlayer", "printBlock");
933                 exit(1);
934         }
935 }
936 //***************************************************************************************************************
937 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
938         try {
939                 
940                 string out = "";
941                 
942                 if ((leftChimeric) && (!rightChimeric)) { //get left
943                         out += querySeq->getName() + "\t";
944                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
945                         
946                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
947                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
948                         
949                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
950                         
951                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
952                         out += querySeq->getName() + "\t";
953                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
954                         
955                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
956                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
957                         
958                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
959                         
960                 }else  { //print both results
961                         
962                         if (leftdata.flag == "yes") {
963                                 out += querySeq->getName() + "_LEFT\t";
964                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
965                                 
966                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
967                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
968                                 
969                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
970                         }
971                         
972                         if (rightdata.flag == "yes") {
973                                 if (leftdata.flag == "yes") { out += "\n"; }
974                                 out +=  querySeq->getName() + "_RIGHT\t";
975                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
976                                 
977                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
978                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
979                                 
980                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
981                         }
982                 }
983                 
984                 return out;
985                 
986         }
987         catch(exception& e) {
988                 m->errorOut(e, "ChimeraSlayer", "getBlock");
989                 exit(1);
990         }
991 }
992 //***************************************************************************************************************
993 string ChimeraSlayer::getBlock(data_struct data, string flag){
994         try {
995                 
996                 string outputString = "";
997                 
998                 outputString += querySeq->getName() + "\t";
999                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1000                         
1001                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1002                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1003                 
1004                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1005                 
1006                 return outputString;
1007         }
1008         catch(exception& e) {
1009                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1010                 exit(1);
1011         }
1012 }
1013 //***************************************************************************************************************
1014 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1015         try {
1016                 
1017                 vector<Sequence*> refSeqs;
1018                 
1019                 if (searchMethod == "distance") {
1020                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1021                         Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1022                         runFilter(newSeq);
1023                         refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1024                         delete newSeq;
1025                 }else if (searchMethod == "blast")  {
1026                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1027                 }else if (searchMethod == "kmer") {
1028                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1029                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1030                 
1031                 return refSeqs;
1032         }
1033         catch(exception& e) {
1034                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1035                 exit(1);
1036         }
1037 }
1038 //***************************************************************************************************************/
1039 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1040         try {   
1041                 
1042                 vector<Sequence*> refResults;
1043                 
1044                 //get parts of query
1045                 string queryUnAligned = q->getUnaligned();
1046                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1047                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1048 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1049                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1050                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1051                 
1052                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1053                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1054                                 
1055                 
1056                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1057 //              vector<int> smaller;
1058 //              vector<int> larger;
1059 //              
1060 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1061 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1062                 
1063                 //merge results         
1064                 map<int, int> seen;
1065                 map<int, int>::iterator it;
1066                 vector<int> mergedResults;
1067                 
1068                 int index = 0;
1069 //              for (int i = 0; i < smaller.size(); i++) {
1070                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1071                         
1072                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1073         
1074                         //add left if you havent already
1075                         it = seen.find(tempIndexesLeft[index]);
1076                         if (it == seen.end()) {  
1077                                 mergedResults.push_back(tempIndexesLeft[index]);
1078                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1079                         }
1080                         
1081                         //add right if you havent already
1082                         it = seen.find(tempIndexesRight[index]);
1083                         if (it == seen.end()) {  
1084                                 mergedResults.push_back(tempIndexesRight[index]);
1085                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1086                         }
1087                         index++;
1088                 }
1089
1090                 
1091                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1092                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1093                         
1094                         //add right if you havent already
1095                         it = seen.find(tempIndexesLeft[i]);
1096                         if (it == seen.end()) {  
1097                                 mergedResults.push_back(tempIndexesLeft[i]);
1098                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1099                         }
1100                 }
1101
1102                 for (int i = index; i < tempIndexesRight.size(); i++) {
1103                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1104                         
1105                         //add right if you havent already
1106                         it = seen.find(tempIndexesRight[i]);
1107                         if (it == seen.end()) {  
1108                                 mergedResults.push_back(tempIndexesRight[i]);
1109                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1110                         }
1111                 }
1112                 
1113                 
1114                 for (int i = 0; i < mergedResults.size(); i++) {
1115                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1116                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1117                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1118                                 refResults.push_back(temp);
1119                                 
1120                         }
1121                 }
1122                 
1123
1124                 delete queryRight;
1125                 delete queryLeft;
1126                 
1127                 return refResults;
1128         }
1129         catch(exception& e) {
1130                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1131                 exit(1);
1132         }
1133 }
1134 //***************************************************************************************************************
1135 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1136         try {   
1137                 vector<Sequence*> refResults;
1138                 
1139                 //get parts of query
1140                 string queryUnAligned = q->getUnaligned();
1141                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1142                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1143                 
1144                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1145                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1146                 
1147                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1148                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1149                 
1150                 //merge results         
1151                 map<int, int> seen;
1152                 map<int, int>::iterator it;
1153                 vector<int> mergedResults;
1154                 
1155                 int index = 0;
1156                 //              for (int i = 0; i < smaller.size(); i++) {
1157                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1158                         
1159                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1160                         
1161                         //add left if you havent already
1162                         it = seen.find(tempIndexesLeft[index]);
1163                         if (it == seen.end()) {  
1164                                 mergedResults.push_back(tempIndexesLeft[index]);
1165                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1166                         }
1167                         
1168                         //add right if you havent already
1169                         it = seen.find(tempIndexesRight[index]);
1170                         if (it == seen.end()) {  
1171                                 mergedResults.push_back(tempIndexesRight[index]);
1172                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1173                         }
1174                         index++;
1175                 }
1176                 
1177                 
1178                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1179                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1180                         
1181                         //add right if you havent already
1182                         it = seen.find(tempIndexesLeft[i]);
1183                         if (it == seen.end()) {  
1184                                 mergedResults.push_back(tempIndexesLeft[i]);
1185                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1186                         }
1187                 }
1188                 
1189                 for (int i = index; i < tempIndexesRight.size(); i++) {
1190                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1191                         
1192                         //add right if you havent already
1193                         it = seen.find(tempIndexesRight[i]);
1194                         if (it == seen.end()) {  
1195                                 mergedResults.push_back(tempIndexesRight[i]);
1196                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1197                         }
1198                 }
1199                 
1200                 
1201                 for (int i = 0; i < mergedResults.size(); i++) {
1202                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1203                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1204                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1205                                 refResults.push_back(temp);
1206                                 
1207                         }
1208                 }
1209
1210                 //cout << endl;         
1211                 delete queryRight;
1212                 delete queryLeft;
1213                 
1214                 return refResults;
1215         }
1216         catch(exception& e) {
1217                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1218                 exit(1);
1219         }
1220 }
1221 //***************************************************************************************************************
1222
1223