]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
working on removing pointers from chimera.slayer to eliminate pesky memory leaks
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 doPrep();
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
42                 exit(1);
43         }
44 }
45 //***************************************************************************************************************
46 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
47                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
48         try {
49                 fastafile = file; templateSeqs = readSeqs(fastafile);
50                 templateFileName = temp; 
51                 searchMethod = mode;
52                 kmerSize = k;
53                 match = ms;
54                 misMatch = mms;
55                 window = win;
56                 divR = div;
57                 minSim = minsim;
58                 minCov = mincov;
59                 minBS = minbs;
60                 minSNP = minsnp;
61                 parents = par;
62                 iters = it;
63                 increment = inc;
64                 numWanted = numw;
65                 realign = r; 
66                 trimChimera = trim;
67                 priority = prior;
68                 
69                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
70                 
71                 if (searchMethod == "distance") { 
72                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
73                         
74                         //run filter on template copying templateSeqs into filteredTemplateSeqs
75                         for (int i = 0; i < templateSeqs.size(); i++) {  
76                                 if (m->control_pressed) {  break; }
77                                 
78                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
79                                 runFilter(newSeq);  
80                                 filteredTemplateSeqs.push_back(newSeq);
81                         }
82                 }
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
86                 exit(1);
87         }
88 }
89 //***************************************************************************************************************
90 int ChimeraSlayer::doPrep() {
91         try {
92                 if (searchMethod == "distance") { 
93                         //read in all query seqs
94                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
95                 
96                         vector<Sequence*> temp = templateSeqs;
97                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
98                 
99                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
100                 
101                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
102                 
103                         if (m->control_pressed) {  return 0; } 
104                 
105                         //run filter on template copying templateSeqs into filteredTemplateSeqs
106                         for (int i = 0; i < templateSeqs.size(); i++) {  
107                                 if (m->control_pressed) {  return 0; }
108                                 
109                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
110                                 runFilter(newSeq);  
111                                 filteredTemplateSeqs.push_back(newSeq);
112                         }
113                 }
114                 string  kmerDBNameLeft;
115                 string  kmerDBNameRight;
116         
117                 //generate the kmerdb to pass to maligner
118                 if (searchMethod == "kmer") { 
119                         string templatePath = m->hasPath(templateFileName);
120                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
121                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
122                                 
123                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
124                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
125                 #ifdef USE_MPI
126                         for (int i = 0; i < templateSeqs.size(); i++) {
127                                         
128                                 if (m->control_pressed) { return 0; } 
129                                         
130                                 string leftFrag = templateSeqs[i]->getUnaligned();
131                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
132                                         
133                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
134                                 databaseLeft->addSequence(leftTemp);    
135                         }
136                         databaseLeft->generateDB();
137                         databaseLeft->setNumSeqs(templateSeqs.size());
138                         
139                         for (int i = 0; i < templateSeqs.size(); i++) {
140                                 if (m->control_pressed) { return 0; } 
141                                         
142                                 string rightFrag = templateSeqs[i]->getUnaligned();
143                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
144                                         
145                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
146                                 databaseRight->addSequence(rightTemp);  
147                         }
148                         databaseRight->generateDB();
149                         databaseRight->setNumSeqs(templateSeqs.size());
150
151                 #else   
152                         //leftside
153                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
154                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
155                         bool needToGenerateLeft = true;
156                         
157                         if(kmerFileTestLeft){   
158                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
159                                 if (GoodFile) {  needToGenerateLeft = false;    }
160                         }
161                         
162                         if(needToGenerateLeft){ 
163                         
164                                 for (int i = 0; i < templateSeqs.size(); i++) {
165                                         
166                                         if (m->control_pressed) { return 0; } 
167                                         
168                                         string leftFrag = templateSeqs[i]->getUnaligned();
169                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
170                                         
171                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
172                                         databaseLeft->addSequence(leftTemp);    
173                                 }
174                                 databaseLeft->generateDB();
175                                 
176                         }else { 
177                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
178                         }
179                         kmerFileTestLeft.close();
180                         
181                         databaseLeft->setNumSeqs(templateSeqs.size());
182                         
183                         //rightside
184                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
185                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
186                         bool needToGenerateRight = true;
187                         
188                         if(kmerFileTestRight){  
189                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
190                                 if (GoodFile) {  needToGenerateRight = false;   }
191                         }
192                         
193                         if(needToGenerateRight){        
194                         
195                                 for (int i = 0; i < templateSeqs.size(); i++) {
196                                         if (m->control_pressed) { return 0; } 
197                                         
198                                         string rightFrag = templateSeqs[i]->getUnaligned();
199                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
200                                         
201                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
202                                         databaseRight->addSequence(rightTemp);  
203                                 }
204                                 databaseRight->generateDB();
205                                 
206                         }else { 
207                                 databaseRight->readKmerDB(kmerFileTestRight);   
208                         }
209                         kmerFileTestRight.close();
210                         
211                         databaseRight->setNumSeqs(templateSeqs.size());
212                 #endif  
213                 }else if (searchMethod == "blast") {
214                 
215                         //generate blastdb
216                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
217                         
218                         if (m->control_pressed) { return 0; }
219
220                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
221                         databaseLeft->generateDB();
222                         databaseLeft->setNumSeqs(templateSeqs.size());
223                 }
224                 
225                 return 0;
226
227         }
228         catch(exception& e) {
229                 m->errorOut(e, "ChimeraSlayer", "doprep");
230                 exit(1);
231         }
232 }
233 //***************************************************************************************************************
234 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
235         try {
236                 
237                 //when template=self, the query file is sorted from most abundance to least abundant
238                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
239                 vector<Sequence*> userTemplate;
240                 
241                 int myAbund = priority[q.getName()];
242                 
243                 for (int i = 0; i < templateSeqs.size(); i++) {
244                         
245                         if (m->control_pressed) { return userTemplate; } 
246                         
247                         //have I reached a sequence with the same abundance as myself?
248                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
249                         
250                         //if its am not chimeric add it
251                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
252                                 userTemplate.push_back(templateSeqs[i]); 
253                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
254                         }
255                 }
256                 
257                 string  kmerDBNameLeft;
258                 string  kmerDBNameRight;
259                 
260                 //generate the kmerdb to pass to maligner
261                 if (searchMethod == "kmer") { 
262                         string templatePath = m->hasPath(templateFileName);
263                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
264                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
265                         
266                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
267                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
268 #ifdef USE_MPI
269                         for (int i = 0; i < userTemplate.size(); i++) {
270                                 
271                                 if (m->control_pressed) { return userTemplate; } 
272                                 
273                                 string leftFrag = userTemplate[i]->getUnaligned();
274                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
275                                 
276                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
277                                 databaseLeft->addSequence(leftTemp);    
278                         }
279                         databaseLeft->generateDB();
280                         databaseLeft->setNumSeqs(userTemplate.size());
281                         
282                         for (int i = 0; i < userTemplate.size(); i++) {
283                                 if (m->control_pressed) { return userTemplate; } 
284                                 
285                                 string rightFrag = userTemplate[i]->getUnaligned();
286                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
287                                 
288                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
289                                 databaseRight->addSequence(rightTemp);  
290                         }
291                         databaseRight->generateDB();
292                         databaseRight->setNumSeqs(userTemplate.size());
293                         
294 #else   
295                         
296                         
297                         for (int i = 0; i < userTemplate.size(); i++) {
298                                 
299                                 if (m->control_pressed) { return userTemplate; } 
300                                 
301                                 string leftFrag = userTemplate[i]->getUnaligned();
302                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
303                                 
304                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
305                                 databaseLeft->addSequence(leftTemp);    
306                         }
307                         databaseLeft->generateDB();
308                         databaseLeft->setNumSeqs(userTemplate.size());
309                                 
310                         for (int i = 0; i < userTemplate.size(); i++) {
311                                 if (m->control_pressed) { return userTemplate; }  
312                                         
313                                 string rightFrag = userTemplate[i]->getUnaligned();
314                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
315                                         
316                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
317                                 databaseRight->addSequence(rightTemp);  
318                         }
319                         databaseRight->generateDB();
320                         databaseRight->setNumSeqs(userTemplate.size());
321 #endif  
322                 }else if (searchMethod == "blast") {
323                         
324                         //generate blastdb
325                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
326                         
327                         if (m->control_pressed) { return userTemplate; }
328
329                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
330                         databaseLeft->generateDB();
331                         databaseLeft->setNumSeqs(userTemplate.size());
332                 }
333                 
334                 return userTemplate;
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
339                 exit(1);
340         }
341 }
342
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {       
345         if (templateFileName != "self") {
346                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
347                 else if (searchMethod == "blast") {  delete databaseLeft; }
348         }
349 }
350 //***************************************************************************************************************
351 void ChimeraSlayer::printHeader(ostream& out) {
352         m->mothurOutEndLine();
353         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
354         m->mothurOutEndLine();
355         
356         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
357 }
358 //***************************************************************************************************************
359 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
360         try {
361                 Sequence trim;
362                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
363                 
364                 if (chimeraFlags == "yes") {
365                         string chimeraFlag = "no";
366                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
367                            ||
368                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
369                         
370                         
371                         if (chimeraFlag == "yes") {     
372                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
373                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
374                                         outAcc << querySeq.getName() << endl;
375                                         
376                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
377                                         
378                                         if (trimChimera) {  
379                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
380                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
381                                                 
382                                                 string newAligned = trim.getAligned();
383
384                                                 if (lengthLeft > lengthRight) { //trim right
385                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
386                                                 }else { //trim left
387                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
388                                                 }
389                                                 trim.setAligned(newAligned);
390                                         }
391                                 }
392                         }
393                         
394                         printBlock(chimeraResults[0], chimeraFlag, out);
395                         out << endl;
396                 }else {  
397                         out << querySeq.getName() << "\tno" << endl; 
398                 }
399                 
400                 return trim;
401                 
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "ChimeraSlayer", "print");
405                 exit(1);
406         }
407 }
408 //***************************************************************************************************************
409 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
410         try {
411                 Sequence trim;
412                                 
413                 if (trimChimera) { 
414                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
415                         trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); 
416                 }
417                 
418                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
419                         
420                         string chimeraFlag = "no";
421                         if (leftPiece.flag == "yes") {
422                                 
423                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
424                                         ||
425                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
426                         }
427                         
428                         if (rightPiece.flag == "yes") {
429                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
430                                  ||
431                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
432                         }
433                         
434                         bool rightChimeric = false;
435                         bool leftChimeric = false;
436                         
437                         if (chimeraFlag == "yes") {     
438                                 //which peice is chimeric or are both
439                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
440                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
441                                 
442                                 if (rightChimeric || leftChimeric) {
443                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
444                                         outAcc << querySeq.getName() << endl;
445                                         
446                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
447                                         
448                                         if (trimChimera) {  
449                                                 string newAligned = trim.getAligned();
450                                                                                                 
451                                                 //right side is fine so keep that
452                                                 if ((leftChimeric) && (!rightChimeric)) {
453                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
454                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
455                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
456                                                 }else { //both sides are chimeric, keep longest piece
457                                                         
458                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
459                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
460                                                         
461                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
462                                                         int length = lengthLeftLeft;
463                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
464                                                         
465                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
466                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
467                                                         
468                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
469                                                         if (lengthRightRight > length) { longest = 4; }
470                                                         
471                                                         if (longest == 1) { //leftleft
472                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
473                                                         }else if (longest == 2) { //leftright
474                                                                 //get rid of leftleft
475                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
476                                                                 //get rid of right
477                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
478                                                         }else if (longest == 3) { //rightleft
479                                                                 //get rid of left
480                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
481                                                                 //get rid of rightright
482                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
483                                                         }else { //rightright
484                                                                 //get rid of left
485                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
486                                                                 //get rid of rightleft
487                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
488                                                         }
489                                                 }
490                                                         
491                                                 trim.setAligned(newAligned);
492                                         }
493                                         
494                                 }
495                         }
496                         
497                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
498                         out << endl;
499                 }else {  
500                         out << querySeq.getName() << "\tno" << endl;  
501                 }
502                 
503                 return trim;
504                 
505         }
506         catch(exception& e) {
507                 m->errorOut(e, "ChimeraSlayer", "print");
508                 exit(1);
509         }
510 }
511
512 #ifdef USE_MPI
513 //***************************************************************************************************************
514 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
515         try {
516                 MPI_Status status;
517                 bool results = false;
518                 string outAccString = "";
519                 string outputString = "";
520                 
521                 Sequence trim;
522                 
523                 if (trimChimera) { 
524                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
525                         trim.setName(leftPiece.trimQuery.getName());  trim.setAligned(aligned);
526                 }
527                 
528                 
529                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
530                         
531                         string chimeraFlag = "no";
532                         if (leftPiece.flag == "yes") {
533                                 
534                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
535                                    ||
536                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
537                         }
538                         
539                         if (rightPiece.flag == "yes") {
540                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
541                                         ||
542                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
543                         }
544                         
545                         bool rightChimeric = false;
546                         bool leftChimeric = false;
547
548                         cout << endl;
549                         
550                         if (chimeraFlag == "yes") {     
551                                 //which peice is chimeric or are both
552                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
553                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
554                                 
555                                 if (rightChimeric || leftChimeric) {
556                                         cout << querySeq.getName() <<  "\tyes" << endl;
557                                         outAccString += querySeq.getName() + "\n";
558                                         results = true;
559                                         
560                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
561                                         
562                                         //write to accnos file
563                                         int length = outAccString.length();
564                                         char* buf2 = new char[length];
565                                         memcpy(buf2, outAccString.c_str(), length);
566                                 
567                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
568                                         delete buf2;
569                                         
570                                         if (trimChimera) {  
571                                                 string newAligned = trim.getAligned();
572                                                 
573                                                 //right side is fine so keep that
574                                                 if ((leftChimeric) && (!rightChimeric)) {
575                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
576                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
577                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
578                                                 }else { //both sides are chimeric, keep longest piece
579                                                         
580                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
581                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
582                                                         
583                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
584                                                         int length = lengthLeftLeft;
585                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
586                                                         
587                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
588                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
589                                                         
590                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
591                                                         if (lengthRightRight > length) { longest = 4; }
592                                                         
593                                                         if (longest == 1) { //leftleft
594                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
595                                                         }else if (longest == 2) { //leftright
596                                                                 //get rid of leftleft
597                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
598                                                                 //get rid of right
599                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
600                                                         }else if (longest == 3) { //rightleft
601                                                                 //get rid of left
602                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
603                                                                 //get rid of rightright
604                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
605                                                         }else { //rightright
606                                                                 //get rid of left
607                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
608                                                                 //get rid of rightleft
609                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
610                                                         }
611                                                 }
612                                                 
613                                                 trim.setAligned(newAligned);
614                                         }
615                                         
616                                 }
617                         }
618                         
619                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
620                         outputString += "\n";
621                 
622                         //write to output file
623                         int length = outputString.length();
624                         char* buf = new char[length];
625                         memcpy(buf, outputString.c_str(), length);
626                                 
627                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
628                         delete buf;
629
630                 }else {  
631                         outputString += querySeq.getName() + "\tno\n";  
632         
633                         //write to output file
634                         int length = outputString.length();
635                         char* buf = new char[length];
636                         memcpy(buf, outputString.c_str(), length);
637                                 
638                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
639                         delete buf;
640                 }
641                 
642                 
643                 return trim;
644         }
645         catch(exception& e) {
646                 m->errorOut(e, "ChimeraSlayer", "print");
647                 exit(1);
648         }
649 }
650 //***************************************************************************************************************
651 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
652         try {
653                 MPI_Status status;
654                 bool results = false;
655                 string outAccString = "";
656                 string outputString = "";
657                 
658                 Sequence trim;
659                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
660                 
661                 if (chimeraFlags == "yes") {
662                         string chimeraFlag = "no";
663                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
664                            ||
665                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
666                         
667                         
668                         if (chimeraFlag == "yes") {     
669                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
670                                         cout << querySeq.getName() <<  "\tyes" << endl;
671                                         outAccString += querySeq.getName() + "\n";
672                                         results = true;
673                                         
674                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
675                                         
676                                         //write to accnos file
677                                         int length = outAccString.length();
678                                         char* buf2 = new char[length];
679                                         memcpy(buf2, outAccString.c_str(), length);
680                                         
681                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
682                                         delete buf2;
683                                         
684                                         if (trimChimera) {  
685                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
686                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
687                                                 
688                                                 string newAligned = trim.getAligned();
689                                                 if (lengthLeft > lengthRight) { //trim right
690                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
691                                                 }else { //trim left
692                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
693                                                 }
694                                                 trim.setAligned(newAligned);    
695                                         }
696                                 }
697                         }
698                         
699                         outputString = getBlock(chimeraResults[0], chimeraFlag);
700                         outputString += "\n";
701                         
702                         //write to output file
703                         int length = outputString.length();
704                         char* buf = new char[length];
705                         memcpy(buf, outputString.c_str(), length);
706                         
707                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
708                         delete buf;
709                         
710                 }else {  
711                         outputString += querySeq.getName() + "\tno\n";  
712                         
713                         //write to output file
714                         int length = outputString.length();
715                         char* buf = new char[length];
716                         memcpy(buf, outputString.c_str(), length);
717                         
718                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
719                         delete buf;
720                 }
721                 
722                 return trim;
723         }
724         catch(exception& e) {
725                 m->errorOut(e, "ChimeraSlayer", "print");
726                 exit(1);
727         }
728 }
729 #endif
730
731 //***************************************************************************************************************
732 int ChimeraSlayer::getChimeras(Sequence* query) {
733         try {
734                 
735                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
736                 printResults.trimQuery = trimQuery; 
737                 
738                 chimeraFlags = "no";
739                 printResults.flag = "no";
740                 
741                 querySeq = *query;
742                 
743                 //you must create a template
744                 vector<Sequence*> thisTemplate;
745                 vector<Sequence*> thisFilteredTemplate;
746                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
747                 else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
748                 
749                 if (m->control_pressed) {  return 0;  }
750                 
751                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
752                 
753                 //moved this out of maligner - 4/29/11
754                 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
755                 
756                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
757                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
758                 
759                 if (templateFileName == "self") {
760                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
761                         else if (searchMethod == "blast") {  delete databaseLeft; }
762                 }
763         
764                 if (m->control_pressed) {  return 0;  }
765
766                 string chimeraFlag = maligner.getResults(*query, decalc);
767
768                 if (m->control_pressed) {  return 0;  }
769                 
770                 vector<results> Results = maligner.getOutput();
771                 
772                 //for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];        }
773                 
774                 if (chimeraFlag == "yes") {
775                         
776                         if (realign) {
777                                 vector<string> parents;
778                                 for (int i = 0; i < Results.size(); i++) {
779                                         parents.push_back(Results[i].parentAligned);
780                                 }
781                                 
782                                 ChimeraReAligner realigner;             
783                                 realigner.reAlign(query, parents);
784
785                         }
786                         
787 //                      cout << query->getAligned() << endl;
788                         //get sequence that were given from maligner results
789                         vector<SeqCompare> seqs;
790                         map<string, float> removeDups;
791                         map<string, float>::iterator itDup;
792                         map<string, string> parentNameSeq;
793                         map<string, string>::iterator itSeq;
794                         for (int j = 0; j < Results.size(); j++) {
795
796                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
797                                 //only add if you are not a duplicate
798 //                              cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
799                                 
800                                 
801                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
802                                 
803                                         itDup = removeDups.find(Results[j].parent);
804                                         if (itDup == removeDups.end()) { //this is not duplicate
805                                                 removeDups[Results[j].parent] = dist;
806                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
807                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
808                                                 removeDups[Results[j].parent] = dist;
809                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
810                                         }
811                                 
812                                 }
813                                 
814                         }
815                         
816                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
817                                 itSeq = parentNameSeq.find(itDup->first);
818                                 Sequence seq(itDup->first, itSeq->second);
819                                 
820                                 SeqCompare member;
821                                 member.seq = seq;
822                                 member.dist = itDup->second;
823                                 seqs.push_back(member);
824                         }
825                         
826                         //limit number of parents to explore - default 3
827                         if (Results.size() > parents) {
828                                 //sort by distance
829                                 sort(seqs.begin(), seqs.end(), compareSeqCompare);
830                                 //prioritize larger more similiar sequence fragments
831                                 reverse(seqs.begin(), seqs.end());
832                                 
833                                 //for (int k = seqs.size()-1; k > (parents-1); k--)  {  
834                                 //      delete seqs[k].seq;
835                                         //seqs.pop_back();      
836                                 //}
837                         }
838                 
839                         //put seqs into vector to send to slayer
840                         
841 //                      cout << query->getAligned() << endl;
842                         vector<Sequence> seqsForSlayer;
843                         for (int k = 0; k < seqs.size(); k++) {  
844 //                              cout << seqs[k].seq->getAligned() << endl;
845                                 seqsForSlayer.push_back(seqs[k].seq);   
846 //                              cout << seqs[k].seq->getName() << endl;
847                         }
848                         
849                         if (m->control_pressed) {  return 0;  }
850
851                         //send to slayer
852                         chimeraFlags = slayer.getResults(*query, seqsForSlayer);
853                         if (m->control_pressed) {  return 0;  }
854                         chimeraResults = slayer.getOutput();
855                         
856                         printResults.flag = chimeraFlags;
857                         printResults.results = chimeraResults;
858                         
859                         //free memory
860                         //for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
861                 }
862                 //cout << endl << endl;
863                 return 0;
864         }
865         catch(exception& e) {
866                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
867                 exit(1);
868         }
869 }
870 //***************************************************************************************************************
871 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
872         try {
873                 out << querySeq.getName() << '\t';
874                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
875         
876                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
877                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
878                 
879                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
880                 
881         }
882         catch(exception& e) {
883                 m->errorOut(e, "ChimeraSlayer", "printBlock");
884                 exit(1);
885         }
886 }
887 //***************************************************************************************************************
888 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
889         try {
890                 
891                 if ((leftChimeric) && (!rightChimeric)) { //print left
892                         out << querySeq.getName() << '\t';
893                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
894                         
895                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
896                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
897                 
898                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
899                 
900                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
901                         out << querySeq.getName() << '\t';
902                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
903                         
904                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
905                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
906                         
907                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
908                         
909                 }else  { //print both results
910                         if (leftdata.flag == "yes") {
911                                 out << querySeq.getName() + "_LEFT" << '\t';
912                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
913                                 
914                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
915                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
916                                 
917                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
918                         }
919                         
920                         if (rightdata.flag == "yes") {
921                                 if (leftdata.flag == "yes") { out << endl; }
922                                 
923                                 out << querySeq.getName() + "_RIGHT"<< '\t';
924                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
925                                 
926                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
927                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
928                                 
929                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
930                 
931                         }
932                 }
933         }
934         catch(exception& e) {
935                 m->errorOut(e, "ChimeraSlayer", "printBlock");
936                 exit(1);
937         }
938 }
939 //***************************************************************************************************************
940 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
941         try {
942                 
943                 string out = "";
944                 
945                 if ((leftChimeric) && (!rightChimeric)) { //get left
946                         out += querySeq.getName() + "\t";
947                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
948                         
949                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
950                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
951                         
952                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
953                         
954                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
955                         out += querySeq.getName() + "\t";
956                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
957                         
958                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
959                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
960                         
961                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
962                         
963                 }else  { //print both results
964                         
965                         if (leftdata.flag == "yes") {
966                                 out += querySeq.getName() + "_LEFT\t";
967                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
968                                 
969                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
970                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
971                                 
972                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
973                         }
974                         
975                         if (rightdata.flag == "yes") {
976                                 if (leftdata.flag == "yes") { out += "\n"; }
977                                 out +=  querySeq.getName() + "_RIGHT\t";
978                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
979                                 
980                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
981                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
982                                 
983                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
984                         }
985                 }
986                 
987                 return out;
988                 
989         }
990         catch(exception& e) {
991                 m->errorOut(e, "ChimeraSlayer", "getBlock");
992                 exit(1);
993         }
994 }
995 //***************************************************************************************************************
996 string ChimeraSlayer::getBlock(data_struct data, string flag){
997         try {
998                 
999                 string outputString = "";
1000                 
1001                 outputString += querySeq.getName() + "\t";
1002                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1003                         
1004                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1005                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1006                 
1007                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1008                 
1009                 return outputString;
1010         }
1011         catch(exception& e) {
1012                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1013                 exit(1);
1014         }
1015 }
1016 //***************************************************************************************************************
1017 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1018         try {
1019                 
1020                 vector<Sequence> refSeqs;
1021                 
1022                 if (searchMethod == "distance") {
1023                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1024                         Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1025                         runFilter(newSeq);
1026                         refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1027                         delete newSeq;
1028                 }else if (searchMethod == "blast")  {
1029                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1030                 }else if (searchMethod == "kmer") {
1031                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1032                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1033                 
1034                 return refSeqs;
1035         }
1036         catch(exception& e) {
1037                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1038                 exit(1);
1039         }
1040 }
1041 //***************************************************************************************************************/
1042 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1043         try {   
1044                 
1045                 vector<Sequence> refResults;
1046                 
1047                 //get parts of query
1048                 string queryUnAligned = q.getUnaligned();
1049                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1050                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1051 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1052                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1053                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1054                 
1055                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1056                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1057                                 
1058                 
1059                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1060 //              vector<int> smaller;
1061 //              vector<int> larger;
1062 //              
1063 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1064 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1065                 
1066                 //merge results         
1067                 map<int, int> seen;
1068                 map<int, int>::iterator it;
1069                 vector<int> mergedResults;
1070                 
1071                 int index = 0;
1072 //              for (int i = 0; i < smaller.size(); i++) {
1073                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1074                         
1075                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1076         
1077                         //add left if you havent already
1078                         it = seen.find(tempIndexesLeft[index]);
1079                         if (it == seen.end()) {  
1080                                 mergedResults.push_back(tempIndexesLeft[index]);
1081                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1082                         }
1083                         
1084                         //add right if you havent already
1085                         it = seen.find(tempIndexesRight[index]);
1086                         if (it == seen.end()) {  
1087                                 mergedResults.push_back(tempIndexesRight[index]);
1088                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1089                         }
1090                         index++;
1091                 }
1092
1093                 
1094                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1095                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1096                         
1097                         //add right if you havent already
1098                         it = seen.find(tempIndexesLeft[i]);
1099                         if (it == seen.end()) {  
1100                                 mergedResults.push_back(tempIndexesLeft[i]);
1101                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1102                         }
1103                 }
1104
1105                 for (int i = index; i < tempIndexesRight.size(); i++) {
1106                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1107                         
1108                         //add right if you havent already
1109                         it = seen.find(tempIndexesRight[i]);
1110                         if (it == seen.end()) {  
1111                                 mergedResults.push_back(tempIndexesRight[i]);
1112                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1113                         }
1114                 }
1115                 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
1116                 //cout << qname << endl;        
1117                 
1118                 for (int i = 0; i < mergedResults.size(); i++) {
1119                         //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1120                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1121                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1122                                 refResults.push_back(temp);
1123                         }
1124                 }
1125                 //cout << endl << endl;
1126
1127                 delete queryRight;
1128                 delete queryLeft;
1129                 
1130                 if (refResults.size() == 0) { m->mothurOut("[WARNING]: megablast found 0 potential parents, so we are not able to check " + q.getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
1131                 
1132                 return refResults;
1133         }
1134         catch(exception& e) {
1135                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1136                 exit(1);
1137         }
1138 }
1139 //***************************************************************************************************************
1140 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1141         try {   
1142                 vector<Sequence> refResults;
1143                 
1144                 //get parts of query
1145                 string queryUnAligned = q.getUnaligned();
1146                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1147                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1148                 
1149                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1150                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1151                 
1152                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1153                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1154                 
1155                 //merge results         
1156                 map<int, int> seen;
1157                 map<int, int>::iterator it;
1158                 vector<int> mergedResults;
1159                 
1160                 int index = 0;
1161                 //              for (int i = 0; i < smaller.size(); i++) {
1162                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1163                         
1164                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1165                         
1166                         //add left if you havent already
1167                         it = seen.find(tempIndexesLeft[index]);
1168                         if (it == seen.end()) {  
1169                                 mergedResults.push_back(tempIndexesLeft[index]);
1170                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1171                         }
1172                         
1173                         //add right if you havent already
1174                         it = seen.find(tempIndexesRight[index]);
1175                         if (it == seen.end()) {  
1176                                 mergedResults.push_back(tempIndexesRight[index]);
1177                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1178                         }
1179                         index++;
1180                 }
1181                 
1182                 
1183                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1184                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1185                         
1186                         //add right if you havent already
1187                         it = seen.find(tempIndexesLeft[i]);
1188                         if (it == seen.end()) {  
1189                                 mergedResults.push_back(tempIndexesLeft[i]);
1190                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1191                         }
1192                 }
1193                 
1194                 for (int i = index; i < tempIndexesRight.size(); i++) {
1195                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1196                         
1197                         //add right if you havent already
1198                         it = seen.find(tempIndexesRight[i]);
1199                         if (it == seen.end()) {  
1200                                 mergedResults.push_back(tempIndexesRight[i]);
1201                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1202                         }
1203                 }
1204                 
1205                 for (int i = 0; i < mergedResults.size(); i++) {
1206                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1207                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1208                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1209                                 refResults.push_back(temp);
1210                                 
1211                         }
1212                 }
1213
1214                 //cout << endl;         
1215                 delete queryRight;
1216                 delete queryLeft;
1217                 
1218                 return refResults;
1219         }
1220         catch(exception& e) {
1221                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1222                 exit(1);
1223         }
1224 }
1225 //***************************************************************************************************************
1226
1227