]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
57528b4026e538f4b9ad85b706182e162bca2567
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37                 numNoParents = 0;
38         
39                 doPrep();
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
43                 exit(1);
44         }
45 }
46 //***************************************************************************************************************
47 //template=self
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 numNoParents = 0;
71                 
72                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
73                 
74                 if (searchMethod == "distance") { 
75                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
76                         
77                         //run filter on template copying templateSeqs into filteredTemplateSeqs
78                         for (int i = 0; i < templateSeqs.size(); i++) {  
79                                 if (m->control_pressed) {  break; }
80                                 
81                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
82                                 runFilter(newSeq);  
83                                 filteredTemplateSeqs.push_back(newSeq);
84                         }
85                 }
86         }
87         catch(exception& e) {
88                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
89                 exit(1);
90         }
91 }
92 //***************************************************************************************************************
93 int ChimeraSlayer::doPrep() {
94         try {
95                 if (searchMethod == "distance") { 
96                         //read in all query seqs
97                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
98                 
99                         vector<Sequence*> temp = templateSeqs;
100                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
101                 
102                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
103                 
104                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
105                 
106                         if (m->control_pressed) {  return 0; } 
107                 
108                         //run filter on template copying templateSeqs into filteredTemplateSeqs
109                         for (int i = 0; i < templateSeqs.size(); i++) {  
110                                 if (m->control_pressed) {  return 0; }
111                                 
112                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
113                                 runFilter(newSeq);  
114                                 filteredTemplateSeqs.push_back(newSeq);
115                         }
116                 }
117                 string  kmerDBNameLeft;
118                 string  kmerDBNameRight;
119         
120                 //generate the kmerdb to pass to maligner
121                 if (searchMethod == "kmer") { 
122                         string templatePath = m->hasPath(templateFileName);
123                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
124                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
125                                 
126                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
127                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
128                 #ifdef USE_MPI
129                         for (int i = 0; i < templateSeqs.size(); i++) {
130                                         
131                                 if (m->control_pressed) { return 0; } 
132                                         
133                                 string leftFrag = templateSeqs[i]->getUnaligned();
134                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
135                                         
136                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
137                                 databaseLeft->addSequence(leftTemp);    
138                         }
139                         databaseLeft->generateDB();
140                         databaseLeft->setNumSeqs(templateSeqs.size());
141                         
142                         for (int i = 0; i < templateSeqs.size(); i++) {
143                                 if (m->control_pressed) { return 0; } 
144                                         
145                                 string rightFrag = templateSeqs[i]->getUnaligned();
146                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
147                                         
148                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
149                                 databaseRight->addSequence(rightTemp);  
150                         }
151                         databaseRight->generateDB();
152                         databaseRight->setNumSeqs(templateSeqs.size());
153
154                 #else   
155                         //leftside
156                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
157                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
158                         bool needToGenerateLeft = true;
159                         
160                         if(kmerFileTestLeft){   
161                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
162                                 if (GoodFile) {  needToGenerateLeft = false;    }
163                         }
164                         
165                         if(needToGenerateLeft){ 
166                         
167                                 for (int i = 0; i < templateSeqs.size(); i++) {
168                                         
169                                         if (m->control_pressed) { return 0; } 
170                                         
171                                         string leftFrag = templateSeqs[i]->getUnaligned();
172                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
173                                         
174                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
175                                         databaseLeft->addSequence(leftTemp);    
176                                 }
177                                 databaseLeft->generateDB();
178                                 
179                         }else { 
180                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
181                         }
182                         kmerFileTestLeft.close();
183                         
184                         databaseLeft->setNumSeqs(templateSeqs.size());
185                         
186                         //rightside
187                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
188                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
189                         bool needToGenerateRight = true;
190                         
191                         if(kmerFileTestRight){  
192                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
193                                 if (GoodFile) {  needToGenerateRight = false;   }
194                         }
195                         
196                         if(needToGenerateRight){        
197                         
198                                 for (int i = 0; i < templateSeqs.size(); i++) {
199                                         if (m->control_pressed) { return 0; } 
200                                         
201                                         string rightFrag = templateSeqs[i]->getUnaligned();
202                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
203                                         
204                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
205                                         databaseRight->addSequence(rightTemp);  
206                                 }
207                                 databaseRight->generateDB();
208                                 
209                         }else { 
210                                 databaseRight->readKmerDB(kmerFileTestRight);   
211                         }
212                         kmerFileTestRight.close();
213                         
214                         databaseRight->setNumSeqs(templateSeqs.size());
215                 #endif  
216                 }else if (searchMethod == "blast") {
217                 
218                         //generate blastdb
219                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
220                         
221                         if (m->control_pressed) { return 0; }
222
223                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
224                         databaseLeft->generateDB();
225                         databaseLeft->setNumSeqs(templateSeqs.size());
226                 }
227                 
228                 return 0;
229
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "ChimeraSlayer", "doprep");
233                 exit(1);
234         }
235 }
236 //***************************************************************************************************************
237 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
238         try {
239                 
240                 //when template=self, the query file is sorted from most abundance to least abundant
241                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
242                 vector<Sequence*> userTemplate;
243                 
244                 int myAbund = priority[q.getName()];
245                 
246                 for (int i = 0; i < templateSeqs.size(); i++) {
247                         
248                         if (m->control_pressed) { return userTemplate; } 
249                         
250                         //have I reached a sequence with the same abundance as myself?
251                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
252                         
253                         //if its am not chimeric add it
254                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
255                                 userTemplate.push_back(templateSeqs[i]); 
256                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
257                         }
258                 }
259                 
260                 //avoids nuisance error from formatdb for making blank blast database
261                 if (userTemplate.size() == 0) {
262                         return userTemplate;
263                 }
264                 
265                 string  kmerDBNameLeft;
266                 string  kmerDBNameRight;
267                 
268                 //generate the kmerdb to pass to maligner
269                 if (searchMethod == "kmer") { 
270                         string templatePath = m->hasPath(templateFileName);
271                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
272                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
273                         
274                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
275                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
276 #ifdef USE_MPI
277                         for (int i = 0; i < userTemplate.size(); i++) {
278                                 
279                                 if (m->control_pressed) { return userTemplate; } 
280                                 
281                                 string leftFrag = userTemplate[i]->getUnaligned();
282                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
283                                 
284                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
285                                 databaseLeft->addSequence(leftTemp);    
286                         }
287                         databaseLeft->generateDB();
288                         databaseLeft->setNumSeqs(userTemplate.size());
289                         
290                         for (int i = 0; i < userTemplate.size(); i++) {
291                                 if (m->control_pressed) { return userTemplate; } 
292                                 
293                                 string rightFrag = userTemplate[i]->getUnaligned();
294                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
295                                 
296                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
297                                 databaseRight->addSequence(rightTemp);  
298                         }
299                         databaseRight->generateDB();
300                         databaseRight->setNumSeqs(userTemplate.size());
301                         
302 #else   
303                         
304                         
305                         for (int i = 0; i < userTemplate.size(); i++) {
306                                 
307                                 if (m->control_pressed) { return userTemplate; } 
308                                 
309                                 string leftFrag = userTemplate[i]->getUnaligned();
310                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
311                                 
312                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
313                                 databaseLeft->addSequence(leftTemp);    
314                         }
315                         databaseLeft->generateDB();
316                         databaseLeft->setNumSeqs(userTemplate.size());
317                                 
318                         for (int i = 0; i < userTemplate.size(); i++) {
319                                 if (m->control_pressed) { return userTemplate; }  
320                                         
321                                 string rightFrag = userTemplate[i]->getUnaligned();
322                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
323                                         
324                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
325                                 databaseRight->addSequence(rightTemp);  
326                         }
327                         databaseRight->generateDB();
328                         databaseRight->setNumSeqs(userTemplate.size());
329 #endif  
330                 }else if (searchMethod == "blast") {
331                         
332                         //generate blastdb
333                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
334                         
335                         if (m->control_pressed) { return userTemplate; }
336
337                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
338                         databaseLeft->generateDB();
339                         databaseLeft->setNumSeqs(userTemplate.size());
340                 }
341                 
342                 return userTemplate;
343                 
344         }
345         catch(exception& e) {
346                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
347                 exit(1);
348         }
349 }
350
351 //***************************************************************************************************************
352 ChimeraSlayer::~ChimeraSlayer() {       
353         if (templateFileName != "self") {
354                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
355                 else if (searchMethod == "blast") {  delete databaseLeft; }
356         }
357 }
358 //***************************************************************************************************************
359 void ChimeraSlayer::printHeader(ostream& out) {
360         m->mothurOutEndLine();
361         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
362         m->mothurOutEndLine();
363         
364         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
365 }
366 //***************************************************************************************************************
367 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
368         try {
369                 Sequence trim;
370                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
371                 
372                 if (chimeraFlags == "yes") {
373                         string chimeraFlag = "no";
374                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
375                            ||
376                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
377                         
378                         
379                         if (chimeraFlag == "yes") {     
380                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
381                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
382                                         outAcc << querySeq.getName() << endl;
383                                         
384                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
385                                         
386                                         if (trimChimera) {  
387                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
388                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
389                                                 
390                                                 string newAligned = trim.getAligned();
391
392                                                 if (lengthLeft > lengthRight) { //trim right
393                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
394                                                 }else { //trim left
395                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
396                                                 }
397                                                 trim.setAligned(newAligned);
398                                         }
399                                 }
400                         }
401                         
402                         printBlock(chimeraResults[0], chimeraFlag, out);
403                         out << endl;
404                 }else {  
405                         out << querySeq.getName() << "\tno" << endl; 
406                 }
407                 
408                 return trim;
409                 
410         }
411         catch(exception& e) {
412                 m->errorOut(e, "ChimeraSlayer", "print");
413                 exit(1);
414         }
415 }
416 //***************************************************************************************************************
417 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
418         try {
419                 Sequence trim;
420                                 
421                 if (trimChimera) { 
422                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
423                         trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); 
424                 }
425                 
426                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
427                         
428                         string chimeraFlag = "no";
429                         if (leftPiece.flag == "yes") {
430                                 
431                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
432                                         ||
433                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
434                         }
435                         
436                         if (rightPiece.flag == "yes") {
437                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
438                                  ||
439                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
440                         }
441                         
442                         bool rightChimeric = false;
443                         bool leftChimeric = false;
444                         
445                         if (chimeraFlag == "yes") {     
446                                 //which peice is chimeric or are both
447                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
448                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
449                                 
450                                 if (rightChimeric || leftChimeric) {
451                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
452                                         outAcc << querySeq.getName() << endl;
453                                         
454                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
455                                         
456                                         if (trimChimera) {  
457                                                 string newAligned = trim.getAligned();
458                                                                                                 
459                                                 //right side is fine so keep that
460                                                 if ((leftChimeric) && (!rightChimeric)) {
461                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
462                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
463                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
464                                                 }else { //both sides are chimeric, keep longest piece
465                                                         
466                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
467                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
468                                                         
469                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
470                                                         int length = lengthLeftLeft;
471                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
472                                                         
473                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
474                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
475                                                         
476                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
477                                                         if (lengthRightRight > length) { longest = 4; }
478                                                         
479                                                         if (longest == 1) { //leftleft
480                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
481                                                         }else if (longest == 2) { //leftright
482                                                                 //get rid of leftleft
483                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
484                                                                 //get rid of right
485                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
486                                                         }else if (longest == 3) { //rightleft
487                                                                 //get rid of left
488                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
489                                                                 //get rid of rightright
490                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
491                                                         }else { //rightright
492                                                                 //get rid of left
493                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
494                                                                 //get rid of rightleft
495                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
496                                                         }
497                                                 }
498                                                         
499                                                 trim.setAligned(newAligned);
500                                         }
501                                         
502                                 }
503                         }
504                         
505                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
506                         out << endl;
507                 }else {  
508                         out << querySeq.getName() << "\tno" << endl;  
509                 }
510                 
511                 return trim;
512                 
513         }
514         catch(exception& e) {
515                 m->errorOut(e, "ChimeraSlayer", "print");
516                 exit(1);
517         }
518 }
519
520 #ifdef USE_MPI
521 //***************************************************************************************************************
522 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
523         try {
524                 MPI_Status status;
525                 bool results = false;
526                 string outAccString = "";
527                 string outputString = "";
528                 
529                 Sequence trim;
530                 
531                 if (trimChimera) { 
532                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
533                         trim.setName(leftPiece.trimQuery.getName());  trim.setAligned(aligned);
534                 }
535                 
536                 
537                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
538                         
539                         string chimeraFlag = "no";
540                         if (leftPiece.flag == "yes") {
541                                 
542                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
543                                    ||
544                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
545                         }
546                         
547                         if (rightPiece.flag == "yes") {
548                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
549                                         ||
550                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
551                         }
552                         
553                         bool rightChimeric = false;
554                         bool leftChimeric = false;
555
556                         cout << endl;
557                         
558                         if (chimeraFlag == "yes") {     
559                                 //which peice is chimeric or are both
560                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
561                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
562                                 
563                                 if (rightChimeric || leftChimeric) {
564                                         cout << querySeq.getName() <<  "\tyes" << endl;
565                                         outAccString += querySeq.getName() + "\n";
566                                         results = true;
567                                         
568                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
569                                         
570                                         //write to accnos file
571                                         int length = outAccString.length();
572                                         char* buf2 = new char[length];
573                                         memcpy(buf2, outAccString.c_str(), length);
574                                 
575                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
576                                         delete buf2;
577                                         
578                                         if (trimChimera) {  
579                                                 string newAligned = trim.getAligned();
580                                                 
581                                                 //right side is fine so keep that
582                                                 if ((leftChimeric) && (!rightChimeric)) {
583                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
584                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
585                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586                                                 }else { //both sides are chimeric, keep longest piece
587                                                         
588                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
589                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
590                                                         
591                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
592                                                         int length = lengthLeftLeft;
593                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
594                                                         
595                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
596                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
597                                                         
598                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
599                                                         if (lengthRightRight > length) { longest = 4; }
600                                                         
601                                                         if (longest == 1) { //leftleft
602                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
603                                                         }else if (longest == 2) { //leftright
604                                                                 //get rid of leftleft
605                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
606                                                                 //get rid of right
607                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
608                                                         }else if (longest == 3) { //rightleft
609                                                                 //get rid of left
610                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
611                                                                 //get rid of rightright
612                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
613                                                         }else { //rightright
614                                                                 //get rid of left
615                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
616                                                                 //get rid of rightleft
617                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
618                                                         }
619                                                 }
620                                                 
621                                                 trim.setAligned(newAligned);
622                                         }
623                                         
624                                 }
625                         }
626                         
627                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
628                         outputString += "\n";
629                 
630                         //write to output file
631                         int length = outputString.length();
632                         char* buf = new char[length];
633                         memcpy(buf, outputString.c_str(), length);
634                                 
635                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
636                         delete buf;
637
638                 }else {  
639                         outputString += querySeq.getName() + "\tno\n";  
640         
641                         //write to output file
642                         int length = outputString.length();
643                         char* buf = new char[length];
644                         memcpy(buf, outputString.c_str(), length);
645                                 
646                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
647                         delete buf;
648                 }
649                 
650                 
651                 return trim;
652         }
653         catch(exception& e) {
654                 m->errorOut(e, "ChimeraSlayer", "print");
655                 exit(1);
656         }
657 }
658 //***************************************************************************************************************
659 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
660         try {
661                 MPI_Status status;
662                 bool results = false;
663                 string outAccString = "";
664                 string outputString = "";
665                 
666                 Sequence trim;
667                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
668                 
669                 if (chimeraFlags == "yes") {
670                         string chimeraFlag = "no";
671                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
672                            ||
673                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
674                         
675                         
676                         if (chimeraFlag == "yes") {     
677                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
678                                         cout << querySeq.getName() <<  "\tyes" << endl;
679                                         outAccString += querySeq.getName() + "\n";
680                                         results = true;
681                                         
682                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
683                                         
684                                         //write to accnos file
685                                         int length = outAccString.length();
686                                         char* buf2 = new char[length];
687                                         memcpy(buf2, outAccString.c_str(), length);
688                                         
689                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
690                                         delete buf2;
691                                         
692                                         if (trimChimera) {  
693                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
694                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
695                                                 
696                                                 string newAligned = trim.getAligned();
697                                                 if (lengthLeft > lengthRight) { //trim right
698                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
699                                                 }else { //trim left
700                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
701                                                 }
702                                                 trim.setAligned(newAligned);    
703                                         }
704                                 }
705                         }
706                         
707                         outputString = getBlock(chimeraResults[0], chimeraFlag);
708                         outputString += "\n";
709                         
710                         //write to output file
711                         int length = outputString.length();
712                         char* buf = new char[length];
713                         memcpy(buf, outputString.c_str(), length);
714                         
715                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
716                         delete buf;
717                         
718                 }else {  
719                         outputString += querySeq.getName() + "\tno\n";  
720                         
721                         //write to output file
722                         int length = outputString.length();
723                         char* buf = new char[length];
724                         memcpy(buf, outputString.c_str(), length);
725                         
726                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
727                         delete buf;
728                 }
729                 
730                 return trim;
731         }
732         catch(exception& e) {
733                 m->errorOut(e, "ChimeraSlayer", "print");
734                 exit(1);
735         }
736 }
737 #endif
738
739 //***************************************************************************************************************
740 int ChimeraSlayer::getChimeras(Sequence* query) {
741         try {
742                 
743                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
744                 printResults.trimQuery = trimQuery; 
745                 
746                 chimeraFlags = "no";
747                 printResults.flag = "no";
748                 
749                 querySeq = *query;
750                 
751                 //you must create a template
752                 vector<Sequence*> thisTemplate;
753                 vector<Sequence*> thisFilteredTemplate;
754                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
755                 else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
756                 
757                 if (m->control_pressed) {  return 0;  }
758                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
759                 
760                 //moved this out of maligner - 4/29/11
761                 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
762                 
763                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
764                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
765                 
766                 if (templateFileName == "self") {
767                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
768                         else if (searchMethod == "blast") {  delete databaseLeft; }
769                 }
770         
771                 if (m->control_pressed) {  return 0;  }
772
773                 string chimeraFlag = maligner.getResults(*query, decalc);
774
775                 if (m->control_pressed) {  return 0;  }
776                 
777                 vector<results> Results = maligner.getOutput();
778                 
779                 //for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];        }
780                 
781                 if (chimeraFlag == "yes") {
782                         
783                         if (realign) {
784                                 vector<string> parents;
785                                 for (int i = 0; i < Results.size(); i++) {
786                                         parents.push_back(Results[i].parentAligned);
787                                 }
788                                 
789                                 ChimeraReAligner realigner;             
790                                 realigner.reAlign(query, parents);
791
792                         }
793                         
794 //                      cout << query->getAligned() << endl;
795                         //get sequence that were given from maligner results
796                         vector<SeqCompare> seqs;
797                         map<string, float> removeDups;
798                         map<string, float>::iterator itDup;
799                         map<string, string> parentNameSeq;
800                         map<string, string>::iterator itSeq;
801                         for (int j = 0; j < Results.size(); j++) {
802
803                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
804                                 //only add if you are not a duplicate
805 //                              cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
806                                 
807                                 
808                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
809                                 
810                                         itDup = removeDups.find(Results[j].parent);
811                                         if (itDup == removeDups.end()) { //this is not duplicate
812                                                 removeDups[Results[j].parent] = dist;
813                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
814                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
815                                                 removeDups[Results[j].parent] = dist;
816                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
817                                         }
818                                 
819                                 }
820                                 
821                         }
822                         
823                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
824                                 itSeq = parentNameSeq.find(itDup->first);
825                                 Sequence seq(itDup->first, itSeq->second);
826                                 
827                                 SeqCompare member;
828                                 member.seq = seq;
829                                 member.dist = itDup->second;
830                                 seqs.push_back(member);
831                         }
832                         
833                         //limit number of parents to explore - default 3
834                         if (Results.size() > parents) {
835                                 //sort by distance
836                                 sort(seqs.begin(), seqs.end(), compareSeqCompare);
837                                 //prioritize larger more similiar sequence fragments
838                                 reverse(seqs.begin(), seqs.end());
839                                 
840                                 //for (int k = seqs.size()-1; k > (parents-1); k--)  {  
841                                 //      delete seqs[k].seq;
842                                         //seqs.pop_back();      
843                                 //}
844                         }
845                 
846                         //put seqs into vector to send to slayer
847                         
848 //                      cout << query->getAligned() << endl;
849                         vector<Sequence> seqsForSlayer;
850                         for (int k = 0; k < seqs.size(); k++) {  
851 //                              cout << seqs[k].seq->getAligned() << endl;
852                                 seqsForSlayer.push_back(seqs[k].seq);   
853 //                              cout << seqs[k].seq->getName() << endl;
854                         }
855                         
856                         if (m->control_pressed) {  return 0;  }
857
858                         //send to slayer
859                         chimeraFlags = slayer.getResults(*query, seqsForSlayer);
860                         if (m->control_pressed) {  return 0;  }
861                         chimeraResults = slayer.getOutput();
862                         
863                         printResults.flag = chimeraFlags;
864                         printResults.results = chimeraResults;
865                         
866                         //free memory
867                         //for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
868                 }
869                 //cout << endl << endl;
870                 return 0;
871         }
872         catch(exception& e) {
873                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
874                 exit(1);
875         }
876 }
877 //***************************************************************************************************************
878 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
879         try {
880                 out << querySeq.getName() << '\t';
881                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
882         
883                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
884                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
885                 
886                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
887                 
888         }
889         catch(exception& e) {
890                 m->errorOut(e, "ChimeraSlayer", "printBlock");
891                 exit(1);
892         }
893 }
894 //***************************************************************************************************************
895 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
896         try {
897                 
898                 if ((leftChimeric) && (!rightChimeric)) { //print left
899                         out << querySeq.getName() << '\t';
900                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
901                         
902                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
903                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
904                 
905                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
906                 
907                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
908                         out << querySeq.getName() << '\t';
909                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
910                         
911                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
912                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
913                         
914                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
915                         
916                 }else  { //print both results
917                         if (leftdata.flag == "yes") {
918                                 out << querySeq.getName() + "_LEFT" << '\t';
919                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
920                                 
921                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
922                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
923                                 
924                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
925                         }
926                         
927                         if (rightdata.flag == "yes") {
928                                 if (leftdata.flag == "yes") { out << endl; }
929                                 
930                                 out << querySeq.getName() + "_RIGHT"<< '\t';
931                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
932                                 
933                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
934                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
935                                 
936                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
937                 
938                         }
939                 }
940         }
941         catch(exception& e) {
942                 m->errorOut(e, "ChimeraSlayer", "printBlock");
943                 exit(1);
944         }
945 }
946 //***************************************************************************************************************
947 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
948         try {
949                 
950                 string out = "";
951                 
952                 if ((leftChimeric) && (!rightChimeric)) { //get left
953                         out += querySeq.getName() + "\t";
954                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
955                         
956                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
957                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
958                         
959                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
960                         
961                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
962                         out += querySeq.getName() + "\t";
963                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
964                         
965                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
966                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
967                         
968                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
969                         
970                 }else  { //print both results
971                         
972                         if (leftdata.flag == "yes") {
973                                 out += querySeq.getName() + "_LEFT\t";
974                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
975                                 
976                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
977                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
978                                 
979                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
980                         }
981                         
982                         if (rightdata.flag == "yes") {
983                                 if (leftdata.flag == "yes") { out += "\n"; }
984                                 out +=  querySeq.getName() + "_RIGHT\t";
985                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
986                                 
987                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
988                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
989                                 
990                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
991                         }
992                 }
993                 
994                 return out;
995                 
996         }
997         catch(exception& e) {
998                 m->errorOut(e, "ChimeraSlayer", "getBlock");
999                 exit(1);
1000         }
1001 }
1002 //***************************************************************************************************************
1003 string ChimeraSlayer::getBlock(data_struct data, string flag){
1004         try {
1005                 
1006                 string outputString = "";
1007                 
1008                 outputString += querySeq.getName() + "\t";
1009                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1010                         
1011                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1012                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1013                 
1014                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1015                 
1016                 return outputString;
1017         }
1018         catch(exception& e) {
1019                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1020                 exit(1);
1021         }
1022 }
1023 //***************************************************************************************************************
1024 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1025         try {
1026                 
1027                 vector<Sequence> refSeqs;
1028                 
1029                 if (searchMethod == "distance") {
1030                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1031                         Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1032                         runFilter(newSeq);
1033                         refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1034                         delete newSeq;
1035                 }else if (searchMethod == "blast")  {
1036                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1037                 }else if (searchMethod == "kmer") {
1038                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1039                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1040                 
1041                 return refSeqs;
1042         }
1043         catch(exception& e) {
1044                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1045                 exit(1);
1046         }
1047 }
1048 //***************************************************************************************************************/
1049 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1050         try {   
1051                 
1052                 vector<Sequence> refResults;
1053                 
1054                 //get parts of query
1055                 string queryUnAligned = q.getUnaligned();
1056                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1057                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1058 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1059                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1060                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1061                 
1062                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1063                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1064                                 
1065                 
1066                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1067 //              vector<int> smaller;
1068 //              vector<int> larger;
1069 //              
1070 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1071 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1072                 
1073                 //merge results         
1074                 map<int, int> seen;
1075                 map<int, int>::iterator it;
1076                 vector<int> mergedResults;
1077                 
1078                 int index = 0;
1079 //              for (int i = 0; i < smaller.size(); i++) {
1080                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1081                         
1082                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1083         
1084                         //add left if you havent already
1085                         it = seen.find(tempIndexesLeft[index]);
1086                         if (it == seen.end()) {  
1087                                 mergedResults.push_back(tempIndexesLeft[index]);
1088                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1089                         }
1090                         
1091                         //add right if you havent already
1092                         it = seen.find(tempIndexesRight[index]);
1093                         if (it == seen.end()) {  
1094                                 mergedResults.push_back(tempIndexesRight[index]);
1095                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1096                         }
1097                         index++;
1098                 }
1099
1100                 
1101                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1102                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1103                         
1104                         //add right if you havent already
1105                         it = seen.find(tempIndexesLeft[i]);
1106                         if (it == seen.end()) {  
1107                                 mergedResults.push_back(tempIndexesLeft[i]);
1108                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1109                         }
1110                 }
1111
1112                 for (int i = index; i < tempIndexesRight.size(); i++) {
1113                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1114                         
1115                         //add right if you havent already
1116                         it = seen.find(tempIndexesRight[i]);
1117                         if (it == seen.end()) {  
1118                                 mergedResults.push_back(tempIndexesRight[i]);
1119                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1120                         }
1121                 }
1122                 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
1123                 //cout << qname << endl;        
1124                 
1125                 if (mergedResults.size() == 0) { numNoParents++; }
1126                 
1127                 for (int i = 0; i < mergedResults.size(); i++) {
1128                         //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1129                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1130                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1131                                 refResults.push_back(temp);
1132                         }
1133                 }
1134                 //cout << endl << endl;
1135
1136                 delete queryRight;
1137                 delete queryLeft;
1138                 
1139                 return refResults;
1140         }
1141         catch(exception& e) {
1142                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1143                 exit(1);
1144         }
1145 }
1146 //***************************************************************************************************************
1147 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1148         try {   
1149                 vector<Sequence> refResults;
1150                 
1151                 //get parts of query
1152                 string queryUnAligned = q.getUnaligned();
1153                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1154                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1155                 
1156                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1157                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1158                 
1159                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1160                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1161                 
1162                 //merge results         
1163                 map<int, int> seen;
1164                 map<int, int>::iterator it;
1165                 vector<int> mergedResults;
1166                 
1167                 int index = 0;
1168                 //              for (int i = 0; i < smaller.size(); i++) {
1169                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1170                         
1171                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1172                         
1173                         //add left if you havent already
1174                         it = seen.find(tempIndexesLeft[index]);
1175                         if (it == seen.end()) {  
1176                                 mergedResults.push_back(tempIndexesLeft[index]);
1177                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1178                         }
1179                         
1180                         //add right if you havent already
1181                         it = seen.find(tempIndexesRight[index]);
1182                         if (it == seen.end()) {  
1183                                 mergedResults.push_back(tempIndexesRight[index]);
1184                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1185                         }
1186                         index++;
1187                 }
1188                 
1189                 
1190                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1191                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1192                         
1193                         //add right if you havent already
1194                         it = seen.find(tempIndexesLeft[i]);
1195                         if (it == seen.end()) {  
1196                                 mergedResults.push_back(tempIndexesLeft[i]);
1197                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1198                         }
1199                 }
1200                 
1201                 for (int i = index; i < tempIndexesRight.size(); i++) {
1202                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1203                         
1204                         //add right if you havent already
1205                         it = seen.find(tempIndexesRight[i]);
1206                         if (it == seen.end()) {  
1207                                 mergedResults.push_back(tempIndexesRight[i]);
1208                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1209                         }
1210                 }
1211                 
1212                 for (int i = 0; i < mergedResults.size(); i++) {
1213                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1214                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1215                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1216                                 refResults.push_back(temp);
1217                                 
1218                         }
1219                 }
1220
1221                 //cout << endl;         
1222                 delete queryRight;
1223                 delete queryLeft;
1224                 
1225                 return refResults;
1226         }
1227         catch(exception& e) {
1228                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1229                 exit(1);
1230         }
1231 }
1232 //***************************************************************************************************************
1233
1234