]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
b637000fe688920ac89097358cc50e9ac5ca2fc8
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera()  {         
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37                 numNoParents = 0;
38                 blastlocation = blas;
39                 threadID = tid;
40         
41                 doPrep();
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45                 exit(1);
46         }
47 }
48 //***************************************************************************************************************
49 //template=self
50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
51                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera()  {        
52         try {
53                 fastafile = file; templateSeqs = readSeqs(fastafile);
54                 templateFileName = temp; 
55                 searchMethod = mode;
56                 kmerSize = k;
57                 match = ms;
58                 misMatch = mms;
59                 window = win;
60                 divR = div;
61                 minSim = minsim;
62                 minCov = mincov;
63                 minBS = minbs;
64                 minSNP = minsnp;
65                 parents = par;
66                 iters = it;
67                 increment = inc;
68                 numWanted = numw;
69                 realign = r; 
70                 trimChimera = trim;
71                 priority = prior;
72                 numNoParents = 0;
73                 blastlocation = blas;
74                 threadID = tid;
75                 
76                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
77                 
78                 if (searchMethod == "distance") { 
79                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
80                         
81                         //run filter on template copying templateSeqs into filteredTemplateSeqs
82                         for (int i = 0; i < templateSeqs.size(); i++) {  
83                                 if (m->control_pressed) {  break; }
84                                 
85                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
86                                 runFilter(newSeq);  
87                                 filteredTemplateSeqs.push_back(newSeq);
88                         }
89                 }
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
93                 exit(1);
94         }
95 }
96 //***************************************************************************************************************
97 int ChimeraSlayer::doPrep() {
98         try {
99                 if (searchMethod == "distance") { 
100                         //read in all query seqs
101                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
102                 
103                         vector<Sequence*> temp = templateSeqs;
104                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
105                 
106                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
107                 
108                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
109                 
110                         if (m->control_pressed) {  return 0; } 
111                 
112                         //run filter on template copying templateSeqs into filteredTemplateSeqs
113                         for (int i = 0; i < templateSeqs.size(); i++) {  
114                                 if (m->control_pressed) {  return 0; }
115                                 
116                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
117                                 runFilter(newSeq);  
118                                 filteredTemplateSeqs.push_back(newSeq);
119                         }
120                 }
121                 string  kmerDBNameLeft;
122                 string  kmerDBNameRight;
123         
124                 //generate the kmerdb to pass to maligner
125                 if (searchMethod == "kmer") { 
126                         string templatePath = m->hasPath(templateFileName);
127                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
128                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
129                                 
130                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
131                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
132                 #ifdef USE_MPI
133                         for (int i = 0; i < templateSeqs.size(); i++) {
134                                         
135                                 if (m->control_pressed) { return 0; } 
136                                         
137                                 string leftFrag = templateSeqs[i]->getUnaligned();
138                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
139                                         
140                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
141                                 databaseLeft->addSequence(leftTemp);    
142                         }
143                         databaseLeft->generateDB();
144                         databaseLeft->setNumSeqs(templateSeqs.size());
145                         
146                         for (int i = 0; i < templateSeqs.size(); i++) {
147                                 if (m->control_pressed) { return 0; } 
148                                         
149                                 string rightFrag = templateSeqs[i]->getUnaligned();
150                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
151                                         
152                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
153                                 databaseRight->addSequence(rightTemp);  
154                         }
155                         databaseRight->generateDB();
156                         databaseRight->setNumSeqs(templateSeqs.size());
157
158                 #else   
159                         //leftside
160                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
161                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
162                         bool needToGenerateLeft = true;
163                         
164                         if(kmerFileTestLeft){   
165                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
166                                 if (GoodFile) {  needToGenerateLeft = false;    }
167                         }
168                         
169                         if(needToGenerateLeft){ 
170                         
171                                 for (int i = 0; i < templateSeqs.size(); i++) {
172                                         
173                                         if (m->control_pressed) { return 0; } 
174                                         
175                                         string leftFrag = templateSeqs[i]->getUnaligned();
176                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
177                                         
178                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
179                                         databaseLeft->addSequence(leftTemp);    
180                                 }
181                                 databaseLeft->generateDB();
182                                 
183                         }else { 
184                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
185                         }
186                         kmerFileTestLeft.close();
187                         
188                         databaseLeft->setNumSeqs(templateSeqs.size());
189                         
190                         //rightside
191                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
192                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
193                         bool needToGenerateRight = true;
194                         
195                         if(kmerFileTestRight){  
196                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
197                                 if (GoodFile) {  needToGenerateRight = false;   }
198                         }
199                         
200                         if(needToGenerateRight){        
201                         
202                                 for (int i = 0; i < templateSeqs.size(); i++) {
203                                         if (m->control_pressed) { return 0; } 
204                                         
205                                         string rightFrag = templateSeqs[i]->getUnaligned();
206                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
207                                         
208                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
209                                         databaseRight->addSequence(rightTemp);  
210                                 }
211                                 databaseRight->generateDB();
212                                 
213                         }else { 
214                                 databaseRight->readKmerDB(kmerFileTestRight);   
215                         }
216                         kmerFileTestRight.close();
217                         
218                         databaseRight->setNumSeqs(templateSeqs.size());
219                 #endif  
220                 }else if (searchMethod == "blast") {
221                 
222                         //generate blastdb
223                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
224                         
225                         if (m->control_pressed) { return 0; }
226
227                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
228                         databaseLeft->generateDB();
229                         databaseLeft->setNumSeqs(templateSeqs.size());
230                 }
231                 
232                 return 0;
233
234         }
235         catch(exception& e) {
236                 m->errorOut(e, "ChimeraSlayer", "doprep");
237                 exit(1);
238         }
239 }
240 //***************************************************************************************************************
241 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
242         try {
243                 
244                 //when template=self, the query file is sorted from most abundance to least abundant
245                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
246                 vector<Sequence*> userTemplate;
247                 
248                 int myAbund = priority[q.getName()];
249                 
250                 for (int i = 0; i < templateSeqs.size(); i++) {
251                         
252                         if (m->control_pressed) { return userTemplate; } 
253                         
254                         //have I reached a sequence with the same abundance as myself?
255                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
256                         
257                         //if its am not chimeric add it
258                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
259                                 userTemplate.push_back(templateSeqs[i]); 
260                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
261                         }
262                 }
263                 
264                 //avoids nuisance error from formatdb for making blank blast database
265                 if (userTemplate.size() == 0) {
266                         return userTemplate;
267                 }
268                 
269                 string  kmerDBNameLeft;
270                 string  kmerDBNameRight;
271                 
272                 //generate the kmerdb to pass to maligner
273                 if (searchMethod == "kmer") { 
274                         string templatePath = m->hasPath(templateFileName);
275                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
276                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
277                         
278                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
279                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
280 #ifdef USE_MPI
281                         for (int i = 0; i < userTemplate.size(); i++) {
282                                 
283                                 if (m->control_pressed) { return userTemplate; } 
284                                 
285                                 string leftFrag = userTemplate[i]->getUnaligned();
286                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
287                                 
288                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
289                                 databaseLeft->addSequence(leftTemp);    
290                         }
291                         databaseLeft->generateDB();
292                         databaseLeft->setNumSeqs(userTemplate.size());
293                         
294                         for (int i = 0; i < userTemplate.size(); i++) {
295                                 if (m->control_pressed) { return userTemplate; } 
296                                 
297                                 string rightFrag = userTemplate[i]->getUnaligned();
298                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
299                                 
300                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
301                                 databaseRight->addSequence(rightTemp);  
302                         }
303                         databaseRight->generateDB();
304                         databaseRight->setNumSeqs(userTemplate.size());
305                         
306 #else   
307                         
308                         
309                         for (int i = 0; i < userTemplate.size(); i++) {
310                                 
311                                 if (m->control_pressed) { return userTemplate; } 
312                                 
313                                 string leftFrag = userTemplate[i]->getUnaligned();
314                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
315                                 
316                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
317                                 databaseLeft->addSequence(leftTemp);    
318                         }
319                         databaseLeft->generateDB();
320                         databaseLeft->setNumSeqs(userTemplate.size());
321                                 
322                         for (int i = 0; i < userTemplate.size(); i++) {
323                                 if (m->control_pressed) { return userTemplate; }  
324                                         
325                                 string rightFrag = userTemplate[i]->getUnaligned();
326                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
327                                         
328                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
329                                 databaseRight->addSequence(rightTemp);  
330                         }
331                         databaseRight->generateDB();
332                         databaseRight->setNumSeqs(userTemplate.size());
333 #endif  
334                 }else if (searchMethod == "blast") {
335                         
336                         //generate blastdb
337                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
338                         
339                         if (m->control_pressed) { return userTemplate; }
340
341                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
342                         databaseLeft->generateDB();
343                         databaseLeft->setNumSeqs(userTemplate.size());
344                 }
345                 
346                 return userTemplate;
347                 
348         }
349         catch(exception& e) {
350                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
351                 exit(1);
352         }
353 }
354
355 //***************************************************************************************************************
356 ChimeraSlayer::~ChimeraSlayer() {       
357         if (templateFileName != "self") {
358                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
359                 else if (searchMethod == "blast") {  delete databaseLeft; }
360         }
361 }
362 //***************************************************************************************************************
363 void ChimeraSlayer::printHeader(ostream& out) {
364         m->mothurOutEndLine();
365         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
366         m->mothurOutEndLine();
367         
368         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
369 }
370 //***************************************************************************************************************
371 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
372         try {
373                 Sequence trim;
374                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
375                 
376                 if (chimeraFlags == "yes") {
377                         string chimeraFlag = "no";
378                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
379                            ||
380                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
381                         
382                         
383                         if (chimeraFlag == "yes") {     
384                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
385                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
386                                         outAcc << querySeq.getName() << endl;
387                                         
388                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
389                                         
390                                         if (trimChimera) {  
391                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
392                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
393                                                 
394                                                 string newAligned = trim.getAligned();
395
396                                                 if (lengthLeft > lengthRight) { //trim right
397                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
398                                                 }else { //trim left
399                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
400                                                 }
401                                                 trim.setAligned(newAligned);
402                                         }
403                                 }
404                         }
405                         
406                         printBlock(chimeraResults[0], chimeraFlag, out);
407                         out << endl;
408                 }else {  
409                         out << querySeq.getName() << "\tno" << endl; 
410                 }
411                 
412                 return trim;
413                 
414         }
415         catch(exception& e) {
416                 m->errorOut(e, "ChimeraSlayer", "print");
417                 exit(1);
418         }
419 }
420 //***************************************************************************************************************
421 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
422         try {
423                 Sequence trim;
424                                 
425                 if (trimChimera) { 
426                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
427                         trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); 
428                 }
429                 
430                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
431                         
432                         string chimeraFlag = "no";
433                         if (leftPiece.flag == "yes") {
434                                 
435                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
436                                         ||
437                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
438                         }
439                         
440                         if (rightPiece.flag == "yes") {
441                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
442                                  ||
443                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
444                         }
445                         
446                         bool rightChimeric = false;
447                         bool leftChimeric = false;
448                         
449                         if (chimeraFlag == "yes") {     
450                                 //which peice is chimeric or are both
451                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
452                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
453                                 
454                                 if (rightChimeric || leftChimeric) {
455                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
456                                         outAcc << querySeq.getName() << endl;
457                                         
458                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
459                                         
460                                         if (trimChimera) {  
461                                                 string newAligned = trim.getAligned();
462                                                                                                 
463                                                 //right side is fine so keep that
464                                                 if ((leftChimeric) && (!rightChimeric)) {
465                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
466                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
467                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
468                                                 }else { //both sides are chimeric, keep longest piece
469                                                         
470                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
471                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
472                                                         
473                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
474                                                         int length = lengthLeftLeft;
475                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
476                                                         
477                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
478                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
479                                                         
480                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
481                                                         if (lengthRightRight > length) { longest = 4; }
482                                                         
483                                                         if (longest == 1) { //leftleft
484                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
485                                                         }else if (longest == 2) { //leftright
486                                                                 //get rid of leftleft
487                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
488                                                                 //get rid of right
489                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
490                                                         }else if (longest == 3) { //rightleft
491                                                                 //get rid of left
492                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
493                                                                 //get rid of rightright
494                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
495                                                         }else { //rightright
496                                                                 //get rid of left
497                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
498                                                                 //get rid of rightleft
499                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
500                                                         }
501                                                 }
502                                                         
503                                                 trim.setAligned(newAligned);
504                                         }
505                                         
506                                 }
507                         }
508                         
509                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
510                         out << endl;
511                 }else {  
512                         out << querySeq.getName() << "\tno" << endl;  
513                 }
514                 
515                 return trim;
516                 
517         }
518         catch(exception& e) {
519                 m->errorOut(e, "ChimeraSlayer", "print");
520                 exit(1);
521         }
522 }
523
524 #ifdef USE_MPI
525 //***************************************************************************************************************
526 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
527         try {
528                 MPI_Status status;
529                 bool results = false;
530                 string outAccString = "";
531                 string outputString = "";
532                 
533                 Sequence trim;
534                 
535                 if (trimChimera) { 
536                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
537                         trim.setName(leftPiece.trimQuery.getName());  trim.setAligned(aligned);
538                 }
539                 
540                 
541                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
542                         
543                         string chimeraFlag = "no";
544                         if (leftPiece.flag == "yes") {
545                                 
546                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
547                                    ||
548                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
549                         }
550                         
551                         if (rightPiece.flag == "yes") {
552                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
553                                         ||
554                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
555                         }
556                         
557                         bool rightChimeric = false;
558                         bool leftChimeric = false;
559
560                         cout << endl;
561                         
562                         if (chimeraFlag == "yes") {     
563                                 //which peice is chimeric or are both
564                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
565                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
566                                 
567                                 if (rightChimeric || leftChimeric) {
568                                         cout << querySeq.getName() <<  "\tyes" << endl;
569                                         outAccString += querySeq.getName() + "\n";
570                                         results = true;
571                                         
572                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
573                                         
574                                         //write to accnos file
575                                         int length = outAccString.length();
576                                         char* buf2 = new char[length];
577                                         memcpy(buf2, outAccString.c_str(), length);
578                                 
579                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
580                                         delete buf2;
581                                         
582                                         if (trimChimera) {  
583                                                 string newAligned = trim.getAligned();
584                                                 
585                                                 //right side is fine so keep that
586                                                 if ((leftChimeric) && (!rightChimeric)) {
587                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
588                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
589                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
590                                                 }else { //both sides are chimeric, keep longest piece
591                                                         
592                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
593                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
594                                                         
595                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
596                                                         int length = lengthLeftLeft;
597                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
598                                                         
599                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
600                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
601                                                         
602                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
603                                                         if (lengthRightRight > length) { longest = 4; }
604                                                         
605                                                         if (longest == 1) { //leftleft
606                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
607                                                         }else if (longest == 2) { //leftright
608                                                                 //get rid of leftleft
609                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
610                                                                 //get rid of right
611                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
612                                                         }else if (longest == 3) { //rightleft
613                                                                 //get rid of left
614                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
615                                                                 //get rid of rightright
616                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
617                                                         }else { //rightright
618                                                                 //get rid of left
619                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
620                                                                 //get rid of rightleft
621                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
622                                                         }
623                                                 }
624                                                 
625                                                 trim.setAligned(newAligned);
626                                         }
627                                         
628                                 }
629                         }
630                         
631                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
632                         outputString += "\n";
633                 
634                         //write to output file
635                         int length = outputString.length();
636                         char* buf = new char[length];
637                         memcpy(buf, outputString.c_str(), length);
638                                 
639                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
640                         delete buf;
641
642                 }else {  
643                         outputString += querySeq.getName() + "\tno\n";  
644         
645                         //write to output file
646                         int length = outputString.length();
647                         char* buf = new char[length];
648                         memcpy(buf, outputString.c_str(), length);
649                                 
650                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
651                         delete buf;
652                 }
653                 
654                 
655                 return trim;
656         }
657         catch(exception& e) {
658                 m->errorOut(e, "ChimeraSlayer", "print");
659                 exit(1);
660         }
661 }
662 //***************************************************************************************************************
663 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
664         try {
665                 MPI_Status status;
666                 bool results = false;
667                 string outAccString = "";
668                 string outputString = "";
669                 
670                 Sequence trim;
671                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
672                 
673                 if (chimeraFlags == "yes") {
674                         string chimeraFlag = "no";
675                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
676                            ||
677                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
678                         
679                         
680                         if (chimeraFlag == "yes") {     
681                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
682                                         cout << querySeq.getName() <<  "\tyes" << endl;
683                                         outAccString += querySeq.getName() + "\n";
684                                         results = true;
685                                         
686                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
687                                         
688                                         //write to accnos file
689                                         int length = outAccString.length();
690                                         char* buf2 = new char[length];
691                                         memcpy(buf2, outAccString.c_str(), length);
692                                         
693                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
694                                         delete buf2;
695                                         
696                                         if (trimChimera) {  
697                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
698                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
699                                                 
700                                                 string newAligned = trim.getAligned();
701                                                 if (lengthLeft > lengthRight) { //trim right
702                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
703                                                 }else { //trim left
704                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
705                                                 }
706                                                 trim.setAligned(newAligned);    
707                                         }
708                                 }
709                         }
710                         
711                         outputString = getBlock(chimeraResults[0], chimeraFlag);
712                         outputString += "\n";
713                         
714                         //write to output file
715                         int length = outputString.length();
716                         char* buf = new char[length];
717                         memcpy(buf, outputString.c_str(), length);
718                         
719                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
720                         delete buf;
721                         
722                 }else {  
723                         outputString += querySeq.getName() + "\tno\n";  
724                         
725                         //write to output file
726                         int length = outputString.length();
727                         char* buf = new char[length];
728                         memcpy(buf, outputString.c_str(), length);
729                         
730                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
731                         delete buf;
732                 }
733                 
734                 return trim;
735         }
736         catch(exception& e) {
737                 m->errorOut(e, "ChimeraSlayer", "print");
738                 exit(1);
739         }
740 }
741 #endif
742
743 //***************************************************************************************************************
744 int ChimeraSlayer::getChimeras(Sequence* query) {
745         try {
746                 
747                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
748                 printResults.trimQuery = trimQuery; 
749                 
750                 chimeraFlags = "no";
751                 printResults.flag = "no";
752                 
753                 querySeq = *query;
754                 
755                 //you must create a template
756                 vector<Sequence*> thisTemplate;
757                 vector<Sequence*> thisFilteredTemplate;
758                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
759                 else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
760                 
761                 if (m->control_pressed) {  return 0;  }
762                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
763                 
764                 //moved this out of maligner - 4/29/11
765                 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
766                 
767                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
768                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
769                 
770                 if (templateFileName == "self") {
771                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
772                         else if (searchMethod == "blast") {  delete databaseLeft; }
773                 }
774         
775                 if (m->control_pressed) {  return 0;  }
776
777                 string chimeraFlag = maligner.getResults(*query, decalc);
778
779                 if (m->control_pressed) {  return 0;  }
780                 
781                 vector<results> Results = maligner.getOutput();
782                 
783                 //for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];        }
784                 
785                 if (chimeraFlag == "yes") {
786                         
787                         if (realign) {
788                                 vector<string> parents;
789                                 for (int i = 0; i < Results.size(); i++) {
790                                         parents.push_back(Results[i].parentAligned);
791                                 }
792                                 
793                                 ChimeraReAligner realigner;             
794                                 realigner.reAlign(query, parents);
795
796                         }
797                         
798 //                      cout << query->getAligned() << endl;
799                         //get sequence that were given from maligner results
800                         vector<SeqCompare> seqs;
801                         map<string, float> removeDups;
802                         map<string, float>::iterator itDup;
803                         map<string, string> parentNameSeq;
804                         map<string, string>::iterator itSeq;
805                         for (int j = 0; j < Results.size(); j++) {
806
807                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
808                                 //only add if you are not a duplicate
809 //                              cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
810                                 
811                                 
812                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
813                                 
814                                         itDup = removeDups.find(Results[j].parent);
815                                         if (itDup == removeDups.end()) { //this is not duplicate
816                                                 removeDups[Results[j].parent] = dist;
817                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
818                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
819                                                 removeDups[Results[j].parent] = dist;
820                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
821                                         }
822                                 
823                                 }
824                                 
825                         }
826                         
827                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
828                                 itSeq = parentNameSeq.find(itDup->first);
829                                 Sequence seq(itDup->first, itSeq->second);
830                                 
831                                 SeqCompare member;
832                                 member.seq = seq;
833                                 member.dist = itDup->second;
834                                 seqs.push_back(member);
835                         }
836                         
837                         //limit number of parents to explore - default 3
838                         if (Results.size() > parents) {
839                                 //sort by distance
840                                 sort(seqs.begin(), seqs.end(), compareSeqCompare);
841                                 //prioritize larger more similiar sequence fragments
842                                 reverse(seqs.begin(), seqs.end());
843                                 
844                                 //for (int k = seqs.size()-1; k > (parents-1); k--)  {  
845                                 //      delete seqs[k].seq;
846                                         //seqs.pop_back();      
847                                 //}
848                         }
849                 
850                         //put seqs into vector to send to slayer
851                         
852 //                      cout << query->getAligned() << endl;
853                         vector<Sequence> seqsForSlayer;
854                         for (int k = 0; k < seqs.size(); k++) {  
855 //                              cout << seqs[k].seq->getAligned() << endl;
856                                 seqsForSlayer.push_back(seqs[k].seq);   
857 //                              cout << seqs[k].seq->getName() << endl;
858                         }
859                         
860                         if (m->control_pressed) {  return 0;  }
861
862                         //send to slayer
863                         chimeraFlags = slayer.getResults(*query, seqsForSlayer);
864                         if (m->control_pressed) {  return 0;  }
865                         chimeraResults = slayer.getOutput();
866                         
867                         printResults.flag = chimeraFlags;
868                         printResults.results = chimeraResults;
869                         
870                         //free memory
871                         //for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
872                 }
873                 //cout << endl << endl;
874                 return 0;
875         }
876         catch(exception& e) {
877                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
878                 exit(1);
879         }
880 }
881 //***************************************************************************************************************
882 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
883         try {
884                 out << querySeq.getName() << '\t';
885                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
886         
887                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
888                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
889                 
890                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
891                 
892         }
893         catch(exception& e) {
894                 m->errorOut(e, "ChimeraSlayer", "printBlock");
895                 exit(1);
896         }
897 }
898 //***************************************************************************************************************
899 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
900         try {
901                 
902                 if ((leftChimeric) && (!rightChimeric)) { //print left
903                         out << querySeq.getName() << '\t';
904                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
905                         
906                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
907                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
908                 
909                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
910                 
911                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
912                         out << querySeq.getName() << '\t';
913                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
914                         
915                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
916                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
917                         
918                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
919                         
920                 }else  { //print both results
921                         if (leftdata.flag == "yes") {
922                                 out << querySeq.getName() + "_LEFT" << '\t';
923                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
924                                 
925                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
926                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
927                                 
928                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
929                         }
930                         
931                         if (rightdata.flag == "yes") {
932                                 if (leftdata.flag == "yes") { out << endl; }
933                                 
934                                 out << querySeq.getName() + "_RIGHT"<< '\t';
935                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
936                                 
937                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
938                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
939                                 
940                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
941                 
942                         }
943                 }
944         }
945         catch(exception& e) {
946                 m->errorOut(e, "ChimeraSlayer", "printBlock");
947                 exit(1);
948         }
949 }
950 //***************************************************************************************************************
951 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
952         try {
953                 
954                 string out = "";
955                 
956                 if ((leftChimeric) && (!rightChimeric)) { //get left
957                         out += querySeq.getName() + "\t";
958                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
959                         
960                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
961                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
962                         
963                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
964                         
965                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
966                         out += querySeq.getName() + "\t";
967                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
968                         
969                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
970                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
971                         
972                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
973                         
974                 }else  { //print both results
975                         
976                         if (leftdata.flag == "yes") {
977                                 out += querySeq.getName() + "_LEFT\t";
978                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
979                                 
980                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
981                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
982                                 
983                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
984                         }
985                         
986                         if (rightdata.flag == "yes") {
987                                 if (leftdata.flag == "yes") { out += "\n"; }
988                                 out +=  querySeq.getName() + "_RIGHT\t";
989                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
990                                 
991                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
992                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
993                                 
994                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
995                         }
996                 }
997                 
998                 return out;
999                 
1000         }
1001         catch(exception& e) {
1002                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1003                 exit(1);
1004         }
1005 }
1006 //***************************************************************************************************************
1007 string ChimeraSlayer::getBlock(data_struct data, string flag){
1008         try {
1009                 
1010                 string outputString = "";
1011                 
1012                 outputString += querySeq.getName() + "\t";
1013                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1014                         
1015                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1016                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1017                 
1018                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1019                 
1020                 return outputString;
1021         }
1022         catch(exception& e) {
1023                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1024                 exit(1);
1025         }
1026 }
1027 //***************************************************************************************************************
1028 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1029         try {
1030                 
1031                 vector<Sequence> refSeqs;
1032                 
1033                 if (searchMethod == "distance") {
1034                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1035                         Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1036                         runFilter(newSeq);
1037                         refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1038                         delete newSeq;
1039                 }else if (searchMethod == "blast")  {
1040                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1041                 }else if (searchMethod == "kmer") {
1042                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1043                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1044                 
1045                 return refSeqs;
1046         }
1047         catch(exception& e) {
1048                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1049                 exit(1);
1050         }
1051 }
1052 //***************************************************************************************************************/
1053 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1054         try {   
1055                 
1056                 vector<Sequence> refResults;
1057                 
1058                 //get parts of query
1059                 string queryUnAligned = q.getUnaligned();
1060                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1061                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1062 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1063                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1064                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1065                 
1066                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1067                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1068                                 
1069                 
1070                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1071 //              vector<int> smaller;
1072 //              vector<int> larger;
1073 //              
1074 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1075 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1076                 
1077                 //merge results         
1078                 map<int, int> seen;
1079                 map<int, int>::iterator it;
1080                 vector<int> mergedResults;
1081                 
1082                 int index = 0;
1083 //              for (int i = 0; i < smaller.size(); i++) {
1084                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1085                         
1086                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1087         
1088                         //add left if you havent already
1089                         it = seen.find(tempIndexesLeft[index]);
1090                         if (it == seen.end()) {  
1091                                 mergedResults.push_back(tempIndexesLeft[index]);
1092                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1093                         }
1094                         
1095                         //add right if you havent already
1096                         it = seen.find(tempIndexesRight[index]);
1097                         if (it == seen.end()) {  
1098                                 mergedResults.push_back(tempIndexesRight[index]);
1099                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1100                         }
1101                         index++;
1102                 }
1103
1104                 
1105                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1106                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1107                         
1108                         //add right if you havent already
1109                         it = seen.find(tempIndexesLeft[i]);
1110                         if (it == seen.end()) {  
1111                                 mergedResults.push_back(tempIndexesLeft[i]);
1112                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1113                         }
1114                 }
1115
1116                 for (int i = index; i < tempIndexesRight.size(); i++) {
1117                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1118                         
1119                         //add right if you havent already
1120                         it = seen.find(tempIndexesRight[i]);
1121                         if (it == seen.end()) {  
1122                                 mergedResults.push_back(tempIndexesRight[i]);
1123                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1124                         }
1125                 }
1126                 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
1127                 //cout << qname << endl;        
1128                 
1129                 if (mergedResults.size() == 0) { numNoParents++; }
1130                 
1131                 for (int i = 0; i < mergedResults.size(); i++) {
1132                         //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1133                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1134                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1135                                 refResults.push_back(temp);
1136                         }
1137                 }
1138                 //cout << endl << endl;
1139
1140                 delete queryRight;
1141                 delete queryLeft;
1142                 
1143                 return refResults;
1144         }
1145         catch(exception& e) {
1146                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1147                 exit(1);
1148         }
1149 }
1150 //***************************************************************************************************************
1151 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1152         try {   
1153                 vector<Sequence> refResults;
1154                 
1155                 //get parts of query
1156                 string queryUnAligned = q.getUnaligned();
1157                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1158                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1159                 
1160                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1161                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1162                 
1163                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1164                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1165                 
1166                 //merge results         
1167                 map<int, int> seen;
1168                 map<int, int>::iterator it;
1169                 vector<int> mergedResults;
1170                 
1171                 int index = 0;
1172                 //              for (int i = 0; i < smaller.size(); i++) {
1173                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1174                         
1175                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1176                         
1177                         //add left if you havent already
1178                         it = seen.find(tempIndexesLeft[index]);
1179                         if (it == seen.end()) {  
1180                                 mergedResults.push_back(tempIndexesLeft[index]);
1181                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1182                         }
1183                         
1184                         //add right if you havent already
1185                         it = seen.find(tempIndexesRight[index]);
1186                         if (it == seen.end()) {  
1187                                 mergedResults.push_back(tempIndexesRight[index]);
1188                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1189                         }
1190                         index++;
1191                 }
1192                 
1193                 
1194                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1195                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1196                         
1197                         //add right if you havent already
1198                         it = seen.find(tempIndexesLeft[i]);
1199                         if (it == seen.end()) {  
1200                                 mergedResults.push_back(tempIndexesLeft[i]);
1201                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1202                         }
1203                 }
1204                 
1205                 for (int i = index; i < tempIndexesRight.size(); i++) {
1206                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1207                         
1208                         //add right if you havent already
1209                         it = seen.find(tempIndexesRight[i]);
1210                         if (it == seen.end()) {  
1211                                 mergedResults.push_back(tempIndexesRight[i]);
1212                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1213                         }
1214                 }
1215                 
1216                 for (int i = 0; i < mergedResults.size(); i++) {
1217                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1218                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1219                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1220                                 refResults.push_back(temp);
1221                                 
1222                         }
1223                 }
1224
1225                 //cout << endl;         
1226                 delete queryRight;
1227                 delete queryLeft;
1228                 
1229                 return refResults;
1230         }
1231         catch(exception& e) {
1232                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1233                 exit(1);
1234         }
1235 }
1236 //***************************************************************************************************************
1237
1238