]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
1f7160aa0d91e8f049fbca4c8b5a85c0bed349d9
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 decalc = new DeCalculator();    
39                 
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 
71                 decalc = new DeCalculator();    
72                 
73                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74                 
75                 //run filter on template
76                 for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  break; }  runFilter(templateSeqs[i]);  }
77
78                 
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
82                 exit(1);
83         }
84 }
85 //***************************************************************************************************************
86 int ChimeraSlayer::doPrep() {
87         try {
88                 if (searchMethod == "distance") { 
89                         //read in all query seqs
90                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
91                 
92                         vector<Sequence*> temp = templateSeqs;
93                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
94                 
95                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
96                 
97                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
98                 
99                         if (m->control_pressed) {  return 0; } 
100                 
101                         //run filter on template copying templateSeqs into filteredTemplateSeqs
102                         for (int i = 0; i < templateSeqs.size(); i++) {  
103                                 if (m->control_pressed) {  return 0; }
104                                 
105                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
106                                 runFilter(newSeq);  
107                                 filteredTemplateSeqs.push_back(newSeq);
108                         }
109                 }
110                 string  kmerDBNameLeft;
111                 string  kmerDBNameRight;
112         
113                 //generate the kmerdb to pass to maligner
114                 if (searchMethod == "kmer") { 
115                         string templatePath = m->hasPath(templateFileName);
116                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
117                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
118                                 
119                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
120                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
121                 #ifdef USE_MPI
122                         for (int i = 0; i < templateSeqs.size(); i++) {
123                                         
124                                 if (m->control_pressed) { return 0; } 
125                                         
126                                 string leftFrag = templateSeqs[i]->getUnaligned();
127                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
128                                         
129                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
130                                 databaseLeft->addSequence(leftTemp);    
131                         }
132                         databaseLeft->generateDB();
133                         databaseLeft->setNumSeqs(templateSeqs.size());
134                         
135                         for (int i = 0; i < templateSeqs.size(); i++) {
136                                 if (m->control_pressed) { return 0; } 
137                                         
138                                 string rightFrag = templateSeqs[i]->getUnaligned();
139                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
140                                         
141                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
142                                 databaseRight->addSequence(rightTemp);  
143                         }
144                         databaseRight->generateDB();
145                         databaseRight->setNumSeqs(templateSeqs.size());
146
147                 #else   
148                         //leftside
149                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
150                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
151                         bool needToGenerateLeft = true;
152                         
153                         if(kmerFileTestLeft){   
154                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
155                                 if (GoodFile) {  needToGenerateLeft = false;    }
156                         }
157                         
158                         if(needToGenerateLeft){ 
159                         
160                                 for (int i = 0; i < templateSeqs.size(); i++) {
161                                         
162                                         if (m->control_pressed) { return 0; } 
163                                         
164                                         string leftFrag = templateSeqs[i]->getUnaligned();
165                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
166                                         
167                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
168                                         databaseLeft->addSequence(leftTemp);    
169                                 }
170                                 databaseLeft->generateDB();
171                                 
172                         }else { 
173                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
174                         }
175                         kmerFileTestLeft.close();
176                         
177                         databaseLeft->setNumSeqs(templateSeqs.size());
178                         
179                         //rightside
180                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
181                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
182                         bool needToGenerateRight = true;
183                         
184                         if(kmerFileTestRight){  
185                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
186                                 if (GoodFile) {  needToGenerateRight = false;   }
187                         }
188                         
189                         if(needToGenerateRight){        
190                         
191                                 for (int i = 0; i < templateSeqs.size(); i++) {
192                                         if (m->control_pressed) { return 0; } 
193                                         
194                                         string rightFrag = templateSeqs[i]->getUnaligned();
195                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
196                                         
197                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
198                                         databaseRight->addSequence(rightTemp);  
199                                 }
200                                 databaseRight->generateDB();
201                                 
202                         }else { 
203                                 databaseRight->readKmerDB(kmerFileTestRight);   
204                         }
205                         kmerFileTestRight.close();
206                         
207                         databaseRight->setNumSeqs(templateSeqs.size());
208                 #endif  
209                 }else if (searchMethod == "blast") {
210                 
211                         //generate blastdb
212                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
213
214                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
215                         databaseLeft->generateDB();
216                         databaseLeft->setNumSeqs(templateSeqs.size());
217                 }
218                 
219                 return 0;
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "ChimeraSlayer", "doprep");
224                 exit(1);
225         }
226 }
227 //***************************************************************************************************************
228 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
229         try {
230                 
231                 //when template=self, the query file is sorted from most abundance to least abundant
232                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
233                 vector<Sequence*> userTemplate;
234                 
235                 int myAbund = priority[q->getName()];
236                 
237                 for (int i = 0; i < templateSeqs.size(); i++) {
238                         
239                         if (m->control_pressed) { return userTemplate; } 
240                         
241                         //have I reached a sequence with the same abundance as myself?
242                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
243                         
244                         //if its am not chimeric add it
245                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
246                                 userTemplate.push_back(templateSeqs[i]); 
247                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
248                         }
249                 }
250                 
251                 string  kmerDBNameLeft;
252                 string  kmerDBNameRight;
253                 
254                 //generate the kmerdb to pass to maligner
255                 if (searchMethod == "kmer") { 
256                         string templatePath = m->hasPath(templateFileName);
257                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
258                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
259                         
260                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
261                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
262 #ifdef USE_MPI
263                         for (int i = 0; i < userTemplate.size(); i++) {
264                                 
265                                 if (m->control_pressed) { return userTemplate; } 
266                                 
267                                 string leftFrag = userTemplate[i]->getUnaligned();
268                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
269                                 
270                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
271                                 databaseLeft->addSequence(leftTemp);    
272                         }
273                         databaseLeft->generateDB();
274                         databaseLeft->setNumSeqs(userTemplate.size());
275                         
276                         for (int i = 0; i < userTemplate.size(); i++) {
277                                 if (m->control_pressed) { return userTemplate; } 
278                                 
279                                 string rightFrag = userTemplate[i]->getUnaligned();
280                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
281                                 
282                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
283                                 databaseRight->addSequence(rightTemp);  
284                         }
285                         databaseRight->generateDB();
286                         databaseRight->setNumSeqs(userTemplate.size());
287                         
288 #else   
289                         
290                         
291                         for (int i = 0; i < userTemplate.size(); i++) {
292                                 
293                                 if (m->control_pressed) { return userTemplate; } 
294                                 
295                                 string leftFrag = userTemplate[i]->getUnaligned();
296                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
297                                 
298                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
299                                 databaseLeft->addSequence(leftTemp);    
300                         }
301                         databaseLeft->generateDB();
302                         databaseLeft->setNumSeqs(userTemplate.size());
303                                 
304                         for (int i = 0; i < userTemplate.size(); i++) {
305                                 if (m->control_pressed) { return userTemplate; }  
306                                         
307                                 string rightFrag = userTemplate[i]->getUnaligned();
308                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
309                                         
310                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
311                                 databaseRight->addSequence(rightTemp);  
312                         }
313                         databaseRight->generateDB();
314                         databaseRight->setNumSeqs(userTemplate.size());
315 #endif  
316                 }else if (searchMethod == "blast") {
317                         
318                         //generate blastdb
319                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
320
321                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
322                         databaseLeft->generateDB();
323                         databaseLeft->setNumSeqs(userTemplate.size());
324                 }
325                 
326                 return userTemplate;
327                 
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
331                 exit(1);
332         }
333 }
334
335 //***************************************************************************************************************
336 ChimeraSlayer::~ChimeraSlayer() {       
337         delete decalc;  
338         if (templateFileName != "self") {
339                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
340                 else if (searchMethod == "blast") {  delete databaseLeft; }
341         }
342 }
343 //***************************************************************************************************************
344 void ChimeraSlayer::printHeader(ostream& out) {
345         m->mothurOutEndLine();
346         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
347         m->mothurOutEndLine();
348         
349         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
350 }
351 //***************************************************************************************************************
352 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
353         try {
354                 Sequence* trim = NULL;
355                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
356                 
357                 if (chimeraFlags == "yes") {
358                         string chimeraFlag = "no";
359                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
360                            ||
361                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
362                         
363                         
364                         if (chimeraFlag == "yes") {     
365                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
366                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
367                                         outAcc << querySeq->getName() << endl;
368                                         
369                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
370                                         
371                                         if (trimChimera) {  
372                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
373                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
374                                                 
375                                                 string newAligned = trim->getAligned();
376
377                                                 if (lengthLeft > lengthRight) { //trim right
378                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
379                                                 }else { //trim left
380                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
381                                                 }
382                                                 trim->setAligned(newAligned);
383                                         }
384                                 }
385                         }
386                         
387                         printBlock(chimeraResults[0], chimeraFlag, out);
388                         out << endl;
389                 }else {  
390                         out << querySeq->getName() << "\tno" << endl; 
391                 }
392                 
393                 return trim;
394                 
395         }
396         catch(exception& e) {
397                 m->errorOut(e, "ChimeraSlayer", "print");
398                 exit(1);
399         }
400 }
401 //***************************************************************************************************************
402 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
403         try {
404                 Sequence* trim = NULL;
405                                 
406                 if (trimChimera) { 
407                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
408                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
409                 }
410                 
411                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
412                         
413                         string chimeraFlag = "no";
414                         if (leftPiece.flag == "yes") {
415                                 
416                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
417                                         ||
418                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
419                         }
420                         
421                         if (rightPiece.flag == "yes") {
422                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
423                                  ||
424                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
425                         }
426                         
427                         bool rightChimeric = false;
428                         bool leftChimeric = false;
429                         
430                         if (chimeraFlag == "yes") {     
431                                 //which peice is chimeric or are both
432                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
433                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
434                                 
435                                 if (rightChimeric || leftChimeric) {
436                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
437                                         outAcc << querySeq->getName() << endl;
438                                         
439                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
440                                         
441                                         if (trimChimera) {  
442                                                 string newAligned = trim->getAligned();
443                                                                                                 
444                                                 //right side is fine so keep that
445                                                 if ((leftChimeric) && (!rightChimeric)) {
446                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
447                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
448                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449                                                 }else { //both sides are chimeric, keep longest piece
450                                                         
451                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
452                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
453                                                         
454                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
455                                                         int length = lengthLeftLeft;
456                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
457                                                         
458                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
459                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
460                                                         
461                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
462                                                         if (lengthRightRight > length) { longest = 4; }
463                                                         
464                                                         if (longest == 1) { //leftleft
465                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466                                                         }else if (longest == 2) { //leftright
467                                                                 //get rid of leftleft
468                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
469                                                                 //get rid of right
470                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
471                                                         }else if (longest == 3) { //rightleft
472                                                                 //get rid of left
473                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
474                                                                 //get rid of rightright
475                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
476                                                         }else { //rightright
477                                                                 //get rid of left
478                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
479                                                                 //get rid of rightleft
480                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
481                                                         }
482                                                 }
483                                                         
484                                                 trim->setAligned(newAligned);
485                                         }
486                                         
487                                 }
488                         }
489                         
490                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
491                         out << endl;
492                 }else {  
493                         out << querySeq->getName() << "\tno" << endl;  
494                 }
495                 
496                 return trim;
497                 
498         }
499         catch(exception& e) {
500                 m->errorOut(e, "ChimeraSlayer", "print");
501                 exit(1);
502         }
503 }
504
505 #ifdef USE_MPI
506 //***************************************************************************************************************
507 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
508         try {
509                 MPI_Status status;
510                 bool results = false;
511                 string outAccString = "";
512                 string outputString = "";
513                 
514                 Sequence* trim = NULL;
515                 
516                 if (trimChimera) { 
517                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
518                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
519                 }
520                 
521                 
522                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
523                         
524                         string chimeraFlag = "no";
525                         if (leftPiece.flag == "yes") {
526                                 
527                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
528                                    ||
529                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
530                         }
531                         
532                         if (rightPiece.flag == "yes") {
533                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
534                                         ||
535                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
536                         }
537                         
538                         bool rightChimeric = false;
539                         bool leftChimeric = false;
540                         
541                         if (chimeraFlag == "yes") {     
542                                 //which peice is chimeric or are both
543                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
544                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
545                                 
546                                 if (rightChimeric || leftChimeric) {
547                                         cout << querySeq->getName() <<  "\tyes" << endl;
548                                         outAccString += querySeq->getName() + "\n";
549                                         results = true;
550                                         
551                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
552                                         
553                                         //write to accnos file
554                                         int length = outAccString.length();
555                                         char* buf2 = new char[length];
556                                         memcpy(buf2, outAccString.c_str(), length);
557                                 
558                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
559                                         delete buf2;
560                                         
561                                         if (trimChimera) {  
562                                                 string newAligned = trim->getAligned();
563                                                 
564                                                 //right side is fine so keep that
565                                                 if ((leftChimeric) && (!rightChimeric)) {
566                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
567                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
568                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
569                                                 }else { //both sides are chimeric, keep longest piece
570                                                         
571                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
572                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
573                                                         
574                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
575                                                         int length = lengthLeftLeft;
576                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
577                                                         
578                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
579                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
580                                                         
581                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
582                                                         if (lengthRightRight > length) { longest = 4; }
583                                                         
584                                                         if (longest == 1) { //leftleft
585                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
586                                                         }else if (longest == 2) { //leftright
587                                                                 //get rid of leftleft
588                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
589                                                                 //get rid of right
590                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
591                                                         }else if (longest == 3) { //rightleft
592                                                                 //get rid of left
593                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
594                                                                 //get rid of rightright
595                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
596                                                         }else { //rightright
597                                                                 //get rid of left
598                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
599                                                                 //get rid of rightleft
600                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
601                                                         }
602                                                 }
603                                                 
604                                                 trim->setAligned(newAligned);
605                                         }
606                                         
607                                 }
608                         }
609                         
610                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
611                         outputString += "\n";
612                 
613                         //write to output file
614                         int length = outputString.length();
615                         char* buf = new char[length];
616                         memcpy(buf, outputString.c_str(), length);
617                                 
618                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
619                         delete buf;
620
621                 }else {  
622                         outputString += querySeq->getName() + "\tno\n";  
623         
624                         //write to output file
625                         int length = outputString.length();
626                         char* buf = new char[length];
627                         memcpy(buf, outputString.c_str(), length);
628                                 
629                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
630                         delete buf;
631                 }
632                 
633                 
634                 return trim;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "ChimeraSlayer", "print");
638                 exit(1);
639         }
640 }
641 //***************************************************************************************************************
642 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
643         try {
644                 MPI_Status status;
645                 bool results = false;
646                 string outAccString = "";
647                 string outputString = "";
648                 
649                 Sequence* trim = NULL;
650                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
651                 
652                 if (chimeraFlags == "yes") {
653                         string chimeraFlag = "no";
654                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
655                            ||
656                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
657                         
658                         
659                         if (chimeraFlag == "yes") {     
660                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
661                                         cout << querySeq->getName() <<  "\tyes" << endl;
662                                         outAccString += querySeq->getName() + "\n";
663                                         results = true;
664                                         
665                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
666                                         
667                                         //write to accnos file
668                                         int length = outAccString.length();
669                                         char* buf2 = new char[length];
670                                         memcpy(buf2, outAccString.c_str(), length);
671                                         
672                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
673                                         delete buf2;
674                                         
675                                         if (trimChimera) {  
676                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
677                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
678                                                 
679                                                 string newAligned = trim->getAligned();
680                                                 if (lengthLeft > lengthRight) { //trim right
681                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
682                                                 }else { //trim left
683                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
684                                                 }
685                                                 trim->setAligned(newAligned);   
686                                         }
687                                 }
688                         }
689                         
690                         outputString = getBlock(chimeraResults[0], chimeraFlag);
691                         outputString += "\n";
692                         
693                         //write to output file
694                         int length = outputString.length();
695                         char* buf = new char[length];
696                         memcpy(buf, outputString.c_str(), length);
697                         
698                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
699                         delete buf;
700                         
701                 }else {  
702                         outputString += querySeq->getName() + "\tno\n";  
703                         
704                         //write to output file
705                         int length = outputString.length();
706                         char* buf = new char[length];
707                         memcpy(buf, outputString.c_str(), length);
708                         
709                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
710                         delete buf;
711                 }
712                 
713                 return trim;
714         }
715         catch(exception& e) {
716                 m->errorOut(e, "ChimeraSlayer", "print");
717                 exit(1);
718         }
719 }
720 #endif
721
722 //***************************************************************************************************************
723 int ChimeraSlayer::getChimeras(Sequence* query) {
724         try {
725                 
726                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
727                 printResults.trimQuery = trimQuery; 
728                 
729                 chimeraFlags = "no";
730                 printResults.flag = "no";
731                 
732                 querySeq = query;
733                 
734                 //you must create a template
735                 vector<Sequence*> thisTemplate;
736                 vector<Sequence*> thisFilteredTemplate;
737                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
738                 else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
739                 
740                 if (m->control_pressed) {  return 0;  }
741                 
742                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
743                 
744                 //moved this out of maligner - 4/29/11
745                 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
746                 
747                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
748                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
749                 
750                 if (templateFileName == "self") {
751                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
752                         else if (searchMethod == "blast") {  delete databaseLeft; }
753                 }
754         
755                 if (m->control_pressed) {  return 0;  }
756
757                 string chimeraFlag = maligner.getResults(query, decalc);
758
759                 if (m->control_pressed) {  return 0;  }
760                 
761                 vector<results> Results = maligner.getOutput();
762                 
763                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
764                 
765                 if (chimeraFlag == "yes") {
766                 
767                         if (realign) {
768                                 vector<Sequence*> parents;
769                                 for (int i = 0; i < Results.size(); i++) {
770 cout << Results[i].parent  << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd  << endl;
771                                         Sequence* parent = new Sequence(Results[i].parent, Results[i].parentAligned);
772                                         
773                                         parents.push_back(parent);
774                                 }
775                                 
776                                 ChimeraReAligner realigner;
777                                 //realigner.reAlign(query, parents);
778                                 
779                                 for (int i = 0; i < parents.size(); i++) { delete parents[i]; }
780                         }
781         //query->printSequence(cout);
782                         //get sequence that were given from maligner results
783                         vector<SeqDist> seqs;
784                         map<string, float> removeDups;
785                         map<string, float>::iterator itDup;
786                         map<string, string> parentNameSeq;
787                         map<string, string>::iterator itSeq;
788                         for (int j = 0; j < Results.size(); j++) {
789                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
790                                 //only add if you are not a duplicate
791                                 itDup = removeDups.find(Results[j].parent);
792                                 if (itDup == removeDups.end()) { //this is not duplicate
793                                         removeDups[Results[j].parent] = dist;
794                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
795                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
796                                         removeDups[Results[j].parent] = dist;
797                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
798                                 }
799                         }
800                         
801                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
802                                 itSeq = parentNameSeq.find(itDup->first);
803                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
804                                 
805                                 SeqDist member;
806                                 member.seq = seq;
807                                 member.dist = itDup->second;
808                                 
809                                 seqs.push_back(member);
810                         }
811                         
812                         //limit number of parents to explore - default 3
813                         if (Results.size() > parents) {
814                                 //sort by distance
815                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
816                                 //prioritize larger more similiar sequence fragments
817                                 reverse(seqs.begin(), seqs.end());
818                                 
819                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
820                                         delete seqs[k].seq;
821                                         seqs.pop_back();        
822                                 }
823                         }
824                 
825                         //put seqs into vector to send to slayer
826                         vector<Sequence*> seqsForSlayer;
827                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
828                         
829                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
830
831                         //send to slayer
832                         chimeraFlags = slayer.getResults(query, seqsForSlayer);
833                         if (m->control_pressed) {  return 0;  }
834                         chimeraResults = slayer.getOutput();
835                         
836                         printResults.flag = chimeraFlags;
837                         printResults.results = chimeraResults;
838                         
839                         //free memory
840                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
841                 }
842                 
843                 return 0;
844         }
845         catch(exception& e) {
846                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
847                 exit(1);
848         }
849 }
850 //***************************************************************************************************************
851 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
852         try {
853                 out << querySeq->getName() << '\t';
854                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
855         
856                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
857                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
858                 
859                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
860                 
861         }
862         catch(exception& e) {
863                 m->errorOut(e, "ChimeraSlayer", "printBlock");
864                 exit(1);
865         }
866 }
867 //***************************************************************************************************************
868 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
869         try {
870                 
871                 if ((leftChimeric) && (!rightChimeric)) { //print left
872                         out << querySeq->getName() << '\t';
873                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
874                         
875                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
876                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
877                 
878                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
879                 
880                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
881                         out << querySeq->getName() << '\t';
882                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
883                         
884                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
885                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
886                         
887                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
888                         
889                 }else  { //print both results
890                         if (leftdata.flag == "yes") {
891                                 out << querySeq->getName() + "_LEFT" << '\t';
892                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
893                                 
894                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
895                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
896                                 
897                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
898                         }
899                         
900                         if (rightdata.flag == "yes") {
901                                 if (leftdata.flag == "yes") { out << endl; }
902                                 
903                                 out << querySeq->getName() + "_RIGHT"<< '\t';
904                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
905                                 
906                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
907                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
908                                 
909                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
910                 
911                         }
912                 }
913         }
914         catch(exception& e) {
915                 m->errorOut(e, "ChimeraSlayer", "printBlock");
916                 exit(1);
917         }
918 }
919 //***************************************************************************************************************
920 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
921         try {
922                 
923                 string out = "";
924                 
925                 if ((leftChimeric) && (!rightChimeric)) { //get left
926                         out += querySeq->getName() + "\t";
927                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
928                         
929                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
930                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
931                         
932                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
933                         
934                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
935                         out += querySeq->getName() + "\t";
936                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
937                         
938                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
939                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
940                         
941                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
942                         
943                 }else  { //print both results
944                         
945                         if (leftdata.flag == "yes") {
946                                 out += querySeq->getName() + "_LEFT\t";
947                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
948                                 
949                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
950                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
951                                 
952                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
953                         }
954                         
955                         if (rightdata.flag == "yes") {
956                                 if (leftdata.flag == "yes") { out += "\n"; }
957                                 out +=  querySeq->getName() + "_RIGHT\t";
958                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
959                                 
960                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
961                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
962                                 
963                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
964                         }
965                 }
966                 
967                 return out;
968                 
969         }
970         catch(exception& e) {
971                 m->errorOut(e, "ChimeraSlayer", "getBlock");
972                 exit(1);
973         }
974 }
975 //***************************************************************************************************************
976 string ChimeraSlayer::getBlock(data_struct data, string flag){
977         try {
978                 
979                 string outputString = "";
980                 
981                 outputString += querySeq->getName() + "\t";
982                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
983                         
984                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
985                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
986                 
987                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
988                 
989                 return outputString;
990         }
991         catch(exception& e) {
992                 m->errorOut(e, "ChimeraSlayer", "getBlock");
993                 exit(1);
994         }
995 }
996 //***************************************************************************************************************
997 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
998         try {
999                 
1000                 vector<Sequence*> refSeqs;
1001                 
1002                 if (searchMethod == "distance") {
1003                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1004                         Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1005                         runFilter(newSeq);
1006                         refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted);
1007                         delete newSeq;
1008                 }else if (searchMethod == "blast")  {
1009                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1010                 }else if (searchMethod == "kmer") {
1011                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1012                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1013                 
1014                 return refSeqs;
1015         }
1016         catch(exception& e) {
1017                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1018                 exit(1);
1019         }
1020 }
1021 //***************************************************************************************************************/
1022 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1023         try {   
1024                 
1025                 vector<Sequence*> refResults;
1026                 
1027                 //get parts of query
1028                 string queryUnAligned = q->getUnaligned();
1029                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1030                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1031                 
1032                 Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
1033                 Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
1034                 
1035                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
1036                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
1037                 
1038                 vector<int> smaller;
1039                 vector<int> larger;
1040                 
1041                 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1042                 else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1043                 
1044                 //merge results         
1045                 map<int, int> seen;
1046                 map<int, int>::iterator it;
1047                 vector<int> mergedResults;
1048                 for (int i = 0; i < smaller.size(); i++) {
1049                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1050         
1051                         //add left if you havent already
1052                         it = seen.find(smaller[i]);
1053                         if (it == seen.end()) {  
1054                                 mergedResults.push_back(smaller[i]);
1055                                 seen[smaller[i]] = smaller[i];
1056                         }
1057                         
1058                         //add right if you havent already
1059                         it = seen.find(larger[i]);
1060                         if (it == seen.end()) {  
1061                                 mergedResults.push_back(larger[i]);
1062                                 seen[larger[i]] = larger[i];
1063                         }
1064                 }
1065                 
1066                 for (int i = smaller.size(); i < larger.size(); i++) {
1067                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1068                         
1069                         //add right if you havent already
1070                         it = seen.find(larger[i]);
1071                         if (it == seen.end()) {  
1072                                 mergedResults.push_back(larger[i]);
1073                                 seen[larger[i]] = larger[i];
1074                         }
1075                 }
1076
1077                 for (int i = 0; i < mergedResults.size(); i++) {
1078                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1079                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1080                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1081                                 refResults.push_back(temp);
1082                                 
1083                         }
1084                 }
1085                         
1086                 delete queryRight;
1087                 delete queryLeft;
1088                 
1089                 return refResults;
1090         }
1091         catch(exception& e) {
1092                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1093                 exit(1);
1094         }
1095 }
1096 //***************************************************************************************************************
1097 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1098         try {   
1099                 vector<Sequence*> refResults;
1100                 
1101                 //get parts of query
1102                 string queryUnAligned = q->getUnaligned();
1103                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1104                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1105                 
1106                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1107                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1108                 
1109                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1110                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1111                 
1112                 //merge results         
1113                 map<int, int> seen;
1114                 map<int, int>::iterator it;
1115                         vector<int> mergedResults;
1116                 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1117                         
1118                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1119                         
1120                         //add left if you havent already
1121                         it = seen.find(tempIndexesLeft[i]);
1122                         if (it == seen.end()) {  
1123                                 mergedResults.push_back(tempIndexesLeft[i]);
1124                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1125                         }
1126                         
1127                         //add right if you havent already
1128                         it = seen.find(tempIndexesRight[i]);
1129                         if (it == seen.end()) {  
1130                                 mergedResults.push_back(tempIndexesRight[i]);
1131                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1132                         }
1133                 }
1134                 
1135                 //numWanted = mergedResults.size();
1136                         
1137                 //cout << q->getName() << endl;         
1138                 
1139                 for (int i = 0; i < mergedResults.size(); i++) {
1140                         //cout << db[mergedResults[i]]->getName() << endl;      
1141                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1142                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1143                                 refResults.push_back(temp);
1144                         }
1145                 }
1146                 //cout << endl;         
1147                 delete queryRight;
1148                 delete queryLeft;
1149                 
1150                 return refResults;
1151         }
1152         catch(exception& e) {
1153                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1154                 exit(1);
1155         }
1156 }
1157 //***************************************************************************************************************
1158
1159