]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
chimera.slayer
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {       
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37         
38                 decalc = new DeCalculator();    
39                 
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
49                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
50         try {
51                 fastafile = file; templateSeqs = readSeqs(fastafile);
52                 templateFileName = temp; 
53                 searchMethod = mode;
54                 kmerSize = k;
55                 match = ms;
56                 misMatch = mms;
57                 window = win;
58                 divR = div;
59                 minSim = minsim;
60                 minCov = mincov;
61                 minBS = minbs;
62                 minSNP = minsnp;
63                 parents = par;
64                 iters = it;
65                 increment = inc;
66                 numWanted = numw;
67                 realign = r; 
68                 trimChimera = trim;
69                 priority = prior;
70                 
71                 decalc = new DeCalculator();    
72                 
73                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
74                 
75                 if (searchMethod == "distance") { 
76                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
77                         
78                         //run filter on template copying templateSeqs into filteredTemplateSeqs
79                         for (int i = 0; i < templateSeqs.size(); i++) {  
80                                 if (m->control_pressed) {  break; }
81                                 
82                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
83                                 runFilter(newSeq);  
84                                 filteredTemplateSeqs.push_back(newSeq);
85                         }
86                 }
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
90                 exit(1);
91         }
92 }
93 //***************************************************************************************************************
94 int ChimeraSlayer::doPrep() {
95         try {
96                 if (searchMethod == "distance") { 
97                         //read in all query seqs
98                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
99                 
100                         vector<Sequence*> temp = templateSeqs;
101                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
102                 
103                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
104                 
105                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
106                 
107                         if (m->control_pressed) {  return 0; } 
108                 
109                         //run filter on template copying templateSeqs into filteredTemplateSeqs
110                         for (int i = 0; i < templateSeqs.size(); i++) {  
111                                 if (m->control_pressed) {  return 0; }
112                                 
113                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
114                                 runFilter(newSeq);  
115                                 filteredTemplateSeqs.push_back(newSeq);
116                         }
117                 }
118                 string  kmerDBNameLeft;
119                 string  kmerDBNameRight;
120         
121                 //generate the kmerdb to pass to maligner
122                 if (searchMethod == "kmer") { 
123                         string templatePath = m->hasPath(templateFileName);
124                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
125                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
126                                 
127                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
128                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
129                 #ifdef USE_MPI
130                         for (int i = 0; i < templateSeqs.size(); i++) {
131                                         
132                                 if (m->control_pressed) { return 0; } 
133                                         
134                                 string leftFrag = templateSeqs[i]->getUnaligned();
135                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
136                                         
137                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
138                                 databaseLeft->addSequence(leftTemp);    
139                         }
140                         databaseLeft->generateDB();
141                         databaseLeft->setNumSeqs(templateSeqs.size());
142                         
143                         for (int i = 0; i < templateSeqs.size(); i++) {
144                                 if (m->control_pressed) { return 0; } 
145                                         
146                                 string rightFrag = templateSeqs[i]->getUnaligned();
147                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
148                                         
149                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
150                                 databaseRight->addSequence(rightTemp);  
151                         }
152                         databaseRight->generateDB();
153                         databaseRight->setNumSeqs(templateSeqs.size());
154
155                 #else   
156                         //leftside
157                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
158                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
159                         bool needToGenerateLeft = true;
160                         
161                         if(kmerFileTestLeft){   
162                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
163                                 if (GoodFile) {  needToGenerateLeft = false;    }
164                         }
165                         
166                         if(needToGenerateLeft){ 
167                         
168                                 for (int i = 0; i < templateSeqs.size(); i++) {
169                                         
170                                         if (m->control_pressed) { return 0; } 
171                                         
172                                         string leftFrag = templateSeqs[i]->getUnaligned();
173                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
174                                         
175                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
176                                         databaseLeft->addSequence(leftTemp);    
177                                 }
178                                 databaseLeft->generateDB();
179                                 
180                         }else { 
181                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
182                         }
183                         kmerFileTestLeft.close();
184                         
185                         databaseLeft->setNumSeqs(templateSeqs.size());
186                         
187                         //rightside
188                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
189                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
190                         bool needToGenerateRight = true;
191                         
192                         if(kmerFileTestRight){  
193                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
194                                 if (GoodFile) {  needToGenerateRight = false;   }
195                         }
196                         
197                         if(needToGenerateRight){        
198                         
199                                 for (int i = 0; i < templateSeqs.size(); i++) {
200                                         if (m->control_pressed) { return 0; } 
201                                         
202                                         string rightFrag = templateSeqs[i]->getUnaligned();
203                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
204                                         
205                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
206                                         databaseRight->addSequence(rightTemp);  
207                                 }
208                                 databaseRight->generateDB();
209                                 
210                         }else { 
211                                 databaseRight->readKmerDB(kmerFileTestRight);   
212                         }
213                         kmerFileTestRight.close();
214                         
215                         databaseRight->setNumSeqs(templateSeqs.size());
216                 #endif  
217                 }else if (searchMethod == "blast") {
218                 
219                         //generate blastdb
220                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
221
222                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
223                         databaseLeft->generateDB();
224                         databaseLeft->setNumSeqs(templateSeqs.size());
225                 }
226                 
227                 return 0;
228
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "ChimeraSlayer", "doprep");
232                 exit(1);
233         }
234 }
235 //***************************************************************************************************************
236 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& userTemplateFiltered) {
237         try {
238                 
239                 //when template=self, the query file is sorted from most abundance to least abundant
240                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
241                 vector<Sequence*> userTemplate;
242                 
243                 int myAbund = priority[q->getName()];
244                 
245                 for (int i = 0; i < templateSeqs.size(); i++) {
246                         
247                         if (m->control_pressed) { return userTemplate; } 
248                         
249                         //have I reached a sequence with the same abundance as myself?
250                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
251                         
252                         //if its am not chimeric add it
253                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
254                                 userTemplate.push_back(templateSeqs[i]); 
255                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
256                         }
257                 }
258                 
259                 string  kmerDBNameLeft;
260                 string  kmerDBNameRight;
261                 
262                 //generate the kmerdb to pass to maligner
263                 if (searchMethod == "kmer") { 
264                         string templatePath = m->hasPath(templateFileName);
265                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
266                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
267                         
268                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
269                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
270 #ifdef USE_MPI
271                         for (int i = 0; i < userTemplate.size(); i++) {
272                                 
273                                 if (m->control_pressed) { return userTemplate; } 
274                                 
275                                 string leftFrag = userTemplate[i]->getUnaligned();
276                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
277                                 
278                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
279                                 databaseLeft->addSequence(leftTemp);    
280                         }
281                         databaseLeft->generateDB();
282                         databaseLeft->setNumSeqs(userTemplate.size());
283                         
284                         for (int i = 0; i < userTemplate.size(); i++) {
285                                 if (m->control_pressed) { return userTemplate; } 
286                                 
287                                 string rightFrag = userTemplate[i]->getUnaligned();
288                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
289                                 
290                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
291                                 databaseRight->addSequence(rightTemp);  
292                         }
293                         databaseRight->generateDB();
294                         databaseRight->setNumSeqs(userTemplate.size());
295                         
296 #else   
297                         
298                         
299                         for (int i = 0; i < userTemplate.size(); i++) {
300                                 
301                                 if (m->control_pressed) { return userTemplate; } 
302                                 
303                                 string leftFrag = userTemplate[i]->getUnaligned();
304                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
305                                 
306                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
307                                 databaseLeft->addSequence(leftTemp);    
308                         }
309                         databaseLeft->generateDB();
310                         databaseLeft->setNumSeqs(userTemplate.size());
311                                 
312                         for (int i = 0; i < userTemplate.size(); i++) {
313                                 if (m->control_pressed) { return userTemplate; }  
314                                         
315                                 string rightFrag = userTemplate[i]->getUnaligned();
316                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
317                                         
318                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
319                                 databaseRight->addSequence(rightTemp);  
320                         }
321                         databaseRight->generateDB();
322                         databaseRight->setNumSeqs(userTemplate.size());
323 #endif  
324                 }else if (searchMethod == "blast") {
325                         
326                         //generate blastdb
327                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
328
329                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
330                         databaseLeft->generateDB();
331                         databaseLeft->setNumSeqs(userTemplate.size());
332                 }
333                 
334                 return userTemplate;
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
339                 exit(1);
340         }
341 }
342
343 //***************************************************************************************************************
344 ChimeraSlayer::~ChimeraSlayer() {       
345         delete decalc;  
346         if (templateFileName != "self") {
347                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
348                 else if (searchMethod == "blast") {  delete databaseLeft; }
349         }
350 }
351 //***************************************************************************************************************
352 void ChimeraSlayer::printHeader(ostream& out) {
353         m->mothurOutEndLine();
354         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
355         m->mothurOutEndLine();
356         
357         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
358 }
359 //***************************************************************************************************************
360 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
361         try {
362                 Sequence* trim = NULL;
363                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
364                 
365                 if (chimeraFlags == "yes") {
366                         string chimeraFlag = "no";
367                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
368                            ||
369                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
370                         
371                         
372                         if (chimeraFlag == "yes") {     
373                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
374                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
375                                         outAcc << querySeq->getName() << endl;
376                                         
377                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
378                                         
379                                         if (trimChimera) {  
380                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
381                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
382                                                 
383                                                 string newAligned = trim->getAligned();
384
385                                                 if (lengthLeft > lengthRight) { //trim right
386                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
387                                                 }else { //trim left
388                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
389                                                 }
390                                                 trim->setAligned(newAligned);
391                                         }
392                                 }
393                         }
394                         
395                         printBlock(chimeraResults[0], chimeraFlag, out);
396                         out << endl;
397                 }else {  
398                         out << querySeq->getName() << "\tno" << endl; 
399                 }
400                 
401                 return trim;
402                 
403         }
404         catch(exception& e) {
405                 m->errorOut(e, "ChimeraSlayer", "print");
406                 exit(1);
407         }
408 }
409 //***************************************************************************************************************
410 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
411         try {
412                 Sequence* trim = NULL;
413                                 
414                 if (trimChimera) { 
415                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
416                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
417                 }
418                 
419                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
420                         
421                         string chimeraFlag = "no";
422                         if (leftPiece.flag == "yes") {
423                                 
424                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
425                                         ||
426                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
427                         }
428                         
429                         if (rightPiece.flag == "yes") {
430                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
431                                  ||
432                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
433                         }
434                         
435                         bool rightChimeric = false;
436                         bool leftChimeric = false;
437                         
438                         if (chimeraFlag == "yes") {     
439                                 //which peice is chimeric or are both
440                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
441                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
442                                 
443                                 if (rightChimeric || leftChimeric) {
444                                         m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
445                                         outAcc << querySeq->getName() << endl;
446                                         
447                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
448                                         
449                                         if (trimChimera) {  
450                                                 string newAligned = trim->getAligned();
451                                                                                                 
452                                                 //right side is fine so keep that
453                                                 if ((leftChimeric) && (!rightChimeric)) {
454                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
455                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
456                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
457                                                 }else { //both sides are chimeric, keep longest piece
458                                                         
459                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
460                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
461                                                         
462                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
463                                                         int length = lengthLeftLeft;
464                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
465                                                         
466                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
467                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
468                                                         
469                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
470                                                         if (lengthRightRight > length) { longest = 4; }
471                                                         
472                                                         if (longest == 1) { //leftleft
473                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
474                                                         }else if (longest == 2) { //leftright
475                                                                 //get rid of leftleft
476                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
477                                                                 //get rid of right
478                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
479                                                         }else if (longest == 3) { //rightleft
480                                                                 //get rid of left
481                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
482                                                                 //get rid of rightright
483                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
484                                                         }else { //rightright
485                                                                 //get rid of left
486                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
487                                                                 //get rid of rightleft
488                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
489                                                         }
490                                                 }
491                                                         
492                                                 trim->setAligned(newAligned);
493                                         }
494                                         
495                                 }
496                         }
497                         
498                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
499                         out << endl;
500                 }else {  
501                         out << querySeq->getName() << "\tno" << endl;  
502                 }
503                 
504                 return trim;
505                 
506         }
507         catch(exception& e) {
508                 m->errorOut(e, "ChimeraSlayer", "print");
509                 exit(1);
510         }
511 }
512
513 #ifdef USE_MPI
514 //***************************************************************************************************************
515 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
516         try {
517                 MPI_Status status;
518                 bool results = false;
519                 string outAccString = "";
520                 string outputString = "";
521                 
522                 Sequence* trim = NULL;
523                 
524                 if (trimChimera) { 
525                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
526                         trim = new Sequence(leftPiece.trimQuery.getName(), aligned); 
527                 }
528                 
529                 
530                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
531                         
532                         string chimeraFlag = "no";
533                         if (leftPiece.flag == "yes") {
534                                 
535                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
536                                    ||
537                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
538                         }
539                         
540                         if (rightPiece.flag == "yes") {
541                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
542                                         ||
543                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
544                         }
545                         
546                         bool rightChimeric = false;
547                         bool leftChimeric = false;
548                         
549                         if (chimeraFlag == "yes") {     
550                                 //which peice is chimeric or are both
551                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
552                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
553                                 
554                                 if (rightChimeric || leftChimeric) {
555                                         cout << querySeq->getName() <<  "\tyes" << endl;
556                                         outAccString += querySeq->getName() + "\n";
557                                         results = true;
558                                         
559                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
560                                         
561                                         //write to accnos file
562                                         int length = outAccString.length();
563                                         char* buf2 = new char[length];
564                                         memcpy(buf2, outAccString.c_str(), length);
565                                 
566                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
567                                         delete buf2;
568                                         
569                                         if (trimChimera) {  
570                                                 string newAligned = trim->getAligned();
571                                                 
572                                                 //right side is fine so keep that
573                                                 if ((leftChimeric) && (!rightChimeric)) {
574                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
575                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
576                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
577                                                 }else { //both sides are chimeric, keep longest piece
578                                                         
579                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
580                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
581                                                         
582                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
583                                                         int length = lengthLeftLeft;
584                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
585                                                         
586                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
587                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
588                                                         
589                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
590                                                         if (lengthRightRight > length) { longest = 4; }
591                                                         
592                                                         if (longest == 1) { //leftleft
593                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
594                                                         }else if (longest == 2) { //leftright
595                                                                 //get rid of leftleft
596                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
597                                                                 //get rid of right
598                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
599                                                         }else if (longest == 3) { //rightleft
600                                                                 //get rid of left
601                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
602                                                                 //get rid of rightright
603                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
604                                                         }else { //rightright
605                                                                 //get rid of left
606                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
607                                                                 //get rid of rightleft
608                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
609                                                         }
610                                                 }
611                                                 
612                                                 trim->setAligned(newAligned);
613                                         }
614                                         
615                                 }
616                         }
617                         
618                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
619                         outputString += "\n";
620                 
621                         //write to output file
622                         int length = outputString.length();
623                         char* buf = new char[length];
624                         memcpy(buf, outputString.c_str(), length);
625                                 
626                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
627                         delete buf;
628
629                 }else {  
630                         outputString += querySeq->getName() + "\tno\n";  
631         
632                         //write to output file
633                         int length = outputString.length();
634                         char* buf = new char[length];
635                         memcpy(buf, outputString.c_str(), length);
636                                 
637                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
638                         delete buf;
639                 }
640                 
641                 
642                 return trim;
643         }
644         catch(exception& e) {
645                 m->errorOut(e, "ChimeraSlayer", "print");
646                 exit(1);
647         }
648 }
649 //***************************************************************************************************************
650 Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
651         try {
652                 MPI_Status status;
653                 bool results = false;
654                 string outAccString = "";
655                 string outputString = "";
656                 
657                 Sequence* trim = NULL;
658                 if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
659                 
660                 if (chimeraFlags == "yes") {
661                         string chimeraFlag = "no";
662                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
663                            ||
664                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
665                         
666                         
667                         if (chimeraFlag == "yes") {     
668                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
669                                         cout << querySeq->getName() <<  "\tyes" << endl;
670                                         outAccString += querySeq->getName() + "\n";
671                                         results = true;
672                                         
673                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq->getName()); }
674                                         
675                                         //write to accnos file
676                                         int length = outAccString.length();
677                                         char* buf2 = new char[length];
678                                         memcpy(buf2, outAccString.c_str(), length);
679                                         
680                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
681                                         delete buf2;
682                                         
683                                         if (trimChimera) {  
684                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
685                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
686                                                 
687                                                 string newAligned = trim->getAligned();
688                                                 if (lengthLeft > lengthRight) { //trim right
689                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
690                                                 }else { //trim left
691                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
692                                                 }
693                                                 trim->setAligned(newAligned);   
694                                         }
695                                 }
696                         }
697                         
698                         outputString = getBlock(chimeraResults[0], chimeraFlag);
699                         outputString += "\n";
700                         
701                         //write to output file
702                         int length = outputString.length();
703                         char* buf = new char[length];
704                         memcpy(buf, outputString.c_str(), length);
705                         
706                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
707                         delete buf;
708                         
709                 }else {  
710                         outputString += querySeq->getName() + "\tno\n";  
711                         
712                         //write to output file
713                         int length = outputString.length();
714                         char* buf = new char[length];
715                         memcpy(buf, outputString.c_str(), length);
716                         
717                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
718                         delete buf;
719                 }
720                 
721                 return trim;
722         }
723         catch(exception& e) {
724                 m->errorOut(e, "ChimeraSlayer", "print");
725                 exit(1);
726         }
727 }
728 #endif
729
730 //***************************************************************************************************************
731 int ChimeraSlayer::getChimeras(Sequence* query) {
732         try {
733                 
734                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
735                 printResults.trimQuery = trimQuery; 
736                 
737                 chimeraFlags = "no";
738                 printResults.flag = "no";
739                 
740                 querySeq = query;
741                 
742                 //you must create a template
743                 vector<Sequence*> thisTemplate;
744                 vector<Sequence*> thisFilteredTemplate;
745                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
746                 else {  thisTemplate = getTemplate(query, thisFilteredTemplate);  } //fills this template and creates the databases
747                 
748                 if (m->control_pressed) {  return 0;  }
749                 
750                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
751                 
752                 //moved this out of maligner - 4/29/11
753                 vector<Sequence*> refSeqs = getRefSeqs(query, thisTemplate, thisFilteredTemplate);
754                 
755                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
756                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
757                 
758                 if (templateFileName == "self") {
759                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
760                         else if (searchMethod == "blast") {  delete databaseLeft; }
761                 }
762         
763                 if (m->control_pressed) {  return 0;  }
764
765                 string chimeraFlag = maligner.getResults(query, decalc);
766
767                 if (m->control_pressed) {  return 0;  }
768                 
769                 vector<results> Results = maligner.getOutput();
770                 
771                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
772                 
773                 if (chimeraFlag == "yes") {
774
775                         if (realign) {
776                                 vector<string> parents;
777                                 for (int i = 0; i < Results.size(); i++) {
778                                         parents.push_back(Results[i].parentAligned);
779                                 }
780                                 
781                                 ChimeraReAligner realigner;             
782                                 realigner.reAlign(query, parents);
783
784                         }
785
786                         //get sequence that were given from maligner results
787                         vector<SeqDist> seqs;
788                         map<string, float> removeDups;
789                         map<string, float>::iterator itDup;
790                         map<string, string> parentNameSeq;
791                         map<string, string>::iterator itSeq;
792                         for (int j = 0; j < Results.size(); j++) {
793                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
794                                 //only add if you are not a duplicate
795                                 itDup = removeDups.find(Results[j].parent);
796                                 if (itDup == removeDups.end()) { //this is not duplicate
797                                         removeDups[Results[j].parent] = dist;
798                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
799                                 }else if (dist > itDup->second) { //is this a stronger number for this parent
800                                         removeDups[Results[j].parent] = dist;
801                                         parentNameSeq[Results[j].parent] = Results[j].parentAligned;
802                                 }
803                         }
804                         
805                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
806                                 itSeq = parentNameSeq.find(itDup->first);
807                                 Sequence* seq = new Sequence(itDup->first, itSeq->second);
808                                 
809                                 SeqDist member;
810                                 member.seq = seq;
811                                 member.dist = itDup->second;
812                                 
813                                 seqs.push_back(member);
814                         }
815                         
816                         //limit number of parents to explore - default 3
817                         if (Results.size() > parents) {
818                                 //sort by distance
819                                 sort(seqs.begin(), seqs.end(), compareSeqDist);
820                                 //prioritize larger more similiar sequence fragments
821                                 reverse(seqs.begin(), seqs.end());
822                                 
823                                 for (int k = seqs.size()-1; k > (parents-1); k--)  {  
824                                         delete seqs[k].seq;
825                                         seqs.pop_back();        
826                                 }
827                         }
828                 
829                         //put seqs into vector to send to slayer
830                         vector<Sequence*> seqsForSlayer;
831                         for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
832                         
833                         if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
834
835                         //send to slayer
836                         chimeraFlags = slayer.getResults(query, seqsForSlayer);
837                         if (m->control_pressed) {  return 0;  }
838                         chimeraResults = slayer.getOutput();
839                         
840                         printResults.flag = chimeraFlags;
841                         printResults.results = chimeraResults;
842                         
843                         //free memory
844                         for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
845                 }
846                 //cout << endl << endl;
847                 return 0;
848         }
849         catch(exception& e) {
850                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
851                 exit(1);
852         }
853 }
854 //***************************************************************************************************************
855 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
856         try {
857                 out << querySeq->getName() << '\t';
858                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
859         
860                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
861                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
862                 
863                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
864                 
865         }
866         catch(exception& e) {
867                 m->errorOut(e, "ChimeraSlayer", "printBlock");
868                 exit(1);
869         }
870 }
871 //***************************************************************************************************************
872 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
873         try {
874                 
875                 if ((leftChimeric) && (!rightChimeric)) { //print left
876                         out << querySeq->getName() << '\t';
877                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
878                         
879                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
880                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
881                 
882                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
883                 
884                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
885                         out << querySeq->getName() << '\t';
886                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
887                         
888                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
889                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
890                         
891                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
892                         
893                 }else  { //print both results
894                         if (leftdata.flag == "yes") {
895                                 out << querySeq->getName() + "_LEFT" << '\t';
896                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
897                                 
898                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
899                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
900                                 
901                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
902                         }
903                         
904                         if (rightdata.flag == "yes") {
905                                 if (leftdata.flag == "yes") { out << endl; }
906                                 
907                                 out << querySeq->getName() + "_RIGHT"<< '\t';
908                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
909                                 
910                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
911                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
912                                 
913                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
914                 
915                         }
916                 }
917         }
918         catch(exception& e) {
919                 m->errorOut(e, "ChimeraSlayer", "printBlock");
920                 exit(1);
921         }
922 }
923 //***************************************************************************************************************
924 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
925         try {
926                 
927                 string out = "";
928                 
929                 if ((leftChimeric) && (!rightChimeric)) { //get left
930                         out += querySeq->getName() + "\t";
931                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
932                         
933                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
934                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
935                         
936                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
937                         
938                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
939                         out += querySeq->getName() + "\t";
940                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
941                         
942                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
943                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
944                         
945                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
946                         
947                 }else  { //print both results
948                         
949                         if (leftdata.flag == "yes") {
950                                 out += querySeq->getName() + "_LEFT\t";
951                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
952                                 
953                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
954                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
955                                 
956                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
957                         }
958                         
959                         if (rightdata.flag == "yes") {
960                                 if (leftdata.flag == "yes") { out += "\n"; }
961                                 out +=  querySeq->getName() + "_RIGHT\t";
962                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
963                                 
964                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
965                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
966                                 
967                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
968                         }
969                 }
970                 
971                 return out;
972                 
973         }
974         catch(exception& e) {
975                 m->errorOut(e, "ChimeraSlayer", "getBlock");
976                 exit(1);
977         }
978 }
979 //***************************************************************************************************************
980 string ChimeraSlayer::getBlock(data_struct data, string flag){
981         try {
982                 
983                 string outputString = "";
984                 
985                 outputString += querySeq->getName() + "\t";
986                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
987                         
988                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
989                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
990                 
991                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
992                 
993                 return outputString;
994         }
995         catch(exception& e) {
996                 m->errorOut(e, "ChimeraSlayer", "getBlock");
997                 exit(1);
998         }
999 }
1000 //***************************************************************************************************************
1001 vector<Sequence*> ChimeraSlayer::getRefSeqs(Sequence* q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1002         try {
1003                 
1004                 vector<Sequence*> refSeqs;
1005                 
1006                 if (searchMethod == "distance") {
1007                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1008                         Sequence* newSeq = new Sequence(q->getName(), q->getAligned());
1009                         runFilter(newSeq);
1010                         refSeqs = decalc->findClosest(newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1011                         delete newSeq;
1012                 }else if (searchMethod == "blast")  {
1013                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1014                 }else if (searchMethod == "kmer") {
1015                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1016                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1017                 
1018                 return refSeqs;
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1022                 exit(1);
1023         }
1024 }
1025 //***************************************************************************************************************/
1026 vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1027         try {   
1028                 
1029                 vector<Sequence*> refResults;
1030                 
1031                 //get parts of query
1032                 string queryUnAligned = q->getUnaligned();
1033                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1034                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1035 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1036                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1037                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1038                 
1039                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1040                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1041                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1042                 vector<int> smaller;
1043                 vector<int> larger;
1044                 
1045                 if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1046                 else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1047                 
1048                 //merge results         
1049                 map<int, int> seen;
1050                 map<int, int>::iterator it;
1051                 vector<int> mergedResults;
1052                 for (int i = 0; i < smaller.size(); i++) {
1053                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1054         
1055                         //add left if you havent already
1056                         it = seen.find(smaller[i]);
1057                         if (it == seen.end()) {  
1058                                 mergedResults.push_back(smaller[i]);
1059                                 seen[smaller[i]] = smaller[i];
1060                         }
1061                         
1062                         //add right if you havent already
1063                         it = seen.find(larger[i]);
1064                         if (it == seen.end()) {  
1065                                 mergedResults.push_back(larger[i]);
1066                                 seen[larger[i]] = larger[i];
1067                         }
1068                 }
1069                 
1070                 for (int i = smaller.size(); i < larger.size(); i++) {
1071                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1072                         
1073                         //add right if you havent already
1074                         it = seen.find(larger[i]);
1075                         if (it == seen.end()) {  
1076                                 mergedResults.push_back(larger[i]);
1077                                 seen[larger[i]] = larger[i];
1078                         }
1079                 }
1080
1081                 for (int i = 0; i < mergedResults.size(); i++) {
1082                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1083                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1084                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1085                                 refResults.push_back(temp);
1086                                 
1087                         }
1088                 }
1089                         
1090                 delete queryRight;
1091                 delete queryLeft;
1092                 
1093                 return refResults;
1094         }
1095         catch(exception& e) {
1096                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1097                 exit(1);
1098         }
1099 }
1100 //***************************************************************************************************************
1101 vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db, int num) {
1102         try {   
1103                 vector<Sequence*> refResults;
1104                 
1105                 //get parts of query
1106                 string queryUnAligned = q->getUnaligned();
1107                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1108                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1109                 
1110                 Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
1111                 Sequence* queryRight = new Sequence(q->getName(), rightQuery);
1112                 
1113                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1114                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1115                 
1116                 //merge results         
1117                 map<int, int> seen;
1118                 map<int, int>::iterator it;
1119                         vector<int> mergedResults;
1120                 for (int i = 0; i < tempIndexesLeft.size(); i++) {
1121                         
1122                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1123                         
1124                         //add left if you havent already
1125                         it = seen.find(tempIndexesLeft[i]);
1126                         if (it == seen.end()) {  
1127                                 mergedResults.push_back(tempIndexesLeft[i]);
1128                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1129                         }
1130                         
1131                         //add right if you havent already
1132                         it = seen.find(tempIndexesRight[i]);
1133                         if (it == seen.end()) {  
1134                                 mergedResults.push_back(tempIndexesRight[i]);
1135                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1136                         }
1137                 }
1138                 
1139                 //numWanted = mergedResults.size();
1140                         
1141                 //cout << q->getName() << endl;         
1142                 
1143                 for (int i = 0; i < mergedResults.size(); i++) {
1144                         //cout << db[mergedResults[i]]->getName() << endl;      
1145                         if (db[mergedResults[i]]->getName() != q->getName()) { 
1146                                 Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1147                                 refResults.push_back(temp);
1148                         }
1149                 }
1150                 //cout << endl;         
1151                 delete queryRight;
1152                 delete queryLeft;
1153                 
1154                 return refResults;
1155         }
1156         catch(exception& e) {
1157                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1158                 exit(1);
1159         }
1160 }
1161 //***************************************************************************************************************
1162
1163