]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
added blastlocation to chimera.slayer and fixed issue with clearcut getting current...
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas) : Chimera()  {          
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37                 numNoParents = 0;
38                 blastlocation = blas;
39         
40                 doPrep();
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
44                 exit(1);
45         }
46 }
47 //***************************************************************************************************************
48 //template=self
49 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
50                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas) : Chimera()  {         
51         try {
52                 fastafile = file; templateSeqs = readSeqs(fastafile);
53                 templateFileName = temp; 
54                 searchMethod = mode;
55                 kmerSize = k;
56                 match = ms;
57                 misMatch = mms;
58                 window = win;
59                 divR = div;
60                 minSim = minsim;
61                 minCov = mincov;
62                 minBS = minbs;
63                 minSNP = minsnp;
64                 parents = par;
65                 iters = it;
66                 increment = inc;
67                 numWanted = numw;
68                 realign = r; 
69                 trimChimera = trim;
70                 priority = prior;
71                 numNoParents = 0;
72                 blastlocation = blas;
73                 
74                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
75                 
76                 if (searchMethod == "distance") { 
77                         createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
78                         
79                         //run filter on template copying templateSeqs into filteredTemplateSeqs
80                         for (int i = 0; i < templateSeqs.size(); i++) {  
81                                 if (m->control_pressed) {  break; }
82                                 
83                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
84                                 runFilter(newSeq);  
85                                 filteredTemplateSeqs.push_back(newSeq);
86                         }
87                 }
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
91                 exit(1);
92         }
93 }
94 //***************************************************************************************************************
95 int ChimeraSlayer::doPrep() {
96         try {
97                 if (searchMethod == "distance") { 
98                         //read in all query seqs
99                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
100                 
101                         vector<Sequence*> temp = templateSeqs;
102                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
103                 
104                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
105                 
106                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
107                 
108                         if (m->control_pressed) {  return 0; } 
109                 
110                         //run filter on template copying templateSeqs into filteredTemplateSeqs
111                         for (int i = 0; i < templateSeqs.size(); i++) {  
112                                 if (m->control_pressed) {  return 0; }
113                                 
114                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
115                                 runFilter(newSeq);  
116                                 filteredTemplateSeqs.push_back(newSeq);
117                         }
118                 }
119                 string  kmerDBNameLeft;
120                 string  kmerDBNameRight;
121         
122                 //generate the kmerdb to pass to maligner
123                 if (searchMethod == "kmer") { 
124                         string templatePath = m->hasPath(templateFileName);
125                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
126                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
127                                 
128                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
129                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
130                 #ifdef USE_MPI
131                         for (int i = 0; i < templateSeqs.size(); i++) {
132                                         
133                                 if (m->control_pressed) { return 0; } 
134                                         
135                                 string leftFrag = templateSeqs[i]->getUnaligned();
136                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
137                                         
138                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
139                                 databaseLeft->addSequence(leftTemp);    
140                         }
141                         databaseLeft->generateDB();
142                         databaseLeft->setNumSeqs(templateSeqs.size());
143                         
144                         for (int i = 0; i < templateSeqs.size(); i++) {
145                                 if (m->control_pressed) { return 0; } 
146                                         
147                                 string rightFrag = templateSeqs[i]->getUnaligned();
148                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
149                                         
150                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
151                                 databaseRight->addSequence(rightTemp);  
152                         }
153                         databaseRight->generateDB();
154                         databaseRight->setNumSeqs(templateSeqs.size());
155
156                 #else   
157                         //leftside
158                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
159                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
160                         bool needToGenerateLeft = true;
161                         
162                         if(kmerFileTestLeft){   
163                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
164                                 if (GoodFile) {  needToGenerateLeft = false;    }
165                         }
166                         
167                         if(needToGenerateLeft){ 
168                         
169                                 for (int i = 0; i < templateSeqs.size(); i++) {
170                                         
171                                         if (m->control_pressed) { return 0; } 
172                                         
173                                         string leftFrag = templateSeqs[i]->getUnaligned();
174                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
175                                         
176                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
177                                         databaseLeft->addSequence(leftTemp);    
178                                 }
179                                 databaseLeft->generateDB();
180                                 
181                         }else { 
182                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
183                         }
184                         kmerFileTestLeft.close();
185                         
186                         databaseLeft->setNumSeqs(templateSeqs.size());
187                         
188                         //rightside
189                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
190                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
191                         bool needToGenerateRight = true;
192                         
193                         if(kmerFileTestRight){  
194                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
195                                 if (GoodFile) {  needToGenerateRight = false;   }
196                         }
197                         
198                         if(needToGenerateRight){        
199                         
200                                 for (int i = 0; i < templateSeqs.size(); i++) {
201                                         if (m->control_pressed) { return 0; } 
202                                         
203                                         string rightFrag = templateSeqs[i]->getUnaligned();
204                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
205                                         
206                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
207                                         databaseRight->addSequence(rightTemp);  
208                                 }
209                                 databaseRight->generateDB();
210                                 
211                         }else { 
212                                 databaseRight->readKmerDB(kmerFileTestRight);   
213                         }
214                         kmerFileTestRight.close();
215                         
216                         databaseRight->setNumSeqs(templateSeqs.size());
217                 #endif  
218                 }else if (searchMethod == "blast") {
219                 
220                         //generate blastdb
221                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation);
222                         
223                         if (m->control_pressed) { return 0; }
224
225                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
226                         databaseLeft->generateDB();
227                         databaseLeft->setNumSeqs(templateSeqs.size());
228                 }
229                 
230                 return 0;
231
232         }
233         catch(exception& e) {
234                 m->errorOut(e, "ChimeraSlayer", "doprep");
235                 exit(1);
236         }
237 }
238 //***************************************************************************************************************
239 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
240         try {
241                 
242                 //when template=self, the query file is sorted from most abundance to least abundant
243                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
244                 vector<Sequence*> userTemplate;
245                 
246                 int myAbund = priority[q.getName()];
247                 
248                 for (int i = 0; i < templateSeqs.size(); i++) {
249                         
250                         if (m->control_pressed) { return userTemplate; } 
251                         
252                         //have I reached a sequence with the same abundance as myself?
253                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
254                         
255                         //if its am not chimeric add it
256                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
257                                 userTemplate.push_back(templateSeqs[i]); 
258                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
259                         }
260                 }
261                 
262                 //avoids nuisance error from formatdb for making blank blast database
263                 if (userTemplate.size() == 0) {
264                         return userTemplate;
265                 }
266                 
267                 string  kmerDBNameLeft;
268                 string  kmerDBNameRight;
269                 
270                 //generate the kmerdb to pass to maligner
271                 if (searchMethod == "kmer") { 
272                         string templatePath = m->hasPath(templateFileName);
273                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
274                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
275                         
276                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
277                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
278 #ifdef USE_MPI
279                         for (int i = 0; i < userTemplate.size(); i++) {
280                                 
281                                 if (m->control_pressed) { return userTemplate; } 
282                                 
283                                 string leftFrag = userTemplate[i]->getUnaligned();
284                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
285                                 
286                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
287                                 databaseLeft->addSequence(leftTemp);    
288                         }
289                         databaseLeft->generateDB();
290                         databaseLeft->setNumSeqs(userTemplate.size());
291                         
292                         for (int i = 0; i < userTemplate.size(); i++) {
293                                 if (m->control_pressed) { return userTemplate; } 
294                                 
295                                 string rightFrag = userTemplate[i]->getUnaligned();
296                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
297                                 
298                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
299                                 databaseRight->addSequence(rightTemp);  
300                         }
301                         databaseRight->generateDB();
302                         databaseRight->setNumSeqs(userTemplate.size());
303                         
304 #else   
305                         
306                         
307                         for (int i = 0; i < userTemplate.size(); i++) {
308                                 
309                                 if (m->control_pressed) { return userTemplate; } 
310                                 
311                                 string leftFrag = userTemplate[i]->getUnaligned();
312                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
313                                 
314                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
315                                 databaseLeft->addSequence(leftTemp);    
316                         }
317                         databaseLeft->generateDB();
318                         databaseLeft->setNumSeqs(userTemplate.size());
319                                 
320                         for (int i = 0; i < userTemplate.size(); i++) {
321                                 if (m->control_pressed) { return userTemplate; }  
322                                         
323                                 string rightFrag = userTemplate[i]->getUnaligned();
324                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
325                                         
326                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
327                                 databaseRight->addSequence(rightTemp);  
328                         }
329                         databaseRight->generateDB();
330                         databaseRight->setNumSeqs(userTemplate.size());
331 #endif  
332                 }else if (searchMethod == "blast") {
333                         
334                         //generate blastdb
335                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation);
336                         
337                         if (m->control_pressed) { return userTemplate; }
338
339                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
340                         databaseLeft->generateDB();
341                         databaseLeft->setNumSeqs(userTemplate.size());
342                 }
343                 
344                 return userTemplate;
345                 
346         }
347         catch(exception& e) {
348                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
349                 exit(1);
350         }
351 }
352
353 //***************************************************************************************************************
354 ChimeraSlayer::~ChimeraSlayer() {       
355         if (templateFileName != "self") {
356                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
357                 else if (searchMethod == "blast") {  delete databaseLeft; }
358         }
359 }
360 //***************************************************************************************************************
361 void ChimeraSlayer::printHeader(ostream& out) {
362         m->mothurOutEndLine();
363         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
364         m->mothurOutEndLine();
365         
366         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
367 }
368 //***************************************************************************************************************
369 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
370         try {
371                 Sequence trim;
372                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
373                 
374                 if (chimeraFlags == "yes") {
375                         string chimeraFlag = "no";
376                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
377                            ||
378                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
379                         
380                         
381                         if (chimeraFlag == "yes") {     
382                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
383                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
384                                         outAcc << querySeq.getName() << endl;
385                                         
386                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
387                                         
388                                         if (trimChimera) {  
389                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
390                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
391                                                 
392                                                 string newAligned = trim.getAligned();
393
394                                                 if (lengthLeft > lengthRight) { //trim right
395                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
396                                                 }else { //trim left
397                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
398                                                 }
399                                                 trim.setAligned(newAligned);
400                                         }
401                                 }
402                         }
403                         
404                         printBlock(chimeraResults[0], chimeraFlag, out);
405                         out << endl;
406                 }else {  
407                         out << querySeq.getName() << "\tno" << endl; 
408                 }
409                 
410                 return trim;
411                 
412         }
413         catch(exception& e) {
414                 m->errorOut(e, "ChimeraSlayer", "print");
415                 exit(1);
416         }
417 }
418 //***************************************************************************************************************
419 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
420         try {
421                 Sequence trim;
422                                 
423                 if (trimChimera) { 
424                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
425                         trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); 
426                 }
427                 
428                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
429                         
430                         string chimeraFlag = "no";
431                         if (leftPiece.flag == "yes") {
432                                 
433                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
434                                         ||
435                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
436                         }
437                         
438                         if (rightPiece.flag == "yes") {
439                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
440                                  ||
441                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
442                         }
443                         
444                         bool rightChimeric = false;
445                         bool leftChimeric = false;
446                         
447                         if (chimeraFlag == "yes") {     
448                                 //which peice is chimeric or are both
449                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
450                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
451                                 
452                                 if (rightChimeric || leftChimeric) {
453                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
454                                         outAcc << querySeq.getName() << endl;
455                                         
456                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
457                                         
458                                         if (trimChimera) {  
459                                                 string newAligned = trim.getAligned();
460                                                                                                 
461                                                 //right side is fine so keep that
462                                                 if ((leftChimeric) && (!rightChimeric)) {
463                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
464                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
465                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
466                                                 }else { //both sides are chimeric, keep longest piece
467                                                         
468                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
469                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
470                                                         
471                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
472                                                         int length = lengthLeftLeft;
473                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
474                                                         
475                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
476                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
477                                                         
478                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
479                                                         if (lengthRightRight > length) { longest = 4; }
480                                                         
481                                                         if (longest == 1) { //leftleft
482                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
483                                                         }else if (longest == 2) { //leftright
484                                                                 //get rid of leftleft
485                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
486                                                                 //get rid of right
487                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
488                                                         }else if (longest == 3) { //rightleft
489                                                                 //get rid of left
490                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
491                                                                 //get rid of rightright
492                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
493                                                         }else { //rightright
494                                                                 //get rid of left
495                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
496                                                                 //get rid of rightleft
497                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
498                                                         }
499                                                 }
500                                                         
501                                                 trim.setAligned(newAligned);
502                                         }
503                                         
504                                 }
505                         }
506                         
507                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
508                         out << endl;
509                 }else {  
510                         out << querySeq.getName() << "\tno" << endl;  
511                 }
512                 
513                 return trim;
514                 
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "ChimeraSlayer", "print");
518                 exit(1);
519         }
520 }
521
522 #ifdef USE_MPI
523 //***************************************************************************************************************
524 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece) {
525         try {
526                 MPI_Status status;
527                 bool results = false;
528                 string outAccString = "";
529                 string outputString = "";
530                 
531                 Sequence trim;
532                 
533                 if (trimChimera) { 
534                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
535                         trim.setName(leftPiece.trimQuery.getName());  trim.setAligned(aligned);
536                 }
537                 
538                 
539                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
540                         
541                         string chimeraFlag = "no";
542                         if (leftPiece.flag == "yes") {
543                                 
544                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
545                                    ||
546                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
547                         }
548                         
549                         if (rightPiece.flag == "yes") {
550                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
551                                         ||
552                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
553                         }
554                         
555                         bool rightChimeric = false;
556                         bool leftChimeric = false;
557
558                         cout << endl;
559                         
560                         if (chimeraFlag == "yes") {     
561                                 //which peice is chimeric or are both
562                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
563                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
564                                 
565                                 if (rightChimeric || leftChimeric) {
566                                         cout << querySeq.getName() <<  "\tyes" << endl;
567                                         outAccString += querySeq.getName() + "\n";
568                                         results = true;
569                                         
570                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
571                                         
572                                         //write to accnos file
573                                         int length = outAccString.length();
574                                         char* buf2 = new char[length];
575                                         memcpy(buf2, outAccString.c_str(), length);
576                                 
577                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
578                                         delete buf2;
579                                         
580                                         if (trimChimera) {  
581                                                 string newAligned = trim.getAligned();
582                                                 
583                                                 //right side is fine so keep that
584                                                 if ((leftChimeric) && (!rightChimeric)) {
585                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
586                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
587                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
588                                                 }else { //both sides are chimeric, keep longest piece
589                                                         
590                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
591                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
592                                                         
593                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
594                                                         int length = lengthLeftLeft;
595                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
596                                                         
597                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
598                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
599                                                         
600                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
601                                                         if (lengthRightRight > length) { longest = 4; }
602                                                         
603                                                         if (longest == 1) { //leftleft
604                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
605                                                         }else if (longest == 2) { //leftright
606                                                                 //get rid of leftleft
607                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
608                                                                 //get rid of right
609                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
610                                                         }else if (longest == 3) { //rightleft
611                                                                 //get rid of left
612                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
613                                                                 //get rid of rightright
614                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
615                                                         }else { //rightright
616                                                                 //get rid of left
617                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
618                                                                 //get rid of rightleft
619                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
620                                                         }
621                                                 }
622                                                 
623                                                 trim.setAligned(newAligned);
624                                         }
625                                         
626                                 }
627                         }
628                         
629                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
630                         outputString += "\n";
631                 
632                         //write to output file
633                         int length = outputString.length();
634                         char* buf = new char[length];
635                         memcpy(buf, outputString.c_str(), length);
636                                 
637                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
638                         delete buf;
639
640                 }else {  
641                         outputString += querySeq.getName() + "\tno\n";  
642         
643                         //write to output file
644                         int length = outputString.length();
645                         char* buf = new char[length];
646                         memcpy(buf, outputString.c_str(), length);
647                                 
648                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
649                         delete buf;
650                 }
651                 
652                 
653                 return trim;
654         }
655         catch(exception& e) {
656                 m->errorOut(e, "ChimeraSlayer", "print");
657                 exit(1);
658         }
659 }
660 //***************************************************************************************************************
661 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
662         try {
663                 MPI_Status status;
664                 bool results = false;
665                 string outAccString = "";
666                 string outputString = "";
667                 
668                 Sequence trim;
669                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
670                 
671                 if (chimeraFlags == "yes") {
672                         string chimeraFlag = "no";
673                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
674                            ||
675                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
676                         
677                         
678                         if (chimeraFlag == "yes") {     
679                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
680                                         cout << querySeq.getName() <<  "\tyes" << endl;
681                                         outAccString += querySeq.getName() + "\n";
682                                         results = true;
683                                         
684                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
685                                         
686                                         //write to accnos file
687                                         int length = outAccString.length();
688                                         char* buf2 = new char[length];
689                                         memcpy(buf2, outAccString.c_str(), length);
690                                         
691                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
692                                         delete buf2;
693                                         
694                                         if (trimChimera) {  
695                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
696                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
697                                                 
698                                                 string newAligned = trim.getAligned();
699                                                 if (lengthLeft > lengthRight) { //trim right
700                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
701                                                 }else { //trim left
702                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
703                                                 }
704                                                 trim.setAligned(newAligned);    
705                                         }
706                                 }
707                         }
708                         
709                         outputString = getBlock(chimeraResults[0], chimeraFlag);
710                         outputString += "\n";
711                         
712                         //write to output file
713                         int length = outputString.length();
714                         char* buf = new char[length];
715                         memcpy(buf, outputString.c_str(), length);
716                         
717                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
718                         delete buf;
719                         
720                 }else {  
721                         outputString += querySeq.getName() + "\tno\n";  
722                         
723                         //write to output file
724                         int length = outputString.length();
725                         char* buf = new char[length];
726                         memcpy(buf, outputString.c_str(), length);
727                         
728                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
729                         delete buf;
730                 }
731                 
732                 return trim;
733         }
734         catch(exception& e) {
735                 m->errorOut(e, "ChimeraSlayer", "print");
736                 exit(1);
737         }
738 }
739 #endif
740
741 //***************************************************************************************************************
742 int ChimeraSlayer::getChimeras(Sequence* query) {
743         try {
744                 
745                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
746                 printResults.trimQuery = trimQuery; 
747                 
748                 chimeraFlags = "no";
749                 printResults.flag = "no";
750                 
751                 querySeq = *query;
752                 
753                 //you must create a template
754                 vector<Sequence*> thisTemplate;
755                 vector<Sequence*> thisFilteredTemplate;
756                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
757                 else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
758                 
759                 if (m->control_pressed) {  return 0;  }
760                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
761                 
762                 //moved this out of maligner - 4/29/11
763                 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
764                 
765                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
766                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
767                 
768                 if (templateFileName == "self") {
769                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
770                         else if (searchMethod == "blast") {  delete databaseLeft; }
771                 }
772         
773                 if (m->control_pressed) {  return 0;  }
774
775                 string chimeraFlag = maligner.getResults(*query, decalc);
776
777                 if (m->control_pressed) {  return 0;  }
778                 
779                 vector<results> Results = maligner.getOutput();
780                 
781                 //for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];        }
782                 
783                 if (chimeraFlag == "yes") {
784                         
785                         if (realign) {
786                                 vector<string> parents;
787                                 for (int i = 0; i < Results.size(); i++) {
788                                         parents.push_back(Results[i].parentAligned);
789                                 }
790                                 
791                                 ChimeraReAligner realigner;             
792                                 realigner.reAlign(query, parents);
793
794                         }
795                         
796 //                      cout << query->getAligned() << endl;
797                         //get sequence that were given from maligner results
798                         vector<SeqCompare> seqs;
799                         map<string, float> removeDups;
800                         map<string, float>::iterator itDup;
801                         map<string, string> parentNameSeq;
802                         map<string, string>::iterator itSeq;
803                         for (int j = 0; j < Results.size(); j++) {
804
805                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
806                                 //only add if you are not a duplicate
807 //                              cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
808                                 
809                                 
810                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
811                                 
812                                         itDup = removeDups.find(Results[j].parent);
813                                         if (itDup == removeDups.end()) { //this is not duplicate
814                                                 removeDups[Results[j].parent] = dist;
815                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
816                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
817                                                 removeDups[Results[j].parent] = dist;
818                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
819                                         }
820                                 
821                                 }
822                                 
823                         }
824                         
825                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
826                                 itSeq = parentNameSeq.find(itDup->first);
827                                 Sequence seq(itDup->first, itSeq->second);
828                                 
829                                 SeqCompare member;
830                                 member.seq = seq;
831                                 member.dist = itDup->second;
832                                 seqs.push_back(member);
833                         }
834                         
835                         //limit number of parents to explore - default 3
836                         if (Results.size() > parents) {
837                                 //sort by distance
838                                 sort(seqs.begin(), seqs.end(), compareSeqCompare);
839                                 //prioritize larger more similiar sequence fragments
840                                 reverse(seqs.begin(), seqs.end());
841                                 
842                                 //for (int k = seqs.size()-1; k > (parents-1); k--)  {  
843                                 //      delete seqs[k].seq;
844                                         //seqs.pop_back();      
845                                 //}
846                         }
847                 
848                         //put seqs into vector to send to slayer
849                         
850 //                      cout << query->getAligned() << endl;
851                         vector<Sequence> seqsForSlayer;
852                         for (int k = 0; k < seqs.size(); k++) {  
853 //                              cout << seqs[k].seq->getAligned() << endl;
854                                 seqsForSlayer.push_back(seqs[k].seq);   
855 //                              cout << seqs[k].seq->getName() << endl;
856                         }
857                         
858                         if (m->control_pressed) {  return 0;  }
859
860                         //send to slayer
861                         chimeraFlags = slayer.getResults(*query, seqsForSlayer);
862                         if (m->control_pressed) {  return 0;  }
863                         chimeraResults = slayer.getOutput();
864                         
865                         printResults.flag = chimeraFlags;
866                         printResults.results = chimeraResults;
867                         
868                         //free memory
869                         //for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
870                 }
871                 //cout << endl << endl;
872                 return 0;
873         }
874         catch(exception& e) {
875                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
876                 exit(1);
877         }
878 }
879 //***************************************************************************************************************
880 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
881         try {
882                 out << querySeq.getName() << '\t';
883                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
884         
885                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
886                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
887                 
888                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
889                 
890         }
891         catch(exception& e) {
892                 m->errorOut(e, "ChimeraSlayer", "printBlock");
893                 exit(1);
894         }
895 }
896 //***************************************************************************************************************
897 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
898         try {
899                 
900                 if ((leftChimeric) && (!rightChimeric)) { //print left
901                         out << querySeq.getName() << '\t';
902                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
903                         
904                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
905                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
906                 
907                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
908                 
909                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
910                         out << querySeq.getName() << '\t';
911                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
912                         
913                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
914                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
915                         
916                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
917                         
918                 }else  { //print both results
919                         if (leftdata.flag == "yes") {
920                                 out << querySeq.getName() + "_LEFT" << '\t';
921                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
922                                 
923                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
924                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
925                                 
926                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
927                         }
928                         
929                         if (rightdata.flag == "yes") {
930                                 if (leftdata.flag == "yes") { out << endl; }
931                                 
932                                 out << querySeq.getName() + "_RIGHT"<< '\t';
933                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
934                                 
935                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
936                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
937                                 
938                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
939                 
940                         }
941                 }
942         }
943         catch(exception& e) {
944                 m->errorOut(e, "ChimeraSlayer", "printBlock");
945                 exit(1);
946         }
947 }
948 //***************************************************************************************************************
949 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
950         try {
951                 
952                 string out = "";
953                 
954                 if ((leftChimeric) && (!rightChimeric)) { //get left
955                         out += querySeq.getName() + "\t";
956                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
957                         
958                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
959                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
960                         
961                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
962                         
963                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
964                         out += querySeq.getName() + "\t";
965                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
966                         
967                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
968                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
969                         
970                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
971                         
972                 }else  { //print both results
973                         
974                         if (leftdata.flag == "yes") {
975                                 out += querySeq.getName() + "_LEFT\t";
976                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
977                                 
978                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
979                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
980                                 
981                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
982                         }
983                         
984                         if (rightdata.flag == "yes") {
985                                 if (leftdata.flag == "yes") { out += "\n"; }
986                                 out +=  querySeq.getName() + "_RIGHT\t";
987                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
988                                 
989                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
990                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
991                                 
992                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
993                         }
994                 }
995                 
996                 return out;
997                 
998         }
999         catch(exception& e) {
1000                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1001                 exit(1);
1002         }
1003 }
1004 //***************************************************************************************************************
1005 string ChimeraSlayer::getBlock(data_struct data, string flag){
1006         try {
1007                 
1008                 string outputString = "";
1009                 
1010                 outputString += querySeq.getName() + "\t";
1011                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1012                         
1013                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1014                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1015                 
1016                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1017                 
1018                 return outputString;
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1022                 exit(1);
1023         }
1024 }
1025 //***************************************************************************************************************
1026 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1027         try {
1028                 
1029                 vector<Sequence> refSeqs;
1030                 
1031                 if (searchMethod == "distance") {
1032                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1033                         Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1034                         runFilter(newSeq);
1035                         refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1036                         delete newSeq;
1037                 }else if (searchMethod == "blast")  {
1038                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1039                 }else if (searchMethod == "kmer") {
1040                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1041                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1042                 
1043                 return refSeqs;
1044         }
1045         catch(exception& e) {
1046                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1047                 exit(1);
1048         }
1049 }
1050 //***************************************************************************************************************/
1051 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1052         try {   
1053                 
1054                 vector<Sequence> refResults;
1055                 
1056                 //get parts of query
1057                 string queryUnAligned = q.getUnaligned();
1058                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1059                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1060 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1061                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1062                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1063                 
1064                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1065                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1066                                 
1067                 
1068                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1069 //              vector<int> smaller;
1070 //              vector<int> larger;
1071 //              
1072 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1073 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1074                 
1075                 //merge results         
1076                 map<int, int> seen;
1077                 map<int, int>::iterator it;
1078                 vector<int> mergedResults;
1079                 
1080                 int index = 0;
1081 //              for (int i = 0; i < smaller.size(); i++) {
1082                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1083                         
1084                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1085         
1086                         //add left if you havent already
1087                         it = seen.find(tempIndexesLeft[index]);
1088                         if (it == seen.end()) {  
1089                                 mergedResults.push_back(tempIndexesLeft[index]);
1090                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1091                         }
1092                         
1093                         //add right if you havent already
1094                         it = seen.find(tempIndexesRight[index]);
1095                         if (it == seen.end()) {  
1096                                 mergedResults.push_back(tempIndexesRight[index]);
1097                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1098                         }
1099                         index++;
1100                 }
1101
1102                 
1103                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1104                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1105                         
1106                         //add right if you havent already
1107                         it = seen.find(tempIndexesLeft[i]);
1108                         if (it == seen.end()) {  
1109                                 mergedResults.push_back(tempIndexesLeft[i]);
1110                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1111                         }
1112                 }
1113
1114                 for (int i = index; i < tempIndexesRight.size(); i++) {
1115                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1116                         
1117                         //add right if you havent already
1118                         it = seen.find(tempIndexesRight[i]);
1119                         if (it == seen.end()) {  
1120                                 mergedResults.push_back(tempIndexesRight[i]);
1121                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1122                         }
1123                 }
1124                 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
1125                 //cout << qname << endl;        
1126                 
1127                 if (mergedResults.size() == 0) { numNoParents++; }
1128                 
1129                 for (int i = 0; i < mergedResults.size(); i++) {
1130                         //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1131                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1132                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1133                                 refResults.push_back(temp);
1134                         }
1135                 }
1136                 //cout << endl << endl;
1137
1138                 delete queryRight;
1139                 delete queryLeft;
1140                 
1141                 return refResults;
1142         }
1143         catch(exception& e) {
1144                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1145                 exit(1);
1146         }
1147 }
1148 //***************************************************************************************************************
1149 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1150         try {   
1151                 vector<Sequence> refResults;
1152                 
1153                 //get parts of query
1154                 string queryUnAligned = q.getUnaligned();
1155                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1156                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1157                 
1158                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1159                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1160                 
1161                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1162                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1163                 
1164                 //merge results         
1165                 map<int, int> seen;
1166                 map<int, int>::iterator it;
1167                 vector<int> mergedResults;
1168                 
1169                 int index = 0;
1170                 //              for (int i = 0; i < smaller.size(); i++) {
1171                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1172                         
1173                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1174                         
1175                         //add left if you havent already
1176                         it = seen.find(tempIndexesLeft[index]);
1177                         if (it == seen.end()) {  
1178                                 mergedResults.push_back(tempIndexesLeft[index]);
1179                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1180                         }
1181                         
1182                         //add right if you havent already
1183                         it = seen.find(tempIndexesRight[index]);
1184                         if (it == seen.end()) {  
1185                                 mergedResults.push_back(tempIndexesRight[index]);
1186                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1187                         }
1188                         index++;
1189                 }
1190                 
1191                 
1192                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1193                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1194                         
1195                         //add right if you havent already
1196                         it = seen.find(tempIndexesLeft[i]);
1197                         if (it == seen.end()) {  
1198                                 mergedResults.push_back(tempIndexesLeft[i]);
1199                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1200                         }
1201                 }
1202                 
1203                 for (int i = index; i < tempIndexesRight.size(); i++) {
1204                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1205                         
1206                         //add right if you havent already
1207                         it = seen.find(tempIndexesRight[i]);
1208                         if (it == seen.end()) {  
1209                                 mergedResults.push_back(tempIndexesRight[i]);
1210                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1211                         }
1212                 }
1213                 
1214                 for (int i = 0; i < mergedResults.size(); i++) {
1215                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1216                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1217                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1218                                 refResults.push_back(temp);
1219                                 
1220                         }
1221                 }
1222
1223                 //cout << endl;         
1224                 delete queryRight;
1225                 delete queryLeft;
1226                 
1227                 return refResults;
1228         }
1229         catch(exception& e) {
1230                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1231                 exit(1);
1232         }
1233 }
1234 //***************************************************************************************************************
1235
1236