]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayer.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / chimeraslayer.cpp
1 /*
2  *  chimeraslayer.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/25/09.
6  *  Copyright 2009 Pschloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraslayer.h"
11 #include "chimerarealigner.h"
12 #include "kmerdb.hpp"
13 #include "blastdb.hpp"
14
15 //***************************************************************************************************************
16 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, 
17 int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera()  {         
18         try {
19                 fastafile = file;
20                 templateFileName = temp; templateSeqs = readSeqs(temp);
21                 searchMethod = mode;
22                 kmerSize = k;
23                 match = ms;
24                 misMatch = mms;
25                 window = win;
26                 divR = div;
27                 minSim = minsim;
28                 minCov = mincov;
29                 minBS = minbs;
30                 minSNP = minsnp;
31                 parents = par;
32                 iters = it;
33                 increment = inc;
34                 numWanted = numw;
35                 realign = r; 
36                 trimChimera = trim;
37                 numNoParents = 0;
38                 blastlocation = blas;
39                 threadID = tid;
40         
41                 doPrep();
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
45                 exit(1);
46         }
47 }
48 //***************************************************************************************************************
49 //template=self, byGroup parameter used for mpienabled version to read the template as MPI_COMM_SELF instead of MPI_COMM_WORLD
50 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
51                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid, bool bg) : Chimera()  {       
52         try {
53                 byGroup = bg;
54                 fastafile = file; templateSeqs = readSeqs(fastafile);
55                 templateFileName = temp; 
56                 searchMethod = mode;
57                 kmerSize = k;
58                 match = ms;
59                 misMatch = mms;
60                 window = win;
61                 divR = div;
62                 minSim = minsim;
63                 minCov = mincov;
64                 minBS = minbs;
65                 minSNP = minsnp;
66                 parents = par;
67                 iters = it;
68                 increment = inc;
69                 numWanted = numw;
70                 realign = r; 
71                 trimChimera = trim;
72                 priority = prior;
73                 numNoParents = 0;
74                 blastlocation = blas;
75                 threadID = tid;
76                 
77                 
78                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
79                 
80                 if (searchMethod == "distance") { 
81                         //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
82                         
83                         //run filter on template copying templateSeqs into filteredTemplateSeqs
84                         for (int i = 0; i < templateSeqs.size(); i++) {  
85                                 if (m->control_pressed) {  break; }
86                                 
87                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
88                                 runFilter(newSeq);  
89                                 filteredTemplateSeqs.push_back(newSeq);
90                         }
91                 }
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
95                 exit(1);
96         }
97 }
98 //***************************************************************************************************************
99 //template=self
100 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
101                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera()  {        
102         try {
103                 fastafile = file; templateSeqs = readSeqs(fastafile);
104                 templateFileName = temp; 
105                 searchMethod = mode;
106                 kmerSize = k;
107                 match = ms;
108                 misMatch = mms;
109                 window = win;
110                 divR = div;
111                 minSim = minsim;
112                 minCov = mincov;
113                 minBS = minbs;
114                 minSNP = minsnp;
115                 parents = par;
116                 iters = it;
117                 increment = inc;
118                 numWanted = numw;
119                 realign = r; 
120                 trimChimera = trim;
121                 priority = prior;
122                 numNoParents = 0;
123                 blastlocation = blas;
124                 threadID = tid;
125                 
126                 
127                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
128                 
129                 if (searchMethod == "distance") { 
130                         //createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
131                         
132                         //run filter on template copying templateSeqs into filteredTemplateSeqs
133                         for (int i = 0; i < templateSeqs.size(); i++) {  
134                                 if (m->control_pressed) {  break; }
135                                 
136                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
137                                 runFilter(newSeq);  
138                                 filteredTemplateSeqs.push_back(newSeq);
139                         }
140                 }
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
144                 exit(1);
145         }
146 }
147 //***************************************************************************************************************
148 int ChimeraSlayer::doPrep() {
149         try {
150                 if (searchMethod == "distance") { 
151                         //read in all query seqs
152                         vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
153                 
154                         vector<Sequence*> temp = templateSeqs;
155                         for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
156                 
157                         createFilter(temp, 0.0); //just removed columns where all seqs have a gap
158                 
159                         for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
160                 
161                         if (m->control_pressed) {  return 0; } 
162                 
163                         //run filter on template copying templateSeqs into filteredTemplateSeqs
164                         for (int i = 0; i < templateSeqs.size(); i++) {  
165                                 if (m->control_pressed) {  return 0; }
166                                 
167                                 Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
168                                 runFilter(newSeq);  
169                                 filteredTemplateSeqs.push_back(newSeq);
170                         }
171                 }
172                 string  kmerDBNameLeft;
173                 string  kmerDBNameRight;
174         
175                 //generate the kmerdb to pass to maligner
176                 if (searchMethod == "kmer") { 
177                         string templatePath = m->hasPath(templateFileName);
178                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
179                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
180                                 
181                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
182                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
183                 #ifdef USE_MPI
184                         for (int i = 0; i < templateSeqs.size(); i++) {
185                                         
186                                 if (m->control_pressed) { return 0; } 
187                                         
188                                 string leftFrag = templateSeqs[i]->getUnaligned();
189                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
190                                         
191                                 Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
192                                 databaseLeft->addSequence(leftTemp);    
193                         }
194                         databaseLeft->generateDB();
195                         databaseLeft->setNumSeqs(templateSeqs.size());
196                         
197                         for (int i = 0; i < templateSeqs.size(); i++) {
198                                 if (m->control_pressed) { return 0; } 
199                                         
200                                 string rightFrag = templateSeqs[i]->getUnaligned();
201                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
202                                         
203                                 Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
204                                 databaseRight->addSequence(rightTemp);  
205                         }
206                         databaseRight->generateDB();
207                         databaseRight->setNumSeqs(templateSeqs.size());
208
209                 #else   
210                         //leftside
211                         kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
212                         ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
213                         bool needToGenerateLeft = true;
214                         
215                         if(kmerFileTestLeft){   
216                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
217                                 if (GoodFile) {  needToGenerateLeft = false;    }
218                         }
219                         
220                         if(needToGenerateLeft){ 
221                         
222                                 for (int i = 0; i < templateSeqs.size(); i++) {
223                                         
224                                         if (m->control_pressed) { return 0; } 
225                                         
226                                         string leftFrag = templateSeqs[i]->getUnaligned();
227                                         leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
228                                         
229                                         Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
230                                         databaseLeft->addSequence(leftTemp);    
231                                 }
232                                 databaseLeft->generateDB();
233                                 
234                         }else { 
235                                 databaseLeft->readKmerDB(kmerFileTestLeft);     
236                         }
237                         kmerFileTestLeft.close();
238                         
239                         databaseLeft->setNumSeqs(templateSeqs.size());
240                         
241                         //rightside
242                         kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
243                         ifstream kmerFileTestRight(kmerDBNameRight.c_str());
244                         bool needToGenerateRight = true;
245                         
246                         if(kmerFileTestRight){  
247                                 bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
248                                 if (GoodFile) {  needToGenerateRight = false;   }
249                         }
250                         
251                         if(needToGenerateRight){        
252                         
253                                 for (int i = 0; i < templateSeqs.size(); i++) {
254                                         if (m->control_pressed) { return 0; } 
255                                         
256                                         string rightFrag = templateSeqs[i]->getUnaligned();
257                                         rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
258                                         
259                                         Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
260                                         databaseRight->addSequence(rightTemp);  
261                                 }
262                                 databaseRight->generateDB();
263                                 
264                         }else { 
265                                 databaseRight->readKmerDB(kmerFileTestRight);   
266                         }
267                         kmerFileTestRight.close();
268                         
269                         databaseRight->setNumSeqs(templateSeqs.size());
270                 #endif  
271                 }else if (searchMethod == "blast") {
272                 
273                         //generate blastdb
274                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID);
275                         
276                         if (m->control_pressed) { return 0; }
277
278                         for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
279                         databaseLeft->generateDB();
280                         databaseLeft->setNumSeqs(templateSeqs.size());
281                 }
282                 
283                 return 0;
284
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "ChimeraSlayer", "doprep");
288                 exit(1);
289         }
290 }
291 //***************************************************************************************************************
292 vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& userTemplateFiltered) {
293         try {
294                 
295                 //when template=self, the query file is sorted from most abundance to least abundant
296                 //userTemplate grows as the query file is processed by adding sequences that are not chimeric and more abundant
297                 vector<Sequence*> userTemplate;
298                 
299                 int myAbund = priority[q.getName()];
300                 
301                 for (int i = 0; i < templateSeqs.size(); i++) {
302                         
303                         if (m->control_pressed) { return userTemplate; } 
304                         
305                         //have I reached a sequence with the same abundance as myself?
306                         if (!(priority[templateSeqs[i]->getName()] > myAbund)) { break; }
307                         
308                         //if its am not chimeric add it
309                         if (chimericSeqs.count(templateSeqs[i]->getName()) == 0) { 
310                                 userTemplate.push_back(templateSeqs[i]); 
311                                 if (searchMethod == "distance") { userTemplateFiltered.push_back(filteredTemplateSeqs[i]); }
312                         }
313                 }
314                 
315                 //avoids nuisance error from formatdb for making blank blast database
316                 if (userTemplate.size() == 0) {
317                         return userTemplate;
318                 }
319                 
320                 string  kmerDBNameLeft;
321                 string  kmerDBNameRight;
322                 
323                 //generate the kmerdb to pass to maligner
324                 if (searchMethod == "kmer") { 
325                         string templatePath = m->hasPath(templateFileName);
326                         string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
327                         databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
328                         
329                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
330                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
331 #ifdef USE_MPI
332                         for (int i = 0; i < userTemplate.size(); i++) {
333                                 
334                                 if (m->control_pressed) { return userTemplate; } 
335                                 
336                                 string leftFrag = userTemplate[i]->getUnaligned();
337                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
338                                 
339                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
340                                 databaseLeft->addSequence(leftTemp);    
341                         }
342                         databaseLeft->generateDB();
343                         databaseLeft->setNumSeqs(userTemplate.size());
344                         
345                         for (int i = 0; i < userTemplate.size(); i++) {
346                                 if (m->control_pressed) { return userTemplate; } 
347                                 
348                                 string rightFrag = userTemplate[i]->getUnaligned();
349                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
350                                 
351                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
352                                 databaseRight->addSequence(rightTemp);  
353                         }
354                         databaseRight->generateDB();
355                         databaseRight->setNumSeqs(userTemplate.size());
356                         
357 #else   
358                         
359                         
360                         for (int i = 0; i < userTemplate.size(); i++) {
361                                 
362                                 if (m->control_pressed) { return userTemplate; } 
363                                 
364                                 string leftFrag = userTemplate[i]->getUnaligned();
365                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
366                                 
367                                 Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
368                                 databaseLeft->addSequence(leftTemp);    
369                         }
370                         databaseLeft->generateDB();
371                         databaseLeft->setNumSeqs(userTemplate.size());
372                                 
373                         for (int i = 0; i < userTemplate.size(); i++) {
374                                 if (m->control_pressed) { return userTemplate; }  
375                                         
376                                 string rightFrag = userTemplate[i]->getUnaligned();
377                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
378                                         
379                                 Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
380                                 databaseRight->addSequence(rightTemp);  
381                         }
382                         databaseRight->generateDB();
383                         databaseRight->setNumSeqs(userTemplate.size());
384 #endif  
385                 }else if (searchMethod == "blast") {
386                         
387                         //generate blastdb
388                         databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID);
389                         
390                         if (m->control_pressed) { return userTemplate; }
391
392                         for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
393                         databaseLeft->generateDB();
394                         databaseLeft->setNumSeqs(userTemplate.size());
395                 }
396                 
397                 return userTemplate;
398                 
399         }
400         catch(exception& e) {
401                 m->errorOut(e, "ChimeraSlayer", "getTemplate");
402                 exit(1);
403         }
404 }
405
406 //***************************************************************************************************************
407 ChimeraSlayer::~ChimeraSlayer() {       
408         if (templateFileName != "self") {
409                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
410                 else if (searchMethod == "blast") {  delete databaseLeft; }
411         }
412 }
413 //***************************************************************************************************************
414 void ChimeraSlayer::printHeader(ostream& out) {
415         m->mothurOutEndLine();
416         m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
417         m->mothurOutEndLine();
418         
419         out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
420 }
421 //***************************************************************************************************************
422 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc) {
423         try {
424                 Sequence trim;
425                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
426                 
427                 if (chimeraFlags == "yes") {
428                         string chimeraFlag = "no";
429                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
430                            ||
431                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
432                         
433                         
434                         if (chimeraFlag == "yes") {     
435                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
436                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
437                                         outAcc << querySeq.getName() << endl;
438                                         
439                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
440                                         
441                                         if (trimChimera) {  
442                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
443                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
444                                                 
445                                                 string newAligned = trim.getAligned();
446
447                                                 if (lengthLeft > lengthRight) { //trim right
448                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
449                                                 }else { //trim left
450                                                         for (int i = 0; i < chimeraResults[0].winLEnd; i++) { newAligned[i] = '.'; }
451                                                 }
452                                                 trim.setAligned(newAligned);
453                                         }
454                                 }
455                         }
456                         
457                         printBlock(chimeraResults[0], chimeraFlag, out);
458                         out << endl;
459                 }else {  
460                         out << querySeq.getName() << "\tno" << endl; 
461                 }
462                 
463                 return trim;
464                 
465         }
466         catch(exception& e) {
467                 m->errorOut(e, "ChimeraSlayer", "print");
468                 exit(1);
469         }
470 }
471 //***************************************************************************************************************
472 Sequence ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftPiece, data_results rightPiece) {
473         try {
474                 Sequence trim;
475                                 
476                 if (trimChimera) { 
477                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
478                         trim.setName(leftPiece.trimQuery.getName()); trim.setAligned(aligned); 
479                 }
480                 
481                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
482                         
483                         string chimeraFlag = "no";
484                         if (leftPiece.flag == "yes") {
485                                 
486                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
487                                         ||
488                                         (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
489                         }
490                         
491                         if (rightPiece.flag == "yes") {
492                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
493                                  ||
494                                  (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
495                         }
496                         
497                         bool rightChimeric = false;
498                         bool leftChimeric = false;
499                         
500                         if (chimeraFlag == "yes") {     
501                                 //which peice is chimeric or are both
502                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
503                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
504                                 
505                                 if (rightChimeric || leftChimeric) {
506                                         m->mothurOut(querySeq.getName() + "\tyes"); m->mothurOutEndLine();
507                                         outAcc << querySeq.getName() << endl;
508                                         
509                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
510                                         
511                                         if (trimChimera) {  
512                                                 string newAligned = trim.getAligned();
513                                                                                                 
514                                                 //right side is fine so keep that
515                                                 if ((leftChimeric) && (!rightChimeric)) {
516                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
517                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
518                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
519                                                 }else { //both sides are chimeric, keep longest piece
520                                                         
521                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
522                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
523                                                         
524                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
525                                                         int length = lengthLeftLeft;
526                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
527                                                         
528                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
529                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
530                                                         
531                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
532                                                         if (lengthRightRight > length) { longest = 4; }
533                                                         
534                                                         if (longest == 1) { //leftleft
535                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
536                                                         }else if (longest == 2) { //leftright
537                                                                 //get rid of leftleft
538                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
539                                                                 //get rid of right
540                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
541                                                         }else if (longest == 3) { //rightleft
542                                                                 //get rid of left
543                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
544                                                                 //get rid of rightright
545                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
546                                                         }else { //rightright
547                                                                 //get rid of left
548                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
549                                                                 //get rid of rightleft
550                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
551                                                         }
552                                                 }
553                                                         
554                                                 trim.setAligned(newAligned);
555                                         }
556                                         
557                                 }
558                         }
559                         
560                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
561                         out << endl;
562                 }else {  
563                         out << querySeq.getName() << "\tno" << endl;  
564                 }
565                 
566                 return trim;
567                 
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "ChimeraSlayer", "print");
571                 exit(1);
572         }
573 }
574
575 #ifdef USE_MPI
576 //***************************************************************************************************************
577 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results leftPiece, data_results rightPiece, bool& chimFlag) {
578         try {
579                 MPI_Status status;
580                 bool results = false;
581                 string outAccString = "";
582                 string outputString = "";
583         chimFlag = false;
584                 
585                 Sequence trim;
586                 
587                 if (trimChimera) { 
588                         string aligned = leftPiece.trimQuery.getAligned() + rightPiece.trimQuery.getAligned();
589                         trim.setName(leftPiece.trimQuery.getName());  trim.setAligned(aligned);
590                 }
591                 
592                 
593                 if ((leftPiece.flag == "yes") || (rightPiece.flag == "yes")) {
594                         
595                         string chimeraFlag = "no";
596                         if (leftPiece.flag == "yes") {
597                                 
598                                 if(  (leftPiece.results[0].bsa >= minBS && leftPiece.results[0].divr_qla_qrb >= divR)
599                                    ||
600                                    (leftPiece.results[0].bsb >= minBS && leftPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
601                         }
602                         
603                         if (rightPiece.flag == "yes") {
604                                 if ( (rightPiece.results[0].bsa >= minBS && rightPiece.results[0].divr_qla_qrb >= divR)
605                                         ||
606                                         (rightPiece.results[0].bsb >= minBS && rightPiece.results[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
607                         }
608                         
609                         bool rightChimeric = false;
610                         bool leftChimeric = false;
611
612                         cout << endl;
613                         
614                         if (chimeraFlag == "yes") {     
615                                 //which peice is chimeric or are both
616                                 if (rightPiece.flag == "yes") { if ((rightPiece.results[0].bsa >= minBS) || (rightPiece.results[0].bsb >= minBS)) { rightChimeric = true; } }
617                                 if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
618                                 
619                                 if (rightChimeric || leftChimeric) {
620                                         cout << querySeq.getName() <<  "\tyes" << endl;
621                                         outAccString += querySeq.getName() + "\n";
622                                         results = true;
623                                         
624                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
625                                         
626                                         //write to accnos file
627                                         int length = outAccString.length();
628                                         char* buf2 = new char[length];
629                                         memcpy(buf2, outAccString.c_str(), length);
630                                 
631                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
632                     chimFlag = true;
633                                         delete buf2;
634                                         
635                                         if (trimChimera) {  
636                                                 string newAligned = trim.getAligned();
637                                                 
638                                                 //right side is fine so keep that
639                                                 if ((leftChimeric) && (!rightChimeric)) {
640                                                         for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
641                                                 }else if ((!leftChimeric) && (rightChimeric)) { //leftside is fine so keep that
642                                                         for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
643                                                 }else { //both sides are chimeric, keep longest piece
644                                                         
645                                                         int lengthLeftLeft = leftPiece.results[0].winLEnd - leftPiece.results[0].winLStart;
646                                                         int lengthLeftRight = leftPiece.results[0].winREnd - leftPiece.results[0].winRStart;
647                                                         
648                                                         int longest = 1; // leftleft = 1, leftright = 2, rightleft = 3 rightright = 4
649                                                         int length = lengthLeftLeft;
650                                                         if (lengthLeftLeft < lengthLeftRight) { longest = 2;  length = lengthLeftRight; }
651                                                         
652                                                         int lengthRightLeft = rightPiece.results[0].winLEnd - rightPiece.results[0].winLStart;
653                                                         int lengthRightRight = rightPiece.results[0].winREnd - rightPiece.results[0].winRStart;
654                                                         
655                                                         if (lengthRightLeft > length) { longest = 3; length = lengthRightLeft;  }
656                                                         if (lengthRightRight > length) { longest = 4; }
657                                                         
658                                                         if (longest == 1) { //leftleft
659                                                                 for (int i = (leftPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
660                                                         }else if (longest == 2) { //leftright
661                                                                 //get rid of leftleft
662                                                                 for (int i = (leftPiece.results[0].winLStart-1); i < (leftPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
663                                                                 //get rid of right
664                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
665                                                         }else if (longest == 3) { //rightleft
666                                                                 //get rid of left
667                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
668                                                                 //get rid of rightright
669                                                                 for (int i = (rightPiece.results[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
670                                                         }else { //rightright
671                                                                 //get rid of left
672                                                                 for (int i = 0; i < leftPiece.results[0].winREnd; i++) { newAligned[i] = '.'; } 
673                                                                 //get rid of rightleft
674                                                                 for (int i = (rightPiece.results[0].winLStart-1); i < (rightPiece.results[0].winLEnd-1); i++) { newAligned[i] = '.'; }
675                                                         }
676                                                 }
677                                                 
678                                                 trim.setAligned(newAligned);
679                                         }
680                                         
681                                 }
682                         }
683                         
684                         outputString = getBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag);
685                         outputString += "\n";
686                 
687                         //write to output file
688                         int length = outputString.length();
689                         char* buf = new char[length];
690                         memcpy(buf, outputString.c_str(), length);
691                                 
692                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
693                         delete buf;
694
695                 }else {  
696                         outputString += querySeq.getName() + "\tno\n";  
697         
698                         //write to output file
699                         int length = outputString.length();
700                         char* buf = new char[length];
701                         memcpy(buf, outputString.c_str(), length);
702                                 
703                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
704                         delete buf;
705                 }
706                 
707                 
708                 return trim;
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "ChimeraSlayer", "print");
712                 exit(1);
713         }
714 }
715 //***************************************************************************************************************
716 Sequence ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
717         try {
718                 MPI_Status status;
719                 bool results = false;
720                 string outAccString = "";
721                 string outputString = "";
722                 
723                 Sequence trim;
724                 if (trimChimera) { trim.setName(trimQuery.getName()); trim.setAligned(trimQuery.getAligned()); }
725                 
726                 if (chimeraFlags == "yes") {
727                         string chimeraFlag = "no";
728                         if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
729                            ||
730                            (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
731                         
732                         
733                         if (chimeraFlag == "yes") {     
734                                 if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
735                                         cout << querySeq.getName() <<  "\tyes" << endl;
736                                         outAccString += querySeq.getName() + "\n";
737                                         results = true;
738                                         
739                                         if (templateFileName == "self") {  chimericSeqs.insert(querySeq.getName()); }
740                                         
741                                         //write to accnos file
742                                         int length = outAccString.length();
743                                         char* buf2 = new char[length];
744                                         memcpy(buf2, outAccString.c_str(), length);
745                                         
746                                         MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
747                                         delete buf2;
748                                         
749                                         if (trimChimera) {  
750                                                 int lengthLeft = chimeraResults[0].winLEnd - chimeraResults[0].winLStart;
751                                                 int lengthRight = chimeraResults[0].winREnd - chimeraResults[0].winRStart;
752                                                 
753                                                 string newAligned = trim.getAligned();
754                                                 if (lengthLeft > lengthRight) { //trim right
755                                                         for (int i = (chimeraResults[0].winRStart-1); i < newAligned.length(); i++) { newAligned[i] = '.'; }
756                                                 }else { //trim left
757                                                         for (int i = 0; i < (chimeraResults[0].winLEnd-1); i++) { newAligned[i] = '.'; }
758                                                 }
759                                                 trim.setAligned(newAligned);    
760                                         }
761                                 }
762                         }
763                         
764                         outputString = getBlock(chimeraResults[0], chimeraFlag);
765                         outputString += "\n";
766                         
767                         //write to output file
768                         int length = outputString.length();
769                         char* buf = new char[length];
770                         memcpy(buf, outputString.c_str(), length);
771                         
772                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
773                         delete buf;
774                         
775                 }else {  
776                         outputString += querySeq.getName() + "\tno\n";  
777                         
778                         //write to output file
779                         int length = outputString.length();
780                         char* buf = new char[length];
781                         memcpy(buf, outputString.c_str(), length);
782                         
783                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
784                         delete buf;
785                 }
786                 
787                 return trim;
788         }
789         catch(exception& e) {
790                 m->errorOut(e, "ChimeraSlayer", "print");
791                 exit(1);
792         }
793 }
794 #endif
795
796 //***************************************************************************************************************
797 int ChimeraSlayer::getChimeras(Sequence* query) {
798         try {
799                 
800                 trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
801                 printResults.trimQuery = trimQuery; 
802                 
803                 chimeraFlags = "no";
804                 printResults.flag = "no";
805                 
806                 querySeq = *query;
807                 
808                 //you must create a template
809                 vector<Sequence*> thisTemplate;
810                 vector<Sequence*> thisFilteredTemplate;
811                 if (templateFileName != "self") { thisTemplate = templateSeqs; thisFilteredTemplate = filteredTemplateSeqs; }
812                 else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
813                 
814                 if (m->control_pressed) {  return 0;  }
815                 if (thisTemplate.size() == 0) {  return 0; } //not chimeric
816                 
817                 //moved this out of maligner - 4/29/11
818                 vector<Sequence> refSeqs = getRefSeqs(*query, thisTemplate, thisFilteredTemplate);
819                 
820                 Maligner maligner(refSeqs, match, misMatch, divR, minSim, minCov); 
821                 Slayer slayer(window, increment, minSim, divR, iters, minSNP, minBS);
822                 
823                 if (templateFileName == "self") {
824                         if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
825                         else if (searchMethod == "blast") {  delete databaseLeft; }
826                 }
827         
828                 if (m->control_pressed) {  return 0;  }
829
830                 string chimeraFlag = maligner.getResults(*query, decalc);
831
832                 if (m->control_pressed) {  return 0;  }
833                 
834                 vector<results> Results = maligner.getOutput();
835                 
836                 //for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];        }
837                 
838                 if (chimeraFlag == "yes") {
839                         
840                         if (realign) {
841                                 vector<string> parents;
842                                 for (int i = 0; i < Results.size(); i++) {
843                                         parents.push_back(Results[i].parentAligned);
844                                 }
845                                 
846                                 ChimeraReAligner realigner;             
847                                 realigner.reAlign(query, parents);
848
849                         }
850                         
851 //                      cout << query->getAligned() << endl;
852                         //get sequence that were given from maligner results
853                         vector<SeqCompare> seqs;
854                         map<string, float> removeDups;
855                         map<string, float>::iterator itDup;
856                         map<string, string> parentNameSeq;
857                         map<string, string>::iterator itSeq;
858                         for (int j = 0; j < Results.size(); j++) {
859
860                                 float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
861                                 //only add if you are not a duplicate
862 //                              cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
863                                 
864                                 
865                                 if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
866                                 
867                                         itDup = removeDups.find(Results[j].parent);
868                                         if (itDup == removeDups.end()) { //this is not duplicate
869                                                 removeDups[Results[j].parent] = dist;
870                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
871                                         }else if (dist > itDup->second) { //is this a stronger number for this parent
872                                                 removeDups[Results[j].parent] = dist;
873                                                 parentNameSeq[Results[j].parent] = Results[j].parentAligned;
874                                         }
875                                 
876                                 }
877                                 
878                         }
879                         
880                         for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
881                                 itSeq = parentNameSeq.find(itDup->first);
882                                 Sequence seq(itDup->first, itSeq->second);
883                                 
884                                 SeqCompare member;
885                                 member.seq = seq;
886                                 member.dist = itDup->second;
887                                 seqs.push_back(member);
888                         }
889                         
890                         //limit number of parents to explore - default 3
891                         if (Results.size() > parents) {
892                                 //sort by distance
893                                 sort(seqs.begin(), seqs.end(), compareSeqCompare);
894                                 //prioritize larger more similiar sequence fragments
895                                 reverse(seqs.begin(), seqs.end());
896                                 
897                                 //for (int k = seqs.size()-1; k > (parents-1); k--)  {  
898                                 //      delete seqs[k].seq;
899                                         //seqs.pop_back();      
900                                 //}
901                         }
902                 
903                         //put seqs into vector to send to slayer
904                         
905 //                      cout << query->getAligned() << endl;
906                         vector<Sequence> seqsForSlayer;
907                         for (int k = 0; k < seqs.size(); k++) {  
908 //                              cout << seqs[k].seq->getAligned() << endl;
909                                 seqsForSlayer.push_back(seqs[k].seq);   
910 //                              cout << seqs[k].seq->getName() << endl;
911                         }
912                         
913                         if (m->control_pressed) {  return 0;  }
914
915                         //send to slayer
916                         chimeraFlags = slayer.getResults(*query, seqsForSlayer);
917                         if (m->control_pressed) {  return 0;  }
918                         chimeraResults = slayer.getOutput();
919                         
920                         printResults.flag = chimeraFlags;
921                         printResults.results = chimeraResults;
922                         
923                         //free memory
924                         //for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
925                 }
926                 //cout << endl << endl;
927                 return 0;
928         }
929         catch(exception& e) {
930                 m->errorOut(e, "ChimeraSlayer", "getChimeras");
931                 exit(1);
932         }
933 }
934 //***************************************************************************************************************
935 void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
936         try {
937                 out << querySeq.getName() << '\t';
938                 out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
939         
940                 out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
941                 out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
942                 
943                 out << flag << '\t' << data.winLStart << "-" << data.winLEnd << '\t' << data.winRStart << "-" << data.winREnd << '\t';
944                 
945         }
946         catch(exception& e) {
947                 m->errorOut(e, "ChimeraSlayer", "printBlock");
948                 exit(1);
949         }
950 }
951 //***************************************************************************************************************
952 void ChimeraSlayer::printBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag, ostream& out){
953         try {
954                 
955                 if ((leftChimeric) && (!rightChimeric)) { //print left
956                         out << querySeq.getName() << '\t';
957                         out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
958                         
959                         out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
960                         out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
961                 
962                         out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
963                 
964                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
965                         out << querySeq.getName() << '\t';
966                         out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
967                         
968                         out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
969                         out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
970                         
971                         out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
972                         
973                 }else  { //print both results
974                         if (leftdata.flag == "yes") {
975                                 out << querySeq.getName() + "_LEFT" << '\t';
976                                 out << leftdata.results[0].parentA.getName() << "\t" << leftdata.results[0].parentB.getName()  << '\t';
977                                 
978                                 out << leftdata.results[0].divr_qla_qrb << '\t' << leftdata.results[0].qla_qrb << '\t' << leftdata.results[0].bsa << '\t';
979                                 out << leftdata.results[0].divr_qlb_qra << '\t' << leftdata.results[0].qlb_qra << '\t' << leftdata.results[0].bsb << '\t';
980                                 
981                                 out << flag << '\t' << leftdata.results[0].winLStart << "-" << leftdata.results[0].winLEnd << '\t' << leftdata.results[0].winRStart << "-" << leftdata.results[0].winREnd << '\t';
982                         }
983                         
984                         if (rightdata.flag == "yes") {
985                                 if (leftdata.flag == "yes") { out << endl; }
986                                 
987                                 out << querySeq.getName() + "_RIGHT"<< '\t';
988                                 out << rightdata.results[0].parentA.getName() << "\t" << rightdata.results[0].parentB.getName()  << '\t';
989                                 
990                                 out << rightdata.results[0].divr_qla_qrb << '\t' << rightdata.results[0].qla_qrb << '\t' << rightdata.results[0].bsa << '\t';
991                                 out << rightdata.results[0].divr_qlb_qra << '\t' << rightdata.results[0].qlb_qra << '\t' << rightdata.results[0].bsb << '\t';
992                                 
993                                 out << flag << '\t' << rightdata.results[0].winLStart << "-" << rightdata.results[0].winLEnd << '\t' << rightdata.results[0].winRStart << "-" << rightdata.results[0].winREnd << '\t';                  
994                 
995                         }
996                 }
997         }
998         catch(exception& e) {
999                 m->errorOut(e, "ChimeraSlayer", "printBlock");
1000                 exit(1);
1001         }
1002 }
1003 //***************************************************************************************************************
1004 string ChimeraSlayer::getBlock(data_results leftdata, data_results rightdata, bool leftChimeric, bool rightChimeric, string flag){
1005         try {
1006                 
1007                 string out = "";
1008                 
1009                 if ((leftChimeric) && (!rightChimeric)) { //get left
1010                         out += querySeq.getName() + "\t";
1011                         out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1012                         
1013                         out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1014                         out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1015                         
1016                         out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1017                         
1018                 }else if ((!leftChimeric) && (rightChimeric)) {  //print right
1019                         out += querySeq.getName() + "\t";
1020                         out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
1021                         
1022                         out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1023                         out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1024                         
1025                         out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
1026                         
1027                 }else  { //print both results
1028                         
1029                         if (leftdata.flag == "yes") {
1030                                 out += querySeq.getName() + "_LEFT\t";
1031                                 out += leftdata.results[0].parentA.getName() + "\t" + leftdata.results[0].parentB.getName() + "\t";
1032                                 
1033                                 out += toString(leftdata.results[0].divr_qla_qrb) + "\t" + toString(leftdata.results[0].qla_qrb) + "\t" + toString(leftdata.results[0].bsa) + "\t";
1034                                 out += toString(leftdata.results[0].divr_qlb_qra) + "\t" + toString(leftdata.results[0].qlb_qra) + "\t" + toString(leftdata.results[0].bsb) + "\t";
1035                                 
1036                                 out += flag + "\t" + toString(leftdata.results[0].winLStart) + "-" + toString(leftdata.results[0].winLEnd) + "\t" + toString(leftdata.results[0].winRStart) + "-" + toString(leftdata.results[0].winREnd) + "\t";
1037                         }
1038                         
1039                         if (rightdata.flag == "yes") {
1040                                 if (leftdata.flag == "yes") { out += "\n"; }
1041                                 out +=  querySeq.getName() + "_RIGHT\t";
1042                                 out += rightdata.results[0].parentA.getName() + "\t" + rightdata.results[0].parentB.getName()  + "\t";
1043                                 
1044                                 out += toString(rightdata.results[0].divr_qla_qrb) + "\t" + toString(rightdata.results[0].qla_qrb) + "\t" + toString(rightdata.results[0].bsa) + "\t";
1045                                 out += toString(rightdata.results[0].divr_qlb_qra) + "\t" + toString(rightdata.results[0].qlb_qra) + "\t" + toString(rightdata.results[0].bsb) + "\t";
1046                                 
1047                                 out += flag + "\t" + toString(rightdata.results[0].winLStart) + "-" + toString(rightdata.results[0].winLEnd) + "\t" + toString(rightdata.results[0].winRStart) + "-" + toString(rightdata.results[0].winREnd) + "\t";                   
1048                         }
1049                 }
1050                 
1051                 return out;
1052                 
1053         }
1054         catch(exception& e) {
1055                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1056                 exit(1);
1057         }
1058 }
1059 //***************************************************************************************************************
1060 string ChimeraSlayer::getBlock(data_struct data, string flag){
1061         try {
1062                 
1063                 string outputString = "";
1064                 
1065                 outputString += querySeq.getName() + "\t";
1066                 outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
1067                         
1068                 outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
1069                 outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
1070                 
1071                 outputString += flag + "\t" + toString(data.winLStart) + "-" + toString(data.winLEnd) + "\t" + toString(data.winRStart) + "-" + toString(data.winREnd) + "\t";
1072                 
1073                 return outputString;
1074         }
1075         catch(exception& e) {
1076                 m->errorOut(e, "ChimeraSlayer", "getBlock");
1077                 exit(1);
1078         }
1079 }
1080 //***************************************************************************************************************
1081 vector<Sequence> ChimeraSlayer::getRefSeqs(Sequence q, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate){
1082         try {
1083                 
1084                 vector<Sequence> refSeqs;
1085                 
1086                 if (searchMethod == "distance") {
1087                         //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
1088                         Sequence* newSeq = new Sequence(q.getName(), q.getAligned());
1089                         runFilter(newSeq);
1090                         refSeqs = decalc.findClosest(*newSeq, thisTemplate, thisFilteredTemplate, numWanted, minSim);
1091                         delete newSeq;
1092                 }else if (searchMethod == "blast")  {
1093                         refSeqs = getBlastSeqs(q, thisTemplate, numWanted); //fills indexes
1094                 }else if (searchMethod == "kmer") {
1095                         refSeqs = getKmerSeqs(q, thisTemplate, numWanted); //fills indexes
1096                 }else { m->mothurOut("not valid search."); exit(1);  } //should never get here
1097                 
1098                 return refSeqs;
1099         }
1100         catch(exception& e) {
1101                 m->errorOut(e, "ChimeraSlayer", "getRefSeqs");
1102                 exit(1);
1103         }
1104 }
1105 //***************************************************************************************************************/
1106 vector<Sequence> ChimeraSlayer::getBlastSeqs(Sequence q, vector<Sequence*>& db, int num) {
1107         try {   
1108                 
1109                 vector<Sequence> refResults;
1110                 
1111                 //get parts of query
1112                 string queryUnAligned = q.getUnaligned();
1113                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1114                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1115 //cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;    
1116                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1117                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1118                 
1119                 vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
1120                 vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
1121                                 
1122                 
1123                 //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
1124 //              vector<int> smaller;
1125 //              vector<int> larger;
1126 //              
1127 //              if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
1128 //              else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
1129                 
1130                 //merge results         
1131                 map<int, int> seen;
1132                 map<int, int>::iterator it;
1133                 vector<int> mergedResults;
1134                 
1135                 int index = 0;
1136 //              for (int i = 0; i < smaller.size(); i++) {
1137                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1138                         
1139                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1140         
1141                         //add left if you havent already
1142                         it = seen.find(tempIndexesLeft[index]);
1143                         if (it == seen.end()) {  
1144                                 mergedResults.push_back(tempIndexesLeft[index]);
1145                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1146                         }
1147                         
1148                         //add right if you havent already
1149                         it = seen.find(tempIndexesRight[index]);
1150                         if (it == seen.end()) {  
1151                                 mergedResults.push_back(tempIndexesRight[index]);
1152                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1153                         }
1154                         index++;
1155                 }
1156
1157                 
1158                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1159                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1160                         
1161                         //add right if you havent already
1162                         it = seen.find(tempIndexesLeft[i]);
1163                         if (it == seen.end()) {  
1164                                 mergedResults.push_back(tempIndexesLeft[i]);
1165                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1166                         }
1167                 }
1168
1169                 for (int i = index; i < tempIndexesRight.size(); i++) {
1170                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1171                         
1172                         //add right if you havent already
1173                         it = seen.find(tempIndexesRight[i]);
1174                         if (it == seen.end()) {  
1175                                 mergedResults.push_back(tempIndexesRight[i]);
1176                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1177                         }
1178                 }
1179                 //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
1180                 //cout << qname << endl;        
1181                 
1182                 if (mergedResults.size() == 0) { numNoParents++; }
1183                 
1184                 for (int i = 0; i < mergedResults.size(); i++) {
1185                         //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1186                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1187                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1188                                 refResults.push_back(temp);
1189                         }
1190                 }
1191                 //cout << endl << endl;
1192
1193                 delete queryRight;
1194                 delete queryLeft;
1195                 
1196                 return refResults;
1197         }
1198         catch(exception& e) {
1199                 m->errorOut(e, "ChimeraSlayer", "getBlastSeqs");
1200                 exit(1);
1201         }
1202 }
1203 //***************************************************************************************************************
1204 vector<Sequence> ChimeraSlayer::getKmerSeqs(Sequence q, vector<Sequence*>& db, int num) {
1205         try {   
1206                 vector<Sequence> refResults;
1207                 
1208                 //get parts of query
1209                 string queryUnAligned = q.getUnaligned();
1210                 string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
1211                 string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
1212                 
1213                 Sequence* queryLeft = new Sequence(q.getName(), leftQuery);
1214                 Sequence* queryRight = new Sequence(q.getName(), rightQuery);
1215                 
1216                 vector<int> tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, num);
1217                 vector<int> tempIndexesRight = databaseRight->findClosestSequences(queryRight, num);
1218                 
1219                 //merge results         
1220                 map<int, int> seen;
1221                 map<int, int>::iterator it;
1222                 vector<int> mergedResults;
1223                 
1224                 int index = 0;
1225                 //              for (int i = 0; i < smaller.size(); i++) {
1226                 while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
1227                         
1228                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1229                         
1230                         //add left if you havent already
1231                         it = seen.find(tempIndexesLeft[index]);
1232                         if (it == seen.end()) {  
1233                                 mergedResults.push_back(tempIndexesLeft[index]);
1234                                 seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
1235                         }
1236                         
1237                         //add right if you havent already
1238                         it = seen.find(tempIndexesRight[index]);
1239                         if (it == seen.end()) {  
1240                                 mergedResults.push_back(tempIndexesRight[index]);
1241                                 seen[tempIndexesRight[index]] = tempIndexesRight[index];
1242                         }
1243                         index++;
1244                 }
1245                 
1246                 
1247                 for (int i = index; i < tempIndexesLeft.size(); i++) {
1248                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1249                         
1250                         //add right if you havent already
1251                         it = seen.find(tempIndexesLeft[i]);
1252                         if (it == seen.end()) {  
1253                                 mergedResults.push_back(tempIndexesLeft[i]);
1254                                 seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
1255                         }
1256                 }
1257                 
1258                 for (int i = index; i < tempIndexesRight.size(); i++) {
1259                         if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
1260                         
1261                         //add right if you havent already
1262                         it = seen.find(tempIndexesRight[i]);
1263                         if (it == seen.end()) {  
1264                                 mergedResults.push_back(tempIndexesRight[i]);
1265                                 seen[tempIndexesRight[i]] = tempIndexesRight[i];
1266                         }
1267                 }
1268                 
1269                 for (int i = 0; i < mergedResults.size(); i++) {
1270                         //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
1271                         if (db[mergedResults[i]]->getName() != q.getName()) { 
1272                                 Sequence temp(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
1273                                 refResults.push_back(temp);
1274                                 
1275                         }
1276                 }
1277
1278                 //cout << endl;         
1279                 delete queryRight;
1280                 delete queryLeft;
1281                 
1282                 return refResults;
1283         }
1284         catch(exception& e) {
1285                 m->errorOut(e, "ChimeraSlayer", "getKmerSeqs");
1286                 exit(1);
1287         }
1288 }
1289 //***************************************************************************************************************
1290
1291