]> git.donarmstrong.com Git - mothur.git/blob - myPerseus.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / myPerseus.cpp
1 /*
2  *  myPerseus.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 9/5/11.
6  *  Copyright 2011 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "myPerseus.h"
11
12 /**************************************************************************************************/
13 int PERSEUSMAXINT = numeric_limits<int>::max();
14 /**************************************************************************************************/
15
16 vector<vector<double> > Perseus::binomial(int maxOrder){
17         try {
18                 vector<vector<double> > binomial(maxOrder+1);
19                 
20                 for(int i=0;i<=maxOrder;i++){
21                         binomial[i].resize(maxOrder+1);
22                         binomial[i][0]=1;
23                         binomial[0][i]=0;
24                 }
25                 binomial[0][0]=1;
26                 
27                 binomial[1][0]=1;
28                 binomial[1][1]=1;
29                 
30                 for(int i=2;i<=maxOrder;i++){
31                         binomial[1][i]=0;
32                 }
33                 
34                 for(int i=2;i<=maxOrder;i++){
35                         for(int j=1;j<=maxOrder;j++){
36                                 if(i==j){       binomial[i][j]=1;                                                                       }
37                                 if(j>i) {       binomial[i][j]=0;                                                                       }
38                                 else    {       binomial[i][j]=binomial[i-1][j-1]+binomial[i-1][j];     }
39                         }
40                 }
41                 
42                 return binomial;
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "Perseus", "binomial");
46                 exit(1);
47         }
48 }
49
50 /**************************************************************************************************/
51 double Perseus::basicPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, pwModel model){
52         try {
53                 double GAP = model.GAP_OPEN;
54                 double MATCH = model.MATCH;
55                 double MISMATCH = model.MISMATCH;
56                 
57                 int queryLength = query.size();
58                 int refLength = reference.size();
59                 
60                 vector<vector<double> > alignMatrix(queryLength + 1);
61                 vector<vector<char> > alignMoves(queryLength + 1);
62                 
63                 for(int i=0;i<=queryLength;i++){
64                         if (m->control_pressed) { return 0; }
65                         alignMatrix[i].resize(refLength + 1, 0);
66                         alignMoves[i].resize(refLength + 1, 'x');
67                 }
68                 
69                 for(int i=0;i<=queryLength;i++){
70                         if (m->control_pressed) { return 0; }
71                         alignMatrix[i][0] = GAP * i;
72                         alignMoves[i][0] = 'u';
73                 }
74                 
75                 for(int i=0;i<=refLength;i++){
76                         if (m->control_pressed) { return 0; }
77                         alignMatrix[0][i] = GAP * i;
78                         alignMoves[0][i] = 'l';
79                 }
80                 
81                 for(int i=1;i<=queryLength;i++){
82                         
83                         if (m->control_pressed) { return 0; }
84                         
85                         for(int j=1;j<=refLength;j++){
86                                 
87                                 double nogapScore;              
88                                 if(query[i-1] == reference[j-1]){       nogapScore = alignMatrix[i-1][j-1] + MATCH;             }
89                                 else                                                    {       nogapScore = alignMatrix[i-1][j-1] + MISMATCH;  }
90                                 
91                                 double leftScore;
92                                 if(i == queryLength)                    {       leftScore = alignMatrix[i][j-1];                                }
93                                 else                                                    {       leftScore = alignMatrix[i][j-1] + GAP;                  }
94                                 
95                                 
96                                 double upScore;
97                                 if(j == refLength)                              {       upScore = alignMatrix[i-1][j];                                  }
98                                 else                                                    {       upScore = alignMatrix[i-1][j] + GAP;                    }
99                                 
100                                 if(nogapScore > leftScore){
101                                         if(nogapScore > upScore){
102                                                 alignMoves[i][j] = 'd';
103                                                 alignMatrix[i][j] = nogapScore;
104                                         }
105                                         else{
106                                                 alignMoves[i][j] = 'u';
107                                                 alignMatrix[i][j] = upScore;
108                                         }
109                                 }
110                                 else{
111                                         if(leftScore > upScore){
112                                                 alignMoves[i][j] = 'l';
113                                                 alignMatrix[i][j] = leftScore;
114                                         }
115                                         else{
116                                                 alignMoves[i][j] = 'u';
117                                                 alignMatrix[i][j] = upScore;
118                                         }
119                                 }
120                         }
121                 }
122                 
123                 int i = queryLength;
124                 int j = refLength;
125                 
126                 qAlign = "";
127                 rAlign = "";
128                         
129                 int diffs = 0;
130                 int length = 0;
131                 
132                 while(i > 0 && j > 0){
133                         
134                         if (m->control_pressed) { return 0; }
135                         
136                         if(alignMoves[i][j] == 'd'){
137                                 qAlign = query[i-1] + qAlign;
138                                 rAlign = reference[j-1] + rAlign;
139
140                                 if(query[i-1] != reference[j-1]){       diffs++;        }
141                                 length++;
142                                 
143                                 i--;
144                                 j--;
145                         }
146                         else if(alignMoves[i][j] == 'u'){
147                                 qAlign = query[i-1] + qAlign;
148                                 
149                                 if(j != refLength)      {       rAlign = '-' + rAlign;  diffs++;        length++;       }
150                                 else                            {       rAlign = '.' + rAlign;  }
151                                 i--;
152                         }
153                         else if(alignMoves[i][j] == 'l'){
154                                 rAlign = reference[j-1] + rAlign;
155                                 
156                                 if(i != queryLength){   qAlign = '-' + qAlign;  diffs++;        length++;       }
157                                 else                            {       qAlign = '.' + qAlign;  }
158                                 j--;
159                         }
160                 }
161                 
162                 while(i>0){
163                         
164                         if (m->control_pressed) { return 0; }
165                         
166                         rAlign = '.' + rAlign;
167                         qAlign = query[i-1] + qAlign;
168                         i--;
169                 }
170                 
171                 while(j>0){
172                         
173                         if (m->control_pressed) { return 0; }
174                         
175                         rAlign = reference[j-1] + rAlign;
176                         qAlign = '.' + qAlign;
177                         j--;
178                 }
179                 
180                 
181
182                 return double(diffs)/double(length);
183         }
184         catch(exception& e) {
185                 m->errorOut(e, "Perseus", "basicPairwiseAlignSeqs");
186                 exit(1);
187         }
188         
189 }
190 /**************************************************************************************************/
191 int Perseus::getDiffs(string qAlign, string rAlign, vector<int>& leftDiffs, vector<int>& leftMap, vector<int>& rightDiffs, vector<int>& rightMap){
192         try {
193                 int alignLength = qAlign.length();
194                 
195                 int lDiffs = 0;
196                 int lCount = 0;
197                 for(int l=0;l<alignLength;l++){
198                         
199                         if (m->control_pressed) { return 0; }
200                         
201                         if(qAlign[l] == '-'){
202                                 lDiffs++;               
203                         }
204                         else if(qAlign[l] != '.'){
205                                 
206                                 if(rAlign[l] == '-'){
207                                         lDiffs++;
208                                 }
209                                 else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
210                                         lDiffs++;
211                                 }
212                                 leftDiffs[lCount] = lDiffs;
213                                 leftMap[lCount] = l;
214                                 
215                                 lCount++;
216                         }                       
217                 }
218                 
219                 int rDiffs = 0;
220                 int rCount = 0;
221                 for(int l=alignLength-1;l>=0;l--){
222                         
223                         if (m->control_pressed) { return 0; }
224                         
225                         if(qAlign[l] == '-'){
226                                 rDiffs++;               
227                         }
228                         else if(qAlign[l] != '.'){
229                                 
230                                 if(rAlign[l] == '-'){
231                                         rDiffs++;
232                                 }
233                                 else if(qAlign[l] != rAlign[l] && rAlign[l] != '.'){
234                                         rDiffs++;
235                                 }
236                                 
237                                 rightDiffs[rCount] = rDiffs;
238                                 rightMap[rCount] = l;
239                                 rCount++;
240                                 
241                         }
242                         
243                 }
244                 
245                 return 0;
246         }
247         catch(exception& e) {
248                 m->errorOut(e, "Perseus", "getDiffs");
249                 exit(1);
250         }
251 }
252 /**************************************************************************************************/
253 int Perseus::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, string& seqA, string& seqB){
254         try {
255                 char nullReturn = -1;
256                 
257                 while(i>=1 && j>=1){
258                         
259                         if (m->control_pressed) { return 0; }
260                         
261                         if(direction == 'd'){
262                                 if(seqA[i-1] == seqB[j-1])      {       return seqA[i-1];       }
263                                 else                                            {       return nullReturn;      }
264                         }
265                         
266                         else if(direction == 'l')               {       j--;                            }
267                         else                                                    {       i--;                            }
268                         
269                         direction = alignMoves[i][j];
270                 }
271                 
272                 return nullReturn;
273         }
274         catch(exception& e) {
275                 m->errorOut(e, "Perseus", "getLastMatch");
276                 exit(1);
277         }
278 }
279
280 /**************************************************************************************************/
281
282 int Perseus::toInt(char b){
283         try {
284                 if(b == 'A')            {       return 0;       }
285                 else if(b == 'C')       {       return 1;       }
286                 else if(b == 'T')       {       return 2;       }
287                 else if(b == 'G')       {       return 3;       }
288                 else { m->mothurOut("[ERROR]: " + toString(b) + " is not ATGC."); m->mothurOutEndLine(); return -1; }
289         }
290         catch(exception& e) {
291                 m->errorOut(e, "Perseus", "toInt");
292                 exit(1);
293         }
294 }
295
296 /**************************************************************************************************/
297
298 double Perseus::modeledPairwiseAlignSeqs(string query, string reference, string& qAlign, string& rAlign, vector<vector<double> >& correctMatrix){
299         try {
300                 int queryLength = query.size();
301                 int refLength = reference.size();
302                 
303                 vector<vector<double> > alignMatrix(queryLength + 1);
304                 vector<vector<char> > alignMoves(queryLength + 1);
305                 
306                 for(int i=0;i<=queryLength;i++){
307                         if (m->control_pressed) { return 0; }
308                         alignMatrix[i].resize(refLength + 1, 0);
309                         alignMoves[i].resize(refLength + 1, 'x');
310                 }
311                 
312                 for(int i=0;i<=queryLength;i++){
313                         if (m->control_pressed) { return 0; }
314                         alignMatrix[i][0] = 15.0 * i;
315                         alignMoves[i][0] = 'u';
316                 }
317                 
318                 for(int i=0;i<=refLength;i++){
319                         if (m->control_pressed) { return 0; }
320                         alignMatrix[0][i] = 15.0 * i;
321                         alignMoves[0][i] = 'l';
322                 }
323                 
324                 for(int i=1;i<=queryLength;i++){
325                         
326                         if (m->control_pressed) { return 0; }
327                         
328                         for(int j=1;j<=refLength;j++){
329                                 
330                                 double nogap;           
331                                 nogap = alignMatrix[i-1][j-1] + correctMatrix[toInt(query[i-1])][toInt(reference[j-1])];                        
332                                 
333                                 double gap;
334                                 
335                                 double left;
336                                 if(i == queryLength){ //terminal gap
337                                         left = alignMatrix[i][j-1];
338                                 }
339                                 else{
340                                         if(reference[j-1] == getLastMatch('l', alignMoves, i, j, query, reference)){
341                                                 gap = 4.0;
342                                         }
343                                         else{
344                                                 gap = 15.0;
345                                         }
346                                         
347                                         left = alignMatrix[i][j-1] + gap;
348                                 }
349                                 
350                                 double up;
351                                 if(j == refLength){ //terminal gap
352                                         up = alignMatrix[i-1][j];
353                                 }
354                                 else{
355                                         
356                                         if(query[i-1] == getLastMatch('u', alignMoves, i, j, query, reference)){
357                                                 gap = 4.0;
358                                         }
359                                         else{
360                                                 gap = 15.0;
361                                         }
362                                         
363                                         up = alignMatrix[i-1][j] + gap;
364                                 }
365                                 
366                                 
367                                 if(nogap < left){
368                                         if(nogap < up){
369                                                 alignMoves[i][j] = 'd';
370                                                 alignMatrix[i][j] = nogap;
371                                         }
372                                         else{
373                                                 alignMoves[i][j] = 'u';
374                                                 alignMatrix[i][j] = up;
375                                         }
376                                 }
377                                 else{
378                                         if(left < up){
379                                                 alignMoves[i][j] = 'l';
380                                                 alignMatrix[i][j] = left;
381                                         }
382                                         else{
383                                                 alignMoves[i][j] = 'u';
384                                                 alignMatrix[i][j] = up;
385                                         }
386                                 }
387                         }
388                 }
389
390                 int i = queryLength;
391                 int j = refLength;
392                 
393                 int alignLength = 0;
394                 
395                 while(i > 0 && j > 0){
396                         
397                         if (m->control_pressed) { return 0; }
398                         
399                         if(alignMoves[i][j] == 'd'){
400                                 qAlign = query[i-1] + qAlign;
401                                 rAlign = reference[j-1] + rAlign;
402                                 alignLength++;
403                                 i--;
404                                 j--;
405                         }
406                         else if(alignMoves[i][j] == 'u'){
407                                 if(j != refLength){
408                                         qAlign = query[i-1] + qAlign;
409                                         rAlign = '-' + rAlign;
410                                         alignLength++;
411                                 }
412                                 
413                                 i--;
414                         }
415                         else if(alignMoves[i][j] == 'l'){
416                                 if(i != queryLength){
417                                         qAlign = '-' + qAlign;
418                                         rAlign = reference[j-1] + rAlign;
419                                         alignLength++;                          
420                                 }
421                                 
422                                 j--;
423                         }
424                 }
425
426                 return alignMatrix[queryLength][refLength] / (double)alignLength;
427         }
428         catch(exception& e) {
429                 m->errorOut(e, "Perseus", "modeledPairwiseAlignSeqs");
430                 exit(1);
431         }
432 }
433
434 /**************************************************************************************************/
435 int Perseus::getAlignments(int curSequenceIndex, vector<seqData> sequences, vector<pwAlign>& alignments, vector<vector<int> >& leftDiffs, vector<vector<int> >& leftMaps, vector<vector<int> >& rightDiffs, vector<vector<int> >& rightMaps, int& bestRefSeq, int& bestRefDiff, vector<bool>& restricted){
436         try {
437                 int numSeqs = sequences.size();
438                 //int bestSequenceMismatch = PERSEUSMAXINT;
439
440                 string curSequence = sequences[curSequenceIndex].sequence;
441                 int curFrequency = sequences[curSequenceIndex].frequency; 
442
443                 bestRefSeq = -1;
444                 
445                 int bestIndex = -1;
446                 int bestDiffs = PERSEUSMAXINT;
447                 int comparisons = 0;
448                         
449                 pwModel model(0, -1, -1.5);
450                 
451                 for(int i=0;i<numSeqs;i++){
452                         
453                         if (m->control_pressed) { return 0; }
454                         
455                         if(i != curSequenceIndex && restricted[i] != 1 && sequences[i].frequency >= 2 * curFrequency){
456                                 string refSequence = sequences[i].sequence;
457                                 
458                                 leftDiffs[i].assign(curSequence.length(), 0);
459                                 leftMaps[i].assign(curSequence.length(), 0);
460                                 rightDiffs[i].assign(curSequence.length(), 0);
461                                 rightMaps[i].assign(curSequence.length(), 0);
462                                 
463                                 basicPairwiseAlignSeqs(curSequence, refSequence, alignments[i].query, alignments[i].reference, model);
464                                 
465
466                                 getDiffs(alignments[i].query, alignments[i].reference, leftDiffs[i], leftMaps[i], rightDiffs[i], rightMaps[i]);
467                                 
468                                 int diffs = rightDiffs[i][curSequence.length()-1];                                                      
469
470                                 if(diffs < bestDiffs){
471                                         bestDiffs = diffs;
472                                         bestIndex = i;
473                                 }
474                                 comparisons++;
475                                 restricted[i] = 0;
476                         }
477                         else{
478                                 restricted[i] = 1;
479                         }
480                 }
481
482                 bestRefSeq = bestIndex;
483                 bestRefDiff = bestDiffs;
484                 
485                 return comparisons;
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "Perseus", "getAlignments");
489                 exit(1);
490         }
491 }
492 /**************************************************************************************************/
493 int Perseus::getChimera(vector<seqData> sequences,
494                            vector<vector<int> >& leftDiffs, 
495                            vector<vector<int> >& rightDiffs,
496                            int& leftParent, 
497                            int& rightParent, 
498                            int& breakPoint,
499                            vector<int>& singleLeft, 
500                            vector<int>& bestLeft, 
501                            vector<int>& singleRight, 
502                            vector<int>& bestRight, 
503                            vector<bool> restricted){
504         try {
505                 int numRefSeqs = restricted.size();
506                 int seqLength = leftDiffs[0].size();
507                 
508                 singleLeft.resize(seqLength, PERSEUSMAXINT);
509                 bestLeft.resize(seqLength, -1);
510
511                 for(int l=0;l<seqLength;l++){
512                         
513                         if (m->control_pressed) { return 0; }
514                         
515                         for(int i=0;i<numRefSeqs;i++){
516                                 
517                                 if(restricted[i] == 0){
518                                         if(((leftDiffs[i][l] < singleLeft[l]) && sequences[i].frequency) || ((leftDiffs[i][l] == singleLeft[l]) && (sequences[i].frequency > sequences[bestLeft[l]].frequency))){
519                                                 singleLeft[l] = leftDiffs[i][l];
520                                                 bestLeft[l] = i;
521                                         }
522                                 }
523                         }
524                 }
525                 
526                 singleRight.resize(seqLength, PERSEUSMAXINT);
527                 bestRight.resize(seqLength, -1);
528                 
529                 for(int l=0;l<seqLength;l++){
530                         
531                         if (m->control_pressed) { return 0; }
532                         
533                         for(int i=0;i<numRefSeqs;i++){
534                                 
535                                 if(restricted[i] == 0){
536                                         if((rightDiffs[i][l] < singleRight[l] && sequences[i].frequency) || ((rightDiffs[i][l] == singleRight[l] && sequences[i].frequency > sequences[bestRight[l]].frequency))){
537                                                 singleRight[l] = rightDiffs[i][l];
538                                                 bestRight[l] = i;
539                                         }
540                                 }
541                         }
542                 }
543
544                 
545                 
546                 int bestChimeraMismatches = PERSEUSMAXINT;
547                 leftParent = -1;
548                 rightParent = -1;
549                 breakPoint = -1;
550                 
551                 for(int l=0;l<seqLength-1;l++){
552                         
553                         if (m->control_pressed) { return 0; }
554                         
555                         int chimera = singleLeft[l] + singleRight[seqLength - l - 2];
556                         if(chimera < bestChimeraMismatches){
557                                 bestChimeraMismatches = chimera;
558                                 breakPoint = l;
559                                 leftParent = bestLeft[l];
560                                 rightParent = bestRight[seqLength - l - 2];
561                         }
562                 }
563                 
564                 return bestChimeraMismatches;
565         }
566         catch(exception& e) {
567                 m->errorOut(e, "Perseus", "getChimera");
568                 exit(1);
569         }
570 }
571
572 /**************************************************************************************************/
573
574 string Perseus::stitchBimera(vector<pwAlign>& alignments, int leftParent, int rightParent, int breakPoint, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
575         try {
576                 int breakLeft = leftMaps[leftParent][breakPoint];
577                 int breakRight = rightMaps[rightParent][rightMaps[rightParent].size() - breakPoint - 2];
578                 
579                 string left = alignments[leftParent].reference;
580                 string right = alignments[rightParent].reference;
581                 string chimera = "";
582                 
583                 for(int i=0;i<=breakLeft;i++){
584                         
585                         if (m->control_pressed) { return 0; }
586                         
587                         if(left[i] != '-' && left[i] != '.'){
588                                 chimera += left[i];
589                         }
590                 }
591                 
592
593                 for(int i=breakRight;i<right.length();i++){
594                         
595                         if (m->control_pressed) { return 0; }
596                         
597                         if(right[i] != '-' && right[i] != '.'){
598                                 chimera += right[i];
599                         }
600                 }
601                 
602                 return chimera;
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "Perseus", "stitchBimera");
606                 exit(1);
607         }
608 }
609 /**************************************************************************************************/
610 int Perseus::getTrimera(vector<seqData>& sequences,
611                            vector<vector<int> >& leftDiffs,
612                            int& leftParent,
613                            int& middleParent,
614                            int& rightParent,
615                            int& breakPointA,
616                            int& breakPointB,
617                            vector<int>& singleLeft,
618                            vector<int>& bestLeft, 
619                            vector<int>& singleRight,
620                            vector<int>& bestRight,
621                            vector<bool> restricted){
622         try {
623                 int numRefSeqs = leftDiffs.size();
624                 int alignLength = leftDiffs[0].size();
625                 int bestTrimeraMismatches = PERSEUSMAXINT;
626                 
627                 leftParent = -1;
628                 middleParent = -1;
629                 rightParent = -1;
630                 
631                 breakPointA = -1;
632                 breakPointB = -1;
633                 
634                 vector<vector<int> > minDelta(alignLength);
635                 vector<vector<int> > minDeltaSeq(alignLength);
636                 
637                 for(int i=0;i<alignLength;i++){
638                         if (m->control_pressed) { return 0; }
639                         minDelta[i].assign(alignLength, PERSEUSMAXINT);
640                         minDeltaSeq[i].assign(alignLength, -1);
641                 }
642                 
643                 for(int x=0;x<alignLength;x++){
644                         for(int y=x;y<alignLength-1;y++){
645                                 for(int i=0;i<numRefSeqs;i++){
646                                         
647                                         if (m->control_pressed) { return 0; }
648                                         
649                                         if(restricted[i] == 0){
650                                                 int delta = leftDiffs[i][y] - leftDiffs[i][x];
651
652                                                 if(delta < minDelta[x][y] || (delta == minDelta[x][y] && sequences[i].frequency > sequences[minDeltaSeq[x][y]].frequency)){
653                                                         minDelta[x][y] = delta;
654                                                         minDeltaSeq[x][y] = i;                                  
655                                                 }                               
656                                         }
657                                 }
658                                 minDelta[x][y] += singleLeft[x] + singleRight[alignLength - y - 2];
659                                 
660                                 if(minDelta[x][y] < bestTrimeraMismatches){
661                                         bestTrimeraMismatches = minDelta[x][y];
662                                         
663                                         breakPointA = x;
664                                         breakPointB = y;
665                                         
666                                         leftParent = bestLeft[x];
667                                         middleParent = minDeltaSeq[x][y];
668                                         rightParent = bestRight[alignLength - y - 2];                           
669                                 }
670                         }               
671                 }
672                 
673                 return bestTrimeraMismatches;
674         }
675         catch(exception& e) {
676                 m->errorOut(e, "Perseus", "getTrimera");
677                 exit(1);
678         }
679 }
680
681 /**************************************************************************************************/
682
683 string Perseus::stitchTrimera(vector<pwAlign> alignments, int leftParent, int middleParent, int rightParent, int breakPointA, int breakPointB, vector<vector<int> >& leftMaps, vector<vector<int> >& rightMaps){
684         try {
685                 int p1SplitPoint = leftMaps[leftParent][breakPointA];
686                 int p2SplitPoint = leftMaps[middleParent][breakPointB];
687                 int p3SplitPoint = rightMaps[rightParent][rightMaps[rightParent].size() - breakPointB - 2];
688                 
689                 string chimeraRefSeq;
690                 for(int i=0;i<=p1SplitPoint;i++){
691                         if (m->control_pressed) { return chimeraRefSeq; }
692                         if(alignments[leftParent].reference[i] != '-' && alignments[leftParent].reference[i] != '.'){
693                                 chimeraRefSeq += alignments[leftParent].reference[i];
694                         }
695                 }
696                 
697                 for(int i=p1SplitPoint+1;i<=p2SplitPoint;i++){
698                         if (m->control_pressed) { return chimeraRefSeq; }
699                         if(alignments[middleParent].reference[i] != '-' && alignments[middleParent].reference[i] != '.'){
700                                 chimeraRefSeq += alignments[middleParent].reference[i];
701                         }
702                 }
703                 
704                 for(int i=p3SplitPoint;i<alignments[rightParent].reference.length();i++){
705                         if (m->control_pressed) { return chimeraRefSeq; }
706                         if(alignments[rightParent].reference[i] != '-' && alignments[rightParent].reference[i] != '.'){
707                                 chimeraRefSeq += alignments[rightParent].reference[i];
708                         }
709                 }
710
711                 return chimeraRefSeq;
712         }
713         catch(exception& e) {
714                 m->errorOut(e, "Perseus", "stitchTrimera");
715                 exit(1);
716         }
717 }
718
719 /**************************************************************************************************/
720
721 int Perseus::threeWayAlign(string query, string parent1, string parent2, string& qAlign, string& aAlign, string& bAlign){
722         try {
723                 pwModel model(1.0, -1.0, -5.0);
724                 
725                 string qL, rL;
726                 string qR, rR;
727
728                 basicPairwiseAlignSeqs(query, parent1, qL, rL, model);  
729                 basicPairwiseAlignSeqs(query, parent2, qR, rR, model);
730
731                 int lLength = qL.length();
732                 int rLength = qR.length();
733                 
734                 string qLNew, rLNew;
735                 string qRNew, rRNew;
736                 
737                 int lIndex = 0;
738                 int rIndex = 0;
739                 
740                 while(lIndex<lLength || rIndex<rLength){
741                         
742                         if (m->control_pressed) { return 0; }
743                         
744                         if(qL[lIndex] == qR[rIndex]){
745                                 qLNew += qL[lIndex];
746                                 rLNew += rL[lIndex];
747                                 lIndex++;
748
749                                 qRNew += qR[rIndex];
750                                 rRNew += rR[rIndex];
751                                 rIndex++;
752                         }
753                         else if(qL[lIndex] == '-' || qL[lIndex] == '.'){
754                                 //insert a gap into the right sequences
755                                 qLNew += qL[lIndex];
756                                 rLNew += rL[lIndex];
757                                 lIndex++;
758                                 
759                                 if(rIndex != rLength){
760                                         qRNew += '-';
761                                         rRNew += '-';
762                                 }
763                                 else{
764                                         qRNew += '.';
765                                         rRNew += '.';
766                                 }
767                         }
768                         else if(qR[rIndex] == '-' || qR[rIndex] == '.'){
769                                 //insert a gap into the left sequences
770                                 qRNew += qR[rIndex];
771                                 rRNew += rR[rIndex];
772                                 rIndex++;
773                                 
774
775                                 if(lIndex != lLength){
776                                         qLNew += '-';
777                                         rLNew += '-';
778                                 }
779                                 else{
780                                         qLNew += '.';
781                                         rLNew += '.';
782                                 }
783                                 
784                         }
785                 }
786                 
787                 qAlign = qLNew;
788                 aAlign = rLNew;
789                 bAlign = rRNew;
790                 
791                 bool qStart = 0;
792                 bool aStart = 0;
793                 bool bStart = 0;
794                 
795                 for(int i=0;i<qAlign.length();i++){
796                         
797                         if (m->control_pressed) { return 0; }
798                         
799                         if(qStart == 0){
800                                 if(qAlign[i] == '-')    {       qAlign[i] = '.';        }
801                                 else                                    {       qStart = 1;                     }
802                         }
803                         if(aStart == 0){
804                                 if(aAlign[i] == '-')    {       aAlign[i] = '.';        }
805                                 else                                    {       aStart = 1;                     }
806                         }
807                         if(bStart == 0){
808                                 if(bAlign[i] == '-')    {       bAlign[i] = '.';        }
809                                 else                                    {       bStart = 1;                     }
810                         }
811                         if(aStart == 1 && bStart == 1 && qStart == 1){
812                                 break;
813                         }
814                 }
815                 return 0;
816         }
817         catch(exception& e) {
818                 m->errorOut(e, "Perseus", "threeWayAlign");
819                 exit(1);
820         }
821 }
822
823 /**************************************************************************************************/
824
825 double Perseus::calcLoonIndex(string query, string parent1, string parent2, int breakPoint, vector<vector<double> >& binMatrix){
826         try {
827                 string queryAln, leftParentAln, rightParentAln;
828                 threeWayAlign(query, parent1, parent2, queryAln, leftParentAln, rightParentAln);
829                 
830                 int alignLength = queryAln.length();
831
832                 int endPos = alignLength;
833                 for(int i=alignLength-1;i>=0; i--){
834                         if(queryAln[i] != '.' && leftParentAln[i] != '.' && rightParentAln[i] != '.'){
835                                 endPos = i + 1;
836                                 break;
837                         }
838                 }
839                 
840                 int diffToLeftCount = 0;
841                 vector<int> diffToLeftMap(alignLength, 0);
842                 
843                 int diffToRightCount = 0;
844                 vector<int> diffToRightMap(alignLength, 0);
845                 
846                 for(int i=0;i<endPos;i++){
847                         
848                         if (m->control_pressed) { return 0; }
849                         
850                         if(queryAln[i] != leftParentAln[i]){
851                                 diffToLeftMap[diffToLeftCount] = i;
852                                 diffToLeftCount++;
853                         }
854                         
855                         if(queryAln[i] != rightParentAln[i]){
856                                 diffToRightMap[diffToRightCount] = i;
857                                 diffToRightCount++;
858                         }
859                 }
860                 
861                 
862                 
863                 diffToLeftMap[diffToLeftCount] = endPos;
864                 diffToRightMap[diffToRightCount] = endPos;
865                 
866                 int indexL = 0;
867                 int indexR = 0;
868                 int indexS = 0;
869                 
870                 vector<int> diffs;
871                 vector<int> splits;
872                 
873                 splits.push_back(-1);
874                 diffs.push_back(diffToRightCount);
875                 indexS++;
876                         
877                 while(indexL < diffToLeftCount || indexR < diffToRightCount){
878                         
879                         if (m->control_pressed) { return 0; }
880                         
881                         if(diffToLeftMap[indexL] <= diffToRightMap[indexR]){
882                                 diffs.push_back(diffs[indexS - 1] + 1);
883                                 splits.push_back(diffToLeftMap[indexL]);
884                                 
885                                 indexL++;
886                                 indexS++;                       
887                         }
888                         else if(diffToLeftMap[indexL] > diffToRightMap[indexR]) {
889                                 diffs.push_back(diffs[indexS - 1] - 1);
890                                 splits.push_back(diffToRightMap[indexR]);
891                                 
892                                 indexR++;
893                                 indexS++;                       
894                         }
895                 }
896                 
897                 int minDiff = PERSEUSMAXINT;
898                 int minIndex = -1;
899                 for(int i=0;i<indexS;i++){
900                         
901                         if (m->control_pressed) { return 0; }
902                         
903                         if(diffs[i] < minDiff){
904                                 minDiff = diffs[i];
905                                 minIndex = i;
906                         }
907                 }
908                 
909                 int splitPos = endPos;
910                 if(minIndex < indexS - 1){
911                         splitPos = (splits[minIndex]+splits[minIndex+1]) / 2;
912                 }
913                 
914                 int diffToChimera = 0;
915                 int leftDiffToP1 = 0;
916                 int rightDiffToP1 = 0;
917                 int leftDiffToP2 = 0;
918                 int rightDiffToP2 = 0;
919                 
920                 for(int i=0;i<endPos;i++){
921                         
922                         if (m->control_pressed) { return 0; }
923                         
924                         char bQuery = queryAln[i];
925                         char bP1 = leftParentAln[i];
926                         char bP2 = rightParentAln[i];
927                         
928                         char bConsensus = bQuery;
929                         if(bP1 == bP2){ bConsensus = bP1;       }
930                         
931                         if(bConsensus != bQuery){
932                                 diffToChimera++;
933                         }
934                         
935                         if(bConsensus != bP1){
936                                 if(i <= splitPos){
937                                         leftDiffToP1++;
938                                 }
939                                 else{
940                                         rightDiffToP1++;                                
941                                 }
942                         }
943                         if(bConsensus != bP2){
944                                 if(i <= splitPos){
945                                         leftDiffToP2++;
946                                 }
947                                 else{
948                                         rightDiffToP2++;                                
949                                 }
950                         }
951                 }               
952                 
953
954                 int diffToClosestParent, diffToFurtherParent;
955                 int xA, xB, yA, yB;
956                 double aFraction, bFraction;
957                 
958                 if(diffToLeftCount <= diffToRightCount){        //if parent 1 is closer
959
960                         diffToClosestParent = leftDiffToP1 + rightDiffToP1;
961                         xA = leftDiffToP1;
962                         xB = rightDiffToP1;
963                         
964                         diffToFurtherParent = leftDiffToP2 + rightDiffToP2;
965                         yA = leftDiffToP2;
966                         yB = rightDiffToP2;
967                         
968                         aFraction = double(splitPos + 1)/(double) endPos;
969                         bFraction = 1 - aFraction;
970                         
971                 }
972                 else{                                                                                           //if parent 2 is closer
973
974                         diffToClosestParent = leftDiffToP2 + rightDiffToP2;
975                         xA = rightDiffToP2;
976                         xB = leftDiffToP2;
977                         
978                         diffToFurtherParent = leftDiffToP1 + rightDiffToP1;
979                         yA = rightDiffToP1;
980                         yB = leftDiffToP1;
981                         
982                         bFraction = double(splitPos + 1)/(double) endPos;
983                         aFraction = 1 - bFraction;
984
985                 }
986                 
987                 double loonIndex = 0;
988                 
989                 int totalDifference = diffToClosestParent + diffToChimera;
990                 
991                 if(totalDifference > 0){
992                         double prob = 0;
993                         
994                         for(int i=diffToClosestParent;i<=totalDifference;i++){
995                                 prob += binMatrix[totalDifference][i] * pow(0.50, i) * pow(0.50, totalDifference - i);
996                         }
997                         loonIndex += -log(prob);
998                 }
999                 
1000                 if(diffToFurtherParent > 0){
1001                         double prob = 0;
1002                         
1003                         for(int i=yA;i<=diffToFurtherParent;i++){
1004                                 prob += binMatrix[diffToFurtherParent][i] * pow(aFraction, i) * pow(1-aFraction, diffToFurtherParent - i);
1005                         }
1006                         loonIndex += -log(prob);
1007                 }
1008                 
1009                 if(diffToClosestParent > 0){
1010                         double prob = 0;
1011                         
1012                         for(int i=xB;i<=diffToClosestParent;i++){
1013                                 prob += binMatrix[diffToClosestParent][i] * pow(bFraction, i) * pow(1-bFraction, diffToClosestParent - i);
1014                         }
1015                         loonIndex += -log(prob);
1016                 }
1017                 
1018                 return loonIndex;
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "Perseus", "calcLoonIndex");
1022                 exit(1);
1023         }
1024 }
1025
1026 /**************************************************************************************************/
1027
1028 double Perseus::calcBestDistance(string query, string reference){
1029         try {
1030                 int alignLength = query.length();
1031                 int mismatch = 0;
1032                 int counter = 0;
1033                 
1034                 for(int i=0;i<alignLength;i++){
1035                         
1036                         if (m->control_pressed) { return 0; }
1037                         
1038                         if((query[i] != '.' || reference[i] != '.') && (query[i] != '-' && reference[i] != '-')){
1039                                 if(query[i] != reference[i]){   mismatch++;     }
1040                                 counter++;                      
1041                         }
1042                 }
1043                 
1044                 return (double)mismatch / (double)counter;
1045         }
1046         catch(exception& e) {
1047                 m->errorOut(e, "Perseus", "calcBestDistance");
1048                 exit(1);
1049         }
1050 }
1051
1052 /**************************************************************************************************/
1053
1054 double Perseus::classifyChimera(double singleDist, double cIndex, double loonIndex, double alpha, double beta){
1055         try {
1056                 double difference = cIndex - singleDist;        //y
1057                 double probability;
1058                 
1059                 if(cIndex >= 0.15 || difference > 0.00){
1060                         probability = 0.0000;
1061                 }
1062                 else{
1063                         probability = 1.0 / (1.0 + exp(-(alpha + beta * loonIndex)));
1064                 }
1065                 
1066                 return probability;
1067         }
1068         catch(exception& e) {
1069                 m->errorOut(e, "Perseus", "classifyChimera");
1070                 exit(1);
1071         }
1072 }
1073 /**************************************************************************************************/