9 void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,
\r
10 const string &PathQA, const string &PathQB,
\r
11 string &Q3, string &A3, string &B3);
\r
13 void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,
\r
14 const string &QLabel, const string &ALabel, const string &BLabel,
\r
17 double GetScore2(double Y, double N, double A)
\r
19 return Y/(opt_xn*(N + opt_dn) + opt_xa*A);
\r
22 void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,
\r
23 const string &QLabel, const string &ALabel, const string &BLabel,
\r
27 Hit.QLabel = QLabel;
\r
29 const byte *Q3Seq = (const byte *) Q3.c_str();
\r
30 const byte *A3Seq = (const byte *) A3.c_str();
\r
31 const byte *B3Seq = (const byte *) B3.c_str();
\r
33 const unsigned ColCount = SIZE(Q3);
\r
34 asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);
\r
37 Log("Q %5u %*.*s\n", ColCount, ColCount, ColCount, Q3Seq);
\r
38 Log("A %5u %*.*s\n", ColCount, ColCount, ColCount, A3Seq);
\r
39 Log("B %5u %*.*s\n", ColCount, ColCount, ColCount, B3Seq);
\r
42 // Discard terminal gaps
\r
43 unsigned ColLo = UINT_MAX;
\r
44 unsigned ColHi = UINT_MAX;
\r
45 for (unsigned Col = 2; Col + 2 < ColCount; ++Col)
\r
47 char q = Q3Seq[Col];
\r
48 char a = A3Seq[Col];
\r
49 char b = B3Seq[Col];
\r
51 if (isacgt(q) && isacgt(a) && isacgt(b))
\r
53 if (ColLo == UINT_MAX)
\r
59 if (ColLo == UINT_MAX)
\r
65 unsigned DiffCount = 0;
\r
67 vector<unsigned> ColToQPos(ColLo, UINT_MAX);
\r
68 vector<unsigned> AccumCount(ColLo, UINT_MAX);
\r
69 vector<unsigned> AccumSameA(ColLo, UINT_MAX);
\r
70 vector<unsigned> AccumSameB(ColLo, UINT_MAX);
\r
71 vector<unsigned> AccumForA(ColLo, UINT_MAX);
\r
72 vector<unsigned> AccumForB(ColLo, UINT_MAX);
\r
73 vector<unsigned> AccumAbstain(ColLo, UINT_MAX);
\r
74 vector<unsigned> AccumAgainst(ColLo, UINT_MAX);
\r
76 unsigned SumSameA = 0;
\r
77 unsigned SumSameB = 0;
\r
78 unsigned SumSameAB = 0;
\r
80 unsigned SumForA = 0;
\r
81 unsigned SumForB = 0;
\r
82 unsigned SumAbstain = 0;
\r
83 unsigned SumAgainst = 0;
\r
84 for (unsigned Col = ColLo; Col <= ColHi; ++Col)
\r
86 char q = Q3Seq[Col];
\r
87 char a = A3Seq[Col];
\r
88 char b = B3Seq[Col];
\r
90 if (isacgt(q) && isacgt(a) && isacgt(b))
\r
98 if (q == a && q != b)
\r
100 if (q == b && q != a)
\r
102 if (a == b && q != a)
\r
104 if (q != a && q != b)
\r
109 ColToQPos.push_back(QPos);
\r
110 AccumSameA.push_back(SumSameA);
\r
111 AccumSameB.push_back(SumSameB);
\r
112 AccumCount.push_back(Sum);
\r
113 AccumForA.push_back(SumForA);
\r
114 AccumForB.push_back(SumForB);
\r
115 AccumAbstain.push_back(SumAbstain);
\r
116 AccumAgainst.push_back(SumAgainst);
\r
126 asserta(SIZE(ColToQPos) == ColHi+1);
\r
127 asserta(SIZE(AccumSameA) == ColHi+1);
\r
128 asserta(SIZE(AccumSameB) == ColHi+1);
\r
129 asserta(SIZE(AccumAbstain) == ColHi+1);
\r
130 asserta(SIZE(AccumAgainst) == ColHi+1);
\r
132 double IdQA = double(SumSameA)/Sum;
\r
133 double IdQB = double(SumSameB)/Sum;
\r
134 double IdAB = double(SumSameAB)/Sum;
\r
135 double MaxId = max(IdQA, IdQB);
\r
138 Log("IdQA=%.1f%% IdQB=%.1f%% IdAB=%.1f\n", IdQA*100.0, IdQB*100.0, IdAB*100.0);
\r
140 Log(" x AQB IdAL IdBL IdAR IdBR DivAB DivBA YAL YBL YAR YBR AbL AbR ScoreAB ScoreAB XLo Xhi\n");
\r
141 Log("----- --- ----- ----- ----- ----- ------ ------ ----- ----- ----- ----- ----- ----- ------- ------- ----- -----\n");
\r
143 unsigned BestXLo = UINT_MAX;
\r
144 unsigned BestXHi = UINT_MAX;
\r
145 double BestDiv = 0.0;
\r
146 double BestIdQM = 0.0;
\r
147 double BestScore = 0.0;
\r
149 // Find range of cols BestXLo..BestXHi that maximizes score
\r
150 bool FirstA = false;
\r
152 // NOTE: Must be < ColHi not <= because use Col+1 below
\r
153 for (unsigned Col = ColLo; Col < ColHi; ++Col)
\r
155 char q = Q3Seq[Col];
\r
156 char a = A3Seq[Col];
\r
157 char b = B3Seq[Col];
\r
159 unsigned SameAL = AccumSameA[Col];
\r
160 unsigned SameBL = AccumSameB[Col];
\r
161 unsigned SameAR = SumSameA - AccumSameA[Col];
\r
162 unsigned SameBR = SumSameB - AccumSameB[Col];
\r
164 double IdAB = double(SameAL + SameBR)/Sum;
\r
165 double IdBA = double(SameBL + SameAR)/Sum;
\r
167 unsigned ForAL = AccumForA[Col];
\r
168 unsigned ForBL = AccumForB[Col];
\r
169 unsigned ForAR = SumForA - AccumForA[Col+1];
\r
170 unsigned ForBR = SumForB - AccumForB[Col+1];
\r
171 unsigned AbL = AccumAbstain[Col];
\r
172 unsigned AbR = SumAbstain - AccumAbstain[Col+1];
\r
174 double ScoreAB = GetScore2(ForAL, ForBL, AbL)*GetScore2(ForBR, ForAR, AbR);
\r
175 double ScoreBA = GetScore2(ForBL, ForAL, AbL)*GetScore2(ForAR, ForBR, AbR);
\r
177 double DivAB = IdAB/MaxId;
\r
178 double DivBA = IdBA/MaxId;
\r
179 double MaxDiv = max(DivAB, DivBA);
\r
181 //if (MaxDiv > BestDiv)
\r
183 // BestDiv = MaxDiv;
\r
186 // FirstA = (DivAB > DivBA);
\r
188 // BestIdQM = IdAB;
\r
190 // BestIdQM = IdBA;
\r
192 //else if (MaxDiv == BestDiv)
\r
195 double MaxScore = max(ScoreAB, ScoreBA);
\r
196 if (MaxScore > BestScore)
\r
198 BestScore = MaxScore;
\r
201 FirstA = (ScoreAB > ScoreBA);
\r
206 if (MaxDiv > BestDiv)
\r
209 else if (MaxScore == BestScore)
\r
212 if (MaxDiv > BestDiv)
\r
219 char q = Q3Seq[Col];
\r
220 char a = A3Seq[Col];
\r
221 char b = B3Seq[Col];
\r
222 Log(" %c%c%c", a, q, b);
\r
223 Log(" %5u", SameAL);
\r
224 Log(" %5u", SameBL);
\r
225 Log(" %5u", SameAR);
\r
226 Log(" %5u", SameBR);
\r
227 Log(" %5.4f", DivAB);
\r
228 Log(" %5.4f", DivBA);
\r
229 Log(" %5u", ForAL);
\r
230 Log(" %5u", ForBL);
\r
231 Log(" %5u", ForAR);
\r
232 Log(" %5u", ForBR);
\r
235 Log(" %7.4f", ScoreAB);
\r
236 Log(" %7.4f", ScoreBA);
\r
237 if (BestXLo != UINT_MAX)
\r
238 Log(" %5u", BestXLo);
\r
239 if (BestXHi != UINT_MAX)
\r
240 Log(" %5u", BestXHi);
\r
246 if (BestXLo == UINT_MAX)
\r
250 Log("No crossover found.\n");
\r
255 Log("BestX col %u - %u\n", BestXLo, BestXHi);
\r
258 // Find maximum region of identity within BestXLo..BestXHi
\r
259 unsigned ColXLo = (BestXLo + BestXHi)/2;
\r
260 unsigned ColXHi = ColXLo;
\r
261 unsigned SegLo = UINT_MAX;
\r
262 unsigned SegHi = UINT_MAX;
\r
263 for (unsigned Col = BestXLo; Col <= BestXHi; ++Col)
\r
265 char q = Q3Seq[Col];
\r
266 char a = A3Seq[Col];
\r
267 char b = B3Seq[Col];
\r
269 if (q == a && q == b)
\r
271 if (SegLo == UINT_MAX)
\r
277 unsigned SegLength = SegHi - SegLo + 1;
\r
278 unsigned BestSegLength = ColXHi - ColXLo + 1;
\r
279 if (SegLength > BestSegLength)
\r
288 unsigned SegLength = SegHi - SegLo + 1;
\r
289 unsigned BestSegLength = ColXHi - ColXLo + 1;
\r
290 if (SegLength > BestSegLength)
\r
297 for (unsigned x = 0; x < ColCount; ++x)
\r
301 else if (x == ColXHi)
\r
311 Hit.ColXLo = ColXLo;
\r
312 Hit.ColXHi = ColXHi;
\r
316 // Hit.LY = AccumForA[ColXLo];
\r
317 // Hit.LN = AccumForB[ColXLo];
\r
319 // Hit.RY = SumForB - AccumForB[ColXHi];
\r
320 // Hit.RN = SumForA - AccumForA[ColXHi];
\r
324 // Hit.LY = AccumForB[ColXLo];
\r
325 // Hit.LN = AccumForA[ColXLo];
\r
326 // Hit.RY = SumForA - AccumForA[ColXHi];
\r
327 // Hit.RN = SumForB - AccumForB[ColXHi];
\r
330 //Hit.LA = AccumAgainst[ColXLo];
\r
331 //Hit.LD = AccumAbstain[ColXLo];
\r
333 //Hit.RA = SumAgainst - AccumAgainst[ColXHi];
\r
334 //Hit.RD = SumAbstain - AccumAbstain[ColXHi];
\r
336 Hit.PctIdAB = IdAB*100.0;
\r
337 Hit.PctIdQM = BestIdQM*100.0;
\r
339 Hit.Div = (BestDiv - 1.0)*100.0;
\r
343 Hit.QLabel = QLabel;
\r
348 //Hit.PathQA = PathQA;
\r
349 //Hit.PathQB = PathQB;
\r
352 Hit.ALabel = ALabel;
\r
353 Hit.BLabel = BLabel;
\r
354 Hit.PctIdQA = IdQA*100.0;
\r
355 Hit.PctIdQB = IdQB*100.0;
\r
361 Hit.ALabel = BLabel;
\r
362 Hit.BLabel = ALabel;
\r
363 Hit.PctIdQA = IdQB*100.0;
\r
364 Hit.PctIdQB = IdQA*100.0;
\r
375 //vector<float> Cons;
\r
376 //for (unsigned Col = 0; Col < ColCount; ++Col)
\r
378 // char q = Q3Seq[Col];
\r
379 // char a = A3Seq[Col];
\r
380 // char b = B3Seq[Col];
\r
381 // if (q == a && q == b && a == b)
\r
383 // Cons.push_back(1.0f);
\r
387 // bool gapq = isgap(q);
\r
388 // bool gapa = isgap(a);
\r
389 // bool gapb = isgap(b);
\r
391 // if (!gapq && !gapa && !gapb)
\r
393 // if (q == a || q == b || a == b)
\r
394 // Cons.push_back(0.75);
\r
396 // Cons.push_back(0.5);
\r
400 // if (!gapa && (a == b || a == q))
\r
401 // Cons.push_back(0.5f);
\r
402 // else if (!gapb && b == q)
\r
403 // Cons.push_back(0.5f);
\r
405 // Cons.push_back(0.0f);
\r
409 //float fLY = 0.0f;
\r
410 //float fLN = 0.0f;
\r
411 //float fLA = 0.0f;
\r
412 //float fRY = 0.0f;
\r
413 //float fRN = 0.0f;
\r
414 //float fRA = 0.0f;
\r
415 for (unsigned Col = ColLo; Col <= ColHi; ++Col)
\r
417 char q = Q3Seq[Col];
\r
418 char a = A3Seq[Col];
\r
419 char b = B3Seq[Col];
\r
420 if (q == a && q == b && a == b)
\r
423 unsigned ngaps = 0;
\r
445 //float AvgCons = (Cons[Col-2] + Cons[Col-1] + Cons[Col+1] + Cons[Col+2])/4;
\r
446 //if (Col < ColXLo)
\r
448 // if (q == a && q != b)
\r
450 // else if (q == b && q != a)
\r
455 //else if (Col > ColXHi)
\r
457 // if (q == b && q != a)
\r
459 // else if (q == a && q != b)
\r
467 if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))
\r
469 if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))
\r
473 //if (Col > 0 && isgap(Q3Seq[Col-1]))
\r
475 //if (Col + 1 < ColCount && isgap(Q3Seq[Col+1]))
\r
480 if (q == a && q != b)
\r
482 else if (q == b && q != a)
\r
487 else if (Col > ColXHi)
\r
489 if (q == b && q != a)
\r
491 else if (q == a && q != b)
\r
498 double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);
\r
499 double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);
\r
500 Hit.Score = ScoreL*ScoreR;
\r
502 extern bool g_UchimeDeNovo;
\r
504 //if (0)//g_UchimeDeNovo)
\r
506 // double AbQ = GetAbFromLabel(QLabel.c_str());
\r
507 // double AbA = GetAbFromLabel(ALabel.c_str());
\r
508 // double AbB = GetAbFromLabel(BLabel.c_str());
\r
509 // if (AbQ > 0.0 && AbA > 0.0 && AbB > 0.0)
\r
511 // double MinAb = min(AbA, AbB);
\r
512 // double Ratio = MinAb/AbQ;
\r
513 // double t = Ratio - opt_abx;
\r
514 // // double Factor = 2.0/(1.0 + exp(-t));
\r
515 // double Factor = min(Ratio, opt_abx)/opt_abx;
\r
516 // if (opt_verbose)
\r
517 // Log("Score %.4f Ab factor %.4f >%s\n", Hit.Score, Factor, QLabel.c_str());
\r
518 // Hit.Score *= Factor;
\r
522 extern FILE *g_fUChimeAlns;
\r
523 if (g_fUChimeAlns != 0 && Hit.Div > 0.0)
\r
525 void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit);
\r
526 WriteChimeHitX(g_fUChimeAlns, Hit);
\r
530 void AlignChime3(const string &Q3, const string &A3, const string &B3,
\r
531 const string &QLabel, const string &ALabel, const string &BLabel,
\r
535 AlignChimeLocal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);
\r
537 AlignChimeGlobal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);
\r
540 static void StripGaps(const byte *Seq, unsigned L, string &s)
\r
543 for (unsigned i = 0; i < L; ++i)
\r
551 static void StripGapsAlloc(const SeqData &SDIn, SeqData &SDOut)
\r
554 byte *s = myalloc(byte, SDIn.L);
\r
556 for (unsigned i = 0; i < SDIn.L; ++i)
\r
558 char c = SDIn.Seq[i];
\r
560 s[k++] = toupper(c);
\r
566 void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,
\r
567 const string &PathQA, const string &PathQB, ChimeHit2 &Hit)
\r
571 // AlignChimeLocal(QSD, ASD, BSD, PathQA, PathQB, Hit);
\r
578 Make3Way(QSD, ASD, BSD, PathQA, PathQB, Q3, A3, B3);
\r
580 AlignChime3(Q3, A3, B3, QSD.Label, ASD.Label, BSD.Label, Hit);
\r
583 void AlignChime3SDRealign(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,
\r
589 StripGapsAlloc(QSD3, QSD);
\r
590 StripGapsAlloc(ASD3, ASD);
\r
591 StripGapsAlloc(BSD3, BSD);
\r
595 bool FoundQA = GlobalAlign(QSD, ASD, PathQA);
\r
596 bool FoundQB = GlobalAlign(QSD, BSD, PathQB);
\r
597 if (!FoundQA || !FoundQB)
\r
600 Hit.QLabel = QSD3.Label;
\r
604 AlignChime(QSD, ASD, BSD, PathQA, PathQB, Hit);
\r
606 myfree((void *) QSD.Seq);
\r
607 myfree((void *) ASD.Seq);
\r
608 myfree((void *) BSD.Seq);
\r
611 void AlignChime3SD(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,
\r
616 AlignChime3SDRealign(QSD3, ASD3, BSD3, Hit);
\r
624 const unsigned ColCount = QSD3.L;
\r
625 asserta(ASD3.L == ColCount && BSD3.L == ColCount);
\r
627 Q3.reserve(ColCount);
\r
628 A3.reserve(ColCount);
\r
629 B3.reserve(ColCount);
\r
631 const byte *QS = QSD3.Seq;
\r
632 const byte *AS = ASD3.Seq;
\r
633 const byte *BS = BSD3.Seq;
\r
634 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
636 byte q = toupper(QS[Col]);
\r
637 byte a = toupper(AS[Col]);
\r
638 byte b = toupper(BS[Col]);
\r
640 if (isgap(q) && isgap(a) && isgap(b))
\r
648 AlignChime3(Q3, A3, B3, QSD3.Label, ASD3.Label, BSD3.Label, Hit);
\r