--- /dev/null
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+\r
+#define TRACE 0\r
+\r
+/***\r
+Let:\r
+ S[i] = Score of col i: 0=no SNP, +1 = Y, -3 = N or A.\r
+\r
+ V[k] = Best segment score from j, j+1 .. k for all possible j\r
+ max(j) Sum i=j..k S[i]\r
+\r
+Recursion relation:\r
+ V[k] = S[k] + max (V[k-1], 0)\r
+***/\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit);\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+ const string &PathQA, const string &PathQB,\r
+ string &Q3, string &A3, string &B3);\r
+\r
+double GetScore2(double Y, double N, double A);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit)\r
+ {\r
+ Hit.Clear();\r
+\r
+ const byte *Q3Seq = (const byte *) Q3.c_str();\r
+ const byte *A3Seq = (const byte *) A3.c_str();\r
+ const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+ const unsigned ColCount = SIZE(Q3);\r
+ asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+ vector<float> ColScoresA(ColCount, 0.0f);\r
+ vector<float> ColScoresB(ColCount, 0.0f);\r
+\r
+ float ScoreN = -(float) opt_xn;\r
+ unsigned QL = 0;\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q))\r
+ ++QL;\r
+\r
+ if (q == a && q == b && a == b)\r
+ continue;\r
+\r
+ if (isgap(q) || isgap(a) || isgap(b))\r
+ continue;\r
+\r
+ if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+ continue;\r
+\r
+ if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+ continue;\r
+\r
+ if (q == a && q != b)\r
+ ColScoresA[Col] = 1;\r
+ else\r
+ ColScoresA[Col] = ScoreN;\r
+\r
+ if (q == b && q != a)\r
+ ColScoresB[Col] = 1;\r
+ else\r
+ ColScoresB[Col] = ScoreN;\r
+ }\r
+\r
+ vector<float> LVA(ColCount, 0.0f);\r
+ vector<float> LVB(ColCount, 0.0f);\r
+\r
+ LVA[0] = ColScoresA[0];\r
+ LVB[0] = ColScoresB[0];\r
+ for (unsigned Col = 1; Col < ColCount; ++Col)\r
+ {\r
+ LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];\r
+ LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];\r
+ }\r
+\r
+ vector<float> RVA(ColCount, 0.0f);\r
+ vector<float> RVB(ColCount, 0.0f);\r
+\r
+ RVA[ColCount-1] = ColScoresA[ColCount-1];\r
+ RVB[ColCount-1] = ColScoresB[ColCount-1];\r
+ for (int Col = ColCount-2; Col >= 0; --Col)\r
+ {\r
+ RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];\r
+ RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];\r
+ }\r
+\r
+ bool FirstA = true;\r
+ float MaxSum = 0.0;\r
+ unsigned ColX = UINT_MAX;\r
+ for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+ {\r
+ float Sum = LVA[Col] + RVB[Col+1];\r
+ if (Sum > MaxSum)\r
+ {\r
+ FirstA = true;\r
+ MaxSum = Sum;\r
+ ColX = Col;\r
+ }\r
+ }\r
+\r
+ for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+ {\r
+ float Sum = LVB[Col] + RVA[Col+1];\r
+ if (Sum > MaxSum)\r
+ {\r
+ FirstA = false;\r
+ MaxSum = Sum;\r
+ ColX = Col;\r
+ }\r
+ }\r
+ if (ColX == UINT_MAX)\r
+ return;\r
+\r
+ unsigned ColLo = UINT_MAX;\r
+ unsigned ColHi = UINT_MAX;\r
+ if (FirstA)\r
+ {\r
+ float Sum = 0.0f;\r
+ for (int Col = ColX; Col >= 0; --Col)\r
+ {\r
+ Sum += ColScoresA[Col];\r
+ if (Sum >= LVA[ColX])\r
+ {\r
+ ColLo = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= LVA[ColX]);\r
+ Sum = 0.0f;\r
+ for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+ {\r
+ Sum += ColScoresB[Col];\r
+ if (Sum >= RVB[ColX])\r
+ {\r
+ ColHi = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= RVB[ColX]);\r
+ }\r
+ else\r
+ {\r
+ float Sum = 0.0f;\r
+ for (int Col = ColX; Col >= 0; --Col)\r
+ {\r
+ Sum += ColScoresB[Col];\r
+ if (Sum >= LVB[ColX])\r
+ {\r
+ ColLo = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= LVB[ColX]);\r
+ Sum = 0.0f;\r
+ for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+ {\r
+ Sum += ColScoresA[Col];\r
+ if (Sum >= RVA[ColX])\r
+ {\r
+ ColHi = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= RVA[ColX]);\r
+ }\r
+\r
+ unsigned ColXHi = ColX;\r
+ for (unsigned Col = ColX + 1; Col < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ \r
+ if (q == a && q == b && !isgap(q))\r
+ ColXHi = Col;\r
+ else\r
+ break;\r
+ }\r
+\r
+ unsigned ColXLo = ColX;\r
+ for (int Col = (int) ColX - 1; Col >= 0; --Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ \r
+ if (q == a && q == b && !isgap(q))\r
+ ColXLo = Col;\r
+ else\r
+ break;\r
+ }\r
+\r
+ unsigned IdQA = 0;\r
+ unsigned IdQB = 0;\r
+ unsigned IdAB = 0;\r
+ unsigned NQA = 0;\r
+ unsigned NQB = 0;\r
+ unsigned NAB = 0;\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q) && !isgap(a))\r
+ {\r
+ ++NQA;\r
+ if (q == a)\r
+ ++IdQA;\r
+ }\r
+\r
+ if (!isgap(q) && !isgap(b))\r
+ {\r
+ ++NQB;\r
+ if (q == b)\r
+ ++IdQB;\r
+ }\r
+\r
+ if (!isgap(a) && !isgap(b))\r
+ {\r
+ ++NAB;\r
+ if (a == b)\r
+ ++IdAB;\r
+ }\r
+ }\r
+\r
+ Hit.PctIdQA = Pct(IdQA, NQA);\r
+ Hit.PctIdQB = Pct(IdQB, NQB);\r
+ Hit.PctIdAB = Pct(IdAB, NAB);\r
+\r
+ unsigned LIdQA = 0;\r
+ unsigned LIdQB = 0;\r
+ for (unsigned Col = ColLo; Col < ColXLo; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q) && !isgap(a))\r
+ {\r
+ if (q == a)\r
+ ++LIdQA;\r
+ }\r
+\r
+ if (!isgap(q) && !isgap(b))\r
+ {\r
+ if (q == b)\r
+ ++LIdQB;\r
+ }\r
+ }\r
+\r
+ unsigned RIdQA = 0;\r
+ unsigned RIdQB = 0;\r
+ for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q) && !isgap(a))\r
+ {\r
+ if (q == a)\r
+ ++RIdQA;\r
+ }\r
+\r
+ if (!isgap(q) && !isgap(b))\r
+ {\r
+ if (q == b)\r
+ ++RIdQB;\r
+ }\r
+ }\r
+\r
+ unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);\r
+ unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);\r
+ unsigned MinIdDiff = min(IdDiffL, IdDiffR);\r
+ unsigned ColRange = ColHi - ColLo + 1;\r
+ if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)\r
+ return;\r
+\r
+// double Div = Pct(MinIdDiff, QSD.L);\r
+\r
+#if TRACE\r
+ {\r
+ Log(" Col A Q B ScoreA ScoreB LVA LVB RVA RVB\n");\r
+ Log("----- - - - ------- ------- ------- ------- ------- -------\n");\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)\r
+ continue;\r
+\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ Log("%5u %c %c %c", Col, a, q, b);\r
+\r
+ if (ColScoresA[Col] == 0.0)\r
+ Log(" %7.7s", "");\r
+ else\r
+ Log(" %7.1f", ColScoresA[Col]);\r
+\r
+ if (ColScoresB[Col] == 0.0)\r
+ Log(" %7.7s", "");\r
+ else\r
+ Log(" %7.1f", ColScoresB[Col]);\r
+\r
+ Log(" %7.1f %7.1f %7.1f %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);\r
+\r
+ Log("\n");\r
+ }\r
+ Log("\n");\r
+ Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",\r
+ MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));\r
+ Log(" LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);\r
+ }\r
+#endif\r
+\r
+ string Q3L;\r
+ string A3L;\r
+ string B3L;\r
+ for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3[Col];\r
+ char a = A3[Col];\r
+ char b = B3[Col];\r
+\r
+ Q3L += q;\r
+ A3L += a;\r
+ B3L += b;\r
+ }\r
+\r
+ AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);\r
+\r
+#if 0\r
+// CS SNPs\r
+ Hit.CS_LY = 0;\r
+ Hit.CS_LN = 0;\r
+ Hit.CS_RY = 0;\r
+ Hit.CS_RN = 0;\r
+ Hit.CS_LA = 0;\r
+ Hit.CS_RA = 0;\r
+ for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ if (q == a && q == b && a == b)\r
+ continue;\r
+ if (isgap(q) || isgap(a) || isgap(b))\r
+ continue;\r
+ if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+ continue;\r
+ if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+ continue;\r
+\r
+ if (!FirstA)\r
+ swap(a, b);\r
+\r
+ if (Col < ColXLo)\r
+ {\r
+ if (q == a && q != b)\r
+ ++Hit.CS_LY;\r
+ else if (q == b && q != a)\r
+ ++Hit.CS_LN;\r
+ else\r
+ ++Hit.CS_LA;\r
+ }\r
+ else if (Col > ColXHi)\r
+ {\r
+ if (q == b && q != a)\r
+ ++Hit.CS_RY;\r
+ else if (q == a && q != b)\r
+ ++Hit.CS_RN;\r
+ else\r
+ ++Hit.CS_RA;\r
+ }\r
+ }\r
+\r
+ double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+ double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+ Hit.Score = ScoreL*ScoreR;\r
+\r
+ //Hit.QSD = QSD;\r
+ //if (FirstA)\r
+ // {\r
+ // Hit.ASD = ASD;\r
+ // Hit.BSD = BSD;\r
+ // Hit.PathQA = PathQA;\r
+ // Hit.PathQB = PathQB;\r
+ // }\r
+ //else\r
+ // {\r
+ // Hit.ASD = BSD;\r
+ // Hit.BSD = ASD;\r
+ // }\r
+\r
+ //Hit.ColLo = ColLo;\r
+ //Hit.ColXLo = ColXLo;\r
+ //Hit.ColXHi = ColXHi;\r
+ //Hit.ColHi = ColHi;\r
+ //Hit.Div = Div;\r
+\r
+// Hit.LogMe();\r
+#endif\r
+ }\r