]> git.donarmstrong.com Git - mothur.git/blobdiff - uchime_src/alignchimel.cpp
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
[mothur.git] / uchime_src / alignchimel.cpp
diff --git a/uchime_src/alignchimel.cpp b/uchime_src/alignchimel.cpp
new file mode 100644 (file)
index 0000000..ae152af
--- /dev/null
@@ -0,0 +1,417 @@
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+\r
+#define        TRACE   0\r
+\r
+/***\r
+Let:\r
+       S[i] =  Score of col i: 0=no SNP, +1 = Y, -3 = N or A.\r
+\r
+       V[k] =  Best segment score from j, j+1 .. k for all possible j\r
+                       max(j) Sum i=j..k S[i]\r
+\r
+Recursion relation:\r
+       V[k] =  S[k] + max (V[k-1], 0)\r
+***/\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit);\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+  const string &PathQA, const string &PathQB,\r
+  string &Q3, string &A3, string &B3);\r
+\r
+double GetScore2(double Y, double N, double A);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+  const string &QLabel, const string &ALabel, const string &BLabel,\r
+  ChimeHit2 &Hit)\r
+       {\r
+       Hit.Clear();\r
+\r
+       const byte *Q3Seq = (const byte *) Q3.c_str();\r
+       const byte *A3Seq = (const byte *) A3.c_str();\r
+       const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+       const unsigned ColCount = SIZE(Q3);\r
+       asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+       vector<float> ColScoresA(ColCount, 0.0f);\r
+       vector<float> ColScoresB(ColCount, 0.0f);\r
+\r
+       float ScoreN = -(float) opt_xn;\r
+       unsigned QL = 0;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q))\r
+                       ++QL;\r
+\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+\r
+               if (isgap(q) || isgap(a) || isgap(b))\r
+                       continue;\r
+\r
+               if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                       continue;\r
+\r
+               if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                       continue;\r
+\r
+               if (q == a && q != b)\r
+                       ColScoresA[Col] = 1;\r
+               else\r
+                       ColScoresA[Col] = ScoreN;\r
+\r
+               if (q == b && q != a)\r
+                       ColScoresB[Col] = 1;\r
+               else\r
+                       ColScoresB[Col] = ScoreN;\r
+               }\r
+\r
+       vector<float> LVA(ColCount, 0.0f);\r
+       vector<float> LVB(ColCount, 0.0f);\r
+\r
+       LVA[0] = ColScoresA[0];\r
+       LVB[0] = ColScoresB[0];\r
+       for (unsigned Col = 1; Col < ColCount; ++Col)\r
+               {\r
+               LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];\r
+               LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];\r
+               }\r
+\r
+       vector<float> RVA(ColCount, 0.0f);\r
+       vector<float> RVB(ColCount, 0.0f);\r
+\r
+       RVA[ColCount-1] = ColScoresA[ColCount-1];\r
+       RVB[ColCount-1] = ColScoresB[ColCount-1];\r
+       for (int Col = ColCount-2; Col >= 0; --Col)\r
+               {\r
+               RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];\r
+               RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];\r
+               }\r
+\r
+       bool FirstA = true;\r
+       float MaxSum = 0.0;\r
+       unsigned ColX = UINT_MAX;\r
+       for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+               {\r
+               float Sum = LVA[Col] + RVB[Col+1];\r
+               if (Sum > MaxSum)\r
+                       {\r
+                       FirstA = true;\r
+                       MaxSum = Sum;\r
+                       ColX = Col;\r
+                       }\r
+               }\r
+\r
+       for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+               {\r
+               float Sum = LVB[Col] + RVA[Col+1];\r
+               if (Sum > MaxSum)\r
+                       {\r
+                       FirstA = false;\r
+                       MaxSum = Sum;\r
+                       ColX = Col;\r
+                       }\r
+               }\r
+       if (ColX == UINT_MAX)\r
+               return;\r
+\r
+       unsigned ColLo = UINT_MAX;\r
+       unsigned ColHi = UINT_MAX;\r
+       if (FirstA)\r
+               {\r
+               float Sum = 0.0f;\r
+               for (int Col = ColX; Col >= 0; --Col)\r
+                       {\r
+                       Sum += ColScoresA[Col];\r
+                       if (Sum >= LVA[ColX])\r
+                               {\r
+                               ColLo = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= LVA[ColX]);\r
+               Sum = 0.0f;\r
+               for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+                       {\r
+                       Sum += ColScoresB[Col];\r
+                       if (Sum >= RVB[ColX])\r
+                               {\r
+                               ColHi = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= RVB[ColX]);\r
+               }\r
+       else\r
+               {\r
+               float Sum = 0.0f;\r
+               for (int Col = ColX; Col >= 0; --Col)\r
+                       {\r
+                       Sum += ColScoresB[Col];\r
+                       if (Sum >= LVB[ColX])\r
+                               {\r
+                               ColLo = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= LVB[ColX]);\r
+               Sum = 0.0f;\r
+               for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+                       {\r
+                       Sum += ColScoresA[Col];\r
+                       if (Sum >= RVA[ColX])\r
+                               {\r
+                               ColHi = Col;\r
+                               break;\r
+                               }\r
+                       }\r
+               asserta(Sum >= RVA[ColX]);\r
+               }\r
+\r
+       unsigned ColXHi = ColX;\r
+       for (unsigned Col = ColX + 1; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               \r
+               if (q == a && q == b && !isgap(q))\r
+                       ColXHi = Col;\r
+               else\r
+                       break;\r
+               }\r
+\r
+       unsigned ColXLo = ColX;\r
+       for (int Col = (int) ColX - 1; Col >= 0; --Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               \r
+               if (q == a && q == b && !isgap(q))\r
+                       ColXLo = Col;\r
+               else\r
+                       break;\r
+               }\r
+\r
+       unsigned IdQA = 0;\r
+       unsigned IdQB = 0;\r
+       unsigned IdAB = 0;\r
+       unsigned NQA = 0;\r
+       unsigned NQB = 0;\r
+       unsigned NAB = 0;\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       ++NQA;\r
+                       if (q == a)\r
+                               ++IdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       ++NQB;\r
+                       if (q == b)\r
+                               ++IdQB;\r
+                       }\r
+\r
+               if (!isgap(a) && !isgap(b))\r
+                       {\r
+                       ++NAB;\r
+                       if (a == b)\r
+                               ++IdAB;\r
+                       }\r
+               }\r
+\r
+       Hit.PctIdQA = Pct(IdQA, NQA);\r
+       Hit.PctIdQB = Pct(IdQB, NQB);\r
+       Hit.PctIdAB = Pct(IdAB, NAB);\r
+\r
+       unsigned LIdQA = 0;\r
+       unsigned LIdQB = 0;\r
+       for (unsigned Col = ColLo; Col < ColXLo; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       if (q == a)\r
+                               ++LIdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       if (q == b)\r
+                               ++LIdQB;\r
+                       }\r
+               }\r
+\r
+       unsigned RIdQA = 0;\r
+       unsigned RIdQB = 0;\r
+       for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+\r
+               if (!isgap(q) && !isgap(a))\r
+                       {\r
+                       if (q == a)\r
+                               ++RIdQA;\r
+                       }\r
+\r
+               if (!isgap(q) && !isgap(b))\r
+                       {\r
+                       if (q == b)\r
+                               ++RIdQB;\r
+                       }\r
+               }\r
+\r
+       unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);\r
+       unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);\r
+       unsigned MinIdDiff = min(IdDiffL, IdDiffR);\r
+       unsigned ColRange = ColHi - ColLo + 1;\r
+       if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)\r
+               return;\r
+\r
+//     double Div = Pct(MinIdDiff, QSD.L);\r
+\r
+#if    TRACE\r
+       {\r
+       Log("  Col  A Q B   ScoreA   ScoreB      LVA      LVB      RVA      RVB\n");\r
+       Log("-----  - - -  -------  -------  -------  -------  -------  -------\n");\r
+       for (unsigned Col = 0; Col < ColCount; ++Col)\r
+               {\r
+               if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)\r
+                       continue;\r
+\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               Log("%5u  %c %c %c", Col, a, q, b);\r
+\r
+               if (ColScoresA[Col] == 0.0)\r
+                       Log("  %7.7s", "");\r
+               else\r
+                       Log("  %7.1f", ColScoresA[Col]);\r
+\r
+               if (ColScoresB[Col] == 0.0)\r
+                       Log("  %7.7s", "");\r
+               else\r
+                       Log("  %7.1f", ColScoresB[Col]);\r
+\r
+               Log("  %7.1f  %7.1f  %7.1f  %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);\r
+\r
+               Log("\n");\r
+               }\r
+       Log("\n");\r
+       Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",\r
+         MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));\r
+       Log("  LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);\r
+       }\r
+#endif\r
+\r
+       string Q3L;\r
+       string A3L;\r
+       string B3L;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3[Col];\r
+               char a = A3[Col];\r
+               char b = B3[Col];\r
+\r
+               Q3L += q;\r
+               A3L += a;\r
+               B3L += b;\r
+               }\r
+\r
+       AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);\r
+\r
+#if    0\r
+// CS SNPs\r
+       Hit.CS_LY = 0;\r
+       Hit.CS_LN = 0;\r
+       Hit.CS_RY = 0;\r
+       Hit.CS_RN = 0;\r
+       Hit.CS_LA = 0;\r
+       Hit.CS_RA = 0;\r
+       for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+               {\r
+               char q = Q3Seq[Col];\r
+               char a = A3Seq[Col];\r
+               char b = B3Seq[Col];\r
+               if (q == a && q == b && a == b)\r
+                       continue;\r
+               if (isgap(q) || isgap(a) || isgap(b))\r
+                       continue;\r
+               if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+                       continue;\r
+               if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+                       continue;\r
+\r
+               if (!FirstA)\r
+                       swap(a, b);\r
+\r
+               if (Col < ColXLo)\r
+                       {\r
+                       if (q == a && q != b)\r
+                               ++Hit.CS_LY;\r
+                       else if (q == b && q != a)\r
+                               ++Hit.CS_LN;\r
+                       else\r
+                               ++Hit.CS_LA;\r
+                       }\r
+               else if (Col > ColXHi)\r
+                       {\r
+                       if (q == b && q != a)\r
+                               ++Hit.CS_RY;\r
+                       else if (q == a && q != b)\r
+                               ++Hit.CS_RN;\r
+                       else\r
+                               ++Hit.CS_RA;\r
+                       }\r
+               }\r
+\r
+       double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+       double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+       Hit.Score = ScoreL*ScoreR;\r
+\r
+       //Hit.QSD = QSD;\r
+       //if (FirstA)\r
+       //      {\r
+       //      Hit.ASD = ASD;\r
+       //      Hit.BSD = BSD;\r
+       //      Hit.PathQA = PathQA;\r
+       //      Hit.PathQB = PathQB;\r
+       //      }\r
+       //else\r
+       //      {\r
+       //      Hit.ASD = BSD;\r
+       //      Hit.BSD = ASD;\r
+       //      }\r
+\r
+       //Hit.ColLo = ColLo;\r
+       //Hit.ColXLo = ColXLo;\r
+       //Hit.ColXHi = ColXHi;\r
+       //Hit.ColHi = ColHi;\r
+       //Hit.Div = Div;\r
+\r
+//     Hit.LogMe();\r
+#endif\r
+       }\r