]> git.donarmstrong.com Git - mothur.git/blobdiff - uchime_src/alnparams.cpp
added uchime_src folder. added biom parameter to make.shared. added biom as a current...
[mothur.git] / uchime_src / alnparams.cpp
diff --git a/uchime_src/alnparams.cpp b/uchime_src/alnparams.cpp
new file mode 100644 (file)
index 0000000..d1b9036
--- /dev/null
@@ -0,0 +1,414 @@
+#include "myutils.h"\r
+#include <float.h>     // for FLT_MAX\r
+#include "mx.h"\r
+#include "alnparams.h"\r
+#include "hsp.h"\r
+\r
+#define TEST   0\r
+\r
+void SetBLOSUM62();
+void SetNucSubstMx(double Match, double Mismatch);\r
+void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
+
+extern Mx<float> g_SubstMxf;
+extern float **g_SubstMx;
+\r
+void AlnParams::Clear()\r
+       {\r
+       SubstMxName = 0;\r
+       LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
+       LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
+       OpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       OpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       ExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       ExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
+       ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
+       LExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       LExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       RExtA = OBVIOUSLY_WRONG_PENALTY;\r
+       RExtB = OBVIOUSLY_WRONG_PENALTY;\r
+       Nucleo = false;\r
+       NucleoSet = false;\r
+       }\r
+\r
+bool AlnParams::Is2() const\r
+       {\r
+       float g = OpenA;\r
+       float e = ExtA;\r
+       if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
+               return false;\r
+       if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+bool AlnParams::Is4() const\r
+       {\r
+       float g = OpenA;\r
+       float tg = LOpenA;\r
+       float e = ExtA;\r
+       float te = LExtA;\r
+       if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
+               return false;\r
+       if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
+               return false;\r
+       return true;\r
+       }\r
+\r
+const char *AlnParams::GetType() const\r
+       {\r
+       if (Is2())\r
+               return "2";\r
+       else if (Is4())\r
+               return "4";\r
+       return "12";\r
+       }\r
+\r
+void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
+       {\r
+       SubstMx = Mx;\r
+       OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
+       ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
+       }\r
+\r
+void AlnParams::SetLocal(float Open, float Ext)\r
+       {\r
+       LocalOpen = Open;\r
+       LocalExt = Ext;\r
+       }\r
+\r
+void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
+  float TermOpen, float TermExt)\r
+       {\r
+       SubstMx = Mx;\r
+       OpenA = OpenB = Open;\r
+       LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
+       ExtA = ExtB = Ext;\r
+       LExtA = LExtB = RExtA = RExtB = TermExt;\r
+       }\r
+\r
+void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
+  unsigned LA, unsigned LB)\r
+       {\r
+       SubstMx = AP.SubstMx;\r
+       OpenA = AP.OpenA;\r
+       OpenB = AP.OpenB;\r
+       ExtA = AP.ExtA;\r
+       ExtB = AP.ExtB;\r
+\r
+       if (HSP.LeftA())\r
+               {\r
+               LOpenA = AP.LOpenA;\r
+               LExtA = AP.LExtA;\r
+               }\r
+       else\r
+               {\r
+               LOpenA = AP.OpenA;\r
+               LExtA = AP.ExtA;\r
+               }\r
+\r
+       if (HSP.LeftB())\r
+               {\r
+               LOpenB = AP.LOpenB;\r
+               LExtB = AP.LExtB;\r
+               }\r
+       else\r
+               {\r
+               LOpenB = AP.OpenB;\r
+               LExtB = AP.ExtB;\r
+               }\r
+\r
+       if (HSP.RightA(LA))\r
+               {\r
+               ROpenA = AP.ROpenA;\r
+               RExtA = AP.RExtA;\r
+               }\r
+       else\r
+               {\r
+               ROpenA = AP.OpenA;\r
+               RExtA = AP.ExtA;\r
+               }\r
+\r
+       if (HSP.RightB(LB))\r
+               {\r
+               ROpenB = AP.ROpenB;\r
+               RExtB = AP.RExtB;\r
+               }\r
+       else\r
+               {\r
+               ROpenB = AP.OpenB;\r
+               RExtB = AP.ExtB;\r
+               }\r
+       }\r
+\r
+void AlnParams::LogMe() const\r
+       {\r
+       Log("AlnParams(%s)", GetType());\r
+       if (Is2())\r
+               Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
+       else if (Is4())\r
+               Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
+       else\r
+               Log(\r
+" gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
+                 OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
+       Log("\n");\r
+       }\r
+\r
+/***\r
+Open/Ext format string is one or more:\r
+       [<flag><flag>...]<value>\r
+\r
+Value is (positive) penalty or * (disabled).\r
+Flag is:\r
+       Q               Query.\r
+       T               Target sequence.\r
+       I               Internal gaps (defafault internal and terminal).\r
+       E               End gaps (default internal and terminal).\r
+       L               Left end.\r
+       R               Right end.\r
+***/\r
+\r
+static void ParseGapStr(const string &s,\r
+  float &QI, float &QL, float &QR,\r
+  float &TI, float &TL, float &TR)\r
+       {\r
+       if (s.empty())\r
+               return;\r
+\r
+       bool Q = false;\r
+       bool T = false;\r
+       bool I = false;\r
+       bool E = false;\r
+       bool L = false;\r
+       bool R = false;\r
+\r
+       const unsigned K = SIZE(s);\r
+       unsigned Dec = 0;\r
+       float Value = FLT_MAX;\r
+       for (unsigned i = 0; i <= K; ++i)\r
+               {\r
+               char c = s.c_str()[i];\r
+               if (c == 0 || c == '/')\r
+                       {\r
+                       if (Value == FLT_MAX)\r
+                               Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
+                       if (!Q && !T && !I && !E && !L && !R)\r
+                               {\r
+                               Q = true;\r
+                               T = true;\r
+                               L = true;\r
+                               R = true;\r
+                               I = true;\r
+                               }\r
+\r
+                       if (!E && !I && !L && !R)\r
+                               {\r
+                               E = false;\r
+                               I = true;\r
+                               L = true;\r
+                               R = true;\r
+                               }\r
+\r
+                       if (E)\r
+                               {\r
+                               if (L || R)\r
+                                       Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
+                               L = true;\r
+                               R = true;\r
+                               }\r
+\r
+                       if (!Q && !T)\r
+                               {\r
+                               Q = true;\r
+                               T = true;\r
+                               }\r
+\r
+                       if (Q && L)\r
+                               QL = -Value;\r
+                       if (Q && R)\r
+                               QR = -Value;\r
+                       if (Q && I)\r
+                               QI = -Value;\r
+                       if (T && L)\r
+                               TL = -Value;\r
+                       if (T && R)\r
+                               TR = -Value;\r
+                       if (T && I)\r
+                               TI = -Value;\r
+                       \r
+                       Value = FLT_MAX;\r
+                       Dec = 0;\r
+                       Q = false;\r
+                       T = false;\r
+                       I = false;\r
+                       E = false;\r
+                       L = false;\r
+                       R = false;\r
+                       }\r
+               else if (c == '*')\r
+                       {\r
+                       if (Value != FLT_MAX)\r
+                               Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+                       Value = -MINUS_INFINITY;\r
+                       }\r
+               else if (isdigit(c))\r
+                       {\r
+                       if (Value == -MINUS_INFINITY)\r
+                               Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+                       if (Value == FLT_MAX)\r
+                               Value = 0.0;\r
+                       if (Dec > 0)\r
+                               {\r
+                               Dec *= 10;\r
+                               Value += float(c - '0')/Dec;\r
+                               }\r
+                       else\r
+                               Value = Value*10 + (c - '0');\r
+                       }\r
+               else if (c == '.')\r
+                       {\r
+                       if (Dec > 0)\r
+                               Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
+                       Dec = 1;\r
+                       }\r
+               else\r
+                       {\r
+                       switch (c)\r
+                               {\r
+                       case 'Q':\r
+                               Q = true;\r
+                               break;\r
+                       case 'T':\r
+                               T = true;\r
+                               break;\r
+                       case 'I':\r
+                               I = true;\r
+                               break;\r
+                       case 'L':\r
+                               L = true;\r
+                               break;\r
+                       case 'R':\r
+                               R = true;\r
+                               break;\r
+                       case 'E':\r
+                               E = true;\r
+                               break;\r
+                       default:\r
+                               Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
+                               }\r
+                       }\r
+               }\r
+       }\r
+\r
+void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
+       {\r
+       ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
+       ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
+       }\r
+\r
+void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
+       {\r
+       if (IsNucleo)\r
+               SetNucSubstMx(opt_match, opt_mismatch);
+       else\r
+               {\r
+               if (opt_matrix == "")\r
+                       {\r
+                       SubstMxName = "BLOSUM62";\r
+                       SetBLOSUM62();
+                       }
+               else\r
+                       {\r
+                       ReadSubstMx(opt_matrix, g_SubstMxf);\r
+                       g_SubstMx = g_SubstMxf.GetData();\r
+                       g_SubstMxf.LogMe();\r
+                       SubstMxName = opt_matrix.c_str();\r
+                       }\r
+               }\r
+       SubstMx = g_SubstMx;\r
+       asserta(SubstMx != 0);\r
+       }\r
+\r
+void AlnParams::InitFromCmdLine(bool IsNucleo)\r
+       {\r
+       Clear();\r
+       Nucleo = IsNucleo;\r
+       NucleoSet = true;\r
+\r
+       SetMxFromCmdLine(IsNucleo);\r
+\r
+// Local\r
+       if (optset_lopen || optset_lext)\r
+               {\r
+               if (!optset_lopen || !optset_lext)\r
+                       Die("Must set both --lopen and --lext");\r
+               if (opt_lopen < 0.0 || opt_lext < 0.0)\r
+                       Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
+               SetLocal(float(-opt_lopen), float(-opt_lext));\r
+               }\r
+       else\r
+               {\r
+       // Same penalties, if-statement to note could differ.\r
+               if (IsNucleo)\r
+                       SetLocal(-10.0f, -1.0f);\r
+               else\r
+                       SetLocal(-10.0f, -1.0f);\r
+               }\r
+\r
+// Global\r
+       if (IsNucleo)\r
+               Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);
+       else\r
+               Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);
+       SetPenalties(opt_gapopen, opt_gapext);\r
+       }\r
+\r
+float AlnParams::GetLocalOpen() const\r
+       {\r
+       return LocalOpen;\r
+       }\r
+\r
+float AlnParams::GetLocalExt() const\r
+       {\r
+       return LocalExt;\r
+       }\r
+\r
+bool AlnParams::GetIsNucleo() const\r
+       {\r
+       asserta(NucleoSet);\r
+       return Nucleo;\r
+       }\r
+\r
+unsigned GetWindexWordLength(bool Nucleo)\r
+       {\r
+       if (optset_w)\r
+               return opt_w;\r
+\r
+       if (Nucleo)\r
+               return 8;\r
+       else\r
+               return 5;\r
+       }\r
+\r
+#if    TEST\r
+static void Test1(const string &os, const string &es)\r
+       {\r
+       AlnParams AP;\r
+       Log("\n");\r
+       Log("OpenStr %s\n", os.c_str());\r
+       Log(" ExtStr %s\n", es.c_str());\r
+       AP.SetPenalties(os, es);\r
+       AP.LogMe();\r
+       }\r
+\r
+void TestGapStr()\r
+       {\r
+       Test1("17I/0.5E", "1I/0.5E");\r
+       Test1("17I/0.5L/0.4R", "1Q/2T");\r
+       Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
+       }\r
+#endif // TEST\r