]> git.donarmstrong.com Git - mothur.git/blob - uchime_src/alnparams.cpp
changes while testing
[mothur.git] / uchime_src / alnparams.cpp
1 #include "myutils.h"\r
2 #include <float.h>      // for FLT_MAX\r
3 #include "mx.h"\r
4 #include "alnparams.h"\r
5 #include "hsp.h"\r
6 \r
7 #define TEST    0\r
8 \r
9 void SetBLOSUM62();
10 void SetNucSubstMx(double Match, double Mismatch);\r
11 void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
12
13 extern Mx<float> g_SubstMxf;
14 extern float **g_SubstMx;
15 \r
16 void AlnParams::Clear()\r
17         {\r
18         SubstMxName = 0;\r
19         LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
20         LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
21         OpenA = OBVIOUSLY_WRONG_PENALTY;\r
22         OpenB = OBVIOUSLY_WRONG_PENALTY;\r
23         ExtA = OBVIOUSLY_WRONG_PENALTY;\r
24         ExtB = OBVIOUSLY_WRONG_PENALTY;\r
25         LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
26         LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
27         ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
28         ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
29         LExtA = OBVIOUSLY_WRONG_PENALTY;\r
30         LExtB = OBVIOUSLY_WRONG_PENALTY;\r
31         RExtA = OBVIOUSLY_WRONG_PENALTY;\r
32         RExtB = OBVIOUSLY_WRONG_PENALTY;\r
33         Nucleo = false;\r
34         NucleoSet = false;\r
35         }\r
36 \r
37 bool AlnParams::Is2() const\r
38         {\r
39         float g = OpenA;\r
40         float e = ExtA;\r
41         if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
42                 return false;\r
43         if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
44                 return false;\r
45         return true;\r
46         }\r
47 \r
48 bool AlnParams::Is4() const\r
49         {\r
50         float g = OpenA;\r
51         float tg = LOpenA;\r
52         float e = ExtA;\r
53         float te = LExtA;\r
54         if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
55                 return false;\r
56         if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
57                 return false;\r
58         return true;\r
59         }\r
60 \r
61 const char *AlnParams::GetType() const\r
62         {\r
63         if (Is2())\r
64                 return "2";\r
65         else if (Is4())\r
66                 return "4";\r
67         return "12";\r
68         }\r
69 \r
70 void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
71         {\r
72         SubstMx = Mx;\r
73         OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
74         ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
75         }\r
76 \r
77 void AlnParams::SetLocal(float Open, float Ext)\r
78         {\r
79         LocalOpen = Open;\r
80         LocalExt = Ext;\r
81         }\r
82 \r
83 void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
84   float TermOpen, float TermExt)\r
85         {\r
86         SubstMx = Mx;\r
87         OpenA = OpenB = Open;\r
88         LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
89         ExtA = ExtB = Ext;\r
90         LExtA = LExtB = RExtA = RExtB = TermExt;\r
91         }\r
92 \r
93 void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
94   unsigned LA, unsigned LB)\r
95         {\r
96         SubstMx = AP.SubstMx;\r
97         OpenA = AP.OpenA;\r
98         OpenB = AP.OpenB;\r
99         ExtA = AP.ExtA;\r
100         ExtB = AP.ExtB;\r
101 \r
102         if (HSP.LeftA())\r
103                 {\r
104                 LOpenA = AP.LOpenA;\r
105                 LExtA = AP.LExtA;\r
106                 }\r
107         else\r
108                 {\r
109                 LOpenA = AP.OpenA;\r
110                 LExtA = AP.ExtA;\r
111                 }\r
112 \r
113         if (HSP.LeftB())\r
114                 {\r
115                 LOpenB = AP.LOpenB;\r
116                 LExtB = AP.LExtB;\r
117                 }\r
118         else\r
119                 {\r
120                 LOpenB = AP.OpenB;\r
121                 LExtB = AP.ExtB;\r
122                 }\r
123 \r
124         if (HSP.RightA(LA))\r
125                 {\r
126                 ROpenA = AP.ROpenA;\r
127                 RExtA = AP.RExtA;\r
128                 }\r
129         else\r
130                 {\r
131                 ROpenA = AP.OpenA;\r
132                 RExtA = AP.ExtA;\r
133                 }\r
134 \r
135         if (HSP.RightB(LB))\r
136                 {\r
137                 ROpenB = AP.ROpenB;\r
138                 RExtB = AP.RExtB;\r
139                 }\r
140         else\r
141                 {\r
142                 ROpenB = AP.OpenB;\r
143                 RExtB = AP.ExtB;\r
144                 }\r
145         }\r
146 \r
147 void AlnParams::LogMe() const\r
148         {\r
149         Log("AlnParams(%s)", GetType());\r
150         if (Is2())\r
151                 Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
152         else if (Is4())\r
153                 Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
154         else\r
155                 Log(\r
156 " gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
157                   OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
158         Log("\n");\r
159         }\r
160 \r
161 /***\r
162 Open/Ext format string is one or more:\r
163         [<flag><flag>...]<value>\r
164 \r
165 Value is (positive) penalty or * (disabled).\r
166 Flag is:\r
167         Q               Query.\r
168         T               Target sequence.\r
169         I               Internal gaps (defafault internal and terminal).\r
170         E               End gaps (default internal and terminal).\r
171         L               Left end.\r
172         R               Right end.\r
173 ***/\r
174 \r
175 static void ParseGapStr(const string &s,\r
176   float &QI, float &QL, float &QR,\r
177   float &TI, float &TL, float &TR)\r
178         {\r
179         if (s.empty())\r
180                 return;\r
181 \r
182         bool Q = false;\r
183         bool T = false;\r
184         bool I = false;\r
185         bool E = false;\r
186         bool L = false;\r
187         bool R = false;\r
188 \r
189         const unsigned K = SIZE(s);\r
190         unsigned Dec = 0;\r
191         float Value = FLT_MAX;\r
192         for (unsigned i = 0; i <= K; ++i)\r
193                 {\r
194                 char c = s.c_str()[i];\r
195                 if (c == 0 || c == '/')\r
196                         {\r
197                         if (Value == FLT_MAX)\r
198                                 Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
199                         if (!Q && !T && !I && !E && !L && !R)\r
200                                 {\r
201                                 Q = true;\r
202                                 T = true;\r
203                                 L = true;\r
204                                 R = true;\r
205                                 I = true;\r
206                                 }\r
207 \r
208                         if (!E && !I && !L && !R)\r
209                                 {\r
210                                 E = false;\r
211                                 I = true;\r
212                                 L = true;\r
213                                 R = true;\r
214                                 }\r
215 \r
216                         if (E)\r
217                                 {\r
218                                 if (L || R)\r
219                                         Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
220                                 L = true;\r
221                                 R = true;\r
222                                 }\r
223 \r
224                         if (!Q && !T)\r
225                                 {\r
226                                 Q = true;\r
227                                 T = true;\r
228                                 }\r
229 \r
230                         if (Q && L)\r
231                                 QL = -Value;\r
232                         if (Q && R)\r
233                                 QR = -Value;\r
234                         if (Q && I)\r
235                                 QI = -Value;\r
236                         if (T && L)\r
237                                 TL = -Value;\r
238                         if (T && R)\r
239                                 TR = -Value;\r
240                         if (T && I)\r
241                                 TI = -Value;\r
242                         \r
243                         Value = FLT_MAX;\r
244                         Dec = 0;\r
245                         Q = false;\r
246                         T = false;\r
247                         I = false;\r
248                         E = false;\r
249                         L = false;\r
250                         R = false;\r
251                         }\r
252                 else if (c == '*')\r
253                         {\r
254                         if (Value != FLT_MAX)\r
255                                 Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
256                         Value = -MINUS_INFINITY;\r
257                         }\r
258                 else if (isdigit(c))\r
259                         {\r
260                         if (Value == -MINUS_INFINITY)\r
261                                 Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
262                         if (Value == FLT_MAX)\r
263                                 Value = 0.0;\r
264                         if (Dec > 0)\r
265                                 {\r
266                                 Dec *= 10;\r
267                                 Value += float(c - '0')/Dec;\r
268                                 }\r
269                         else\r
270                                 Value = Value*10 + (c - '0');\r
271                         }\r
272                 else if (c == '.')\r
273                         {\r
274                         if (Dec > 0)\r
275                                 Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
276                         Dec = 1;\r
277                         }\r
278                 else\r
279                         {\r
280                         switch (c)\r
281                                 {\r
282                         case 'Q':\r
283                                 Q = true;\r
284                                 break;\r
285                         case 'T':\r
286                                 T = true;\r
287                                 break;\r
288                         case 'I':\r
289                                 I = true;\r
290                                 break;\r
291                         case 'L':\r
292                                 L = true;\r
293                                 break;\r
294                         case 'R':\r
295                                 R = true;\r
296                                 break;\r
297                         case 'E':\r
298                                 E = true;\r
299                                 break;\r
300                         default:\r
301                                 Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
302                                 }\r
303                         }\r
304                 }\r
305         }\r
306 \r
307 void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
308         {\r
309         ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
310         ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
311         }\r
312 \r
313 void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
314         {\r
315         if (IsNucleo)\r
316                 SetNucSubstMx(opt_match, opt_mismatch);
317         else\r
318                 {\r
319                 if (opt_matrix == "")\r
320                         {\r
321                         SubstMxName = "BLOSUM62";\r
322                         SetBLOSUM62();
323                         }
324                 else\r
325                         {\r
326                         ReadSubstMx(opt_matrix, g_SubstMxf);\r
327                         g_SubstMx = g_SubstMxf.GetData();\r
328                         g_SubstMxf.LogMe();\r
329                         SubstMxName = opt_matrix.c_str();\r
330                         }\r
331                 }\r
332         SubstMx = g_SubstMx;\r
333         asserta(SubstMx != 0);\r
334         }\r
335 \r
336 void AlnParams::InitFromCmdLine(bool IsNucleo)\r
337         {\r
338         Clear();\r
339         Nucleo = IsNucleo;\r
340         NucleoSet = true;\r
341 \r
342         SetMxFromCmdLine(IsNucleo);\r
343 \r
344 // Local\r
345         if (optset_lopen || optset_lext)\r
346                 {\r
347                 if (!optset_lopen || !optset_lext)\r
348                         Die("Must set both --lopen and --lext");\r
349                 if (opt_lopen < 0.0 || opt_lext < 0.0)\r
350                         Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
351                 SetLocal(float(-opt_lopen), float(-opt_lext));\r
352                 }\r
353         else\r
354                 {\r
355         // Same penalties, if-statement to note could differ.\r
356                 if (IsNucleo)\r
357                         SetLocal(-10.0f, -1.0f);\r
358                 else\r
359                         SetLocal(-10.0f, -1.0f);\r
360                 }\r
361 \r
362 // Global\r
363         if (IsNucleo)\r
364                 Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);
365         else\r
366                 Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);
367         SetPenalties(opt_gapopen, opt_gapext);\r
368         }\r
369 \r
370 float AlnParams::GetLocalOpen() const\r
371         {\r
372         return LocalOpen;\r
373         }\r
374 \r
375 float AlnParams::GetLocalExt() const\r
376         {\r
377         return LocalExt;\r
378         }\r
379 \r
380 bool AlnParams::GetIsNucleo() const\r
381         {\r
382         asserta(NucleoSet);\r
383         return Nucleo;\r
384         }\r
385 \r
386 unsigned GetWindexWordLength(bool Nucleo)\r
387         {\r
388         if (optset_w)\r
389                 return opt_w;\r
390 \r
391         if (Nucleo)\r
392                 return 8;\r
393         else\r
394                 return 5;\r
395         }\r
396 \r
397 #if     TEST\r
398 static void Test1(const string &os, const string &es)\r
399         {\r
400         AlnParams AP;\r
401         Log("\n");\r
402         Log("OpenStr %s\n", os.c_str());\r
403         Log(" ExtStr %s\n", es.c_str());\r
404         AP.SetPenalties(os, es);\r
405         AP.LogMe();\r
406         }\r
407 \r
408 void TestGapStr()\r
409         {\r
410         Test1("17I/0.5E", "1I/0.5E");\r
411         Test1("17I/0.5L/0.4R", "1Q/2T");\r
412         Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
413         }\r
414 #endif // TEST\r