]> git.donarmstrong.com Git - mothur.git/blob - alnparams.cpp
41ee738027921b1a217544a67a449ff0dc5fb761
[mothur.git] / alnparams.cpp
1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.\r
2 \r
3 #include "myutils.h"\r
4 #include <float.h>      // for FLT_MAX\r
5 #include "mx.h"\r
6 #include "alnparams.h"\r
7 #include "hsp.h"\r
8 \r
9 #define TEST    0\r
10 \r
11 void SetBLOSUM62();\r
12 void SetNucSubstMx(double Match, double Mismatch);\r
13 void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
14 \r
15 extern Mx<float> g_SubstMxf;\r
16 extern float **g_SubstMx;\r
17 \r
18 void AlnParams::Clear()\r
19         {\r
20         SubstMxName = 0;\r
21         LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
22         LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
23         OpenA = OBVIOUSLY_WRONG_PENALTY;\r
24         OpenB = OBVIOUSLY_WRONG_PENALTY;\r
25         ExtA = OBVIOUSLY_WRONG_PENALTY;\r
26         ExtB = OBVIOUSLY_WRONG_PENALTY;\r
27         LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
28         LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
29         ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
30         ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
31         LExtA = OBVIOUSLY_WRONG_PENALTY;\r
32         LExtB = OBVIOUSLY_WRONG_PENALTY;\r
33         RExtA = OBVIOUSLY_WRONG_PENALTY;\r
34         RExtB = OBVIOUSLY_WRONG_PENALTY;\r
35         Nucleo = false;\r
36         NucleoSet = false;\r
37         }\r
38 \r
39 bool AlnParams::Is2() const\r
40         {\r
41         float g = OpenA;\r
42         float e = ExtA;\r
43         if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
44                 return false;\r
45         if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
46                 return false;\r
47         return true;\r
48         }\r
49 \r
50 bool AlnParams::Is4() const\r
51         {\r
52         float g = OpenA;\r
53         float tg = LOpenA;\r
54         float e = ExtA;\r
55         float te = LExtA;\r
56         if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
57                 return false;\r
58         if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
59                 return false;\r
60         return true;\r
61         }\r
62 \r
63 const char *AlnParams::GetType() const\r
64         {\r
65         if (Is2())\r
66                 return "2";\r
67         else if (Is4())\r
68                 return "4";\r
69         return "12";\r
70         }\r
71 \r
72 void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
73         {\r
74         SubstMx = Mx;\r
75         OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
76         ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
77         }\r
78 \r
79 void AlnParams::SetLocal(float Open, float Ext)\r
80         {\r
81         LocalOpen = Open;\r
82         LocalExt = Ext;\r
83         }\r
84 \r
85 void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
86   float TermOpen, float TermExt)\r
87         {\r
88         SubstMx = Mx;\r
89         OpenA = OpenB = Open;\r
90         LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
91         ExtA = ExtB = Ext;\r
92         LExtA = LExtB = RExtA = RExtB = TermExt;\r
93         }\r
94 \r
95 void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
96   unsigned LA, unsigned LB)\r
97         {\r
98         SubstMx = AP.SubstMx;\r
99         OpenA = AP.OpenA;\r
100         OpenB = AP.OpenB;\r
101         ExtA = AP.ExtA;\r
102         ExtB = AP.ExtB;\r
103 \r
104         if (HSP.LeftA())\r
105                 {\r
106                 LOpenA = AP.LOpenA;\r
107                 LExtA = AP.LExtA;\r
108                 }\r
109         else\r
110                 {\r
111                 LOpenA = AP.OpenA;\r
112                 LExtA = AP.ExtA;\r
113                 }\r
114 \r
115         if (HSP.LeftB())\r
116                 {\r
117                 LOpenB = AP.LOpenB;\r
118                 LExtB = AP.LExtB;\r
119                 }\r
120         else\r
121                 {\r
122                 LOpenB = AP.OpenB;\r
123                 LExtB = AP.ExtB;\r
124                 }\r
125 \r
126         if (HSP.RightA(LA))\r
127                 {\r
128                 ROpenA = AP.ROpenA;\r
129                 RExtA = AP.RExtA;\r
130                 }\r
131         else\r
132                 {\r
133                 ROpenA = AP.OpenA;\r
134                 RExtA = AP.ExtA;\r
135                 }\r
136 \r
137         if (HSP.RightB(LB))\r
138                 {\r
139                 ROpenB = AP.ROpenB;\r
140                 RExtB = AP.RExtB;\r
141                 }\r
142         else\r
143                 {\r
144                 ROpenB = AP.OpenB;\r
145                 RExtB = AP.ExtB;\r
146                 }\r
147         }\r
148 \r
149 void AlnParams::LogMe() const\r
150         {\r
151         Log("AlnParams(%s)", GetType());\r
152         if (Is2())\r
153                 Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
154         else if (Is4())\r
155                 Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
156         else\r
157                 Log(\r
158 " gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
159                   OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
160         Log("\n");\r
161         }\r
162 \r
163 /***\r
164 Open/Ext format string is one or more:\r
165         [<flag><flag>...]<value>\r
166 \r
167 Value is (positive) penalty or * (disabled).\r
168 Flag is:\r
169         Q               Query.\r
170         T               Target sequence.\r
171         I               Internal gaps (defafault internal and terminal).\r
172         E               End gaps (default internal and terminal).\r
173         L               Left end.\r
174         R               Right end.\r
175 ***/\r
176 \r
177 static void ParseGapStr(const string &s,\r
178   float &QI, float &QL, float &QR,\r
179   float &TI, float &TL, float &TR)\r
180         {\r
181         if (s.empty())\r
182                 return;\r
183 \r
184         bool Q = false;\r
185         bool T = false;\r
186         bool I = false;\r
187         bool E = false;\r
188         bool L = false;\r
189         bool R = false;\r
190 \r
191         const unsigned K = SIZE(s);\r
192         unsigned Dec = 0;\r
193         float Value = FLT_MAX;\r
194         for (unsigned i = 0; i <= K; ++i)\r
195                 {\r
196                 char c = s.c_str()[i];\r
197                 if (c == 0 || c == '/')\r
198                         {\r
199                         if (Value == FLT_MAX)\r
200                                 Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
201                         if (!Q && !T && !I && !E && !L && !R)\r
202                                 {\r
203                                 Q = true;\r
204                                 T = true;\r
205                                 L = true;\r
206                                 R = true;\r
207                                 I = true;\r
208                                 }\r
209 \r
210                         if (!E && !I && !L && !R)\r
211                                 {\r
212                                 E = false;\r
213                                 I = true;\r
214                                 L = true;\r
215                                 R = true;\r
216                                 }\r
217 \r
218                         if (E)\r
219                                 {\r
220                                 if (L || R)\r
221                                         Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
222                                 L = true;\r
223                                 R = true;\r
224                                 }\r
225 \r
226                         if (!Q && !T)\r
227                                 {\r
228                                 Q = true;\r
229                                 T = true;\r
230                                 }\r
231 \r
232                         if (Q && L)\r
233                                 QL = -Value;\r
234                         if (Q && R)\r
235                                 QR = -Value;\r
236                         if (Q && I)\r
237                                 QI = -Value;\r
238                         if (T && L)\r
239                                 TL = -Value;\r
240                         if (T && R)\r
241                                 TR = -Value;\r
242                         if (T && I)\r
243                                 TI = -Value;\r
244                         \r
245                         Value = FLT_MAX;\r
246                         Dec = 0;\r
247                         Q = false;\r
248                         T = false;\r
249                         I = false;\r
250                         E = false;\r
251                         L = false;\r
252                         R = false;\r
253                         }\r
254                 else if (c == '*')\r
255                         {\r
256                         if (Value != FLT_MAX)\r
257                                 Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
258                         Value = -MINUS_INFINITY;\r
259                         }\r
260                 else if (isdigit(c))\r
261                         {\r
262                         if (Value == -MINUS_INFINITY)\r
263                                 Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
264                         if (Value == FLT_MAX)\r
265                                 Value = 0.0;\r
266                         if (Dec > 0)\r
267                                 {\r
268                                 Dec *= 10;\r
269                                 Value += float(c - '0')/Dec;\r
270                                 }\r
271                         else\r
272                                 Value = Value*10 + (c - '0');\r
273                         }\r
274                 else if (c == '.')\r
275                         {\r
276                         if (Dec > 0)\r
277                                 Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
278                         Dec = 1;\r
279                         }\r
280                 else\r
281                         {\r
282                         switch (c)\r
283                                 {\r
284                         case 'Q':\r
285                                 Q = true;\r
286                                 break;\r
287                         case 'T':\r
288                                 T = true;\r
289                                 break;\r
290                         case 'I':\r
291                                 I = true;\r
292                                 break;\r
293                         case 'L':\r
294                                 L = true;\r
295                                 break;\r
296                         case 'R':\r
297                                 R = true;\r
298                                 break;\r
299                         case 'E':\r
300                                 E = true;\r
301                                 break;\r
302                         default:\r
303                                 Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
304                                 }\r
305                         }\r
306                 }\r
307         }\r
308 \r
309 void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
310         {\r
311         ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
312         ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
313         }\r
314 \r
315 void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
316         {\r
317         if (IsNucleo)\r
318                 SetNucSubstMx(opt_match, opt_mismatch);\r
319         else\r
320                 {\r
321                 if (opt_matrix == "")\r
322                         {\r
323                         SubstMxName = "BLOSUM62";\r
324                         SetBLOSUM62();\r
325                         }\r
326                 else\r
327                         {\r
328                         ReadSubstMx(opt_matrix, g_SubstMxf);\r
329                         g_SubstMx = g_SubstMxf.GetData();\r
330                         g_SubstMxf.LogMe();\r
331                         SubstMxName = opt_matrix.c_str();\r
332                         }\r
333                 }\r
334         SubstMx = g_SubstMx;\r
335         asserta(SubstMx != 0);\r
336         }\r
337 \r
338 void AlnParams::InitFromCmdLine(bool IsNucleo)\r
339         {\r
340         Clear();\r
341         Nucleo = IsNucleo;\r
342         NucleoSet = true;\r
343 \r
344         SetMxFromCmdLine(IsNucleo);\r
345 \r
346 // Local\r
347         if (optset_lopen || optset_lext)\r
348                 {\r
349                 if (!optset_lopen || !optset_lext)\r
350                         Die("Must set both --lopen and --lext");\r
351                 if (opt_lopen < 0.0 || opt_lext < 0.0)\r
352                         Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
353                 SetLocal(float(-opt_lopen), float(-opt_lext));\r
354                 }\r
355         else\r
356                 {\r
357         // Same penalties, if-statement to note could differ.\r
358                 if (IsNucleo)\r
359                         SetLocal(-10.0f, -1.0f);\r
360                 else\r
361                         SetLocal(-10.0f, -1.0f);\r
362                 }\r
363 \r
364 // Global\r
365         if (IsNucleo)\r
366                 Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);\r
367         else\r
368                 Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);\r
369         SetPenalties(opt_gapopen, opt_gapext);\r
370         }\r
371 \r
372 float AlnParams::GetLocalOpen() const\r
373         {\r
374         return LocalOpen;\r
375         }\r
376 \r
377 float AlnParams::GetLocalExt() const\r
378         {\r
379         return LocalExt;\r
380         }\r
381 \r
382 bool AlnParams::GetIsNucleo() const\r
383         {\r
384         asserta(NucleoSet);\r
385         return Nucleo;\r
386         }\r
387 \r
388 unsigned GetWindexWordLength(bool Nucleo)\r
389         {\r
390         if (optset_w)\r
391                 return opt_w;\r
392 \r
393         if (Nucleo)\r
394                 return 8;\r
395         else\r
396                 return 5;\r
397         }\r
398 \r
399 #if     TEST\r
400 static void Test1(const string &os, const string &es)\r
401         {\r
402         AlnParams AP;\r
403         Log("\n");\r
404         Log("OpenStr %s\n", os.c_str());\r
405         Log(" ExtStr %s\n", es.c_str());\r
406         AP.SetPenalties(os, es);\r
407         AP.LogMe();\r
408         }\r
409 \r
410 void TestGapStr()\r
411         {\r
412         Test1("17I/0.5E", "1I/0.5E");\r
413         Test1("17I/0.5L/0.4R", "1Q/2T");\r
414         Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
415         }\r
416 #endif // TEST\r