]> git.donarmstrong.com Git - mothur.git/blob - uchime_src/fractid.cpp
changes while testing
[mothur.git] / uchime_src / fractid.cpp
1 #include "myutils.h"\r
2 #include "alpha.h"\r
3 \r
4 //unsigned g_MaxL = 0;\r
5 \r
6 static bool *g_IsChar = g_IsAminoChar;\r
7 \r
8 // Term gaps allowed in query (A) only\r
9 static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path,\r
10   char *ptrDesc)\r
11         {\r
12         if (*Path == 'D')\r
13                 {\r
14                 if (ptrDesc != 0)\r
15                         sprintf(ptrDesc, "(term gap in Query)");\r
16                 return 0;\r
17                 }\r
18 \r
19         const char *LastM = 0;\r
20         for (const char *p = Path; *p; ++p)\r
21                 if (*p == 'M')\r
22                         LastM = p;\r
23 \r
24         unsigned PosA = 0;\r
25         unsigned PosB = 0;\r
26         unsigned Ids = 0;\r
27         unsigned Diffs = 0;\r
28         unsigned Cols = 0;\r
29         for (const char *p = Path; *p && p != LastM; ++p)\r
30                 {\r
31                 ++Cols;\r
32                 char c = *p;\r
33                 if (c == 'M')\r
34                         {\r
35                         byte a = toupper(A[PosA]);\r
36                         byte b = toupper(B[PosB]);\r
37                         if (g_IsChar[a] && g_IsChar[b])\r
38                                 {\r
39                                 if (a == b)\r
40                                         ++Ids;\r
41                                 else\r
42                                         ++Diffs;\r
43                                 }\r
44                         else\r
45                                 --Cols;\r
46                         }\r
47                 if (c == 'D' || c == 'I')\r
48                         ++Diffs;\r
49                 if (c == 'M' || c == 'D')\r
50                         ++PosA;\r
51                 if (c == 'M' || c == 'I')\r
52                         ++PosB;\r
53                 }\r
54 \r
55         double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
56         if (ptrDesc != 0)\r
57                 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
58         return FractId;\r
59         }\r
60 \r
61 static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path,\r
62   char *ptrDesc)\r
63         {\r
64         unsigned PosA = 0;\r
65         unsigned PosB = 0;\r
66         unsigned Ids = 0;\r
67         unsigned Diffs = 0;\r
68         unsigned Cols = 0;\r
69         for (const char *p = Path; *p; ++p)\r
70                 {\r
71                 ++Cols;\r
72                 char c = *p;\r
73                 if (c == 'M')\r
74                         {\r
75                         byte a = toupper(A[PosA]);\r
76                         byte b = toupper(B[PosB]);\r
77                         if (g_IsChar[a] && g_IsChar[b])\r
78                                 {\r
79                                 if (a == b)\r
80                                         ++Ids;\r
81                                 else\r
82                                         ++Diffs;\r
83                                 }\r
84                         else\r
85                                 --Cols;\r
86                         }\r
87                 if (c == 'D' || c == 'I')\r
88                         ++Diffs;\r
89                 if (c == 'M' || c == 'D')\r
90                         ++PosA;\r
91                 if (c == 'M' || c == 'I')\r
92                         ++PosB;\r
93                 }\r
94 \r
95         double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
96         if (ptrDesc != 0)\r
97                 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
98         return FractId;\r
99         }\r
100 \r
101 static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B,\r
102   const char *Path, char *ptrDesc)\r
103         {\r
104         unsigned i = 0;\r
105         unsigned FirstM = UINT_MAX;\r
106         unsigned LastM = UINT_MAX;\r
107         for (const char *p = Path; *p; ++p)\r
108                 {\r
109                 if (*p == 'M')\r
110                         {\r
111                         if (FirstM == UINT_MAX)\r
112                                 FirstM = i;\r
113                         LastM = i;\r
114                         }\r
115                 ++i;\r
116                 }\r
117         if (FirstM == UINT_MAX)\r
118                 {\r
119                 if (ptrDesc != 0)\r
120                         strcpy(ptrDesc, "(no matches)");\r
121                 return 0.0;\r
122                 }\r
123 \r
124         unsigned PosA = 0;\r
125         unsigned PosB = 0;\r
126         unsigned Ids = 0;\r
127         unsigned Diffs = 0;\r
128         unsigned Cols = 0;\r
129         for (unsigned i = 0; i < FirstM; ++i)\r
130                 {\r
131                 char c = Path[i];\r
132                 if (c == 'M' || c == 'D')\r
133                         ++PosA;\r
134                 if (c == 'M' || c == 'I')\r
135                         ++PosB;\r
136                 }\r
137 \r
138         for (unsigned i = FirstM; i <= LastM; ++i)\r
139                 {\r
140                 ++Cols;\r
141                 char c = Path[i];\r
142                 if (c == 'M')\r
143                         {\r
144                         byte a = toupper(A[PosA]);\r
145                         byte b = toupper(B[PosB]);\r
146                         if (g_IsChar[a] && g_IsChar[b])\r
147                                 {\r
148                                 if (a == b)\r
149                                         ++Ids;\r
150                                 else\r
151                                         ++Diffs;\r
152                                 }\r
153                         else\r
154                                 --Cols;\r
155                         }\r
156                 if (c == 'D' || c == 'I')\r
157                         ++Diffs;\r
158                 if (c == 'M' || c == 'D')\r
159                         ++PosA;\r
160                 if (c == 'M' || c == 'I')\r
161                         ++PosB;\r
162                 }\r
163 \r
164         double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
165         if (ptrDesc != 0)\r
166                 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
167         return FractId;\r
168         }\r
169 \r
170 static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path,\r
171   char *ptrDesc)\r
172         {\r
173         unsigned PosA = 0;\r
174         unsigned PosB = 0;\r
175         unsigned Mismatches = 0;\r
176         unsigned Gaps = 0;\r
177         for (const char *p = Path; *p; ++p)\r
178                 {\r
179                 char c = *p;\r
180                 if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB]))\r
181                         ++Mismatches;\r
182                 if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M'))\r
183                         ++Gaps;\r
184                 if (c == 'M' || c == 'D')\r
185                         ++PosA;\r
186                 if (c == 'M' || c == 'I')\r
187                         ++PosB;\r
188                 }\r
189         unsigned Diffs = Gaps + Mismatches;\r
190         double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB));\r
191         if (ptrDesc != 0)\r
192                 sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]",\r
193                   Gaps, Diffs, PosB);\r
194         double FractId = 1.0 - FractDiffs;\r
195         if (FractId < 0.0)\r
196                 return 0.0;\r
197         return FractId;\r
198         }\r
199 \r
200 static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path,\r
201   char *ptrDesc)\r
202         {\r
203         unsigned PosA = 0;\r
204         unsigned PosB = 0;\r
205         unsigned Ids = 0;\r
206         unsigned Wilds = 0;\r
207         unsigned Cols = 0;\r
208         for (const char *p = Path; *p; ++p)\r
209                 {\r
210                 ++Cols;\r
211                 char c = *p;\r
212                 if (c == 'M')\r
213                         {\r
214                         byte a = toupper(A[PosA]);\r
215                         byte b = toupper(B[PosB]);\r
216                         if (g_IsChar[a] && g_IsChar[b])\r
217                                 {\r
218                                 if (a == b)\r
219                                         ++Ids;\r
220                                 }\r
221                         else\r
222                                 ++Wilds;\r
223                         }\r
224                 if (c == 'M' || c == 'D')\r
225                         ++PosA;\r
226                 if (c == 'M' || c == 'I')\r
227                         ++PosB;\r
228                 }\r
229         asserta(Cols >= Wilds);\r
230         Cols -= Wilds;\r
231         double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols);\r
232         if (ptrDesc != 0)\r
233                 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
234         return FractId;\r
235         }\r
236 \r
237 static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path,\r
238   char *ptrDesc)\r
239         {\r
240         unsigned PosA = 0;\r
241         unsigned PosB = 0;\r
242         unsigned Ids = 0;\r
243         unsigned Wilds = 0;\r
244         for (const char *p = Path; *p; ++p)\r
245                 {\r
246                 char c = *p;\r
247                 if (c == 'M')\r
248                         {\r
249                         byte a = toupper(A[PosA]);\r
250                         byte b = toupper(B[PosB]);\r
251                         if (g_IsChar[a] && g_IsChar[b])\r
252                                 {\r
253                                 if (a == b)\r
254                                         ++Ids;\r
255                                 }\r
256                         else\r
257                                 ++Wilds;\r
258                         }\r
259                 if (c == 'M' || c == 'D')\r
260                         ++PosA;\r
261                 if (c == 'M' || c == 'I')\r
262                         ++PosB;\r
263                 }\r
264         unsigned MinLen = min(PosA, PosB) - Wilds;\r
265         double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen));\r
266         if (ptrDesc != 0)\r
267                 sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen);\r
268         return FractId;\r
269         }\r
270 \r
271 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
272   bool Nucleo, char *ptrDesc, unsigned IdDef)\r
273         {\r
274         if (Nucleo)\r
275                 g_IsChar = g_IsACGTU;\r
276         else\r
277                 g_IsChar = g_IsAminoChar;\r
278 \r
279         if (Path == 0)\r
280                 {\r
281                 if (ptrDesc != 0)\r
282                         strcpy(ptrDesc, "(NULL path)");\r
283                 return 0.0;\r
284                 }\r
285 \r
286         unsigned ColCount = (unsigned) strlen(Path);\r
287         if (ColCount == 0)\r
288                 return 0.0;\r
289 \r
290         if (opt_leftjust)\r
291                 {\r
292                 if (Path[0] != 'M' || Path[ColCount-1] == 'D')\r
293                         {\r
294                         if (ptrDesc != 0)\r
295                                 strcpy(ptrDesc, "(leftjust)");\r
296                         return 0.0;\r
297                         }\r
298                 }\r
299 \r
300         if (opt_rightjust)\r
301                 {\r
302                 if (Path[0] == 'D' || Path[ColCount-1] != 'M')\r
303                         {\r
304                         if (ptrDesc != 0)\r
305                                 strcpy(ptrDesc, "(rightjust)");\r
306                         return 0.0;\r
307                         }\r
308                 }\r
309 \r
310         double FractId = 0.0;\r
311         //if (opt_idprefix > 0)\r
312         //      {\r
313         //      for (unsigned i = 0; i < opt_idprefix; ++i)\r
314         //              {\r
315         //              char c = Path[i];\r
316         //              if (c != 'M' || toupper(A[i]) != toupper(B[i]))\r
317         //                      {\r
318         //                      if (ptrDesc != 0)\r
319         //                              sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)",\r
320         //                                i, opt_idprefix);\r
321         //                      return 0.0;\r
322         //                      }\r
323         //              }\r
324         //      }\r
325 \r
326         //if (opt_idsuffix > 0)\r
327         //      {\r
328         //      unsigned Cols = strlen(Path);\r
329         //      for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i)\r
330         //              {\r
331         //              unsigned k = Cols - 1 - i;\r
332         //              char c = Path[k];\r
333         //              if (c != 'M' || toupper(A[k]) != toupper(B[k]))\r
334         //                      {\r
335         //                      if (ptrDesc != 0)\r
336         //                              sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)",\r
337         //                                i, opt_idsuffix);\r
338         //                      return 0.0;\r
339         //                      }\r
340         //              }\r
341         //      }\r
342 \r
343         if (opt_maxqgap > 0 || opt_maxtgap > 0)\r
344                 {\r
345                 unsigned L = 0;\r
346                 const char *LastM = 0;\r
347                 for (const char *p = Path; *p; ++p)\r
348                         if (*p == 'M')\r
349                                 LastM = p;\r
350 \r
351 //              g_MaxL = 0;\r
352                 for (const char *p = Path; *p && p != LastM; ++p)\r
353                         {\r
354                         char c = *p;\r
355                         switch (c)\r
356                                 {\r
357                         case 'M':\r
358                                 if (L > 0)\r
359                                         {\r
360                                         if (p[-1] == 'D')\r
361                                                 {\r
362                                                 if (L > opt_maxtgap)\r
363                                                         {\r
364                                                         if (ptrDesc != 0)\r
365                                                                 sprintf(ptrDesc, "(maxtgap)");\r
366                                                         return 0.0;\r
367                                                         }\r
368                                                 }\r
369                                         else if (p[-1] == 'I')\r
370                                                 {\r
371                                                 if (L > opt_maxqgap)\r
372                                                         {\r
373                                                         if (ptrDesc != 0)\r
374                                                                 sprintf(ptrDesc, "(maxqgap)");\r
375                                                         return 0.0;\r
376                                                         }\r
377                                                 }\r
378                                         else\r
379                                                 asserta(false);\r
380                                         }\r
381                                 L = 0;\r
382                                 break;\r
383 \r
384                         case 'D':\r
385                         case 'I':\r
386                                 ++L;\r
387                                 //if (L > g_MaxL)\r
388                                 //      g_MaxL = L;\r
389                                 break;\r
390 \r
391                         default:\r
392                                 asserta(false);\r
393                                 }\r
394                         }\r
395                 }\r
396 \r
397         switch (IdDef)\r
398                 {\r
399         case 0:\r
400                 FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc);\r
401                 break;\r
402 \r
403         case 1:\r
404                 FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc);\r
405                 break;\r
406 \r
407         case 2:\r
408                 FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc);\r
409                 break;\r
410 \r
411         case 3:\r
412                 FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc);\r
413                 break;\r
414 \r
415         case 4:\r
416                 FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc);\r
417                 break;\r
418 \r
419         case 5:\r
420                 FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc);\r
421                 break;\r
422 \r
423         default:\r
424                 Die("--iddef %u invalid", opt_iddef);\r
425                 }\r
426 \r
427         return FractId;\r
428         }\r
429 \r
430 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
431   bool Nucleo, char *ptrDesc)\r
432         {\r
433         return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef);\r
434         }\r
435 \r
436 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo)\r
437         {\r
438         return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0);\r
439         }\r
440 \r
441 double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path)\r
442         {\r
443         return GetFractIdGivenPath(A, B, Path.c_str(), true);\r
444         }\r
445 \r
446 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path)\r
447         {\r
448         return GetFractIdGivenPath(A, B, Path, true);\r
449         }\r