1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
6 //unsigned g_MaxL = 0;
\r
8 static bool *g_IsChar = g_IsAminoChar;
\r
10 // Term gaps allowed in query (A) only
\r
11 static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path,
\r
17 sprintf(ptrDesc, "(term gap in Query)");
\r
21 const char *LastM = 0;
\r
22 for (const char *p = Path; *p; ++p)
\r
31 for (const char *p = Path; *p && p != LastM; ++p)
\r
37 byte a = toupper(A[PosA]);
\r
38 byte b = toupper(B[PosB]);
\r
39 if (g_IsChar[a] && g_IsChar[b])
\r
49 if (c == 'D' || c == 'I')
\r
51 if (c == 'M' || c == 'D')
\r
53 if (c == 'M' || c == 'I')
\r
57 double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));
\r
59 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);
\r
63 static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path,
\r
71 for (const char *p = Path; *p; ++p)
\r
77 byte a = toupper(A[PosA]);
\r
78 byte b = toupper(B[PosB]);
\r
79 if (g_IsChar[a] && g_IsChar[b])
\r
89 if (c == 'D' || c == 'I')
\r
91 if (c == 'M' || c == 'D')
\r
93 if (c == 'M' || c == 'I')
\r
97 double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));
\r
99 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);
\r
103 static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B,
\r
104 const char *Path, char *ptrDesc)
\r
107 unsigned FirstM = UINT_MAX;
\r
108 unsigned LastM = UINT_MAX;
\r
109 for (const char *p = Path; *p; ++p)
\r
113 if (FirstM == UINT_MAX)
\r
119 if (FirstM == UINT_MAX)
\r
122 strcpy(ptrDesc, "(no matches)");
\r
129 unsigned Diffs = 0;
\r
131 for (unsigned i = 0; i < FirstM; ++i)
\r
134 if (c == 'M' || c == 'D')
\r
136 if (c == 'M' || c == 'I')
\r
140 for (unsigned i = FirstM; i <= LastM; ++i)
\r
146 byte a = toupper(A[PosA]);
\r
147 byte b = toupper(B[PosB]);
\r
148 if (g_IsChar[a] && g_IsChar[b])
\r
158 if (c == 'D' || c == 'I')
\r
160 if (c == 'M' || c == 'D')
\r
162 if (c == 'M' || c == 'I')
\r
166 double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));
\r
168 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);
\r
172 static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path,
\r
177 unsigned Mismatches = 0;
\r
179 for (const char *p = Path; *p; ++p)
\r
182 if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB]))
\r
184 if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M'))
\r
186 if (c == 'M' || c == 'D')
\r
188 if (c == 'M' || c == 'I')
\r
191 unsigned Diffs = Gaps + Mismatches;
\r
192 double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB));
\r
194 sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]",
\r
195 Gaps, Diffs, PosB);
\r
196 double FractId = 1.0 - FractDiffs;
\r
202 static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path,
\r
208 unsigned Wilds = 0;
\r
210 for (const char *p = Path; *p; ++p)
\r
216 byte a = toupper(A[PosA]);
\r
217 byte b = toupper(B[PosB]);
\r
218 if (g_IsChar[a] && g_IsChar[b])
\r
226 if (c == 'M' || c == 'D')
\r
228 if (c == 'M' || c == 'I')
\r
231 asserta(Cols >= Wilds);
\r
233 double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols);
\r
235 sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);
\r
239 static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path,
\r
245 unsigned Wilds = 0;
\r
246 for (const char *p = Path; *p; ++p)
\r
251 byte a = toupper(A[PosA]);
\r
252 byte b = toupper(B[PosB]);
\r
253 if (g_IsChar[a] && g_IsChar[b])
\r
261 if (c == 'M' || c == 'D')
\r
263 if (c == 'M' || c == 'I')
\r
266 unsigned MinLen = min(PosA, PosB) - Wilds;
\r
267 double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen));
\r
269 sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen);
\r
273 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,
\r
274 bool Nucleo, char *ptrDesc, unsigned IdDef)
\r
277 g_IsChar = g_IsACGTU;
\r
279 g_IsChar = g_IsAminoChar;
\r
284 strcpy(ptrDesc, "(NULL path)");
\r
288 unsigned ColCount = (unsigned) strlen(Path);
\r
294 if (Path[0] != 'M' || Path[ColCount-1] == 'D')
\r
297 strcpy(ptrDesc, "(leftjust)");
\r
304 if (Path[0] == 'D' || Path[ColCount-1] != 'M')
\r
307 strcpy(ptrDesc, "(rightjust)");
\r
312 double FractId = 0.0;
\r
313 //if (opt_idprefix > 0)
\r
315 // for (unsigned i = 0; i < opt_idprefix; ++i)
\r
317 // char c = Path[i];
\r
318 // if (c != 'M' || toupper(A[i]) != toupper(B[i]))
\r
320 // if (ptrDesc != 0)
\r
321 // sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)",
\r
322 // i, opt_idprefix);
\r
328 //if (opt_idsuffix > 0)
\r
330 // unsigned Cols = strlen(Path);
\r
331 // for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i)
\r
333 // unsigned k = Cols - 1 - i;
\r
334 // char c = Path[k];
\r
335 // if (c != 'M' || toupper(A[k]) != toupper(B[k]))
\r
337 // if (ptrDesc != 0)
\r
338 // sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)",
\r
339 // i, opt_idsuffix);
\r
345 if (opt_maxqgap > 0 || opt_maxtgap > 0)
\r
348 const char *LastM = 0;
\r
349 for (const char *p = Path; *p; ++p)
\r
354 for (const char *p = Path; *p && p != LastM; ++p)
\r
364 if (L > opt_maxtgap)
\r
367 sprintf(ptrDesc, "(maxtgap)");
\r
371 else if (p[-1] == 'I')
\r
373 if (L > opt_maxqgap)
\r
376 sprintf(ptrDesc, "(maxqgap)");
\r
402 FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc);
\r
406 FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc);
\r
410 FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc);
\r
414 FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc);
\r
418 FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc);
\r
422 FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc);
\r
426 Die("--iddef %u invalid", opt_iddef);
\r
432 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,
\r
433 bool Nucleo, char *ptrDesc)
\r
435 return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef);
\r
438 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo)
\r
440 return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0);
\r
443 double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path)
\r
445 return GetFractIdGivenPath(A, B, Path.c_str(), true);
\r
448 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path)
\r
450 return GetFractIdGivenPath(A, B, Path, true);
\r