X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fdist_dna.c;h=9057b9be5bfe8190c41d08063496728e910fccb6;hb=a03a8c554a6fde0dc4313688e3248bfae2e521e4;hp=d56c794334beee45f113cef278d5c38dea5a5147;hpb=f3426364b40c7c0e6aadf6ea2690716425abdfc9;p=ape.git diff --git a/src/dist_dna.c b/src/dist_dna.c index d56c794..9057b9b 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,6 +1,6 @@ -/* dist_dna.c 2008-12-22 */ +/* dist_dna.c 2010-03-16 */ -/* Copyright 2005-2008 Emmanuel Paradis +/* Copyright 2005-2010 Emmanuel Paradis /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -12,7 +12,7 @@ #define LN4 1.386294361119890572454 /* returns 8 if the base is known surely, 0 otherwise */ -#define KnownBase(a) a & 8 +#define KnownBase(a) (a & 8) /* returns 1 if the base is adenine surely, 0 otherwise */ #define IsAdenine(a) a == 136 @@ -350,14 +350,14 @@ void distDNA_K81_pairdel(unsigned char *x, int *n, int *s, double *d, #define COMPUTE_DIST_F84\ P = ((double) Ns/L);\ Q = ((double) (Nd - Ns)/L);\ - d[target] = -2*A*log(1 - (P/(2*A) - (A - B)*Q/(2*A*C))) + 2*(A - B - C)*log(1 - Q/(2*C));\ + d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\ if (*variance) {\ t1 = A*C;\ t2 = C*P/2;\ t3 = (A - B)*Q/2;\ a = t1/(t1 - t2 - t3);\ b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\ - var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\ + var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/L;\ } void distDNA_F84(unsigned char *x, int *n, int *s, double *d, @@ -800,6 +800,67 @@ void distDNA_BH87(unsigned char *x, int *n, int *s, double *d, for (i2 = i1 + 1; i2 <= *n; i2++) { for (k = 0; k < 16; k++) Ntab[k] = 0; for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { +#define PREPARE_BF_F84\ + A = (BF[0]*BF[2])/(BF[0] + BF[2]) + (BF[1]*BF[3])/(BF[1] + BF[3]);\ + B = BF[0]*BF[2] + BF[1]*BF[3];\ + C = (BF[0] + BF[2])*(BF[1] + BF[3]); + +#define COMPUTE_DIST_F84\ + P = ((double) Ns/L);\ + Q = ((double) (Nd - Ns)/L);\ + d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\ + if (*variance) {\ + t1 = A*C;\ + t2 = C*P/2;\ + t3 = (A - B)*Q/2;\ + a = t1/(t1 - t2 - t3);\ + b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\ + var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\ + } + +void distDNA_F84(unsigned char *x, int *n, int *s, double *d, + double *BF, int *variance, double *var) +{ + int i1, i2, Nd, Ns, L, target, s1, s2; + double P, Q, A, B, C, a, b, t1, t2, t3; + + PREPARE_BF_F84 + L = *s; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = Ns = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { + COUNT_TS_TV + } + COMPUTE_DIST_F84 + target++; + } + } +} + +void distDNA_F84_pairdel(unsigned char *x, int *n, int *s, double *d, + double *BF, int *variance, double *var) +{ + int i1, i2, Nd, Ns, L, target, s1, s2; + double P, Q, A, B, C, a, b, t1, t2, t3; + + PREPARE_BF_F84 + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = Ns = L = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { + CHECK_PAIRWISE_DELETION + COUNT_TS_TV + } + COMPUTE_DIST_F84 + target++; + } + } +} DO_CONTINGENCY_NUCLEOTIDES } @@ -951,7 +1012,7 @@ void distDNA_ParaLin_pairdel(unsigned char *x, int *n, int *s, double *d, } } -void BaseProportion(unsigned char *x, int *n, double *BF) +void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) { int i, m; @@ -967,7 +1028,7 @@ void BaseProportion(unsigned char *x, int *n, double *BF) } } } - for (i = 0; i < 4; i++) BF[i] /= m; + if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; } void SegSites(unsigned char *x, int *n, int *s, int *seg) @@ -981,7 +1042,7 @@ void SegSites(unsigned char *x, int *n, int *s, int *seg) basis = x[i]; i++; while (i < *n * (j + 1)) { - if (x[i] == basis) i++; + if (!KnownBase(x[i]) || x[i] == basis) i++; else { seg[j] = 1; break;