X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fdist_dna.c;h=7174af6d013d961c80dba33b453c78ad5e3c8bfe;hb=1ad48c7a70983375138a6500372db588c8a3a134;hp=2aa6da6be9531dd1c24ba7d5c90e66c9afe27cc1;hpb=8883719a9139685f26d4c0c4cb26872d5f6d0d96;p=ape.git diff --git a/src/dist_dna.c b/src/dist_dna.c index 2aa6da6..7174af6 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,6 +1,6 @@ -/* dist_dna.c 2009-09-16 */ +/* dist_dna.c 2012-01-10 */ -/* Copyright 2005-2008 Emmanuel Paradis +/* Copyright 2005-2012 Emmanuel Paradis /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -64,21 +64,49 @@ double detFourByFour(double *x) if (KnownBase(x[s1]) && KnownBase(x[s2])) L++;\ else continue; -void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) +#define COUNT_TS_TV\ + if (SameBase(x[s1], x[s2])) continue;\ + Nd++;\ + if (IsPurine(x[s1]) && IsPurine(x[s2])) {\ + Ns++;\ + continue;\ + }\ + if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; + +void distDNA_TsTv(unsigned char *x, int *n, int *s, double *d, int Ts, int pairdel) { - int i1, i2, s1, s2, target, Nd; + int i1, i2, s1, s2, target, Nd, Ns; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = Ns = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { + if (pairdel && !(KnownBase(x[s1]) && KnownBase(x[s2]))) continue; + COUNT_TS_TV + } + if (Ts) d[target] = ((double) Ns); /* output number of transitions */ + else d[target] = ((double) Nd - Ns); /* output number of transversions */ + target++; + } + } +} - target = 0; - for (i1 = 1; i1 < *n; i1++) { - for (i2 = i1 + 1; i2 <= *n; i2++) { - Nd = 0; - for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) - if (DifferentBase(x[s1], x[s2])) Nd++; - if (scaled) d[target] = ((double) Nd / *s); - else d[target] = ((double) Nd); - target++; +void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) +{ + int i1, i2, s1, s2, target, Nd; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) + if (DifferentBase(x[s1], x[s2])) Nd++; + if (scaled) d[target] = ((double) Nd / *s); + else d[target] = ((double) Nd); + target++; + } } - } } void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d, int scaled) @@ -150,15 +178,6 @@ void distDNA_JC69_pairdel(unsigned char *x, int *n, int *s, double *d, } } -#define COUNT_TS_TV\ - if (SameBase(x[s1], x[s2])) continue;\ - Nd++;\ - if (IsPurine(x[s1]) && IsPurine(x[s2])) {\ - Ns++;\ - continue;\ - }\ - if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; - #define COMPUTE_DIST_K80\ P = ((double) Ns/L);\ Q = ((double) (Nd - Ns)/L);\ @@ -350,14 +369,14 @@ void distDNA_K81_pairdel(unsigned char *x, int *n, int *s, double *d, #define COMPUTE_DIST_F84\ P = ((double) Ns/L);\ Q = ((double) (Nd - Ns)/L);\ - d[target] = -2*A*log(1 - (P/(2*A) - (A - B)*Q/(2*A*C))) + 2*(A - B - C)*log(1 - Q/(2*C));\ + d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\ if (*variance) {\ t1 = A*C;\ t2 = C*P/2;\ t3 = (A - B)*Q/2;\ a = t1/(t1 - t2 - t3);\ b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\ - var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\ + var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/L;\ } void distDNA_F84(unsigned char *x, int *n, int *s, double *d, @@ -516,8 +535,8 @@ void distDNA_TN93(unsigned char *x, int *n, int *s, double *d, double *BF, int *variance, double *var, int *gamma, double *alpha) { - int i1, i2, k, Nd, Ns1, Ns2, L, target, s1, s2; - double P1, P2, Q, A, B, C, gR, gY, k1, k2, k3, k4, w1, w2, w3, c1, c2, c3, c4, b; + int i1, i2, Nd, Ns1, Ns2, L, target, s1, s2; + double P1, P2, Q, gR, gY, k1, k2, k3, k4, w1, w2, w3, c1, c2, c3, c4, b; L = *s; @@ -540,8 +559,8 @@ void distDNA_TN93_pairdel(unsigned char *x, int *n, int *s, double *d, double *BF, int *variance, double *var, int *gamma, double *alpha) { - int i1, i2, k, Nd, Ns1, Ns2, L, target, s1, s2; - double P1, P2, Q, A, B, C, gR, gY, k1, k2, k3, k4, w1, w2, w3, c1, c2, c3, c4, b; + int i1, i2, Nd, Ns1, Ns2, L, target, s1, s2; + double P1, P2, Q, gR, gY, k1, k2, k3, k4, w1, w2, w3, c1, c2, c3, c4, b; PREPARE_BF_TN93 @@ -723,7 +742,7 @@ void distDNA_GG95_pairdel(unsigned char *x, int *n, int *s, double *d, #define COMPUTE_DIST_LogDet\ for (k = 0; k < 16; k++) Ftab[k] = ((double) Ntab[k]/L);\ - d[target] = (-log(detFourByFour(Ftab))/4 - LN4)/4;\ + d[target] = -log(detFourByFour(Ftab))/4 - LN4;\ if (*variance) {\ /* For the inversion, we first make U an identity matrix */\ for (k = 1; k < 15; k++) U[k] = 0;\ @@ -793,8 +812,8 @@ void distDNA_BH87(unsigned char *x, int *n, int *s, double *d, currently not available. */ { - int i1, i2, k, kb, s1, s2, m, Ntab[16], ROWsums[4], ndim = 4, info, ipiv[16]; - double P12[16], P21[16], U[16]; + int i1, i2, k, kb, s1, s2, m, Ntab[16], ROWsums[4]; + double P12[16], P21[16]; for (i1 = 1; i1 < *n; i1++) { for (i2 = i1 + 1; i2 <= *n; i2++) { @@ -951,23 +970,79 @@ void distDNA_ParaLin_pairdel(unsigned char *x, int *n, int *s, double *d, } } -void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) +/* void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) */ +/* { */ +/* int i, m; */ + +/* m = 0; */ +/* for (i = 0; i < *n; i++) { */ +/* if (KnownBase(x[i])) { */ +/* m++; */ +/* switch (x[i]) { */ +/* case 136 : BF[0]++; break; */ +/* case 40 : BF[1]++; break; */ +/* case 72 : BF[2]++; break; */ +/* case 24 : BF[3]++; break; */ +/* } */ +/* } */ +/* } */ +/* if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; */ +/* } */ + +/* void BaseProportion(unsigned char *x, int *n, double *BF) */ +/* { */ +/* int i; */ + +/* for (i = 0; i < *n; i++) { */ +/* switch (x[i]) { */ +/* case 136 : BF[0]++; break; */ +/* case 40 : BF[1]++; break; */ +/* case 72 : BF[2]++; break; */ +/* case 24 : BF[3]++; break; */ +/* case 192 : BF[4]++; break; */ +/* case 160 : BF[5]++; break; */ +/* case 144 : BF[6]++; break; */ +/* case 96 : BF[7]++; break; */ +/* case 80 : BF[8]++; break; */ +/* case 48 : BF[9]++; break; */ +/* case 224 : BF[10]++; break; */ +/* case 176 : BF[11]++; break; */ +/* case 208 : BF[12]++; break; */ +/* case 112 : BF[13]++; break; */ +/* case 240 : BF[14]++; break; */ +/* case 4 : BF[15]++; break; */ +/* case 2 : BF[16]++; break; */ +/* } */ +/* } */ +/* } */ + +/* a hash table is much faster than switch (2012-01-10) */ +void BaseProportion(unsigned char *x, int *n, double *BF) { - int i, m; - - m = 0; - for (i = 0; i < *n; i++) { - if (KnownBase(x[i])) { - m++; - switch (x[i]) { - case 136 : BF[0]++; break; - case 40 : BF[1]++; break; - case 72 : BF[2]++; break; - case 24 : BF[3]++; break; - } - } - } - if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; + int i; + double count[256]; + + memset(count, 0, 256*sizeof(double)); + + for (i = 0; i < *n; i++) count[x[i]]++; + + BF[0] = count[136]; + BF[1] = count[40]; + BF[2] = count[72]; + BF[3] = count[24]; + BF[4] = count[192]; + BF[5] = count[160]; + BF[6] = count[144]; + BF[7] = count[96]; + BF[8] = count[80]; + BF[9] = count[48]; + BF[10] = count[224]; + BF[11] = count[176]; + BF[12] = count[208]; + BF[13] = count[112]; + BF[14] = count[240]; + BF[15] = count[4]; + BF[16] = count[2]; } void SegSites(unsigned char *x, int *n, int *s, int *seg) @@ -1013,39 +1088,33 @@ void dist_dna(unsigned char *x, int *n, int *s, int *model, double *d, switch (*model) { case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 1); else distDNA_raw(x, n, s, d, 1); break; - case 2 : if (pairdel) distDNA_JC69_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_JC69(x, n, s, d, variance, var, gamma, alpha); break; - case 3 : if (pairdel) distDNA_K80_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_K80(x, n, s, d, variance, var, gamma, alpha); break; - case 4 : if (pairdel) distDNA_F81_pairdel(x, n, s, d, BF, variance, var, gamma, alpha); else distDNA_F81(x, n, s, d, BF, variance, var, gamma, alpha); break; - case 5 : if (pairdel) distDNA_K81_pairdel(x, n, s, d, variance, var); else distDNA_K81(x, n, s, d, variance, var); break; - case 6 : if (pairdel) distDNA_F84_pairdel(x, n, s, d, BF, variance, var); else distDNA_F84(x, n, s, d, BF, variance, var); break; - case 7 : if (pairdel) distDNA_T92_pairdel(x, n, s, d, BF, variance, var); else distDNA_T92(x, n, s, d, BF, variance, var); break; - case 8 : if (pairdel) distDNA_TN93_pairdel(x, n, s, d, BF, variance, var, gamma, alpha); else distDNA_TN93(x, n, s, d, BF, variance, var, gamma, alpha); break; - case 9 : if (pairdel) distDNA_GG95_pairdel(x, n, s, d, variance, var); else distDNA_GG95(x, n, s, d, variance, var); break; - case 10 : if (pairdel) distDNA_LogDet_pairdel(x, n, s, d, variance, var); else distDNA_LogDet(x, n, s, d, variance, var); break; - case 11 : distDNA_BH87(x, n, s, d, variance, var); break; - case 12 : if (pairdel) distDNA_ParaLin_pairdel(x, n, s, d, variance, var); else distDNA_ParaLin(x, n, s, d, variance, var); break; case 13 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 0); else distDNA_raw(x, n, s, d, 0); break; + case 14 : if (pairdel) distDNA_TsTv(x, n, s, d, 1, 1); + else distDNA_TsTv(x, n, s, d, 1, 0); break; + case 15 : if (pairdel) distDNA_TsTv(x, n, s, d, 0, 1); + else distDNA_TsTv(x, n, s, d, 0, 0); break; + } }