X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fdist_dna.c;h=b8797e955b0647d88ed547e68c22119c9a0112fd;hb=50470fdeb74ae1e235e61ed6acfae81dd51f655c;hp=9057b9be5bfe8190c41d08063496728e910fccb6;hpb=a03a8c554a6fde0dc4313688e3248bfae2e521e4;p=ape.git diff --git a/src/dist_dna.c b/src/dist_dna.c index 9057b9b..b8797e9 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,4 +1,4 @@ -/* dist_dna.c 2010-03-16 */ +/* dist_dna.c 2010-09-16 */ /* Copyright 2005-2010 Emmanuel Paradis @@ -64,21 +64,49 @@ double detFourByFour(double *x) if (KnownBase(x[s1]) && KnownBase(x[s2])) L++;\ else continue; -void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) +#define COUNT_TS_TV\ + if (SameBase(x[s1], x[s2])) continue;\ + Nd++;\ + if (IsPurine(x[s1]) && IsPurine(x[s2])) {\ + Ns++;\ + continue;\ + }\ + if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; + +void distDNA_TsTv(unsigned char *x, int *n, int *s, double *d, int Ts, int pairdel) { - int i1, i2, s1, s2, target, Nd; + int i1, i2, s1, s2, target, Nd, Ns; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = Ns = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { + if (pairdel && !(KnownBase(x[s1]) && KnownBase(x[s2]))) continue; + COUNT_TS_TV + } + if (Ts) d[target] = ((double) Ns); /* output number of transitions */ + else d[target] = ((double) Nd - Ns); /* output number of transversions */ + target++; + } + } +} - target = 0; - for (i1 = 1; i1 < *n; i1++) { - for (i2 = i1 + 1; i2 <= *n; i2++) { - Nd = 0; - for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) - if (DifferentBase(x[s1], x[s2])) Nd++; - if (scaled) d[target] = ((double) Nd / *s); - else d[target] = ((double) Nd); - target++; +void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) +{ + int i1, i2, s1, s2, target, Nd; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) + if (DifferentBase(x[s1], x[s2])) Nd++; + if (scaled) d[target] = ((double) Nd / *s); + else d[target] = ((double) Nd); + target++; + } } - } } void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d, int scaled) @@ -150,15 +178,6 @@ void distDNA_JC69_pairdel(unsigned char *x, int *n, int *s, double *d, } } -#define COUNT_TS_TV\ - if (SameBase(x[s1], x[s2])) continue;\ - Nd++;\ - if (IsPurine(x[s1]) && IsPurine(x[s2])) {\ - Ns++;\ - continue;\ - }\ - if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; - #define COMPUTE_DIST_K80\ P = ((double) Ns/L);\ Q = ((double) (Nd - Ns)/L);\ @@ -723,7 +742,7 @@ void distDNA_GG95_pairdel(unsigned char *x, int *n, int *s, double *d, #define COMPUTE_DIST_LogDet\ for (k = 0; k < 16; k++) Ftab[k] = ((double) Ntab[k]/L);\ - d[target] = (-log(detFourByFour(Ftab))/4 - LN4)/4;\ + d[target] = -log(detFourByFour(Ftab))/4 - LN4;\ if (*variance) {\ /* For the inversion, we first make U an identity matrix */\ for (k = 1; k < 15; k++) U[k] = 0;\ @@ -800,67 +819,6 @@ void distDNA_BH87(unsigned char *x, int *n, int *s, double *d, for (i2 = i1 + 1; i2 <= *n; i2++) { for (k = 0; k < 16; k++) Ntab[k] = 0; for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { -#define PREPARE_BF_F84\ - A = (BF[0]*BF[2])/(BF[0] + BF[2]) + (BF[1]*BF[3])/(BF[1] + BF[3]);\ - B = BF[0]*BF[2] + BF[1]*BF[3];\ - C = (BF[0] + BF[2])*(BF[1] + BF[3]); - -#define COMPUTE_DIST_F84\ - P = ((double) Ns/L);\ - Q = ((double) (Nd - Ns)/L);\ - d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\ - if (*variance) {\ - t1 = A*C;\ - t2 = C*P/2;\ - t3 = (A - B)*Q/2;\ - a = t1/(t1 - t2 - t3);\ - b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\ - var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\ - } - -void distDNA_F84(unsigned char *x, int *n, int *s, double *d, - double *BF, int *variance, double *var) -{ - int i1, i2, Nd, Ns, L, target, s1, s2; - double P, Q, A, B, C, a, b, t1, t2, t3; - - PREPARE_BF_F84 - L = *s; - - target = 0; - for (i1 = 1; i1 < *n; i1++) { - for (i2 = i1 + 1; i2 <= *n; i2++) { - Nd = Ns = 0; - for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { - COUNT_TS_TV - } - COMPUTE_DIST_F84 - target++; - } - } -} - -void distDNA_F84_pairdel(unsigned char *x, int *n, int *s, double *d, - double *BF, int *variance, double *var) -{ - int i1, i2, Nd, Ns, L, target, s1, s2; - double P, Q, A, B, C, a, b, t1, t2, t3; - - PREPARE_BF_F84 - - target = 0; - for (i1 = 1; i1 < *n; i1++) { - for (i2 = i1 + 1; i2 <= *n; i2++) { - Nd = Ns = L = 0; - for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { - CHECK_PAIRWISE_DELETION - COUNT_TS_TV - } - COMPUTE_DIST_F84 - target++; - } - } -} DO_CONTINGENCY_NUCLEOTIDES } @@ -1074,39 +1032,33 @@ void dist_dna(unsigned char *x, int *n, int *s, int *model, double *d, switch (*model) { case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 1); else distDNA_raw(x, n, s, d, 1); break; - case 2 : if (pairdel) distDNA_JC69_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_JC69(x, n, s, d, variance, var, gamma, alpha); break; - case 3 : if (pairdel) distDNA_K80_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_K80(x, n, s, d, variance, var, gamma, alpha); break; - case 4 : if (pairdel) distDNA_F81_pairdel(x, n, s, d, BF, variance, var, gamma, alpha); else distDNA_F81(x, n, s, d, BF, variance, var, gamma, alpha); break; - case 5 : if (pairdel) distDNA_K81_pairdel(x, n, s, d, variance, var); else distDNA_K81(x, n, s, d, variance, var); break; - case 6 : if (pairdel) distDNA_F84_pairdel(x, n, s, d, BF, variance, var); else distDNA_F84(x, n, s, d, BF, variance, var); break; - case 7 : if (pairdel) distDNA_T92_pairdel(x, n, s, d, BF, variance, var); else distDNA_T92(x, n, s, d, BF, variance, var); break; - case 8 : if (pairdel) distDNA_TN93_pairdel(x, n, s, d, BF, variance, var, gamma, alpha); else distDNA_TN93(x, n, s, d, BF, variance, var, gamma, alpha); break; - case 9 : if (pairdel) distDNA_GG95_pairdel(x, n, s, d, variance, var); else distDNA_GG95(x, n, s, d, variance, var); break; - case 10 : if (pairdel) distDNA_LogDet_pairdel(x, n, s, d, variance, var); else distDNA_LogDet(x, n, s, d, variance, var); break; - case 11 : distDNA_BH87(x, n, s, d, variance, var); break; - case 12 : if (pairdel) distDNA_ParaLin_pairdel(x, n, s, d, variance, var); else distDNA_ParaLin(x, n, s, d, variance, var); break; case 13 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 0); else distDNA_raw(x, n, s, d, 0); break; + case 14 : if (pairdel) distDNA_TsTv(x, n, s, d, 1, 1); + else distDNA_TsTv(x, n, s, d, 1, 0); break; + case 15 : if (pairdel) distDNA_TsTv(x, n, s, d, 0, 1); + else distDNA_TsTv(x, n, s, d, 0, 0); break; + } }