X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fdist_dna.c;h=a289f159e5bafad3c353845b7a364c3c99d3c6e2;hb=82dd3702485179ba5408f1e3e57eb856d025e16c;hp=d56c794334beee45f113cef278d5c38dea5a5147;hpb=f3426364b40c7c0e6aadf6ea2690716425abdfc9;p=ape.git diff --git a/src/dist_dna.c b/src/dist_dna.c index d56c794..a289f15 100644 --- a/src/dist_dna.c +++ b/src/dist_dna.c @@ -1,6 +1,6 @@ -/* dist_dna.c 2008-12-22 */ +/* dist_dna.c 2010-07-14 */ -/* Copyright 2005-2008 Emmanuel Paradis +/* Copyright 2005-2010 Emmanuel Paradis /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ @@ -12,7 +12,7 @@ #define LN4 1.386294361119890572454 /* returns 8 if the base is known surely, 0 otherwise */ -#define KnownBase(a) a & 8 +#define KnownBase(a) (a & 8) /* returns 1 if the base is adenine surely, 0 otherwise */ #define IsAdenine(a) a == 136 @@ -64,21 +64,49 @@ double detFourByFour(double *x) if (KnownBase(x[s1]) && KnownBase(x[s2])) L++;\ else continue; -void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) +#define COUNT_TS_TV\ + if (SameBase(x[s1], x[s2])) continue;\ + Nd++;\ + if (IsPurine(x[s1]) && IsPurine(x[s2])) {\ + Ns++;\ + continue;\ + }\ + if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; + +void distDNA_TsTv(unsigned char *x, int *n, int *s, double *d, int Ts, int pairdel) { - int i1, i2, s1, s2, target, Nd; + int i1, i2, s1, s2, target, Nd, Ns; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = Ns = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) { + if (pairdel && !(KnownBase(x[s1]) && KnownBase(x[s2]))) continue; + COUNT_TS_TV + } + if (Ts) d[target] = ((double) Ns); /* output number of transitions */ + else d[target] = ((double) Nd - Ns); /* output number of transversions */ + target++; + } + } +} - target = 0; - for (i1 = 1; i1 < *n; i1++) { - for (i2 = i1 + 1; i2 <= *n; i2++) { - Nd = 0; - for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) - if (DifferentBase(x[s1], x[s2])) Nd++; - if (scaled) d[target] = ((double) Nd / *s); - else d[target] = ((double) Nd); - target++; +void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled) +{ + int i1, i2, s1, s2, target, Nd; + + target = 0; + for (i1 = 1; i1 < *n; i1++) { + for (i2 = i1 + 1; i2 <= *n; i2++) { + Nd = 0; + for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) + if (DifferentBase(x[s1], x[s2])) Nd++; + if (scaled) d[target] = ((double) Nd / *s); + else d[target] = ((double) Nd); + target++; + } } - } } void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d, int scaled) @@ -150,15 +178,6 @@ void distDNA_JC69_pairdel(unsigned char *x, int *n, int *s, double *d, } } -#define COUNT_TS_TV\ - if (SameBase(x[s1], x[s2])) continue;\ - Nd++;\ - if (IsPurine(x[s1]) && IsPurine(x[s2])) {\ - Ns++;\ - continue;\ - }\ - if (IsPyrimidine(x[s1]) && IsPyrimidine(x[s2])) Ns++; - #define COMPUTE_DIST_K80\ P = ((double) Ns/L);\ Q = ((double) (Nd - Ns)/L);\ @@ -350,14 +369,14 @@ void distDNA_K81_pairdel(unsigned char *x, int *n, int *s, double *d, #define COMPUTE_DIST_F84\ P = ((double) Ns/L);\ Q = ((double) (Nd - Ns)/L);\ - d[target] = -2*A*log(1 - (P/(2*A) - (A - B)*Q/(2*A*C))) + 2*(A - B - C)*log(1 - Q/(2*C));\ + d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\ if (*variance) {\ t1 = A*C;\ t2 = C*P/2;\ t3 = (A - B)*Q/2;\ a = t1/(t1 - t2 - t3);\ b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\ - var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\ + var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/L;\ } void distDNA_F84(unsigned char *x, int *n, int *s, double *d, @@ -951,7 +970,7 @@ void distDNA_ParaLin_pairdel(unsigned char *x, int *n, int *s, double *d, } } -void BaseProportion(unsigned char *x, int *n, double *BF) +void BaseProportion(unsigned char *x, int *n, double *BF, int *freq) { int i, m; @@ -967,7 +986,7 @@ void BaseProportion(unsigned char *x, int *n, double *BF) } } } - for (i = 0; i < 4; i++) BF[i] /= m; + if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m; } void SegSites(unsigned char *x, int *n, int *s, int *seg) @@ -981,7 +1000,7 @@ void SegSites(unsigned char *x, int *n, int *s, int *seg) basis = x[i]; i++; while (i < *n * (j + 1)) { - if (x[i] == basis) i++; + if (!KnownBase(x[i]) || x[i] == basis) i++; else { seg[j] = 1; break; @@ -1013,39 +1032,33 @@ void dist_dna(unsigned char *x, int *n, int *s, int *model, double *d, switch (*model) { case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 1); else distDNA_raw(x, n, s, d, 1); break; - case 2 : if (pairdel) distDNA_JC69_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_JC69(x, n, s, d, variance, var, gamma, alpha); break; - case 3 : if (pairdel) distDNA_K80_pairdel(x, n, s, d, variance, var, gamma, alpha); else distDNA_K80(x, n, s, d, variance, var, gamma, alpha); break; - case 4 : if (pairdel) distDNA_F81_pairdel(x, n, s, d, BF, variance, var, gamma, alpha); else distDNA_F81(x, n, s, d, BF, variance, var, gamma, alpha); break; - case 5 : if (pairdel) distDNA_K81_pairdel(x, n, s, d, variance, var); else distDNA_K81(x, n, s, d, variance, var); break; - case 6 : if (pairdel) distDNA_F84_pairdel(x, n, s, d, BF, variance, var); else distDNA_F84(x, n, s, d, BF, variance, var); break; - case 7 : if (pairdel) distDNA_T92_pairdel(x, n, s, d, BF, variance, var); else distDNA_T92(x, n, s, d, BF, variance, var); break; - case 8 : if (pairdel) distDNA_TN93_pairdel(x, n, s, d, BF, variance, var, gamma, alpha); else distDNA_TN93(x, n, s, d, BF, variance, var, gamma, alpha); break; - case 9 : if (pairdel) distDNA_GG95_pairdel(x, n, s, d, variance, var); else distDNA_GG95(x, n, s, d, variance, var); break; - case 10 : if (pairdel) distDNA_LogDet_pairdel(x, n, s, d, variance, var); else distDNA_LogDet(x, n, s, d, variance, var); break; - case 11 : distDNA_BH87(x, n, s, d, variance, var); break; - case 12 : if (pairdel) distDNA_ParaLin_pairdel(x, n, s, d, variance, var); else distDNA_ParaLin(x, n, s, d, variance, var); break; case 13 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 0); else distDNA_raw(x, n, s, d, 0); break; + case 14 : if (pairdel) distDNA_TsTv(x, n, s, d, 1, 1); + else distDNA_TsTv(x, n, s, d, 1, 0); break; + case 15 : if (pairdel) distDNA_TsTv(x, n, s, d, 0, 1); + else distDNA_TsTv(x, n, s, d, 0, 0); break; + } }