-/* dist_dna.c 2007-12-01 */
+/* dist_dna.c 2010-03-16 */
-/* Copyright 2005-2007 Emmanuel Paradis
+/* Copyright 2005-2010 Emmanuel Paradis
/* This file is part of the R-package `ape'. */
/* See the file ../COPYING for licensing issues. */
#define LN4 1.386294361119890572454
/* returns 8 if the base is known surely, 0 otherwise */
-#define KnownBase(a) a & 8
+#define KnownBase(a) (a & 8)
/* returns 1 if the base is adenine surely, 0 otherwise */
#define IsAdenine(a) a == 136
if (KnownBase(x[s1]) && KnownBase(x[s2])) L++;\
else continue;
-void distDNA_raw(unsigned char *x, int *n, int *s, double *d)
+void distDNA_raw(unsigned char *x, int *n, int *s, double *d, int scaled)
{
int i1, i2, s1, s2, target, Nd;
Nd = 0;
for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n)
if (DifferentBase(x[s1], x[s2])) Nd++;
- d[target] = ((double) Nd / *s);
+ if (scaled) d[target] = ((double) Nd / *s);
+ else d[target] = ((double) Nd);
target++;
}
}
}
-void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d)
+void distDNA_raw_pairdel(unsigned char *x, int *n, int *s, double *d, int scaled)
{
int i1, i2, s1, s2, target, Nd, L;
CHECK_PAIRWISE_DELETION
if (DifferentBase(x[s1], x[s2])) Nd++;
}
- d[target] = ((double) Nd/L);
+ if (scaled) d[target] = ((double) Nd/L);
+ else d[target] = ((double) Nd);
target++;
}
}
#define COMPUTE_DIST_F84\
P = ((double) Ns/L);\
Q = ((double) (Nd - Ns)/L);\
- d[target] = -2*A*log(1 - (P/(2*A) - (A - B)*Q/(2*A*C))) + 2*(A - B - C)*log(1 - Q/(2*C));\
+ d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\
if (*variance) {\
t1 = A*C;\
t2 = C*P/2;\
t3 = (A - B)*Q/2;\
a = t1/(t1 - t2 - t3);\
b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\
- var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\
+ var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/L;\
}
void distDNA_F84(unsigned char *x, int *n, int *s, double *d,
for (i2 = i1 + 1; i2 <= *n; i2++) {
for (k = 0; k < 16; k++) Ntab[k] = 0;
for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) {
+#define PREPARE_BF_F84\
+ A = (BF[0]*BF[2])/(BF[0] + BF[2]) + (BF[1]*BF[3])/(BF[1] + BF[3]);\
+ B = BF[0]*BF[2] + BF[1]*BF[3];\
+ C = (BF[0] + BF[2])*(BF[1] + BF[3]);
+
+#define COMPUTE_DIST_F84\
+ P = ((double) Ns/L);\
+ Q = ((double) (Nd - Ns)/L);\
+ d[target] = -2*A*log(1 - P/(2*A) - (A - B)*Q/(2*A*C)) + 2*(A - B - C)*log(1 - Q/(2*C));\
+ if (*variance) {\
+ t1 = A*C;\
+ t2 = C*P/2;\
+ t3 = (A - B)*Q/2;\
+ a = t1/(t1 - t2 - t3);\
+ b = A*(A - B)/(t1 - t2 - t3) - (A - B - C)/(C - Q/2);\
+ var[target] = (a*a*P + b*b*Q - pow(a*P + b*Q, 2))/2;\
+ }
+
+void distDNA_F84(unsigned char *x, int *n, int *s, double *d,
+ double *BF, int *variance, double *var)
+{
+ int i1, i2, Nd, Ns, L, target, s1, s2;
+ double P, Q, A, B, C, a, b, t1, t2, t3;
+
+ PREPARE_BF_F84
+ L = *s;
+
+ target = 0;
+ for (i1 = 1; i1 < *n; i1++) {
+ for (i2 = i1 + 1; i2 <= *n; i2++) {
+ Nd = Ns = 0;
+ for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) {
+ COUNT_TS_TV
+ }
+ COMPUTE_DIST_F84
+ target++;
+ }
+ }
+}
+
+void distDNA_F84_pairdel(unsigned char *x, int *n, int *s, double *d,
+ double *BF, int *variance, double *var)
+{
+ int i1, i2, Nd, Ns, L, target, s1, s2;
+ double P, Q, A, B, C, a, b, t1, t2, t3;
+
+ PREPARE_BF_F84
+
+ target = 0;
+ for (i1 = 1; i1 < *n; i1++) {
+ for (i2 = i1 + 1; i2 <= *n; i2++) {
+ Nd = Ns = L = 0;
+ for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) {
+ CHECK_PAIRWISE_DELETION
+ COUNT_TS_TV
+ }
+ COMPUTE_DIST_F84
+ target++;
+ }
+ }
+}
DO_CONTINGENCY_NUCLEOTIDES
}
}
}
-void BaseProportion(unsigned char *x, int *n, double *BF)
+void BaseProportion(unsigned char *x, int *n, double *BF, int *freq)
{
int i, m;
}
}
}
- for (i = 0; i < 4; i++) BF[i] /= m;
+ if (! *freq) for (i = 0; i < 4; i++) BF[i] /= m;
}
void SegSites(unsigned char *x, int *n, int *s, int *seg)
basis = x[i];
i++;
while (i < *n * (j + 1)) {
- if (x[i] == basis) i++;
+ if (!KnownBase(x[i]) || x[i] == basis) i++;
else {
seg[j] = 1;
break;
}
}
-void NucleotideDiversity(unsigned char *x, int *n, int *s,
- int *pairdel, double *ans)
-{
- int i1, i2, s1, s2, Nd, L;
-
- if (!*pairdel) L = *s;
-
- for (i1 = 1; i1 < *n; i1++) {
- for (i2 = i1 + 1; i2 <= *n; i2++) {
- Nd = 0;
- if (*pairdel) L = 0;
- for (s1 = i1 - 1, s2 = i2 - 1; s1 < i1 + *n*(*s - 1); s1+= *n, s2 += *n) {
- CHECK_PAIRWISE_DELETION
- if (DifferentBase(x[s1], x[s2])) Nd++;
- }
- *ans += ((double) Nd/L);
- }
- }
- *ans /= (*n * (*n - 1)/2);
-}
-
void GlobalDeletionDNA(unsigned char *x, int *n, int *s, int *keep)
{
int i, j;
int *gamma, double *alpha)
{
switch (*model) {
- case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d);
- else distDNA_raw(x, n, s, d); break;
+ case 1 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 1);
+ else distDNA_raw(x, n, s, d, 1); break;
case 2 : if (pairdel) distDNA_JC69_pairdel(x, n, s, d, variance, var, gamma, alpha);
else distDNA_JC69(x, n, s, d, variance, var, gamma, alpha); break;
case 12 : if (pairdel) distDNA_ParaLin_pairdel(x, n, s, d, variance, var);
else distDNA_ParaLin(x, n, s, d, variance, var); break;
+ case 13 : if (pairdel) distDNA_raw_pairdel(x, n, s, d, 0);
+ else distDNA_raw(x, n, s, d, 0); break;
}
}