1 /* BIONJ.c 2012-04-30 */
3 /* Copyright 2007-2008 Olivier Gascuel, Hoa Sien Cuong,
4 R port by Vincent Lefort and Emmanuel Paradis */
6 /* This file is part of the R-package `ape'. */
7 /* See the file ../COPYING for licensing issues. */
13 GERAD - Montreal- Canada
14 olivierg@crt.umontreal.ca
16 LIRMM - Montpellier- France
19 UNIX version, written in C
20 by Hoa Sien Cuong (Univ. Montreal) */
24 void Initialize(float **delta, double *X, int n);
25 void bionj(double *X, int *N, int *edge1, int *edge2, double *el);
26 float Distance(int i, int j, float **delta);
27 float Variance(int i, int j, float **delta);
28 int Emptied(int i, float **delta);
29 float Sum_S(int i, float **delta);
30 void Compute_sums_Sx(float **delta, int n);
31 void Best_pair(float **delta, int r, int *a, int *b, int n);
32 float Agglomerative_criterion(int i, int j, float **delta, int r);
33 float Branch_length(int a, int b, float **delta, int r);
34 float Reduction4(int a, float la, int b, float lb, int i, float lamda, float **delta);
35 float Reduction10(int a, int b, int i, float lamda, float vab, float **delta);
36 float Lamda(int a, int b, float vab, float **delta, int n, int r);
38 /* INPUT, OUTPUT, INITIALIZATION
40 The lower-half of the delta matrix is occupied by
41 dissimilarities. The upper-half of the matrix is
42 occupied by variances. The first column
43 is initialized as 0; during the algorithm some
44 indices are no more used, and the corresponding
45 positions in the first column are set to 1. */
49 This function reads an input data and returns the delta matrix
51 input: float **delta : delta matrix
52 double *X : distances sent from R as a lower triangle matrix
53 int n : number of taxa
55 output: float **delta : delta matrix initialized */
57 void Initialize(float **delta, double *X, int n)
59 int i, j; /* matrix line and column indices */
60 int k = 0; /* index along X */
62 for (i = 1; i < n; i++)
63 for (j = i + 1; j <= n; j++)
64 delta[i][j] = delta[j][i] = X[k++];
66 for (i = 1; i <= n; i++)
67 delta[i][i] = delta[i][0] = 0;
70 void bionj(double *X, int *N, int *edge1, int *edge2, double *el)
72 int *a, *b; /* pair to be agglomerated */
73 float **delta; /* delta matrix */
74 float la; /* first taxon branch-length */
75 float lb; /* second taxon branch-length */
76 float vab; /* variance of Dab */
79 int r; /* number of subtrees */
80 int n; /* number of taxa */
81 int i, x, y, curnod, k;
83 int *ilab; /* indices of the tips (used as "labels") */
85 a = (int*)R_alloc(1, sizeof(int));
86 b = (int*)R_alloc(1, sizeof(int));
90 /* Create the delta matrix */
91 delta = (float **)R_alloc(n + 1, sizeof(float*));
92 for (i = 1; i <= n; i++)
93 delta[i] = (float *)R_alloc(n + 1, sizeof(float));
98 Initialize(delta, X, n);
100 ilab = (int *)R_alloc(n + 1, sizeof(int));
101 for (i = 1; i <= n; i++) ilab[i] = i;
108 Compute_sums_Sx(delta, n); /* compute the sum Sx */
109 Best_pair(delta, r, a, b, n); /* find the best pair by */
110 vab = Variance(*a, *b, delta); /* minimizing (1) */
111 la = Branch_length(*a, *b, delta, r); /* compute branch-lengths */
112 lb = Branch_length(*b, *a, delta, r); /* using formula (2) */
114 lamda = Lamda(*a, *b, vab, delta, n, r); /* compute lambda* using (9)*/
116 edge1[k] = edge1[k + 1] = curnod;
118 edge2[k + 1] = ilab[*b];
123 for (i = 1; i <= n; i++) {
124 if (Emptied(i,delta) || (i == *a) || (i == *b)) continue;
133 /* apply reduction formulae 4 and 10 to delta */
134 delta[x][y] = Reduction4(*a, la, *b, lb, i, lamda, delta);
135 delta[y][x] = Reduction10(*a, *b, i, lamda, vab, delta);
138 delta[*b][0] = 1.0; /* make the b line empty */
144 /* finalise the three basal edges */
149 if (!Emptied(i, delta)) last[k++] = i;
153 for (i = 0, k = 2 * n - 4; i < 3; i++, k--) {
154 edge1[k] = curnod; /* <- the root at this stage */
155 edge2[k] = ilab[last[i]];
159 D[0] = Distance(last[0], last[1], delta);
160 D[1] = Distance(last[0], last[2], delta);
161 D[2] = Distance(last[1], last[2], delta);
163 el[2 * n - 4] = (D[0] + D[1] - D[2])/2;
164 el[2 * n - 5] = (D[0] + D[2] - D[1])/2;
165 el[2 * n - 6] = (D[2] + D[1] - D[0])/2;
170 This function retrieves and returns the distance between taxa
171 i and j from the delta matrix.
173 input: int i : taxon i
175 float **delta : the delta matrix
177 output: float distance : dissimilarity between the two taxa */
179 float Distance(int i, int j, float **delta)
181 if (i > j) return(delta[i][j]);
182 else return(delta[j][i]);
187 This function retrieves and returns the variance of the
188 distance between i and j, from the delta matrix.
190 input: int i : taxon i
192 float **delta : the delta matrix
194 output: float distance : the variance of Dij */
196 float Variance(int i, int j, float **delta)
198 if (i > j) return(delta[j][i]);
199 else return(delta[i][j]);
205 This function tests if a line is emptied or not.
207 input: int i : subtree (or line) i
208 float **delta : the delta matrix
210 output: 0 : if not emptied
213 int Emptied(int i, float **delta)
215 return((int)delta[i][0]);
220 This function retrieves the sum Sx from the diagonal
223 input: int i : subtree i
224 float **delta : the delta matrix
226 output: float delta[i][i] : sum Si */
228 float Sum_S(int i, float **delta)
233 /* -- Compute_sums_Sx --
235 This function computes the sums Sx and stores them in the
236 diagonal the delta matrix.
238 input: float **delta : the delta matrix
239 int n : the number of taxa */
242 void Compute_sums_Sx(float **delta, int n)
247 for (i = 1; i <= n ; i++) {
248 if (Emptied(i, delta)) continue;
250 for (j = 1; j <= n; j++) {
251 if (i == j || Emptied(j, delta)) continue;
252 sum += Distance(i, j, delta); /* compute the sum Si */
261 This function finds the best pair to be agglomerated by
262 minimizing the agglomerative criterion (1).
264 input: float **delta : the delta matrix
265 int r : number of subtrees
266 int *a : contain the first taxon of the pair
267 int *b : contain the second taxon of the pair
268 int n : number of taxa
270 output: int *a : the first taxon of the pair
271 int *b : the second taxon of the pair */
274 void Best_pair(float **delta, int r, int *a, int *b, int n)
276 float Qxy; /* value of the criterion calculated */
277 int x, y; /* the pair which is tested */
278 float Qmin; /* current minimun of the criterion */
281 for (x = 1; x <= n; x++) {
282 if (Emptied(x, delta)) continue;
283 for (y = 1; y < x; y++) {
284 if (Emptied(y, delta)) continue;
285 Qxy = Agglomerative_criterion(x, y, delta, r);
286 if (Qxy < Qmin - 0.000001) {
298 float Agglomerative_criterion(int i, int j, float **delta, int r)
300 return((r - 2) * Distance(i, j, delta) -
301 Sum_S(i, delta) - Sum_S(j, delta));
305 float Branch_length(int a, int b, float **delta, int r)
307 return(0.5 * (Distance(a, b, delta) +
308 (Sum_S(a, delta) - Sum_S(b, delta))/(r - 2)));
312 float Reduction4(int a, float la, int b, float lb, int i, float lamda, float **delta)
314 return(lamda * (Distance(a, i, delta) - la) +
315 (1 - lamda) * (Distance(b, i, delta) - lb));
319 float Reduction10(int a, int b, int i, float lamda, float vab, float **delta)
321 return(lamda * Variance(a, i, delta) + (1 - lamda) * Variance(b, i, delta)
322 - lamda * (1 - lamda) * vab);
325 float Lamda(int a, int b, float vab, float **delta, int n, int r)
330 if (vab == 0.0) lamda = 0.5; else {
331 for (i = 1; i <= n ; i++) {
332 if (a == i || b == i || Emptied(i, delta)) continue;
333 lamda += (Variance(b, i, delta) - Variance(a, i, delta));
335 lamda = 0.5 + lamda/(2 * (r - 2) * vab); /* Formula (9) */
337 if (lamda > 1.0) lamda = 1.0; /* force 0 < lamda < 1 */
338 if (lamda < 0.0) lamda = 0.0;