3 /* Copyright 2006 Emmanuel Paradis
5 /* This file is part of the R-package `ape'. */
6 /* See the file ../COPYING for licensing issues. */
10 #define DINDEX(i, j) n*(i - 1) - i*(i - 1)/2 + j - i - 1
12 int give_index(int i, int j, int n)
14 if (i > j) return(DINDEX(j, i));
15 else return(DINDEX(i, j));
18 double sum_dist_to_i(int n, double *D, int i)
19 /* returns the sum of all distances D_ij between i and j
20 with j between 1, and n and j != i */
28 for (j = 1; j < i; j++)
29 sum += D[DINDEX(j, i)];
33 for (j = i + 1; j <= n; j++)
34 sum += D[DINDEX(i, j)];
41 /* Find the 'R' indices of the two corresponding OTUs */ \
42 /* The indices of the first element of the pair in the \
43 distance matrix are n-1 times 1, n-2 times 2, n-3 times 3, \
44 ..., once n-1. Given this, the algorithm below is quite \
47 for (OTU1 = 1; OTU1 < n; OTU1++) { \
49 if (i >= smallest + 1) break; \
51 /* Finding the second OTU is easier! */ \
52 OTU2 = smallest + 1 + OTU1 - n*(OTU1 - 1) + OTU1*(OTU1 - 1)/2;
55 /* give the node and tip numbers to edge */ \
56 edge2[k] = otu_label[OTU1 - 1]; \
57 edge2[k + 1] = otu_label[OTU2 - 1]; \
58 edge1[k] = edge1[k + 1] = cur_nod;
60 void nj(double *D, int *N, int *edge1, int *edge2, double *edge_length)
62 double SUMD, Sdist, *S, Ndist, *new_dist, A, B, *DI, d_i, x, y;
63 int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
73 S = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
74 new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
75 otu_label = (int*)R_alloc(n, sizeof(int));
76 DI = (double*)R_alloc(n - 2, sizeof(double));
78 for (i = 0; i < n; i++) otu_label[i] = i + 1;
81 /* First, look if there are distances equal to 0. */
82 /* Since there may be multichotomies, we loop
83 through the OTUs instead of the distances. */
89 if (D[DINDEX(OTU1, OTU2)] == 0) {
91 edge_length[k] = edge_length[k + 1] = 0.;
95 /* We remove the second tip label: */
97 for (i = OTU2; i < n; i++)
98 otu_label[i - 1] = otu_label[i];
102 for (i = 1; i < n; i++) {
103 if (i == OTU2) continue;
104 for (j = i + 1; j <= n; j++) {
105 if (j == OTU2) continue;
106 new_dist[ij] = D[DINDEX(i, j)];
111 for (i = 0; i < n*(n - 1)/2; i++) D[i] = new_dist[i];
113 otu_label[OTU1 - 1] = cur_nod;
114 /* to avoid adjusting the internal branch at the end: */
115 DI[cur_nod - *N - 1] = 0;
125 for (i = 0; i < n*(n - 1)/2; i++) SUMD += D[i];
127 for (i = 1; i < n; i++) {
128 for (j = i + 1; j <= n; j++) {
129 /* we know that i < j, so: */
131 A = sum_dist_to_i(n, D, i) - D[ij];
132 B = sum_dist_to_i(n, D, j) - D[ij];
133 S[ij] = (A + B)/(2*n - 4) + 0.5*D[ij] + (SUMD - A - B - D[ij])/(n - 2);
137 /* find the 'C' index of the smallest value of S */
139 for (i = 1; i < n*(n - 1)/2; i++)
140 if (S[smallest] > S[i]) smallest = i;
146 /* get the distances between all OTUs but the 2 selected ones
148 a) get the sum for both
149 b) compute the distances for the new OTU */
151 for (i = 1; i <= n; i++) {
152 if (i == OTU1 || i == OTU2) continue;
153 x = D[give_index(i, OTU1, n)]; /* distance between OTU1 and i */
154 y = D[give_index(i, OTU2, n)]; /* distance between OTU2 and i */
155 new_dist[ij] = (x + y)/2;
160 /* compute the branch lengths */
163 edge_length[k] = (D[smallest] + A - B)/2;
164 edge_length[k + 1] = (D[smallest] + B - A)/2;
165 DI[cur_nod - *N - 1] = D[smallest];
167 /* update before the next loop */
168 if (OTU1 > OTU2) { /* make sure that OTU1 < OTU2 */
174 for (i = OTU1 - 1; i > 0; i--) otu_label[i] = otu_label[i - 1];
176 for (i = OTU2; i <= n; i++) otu_label[i - 1] = otu_label[i];
177 otu_label[0] = cur_nod;
179 for (i = 1; i < n; i++) {
180 if (i == OTU1 || i == OTU2) continue;
181 for (j = i + 1; j <= n; j++) {
182 if (j == OTU1 || j == OTU2) continue;
183 new_dist[ij] = D[DINDEX(i, j)];
189 for (i = 0; i < n*(n - 1)/2; i++) D[i] = new_dist[i];
195 for (i = 0; i < 3; i++) {
196 edge1[*N*2 - 4 - i] = cur_nod;
197 edge2[*N*2 - 4 - i] = otu_label[i];
200 edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
201 edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
202 edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
204 for (i = 0; i < *N*2 - 3; i++) {
205 if (edge2[i] <= *N) continue;
206 /* In case there are zero branch lengths (see above): */
207 if (DI[edge2[i] - *N - 1] == 0) continue;
208 edge_length[i] -= DI[edge2[i] - *N - 1]/2;