X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fnj.c;h=21b4c1e00f0cbbd164a9b10986262d54da8fb26c;hb=fab4946bb5d41cd408dffd4b66aae8a697690cfa;hp=af53abbd91ce9669d3038b704be47d7cba8d4ff5;hpb=100e0d086372a87bc2506873bdf86d3fcdda9922;p=ape.git diff --git a/src/nj.c b/src/nj.c index af53abb..21b4c1e 100644 --- a/src/nj.c +++ b/src/nj.c @@ -1,19 +1,11 @@ -/* nj.c 2009-07-09 */ +/* nj.c 2011-10-20 */ -/* Copyright 2006-2009 Emmanuel Paradis +/* Copyright 2006-2011 Emmanuel Paradis /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ -#include - -#define DINDEX(i, j) n*(i - 1) - i*(i - 1)/2 + j - i - 1 - -int give_index(int i, int j, int n) -{ - if (i > j) return(DINDEX(j, i)); - else return(DINDEX(i, j)); -} +#include "ape.h" double sum_dist_to_i(int n, double *D, int i) /* returns the sum of all distances D_ij between i and j @@ -32,8 +24,8 @@ j 4 2 6 9 5 3 7 10 12 6 4 8 11 13 14 - so that we sum the values of the ith column-1st loop-and those of - (i - 1)th row (labelled 'i')-2nd loop */ + so that we sum the values of the ith column--1st loop--and those of + (i - 1)th row (labelled 'i')--2nd loop */ double sum = 0; int j, start, end; @@ -58,103 +50,79 @@ j 4 2 6 9 return(sum); } -#define GET_I_AND_J \ -/* Find the 'R' indices of the two corresponding OTUs */ \ -/* The indices of the first element of the pair in the \ - distance matrix are n-1 times 1, n-2 times 2, n-3 times 3, \ - ..., once n-1. Given this, the algorithm below is quite \ - straightforward.*/ \ - i = 0; \ - for (OTU1 = 1; OTU1 < n; OTU1++) { \ - i += n - OTU1; \ - if (i >= smallest + 1) break; \ - } \ - /* Finding the second OTU is easier! */ \ - OTU2 = smallest + 1 + OTU1 - n*(OTU1 - 1) + OTU1*(OTU1 - 1)/2 - -#define SET_CLADE \ -/* give the node and tip numbers to edge */ \ - edge2[k] = otu_label[OTU1 - 1]; \ - edge2[k + 1] = otu_label[OTU2 - 1]; \ - edge1[k] = edge1[k + 1] = cur_nod - void nj(double *D, int *N, int *edge1, int *edge2, double *edge_length) { - double SUMD, *S, Sdist, Ndist, *new_dist, A, B, *DI, d_i, x, y; + double *S, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y; int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label; S = &Sdist; new_dist = &Ndist; otu_label = &o_l; - DI = &d_i; n = *N; cur_nod = 2*n - 2; - S = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); + S = (double*)R_alloc(n + 1, sizeof(double)); new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double)); - otu_label = (int*)R_alloc(n, sizeof(int)); - DI = (double*)R_alloc(n - 2, sizeof(double)); + otu_label = (int*)R_alloc(n + 1, sizeof(int)); + + for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */ - for (i = 0; i < n; i++) otu_label[i] = i + 1; k = 0; while (n > 3) { - SUMD = 0; - for (i = 0; i < n*(n - 1)/2; i++) SUMD += D[i]; + for (i = 1; i <= n; i++) + S[i] = sum_dist_to_i(n, D, i); /* S[0] is not used */ ij = 0; + smallest_S = 1e50; + B = n - 2; for (i = 1; i < n; i++) { for (j = i + 1; j <= n; j++) { - A = sum_dist_to_i(n, D, i) - D[ij]; - B = sum_dist_to_i(n, D, j) - D[ij]; - S[ij] = (A + B)/(2*n - 4) + 0.5*D[ij] - + (SUMD - A - B - D[ij])/(n - 2); + A = B*D[ij] - S[i] - S[j]; + if (A < smallest_S) { + OTU1 = i; + OTU2 = j; + smallest_S = A; + smallest = ij; + } ij++; } } - /* find the 'C' index of the smallest value of S */ - smallest = 0; - for (i = 1; i < n*(n - 1)/2; i++) - if (S[smallest] > S[i]) smallest = i; - - GET_I_AND_J; - SET_CLADE; + edge2[k] = otu_label[OTU1]; + edge2[k + 1] = otu_label[OTU2]; + edge1[k] = edge1[k + 1] = cur_nod; /* get the distances between all OTUs but the 2 selected ones and the latter: a) get the sum for both b) compute the distances for the new OTU */ - A = B = ij = 0; + + A = D[smallest]; + ij = 0; for (i = 1; i <= n; i++) { if (i == OTU1 || i == OTU2) continue; x = D[give_index(i, OTU1, n)]; /* dist between OTU1 and i */ y = D[give_index(i, OTU2, n)]; /* dist between OTU2 and i */ - new_dist[ij] = (x + y)/2; - A += x; - B += y; + new_dist[ij] = (x + y - A)/2; ij++; } /* compute the branch lengths */ - A /= n - 2; - B /= n - 2; - edge_length[k] = (D[smallest] + A - B)/2; - edge_length[k + 1] = (D[smallest] + B - A)/2; - DI[cur_nod - *N - 1] = D[smallest]; - - /* update before the next loop */ - if (OTU1 > OTU2) { /* make sure that OTU1 < OTU2 */ - i = OTU1; - OTU1 = OTU2; - OTU2 = i; - } + B = (S[OTU1] - S[OTU2])/B; /* don't need B anymore */ + edge_length[k] = (A + B)/2; + edge_length[k + 1] = (A - B)/2; + + /* update before the next loop + (we are sure that OTU1 < OTU2) */ if (OTU1 != 1) - for (i = OTU1 - 1; i > 0; i--) otu_label[i] = otu_label[i - 1]; + for (i = OTU1; i > 1; i--) + otu_label[i] = otu_label[i - 1]; if (OTU2 != n) - for (i = OTU2; i <= n; i++) otu_label[i - 1] = otu_label[i]; - otu_label[0] = cur_nod; + for (i = OTU2; i < n; i++) + otu_label[i] = otu_label[i + 1]; + otu_label[1] = cur_nod; for (i = 1; i < n; i++) { if (i == OTU1 || i == OTU2) continue; @@ -174,17 +142,10 @@ void nj(double *D, int *N, int *edge1, int *edge2, double *edge_length) for (i = 0; i < 3; i++) { edge1[*N*2 - 4 - i] = cur_nod; - edge2[*N*2 - 4 - i] = otu_label[i]; + edge2[*N*2 - 4 - i] = otu_label[i + 1]; } edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2; edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2; edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2; - - for (i = 0; i < *N*2 - 3; i++) { - if (edge2[i] <= *N) continue; - /* In case there are zero branch lengths: */ - if (DI[edge2[i] - *N - 1] == 0) continue; - edge_length[i] -= DI[edge2[i] - *N - 1]/2; - } }