From eccd02caa284c933ccabf54c2b52d2c871cbd8e8 Mon Sep 17 00:00:00 2001 From: paradis Date: Thu, 16 Aug 2012 08:12:38 +0000 Subject: [PATCH] new dist.nodes() with C code git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@192 6e262413-ae40-0410-9e79-b911bd7a66b7 --- DESCRIPTION | 4 +-- NEWS | 9 +++++ R/cophenetic.phylo.R | 79 +++++++++----------------------------------- R/reorder.phylo.R | 28 +++++++++------- src/dist_nodes.c | 41 +++++++++++++++++++++++ src/reorder_phylo.c | 19 ++++------- 6 files changed, 91 insertions(+), 89 deletions(-) create mode 100644 src/dist_nodes.c diff --git a/DESCRIPTION b/DESCRIPTION index 66a9e92..8f0658f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape -Version: 3.0-5 -Date: 2012-06-22 +Version: 3.0-6 +Date: 2012-08-14 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/NEWS b/NEWS index 1c1ae9c..12aabcc 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,12 @@ + CHANGES IN APE VERSION 3.0-6 + + +OTHER CHANGES + + o dist.nodes() is now 6 to 10 times faster. + + + CHANGES IN APE VERSION 3.0-5 diff --git a/R/cophenetic.phylo.R b/R/cophenetic.phylo.R index bdcbfcf..a4f00be 100644 --- a/R/cophenetic.phylo.R +++ b/R/cophenetic.phylo.R @@ -1,74 +1,27 @@ -## cophenetic.phylo.R (2010-11-15) +## cophenetic.phylo.R (2012-08-14) ## Pairwise Distances from a Phylogenetic Tree -## Copyright 2006-2010 Emmanuel Paradis +## Copyright 2006-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -dist.nodes <- function(x) +dist.nodes <- function(phy) { - if (is.null(x$edge.length)) - stop("your tree has no branch lengths") - - trim <- FALSE - if (!is.binary.tree(x) || !is.rooted(x)) { - trim <- TRUE - x <- makeNodeLabel(x) - x <- multi2di(x, random = FALSE) - } - - n <- length(x$tip.label) - n.node <- x$Nnode - N <- n + n.node - x <- reorder(x, order = "pruningwise") - - res <- matrix(NA, N, N) - res[cbind(1:N, 1:N)] <- 0 # implicit mode conversion - - ## I like the simplicity of this one: - res[x$edge] <- res[x$edge[, 2:1]] <- x$edge.length - - ## compute the distances ... - for (i in seq(from = 1, by = 2, length.out = n.node)) { - j <- i + 1 - anc <- x$edge[i, 1] - des1 <- x$edge[i, 2] - des2 <- x$edge[j, 2] - - ## If `des1' is a node, we look for the nodes and tips for - ## which the distance up to `des1' has already been - ## computed, including `des1' itself. For all these, we can - ## compute the distance up to `anc' and all node(s) and - ## tip(s) in `des2'. - if (des1 > n) des1 <- which(!is.na(res[des1, ])) - - ## id. for `des2' - if (des2 > n) des2 <- which(!is.na(res[des2, ])) - - ## The following expression is vectorized only on `des2' and - ## not on `des1' because they may be of different lengths. - for (y in des1) - res[y, des2] <- res[des2, y] <- res[anc, y] + res[anc, des2] - ## compute the distances between the tip(s) and node(s) - ## in `des2' and the ancestor of `anc'; id. for `des2' - ## (only if it is not the root) - if (anc != n + 1) { - ind <- which(x$edge[, 2] == anc) - nod <- x$edge[ind, 1] # the ancestor of `anc' - l <- x$edge.length[ind] - res[des2, nod] <- res[nod, des2] <- res[anc, des2] + l - res[des1, nod] <- res[nod, des1] <- res[anc, des1] + l - } - } - if (trim) { - i <- which(x$node.label == "") + n - res <- res[-i, -i] - N <- dim(res)[1] - } - dimnames(res)[1:2] <- list(1:N) - res + phy <- reorder(phy) + n <- Ntip(phy) + m <- phy$Nnode + + d <- .C("dist_nodes", as.integer(n), as.integer(m), + as.integer(phy$edge[, 1] - 1L), as.integer(phy$edge[, 2] - 1L), + as.double(phy$edge.length), as.integer(Nedge(phy)), + double(nm * nm), DUP = FALSE, NAOK = TRUE, + PACKAGE = "ape")[[7]] + nm <- n + m + dim(d) <- c(nm, nm) + dimnames(d) <- list(1:nm, 1:nm) + d } cophenetic.phylo <- function(x) diff --git a/R/reorder.phylo.R b/R/reorder.phylo.R index e06b26b..226adb9 100644 --- a/R/reorder.phylo.R +++ b/R/reorder.phylo.R @@ -1,8 +1,8 @@ -## reorder.phylo.R (2010-04-02) +## reorder.phylo.R (2012-08-01) ## Internal Reordering of Trees -## Copyright 2006-2010 Emmanuel Paradis +## Copyright 2006-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -16,16 +16,20 @@ reorder.phylo <- function(x, order = "cladewise", ...) if (nb.node == 1) return(x) nb.tip <- length(x$tip.label) nb.edge <- dim(x$edge)[1] - neworder <- if (order == "cladewise") - .C("neworder_cladewise", as.integer(nb.tip), - as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), - as.integer(nb.edge), integer(nb.edge), - PACKAGE = "ape")[[5]] - else - .C("neworder_pruningwise", as.integer(nb.tip), - as.integer(nb.node), as.integer(x$edge[, 1]), - as.integer(x$edge[, 2]), as.integer(nb.edge), - integer(nb.edge), PACKAGE = "ape")[[6]] + if (order == "cladewise") { + neworder <- + .C("neworder_cladewise", as.integer(nb.tip), + as.integer(x$edge[, 1]), as.integer(x$edge[, 2]), + as.integer(nb.edge), integer(nb.edge), + PACKAGE = "ape")[[5]] + } else { + ##node.degree <- tabulate(x$edge[, 1]) + neworder <- + .C("neworder_pruningwise", as.integer(nb.tip), + as.integer(nb.node), as.integer(x$edge[, 1]), + as.integer(x$edge[, 2]), as.integer(nb.edge), + integer(nb.edge), PACKAGE = "ape")[[6]] + } x$edge <- x$edge[neworder, ] if (!is.null(x$edge.length)) x$edge.length <- x$edge.length[neworder] diff --git a/src/dist_nodes.c b/src/dist_nodes.c new file mode 100644 index 0000000..b0c106a --- /dev/null +++ b/src/dist_nodes.c @@ -0,0 +1,41 @@ +/* dist_nodes.c 2012-08-14 */ + +/* Copyright 2012 Emmanuel Paradis + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + +#include + +#define DINDEX2(i, j) i + NM * j + +/* The algorithm is pretty simple: the tree must be in cladewise order + because the edges are visited contiguously. Each edge gives trivially + one distance, then by moving up along the edge matrix, one finds nodes + that have already been visited and the distance matrix can be updated. */ + +void dist_nodes(int *n, int *m, int *e1, int *e2, double *el, int *N, double *D) +/* n: nb of tips, m: nb of nodes, N: nb of edges */ +{ + int i, j, k, a, d, NM = *n + *m, ROOT; + double x; + + ROOT = e1[0]; d = e2[0]; /* the 2 nodes of the 1st edge */ + D[DINDEX2(ROOT, d)] = D[DINDEX2(d, ROOT)] = el[0]; /* the 1st edge gives the 1st distance */ + +/* go down along the edge matrix + starting at the 2nd edge: */ + for (i = 1; i < *N; i++) { + a = e1[i]; d = e2[i]; x = el[i]; /* get the i-th nodes and branch length */ + D[DINDEX2(a, d)] = D[DINDEX2(d, a)] = x; + /* then go up along the edge matrix from the i-th edge + to visit the nodes already visited and update the distances: */ + for (j = i - 1; j >= 0; j--) { + k = e2[j]; + if (k == a) continue; + D[DINDEX2(k, d)] = D[DINDEX2(d, k)] = D[DINDEX2(a, k)] + x; + } + if (k != ROOT) + D[DINDEX2(ROOT, d)] = D[DINDEX2(d, ROOT)] = D[DINDEX2(ROOT, a)] + x; + } +} diff --git a/src/reorder_phylo.c b/src/reorder_phylo.c index c54ea52..979b23e 100644 --- a/src/reorder_phylo.c +++ b/src/reorder_phylo.c @@ -1,12 +1,11 @@ -/* reorder_phylo.c 2008-03-17 */ +/* reorder_phylo.c 2012-08-01 */ -/* Copyright 2008 Emmanuel Paradis */ +/* Copyright 2008-2012 Emmanuel Paradis */ /* This file is part of the R-package `ape'. */ /* See the file ../COPYING for licensing issues. */ #include -#include void neworder_cladewise(int *n, int *edge1, int *edge2, int *N, int *neworder) @@ -61,23 +60,19 @@ void neworder_cladewise(int *n, int *edge1, int *edge2, void neworder_pruningwise(int *ntip, int *nnode, int *edge1, int *edge2, int *nedge, int *neworder) { - int *Ndegr, degree, *ready, rdy, i, j, node, nextI, n; - Ndegr = °ree; - ready = &rdy; + int *ready, *Ndegr, i, j, node, nextI, n; - ready = (int*)R_alloc(*nedge, sizeof(int)); - - /* use `nextI' temporarily because need an address for R_tabulate */ nextI = *ntip + *nnode; Ndegr = (int*)R_alloc(nextI, sizeof(int)); memset(Ndegr, 0, nextI*sizeof(int)); - R_tabulate(edge1, nedge, &nextI, Ndegr); + for (i = 0; i < *nedge; i++) (Ndegr[edge1[i] - 1])++; + + ready = (int*)R_alloc(*nedge, sizeof(int)); /* `ready' indicates whether an edge is ready to be */ /* collected; only the terminal edges are initially ready */ for (i = 0; i < *nedge; i++) - if (edge2[i] <= *ntip) ready[i] = 1; - else ready[i] = 0; + ready[i] = (edge2[i] <= *ntip) ? 1 : 0; /* `n' counts the number of times a node has been seen. */ /* This algo will work if the tree is in cladewise order, */ -- 2.39.2