changes in reorder(, "cladewise")

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Mon, 20 Aug 2012 08:45:48 +0000 (08:45 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Mon, 20 Aug 2012 08:45:48 +0000 (08:45 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Mon, 20 Aug 2012 08:45:48 +0000 (08:45 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Mon, 20 Aug 2012 08:45:48 +0000 (08:45 +0000)
diff --git a/DESCRIPTION b/DESCRIPTION

index 8f0658ffbac77416a7817e30ca8301729e0c7137..03b6257e9bc727907847fc62821b247b5e9e722b 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: ape
  Version: 3.0-6
-Date: 2012-08-14
+Date: 2012-08-17
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/NEWS b/NEWS

index 12aabcc184d0d156963862a6206b735260ff5131..812bb6c414cdbf2a9e8f3b54bbbb63b0e711c441 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,10 @@ OTHER CHANGES
  
      o dist.nodes() is now 6 to 10 times faster.
  
+    o reorder(, "cladewise") is now faster. The change is not very
+      visible for small trees (n < 1000) but this can be more than
+      1000 faster for big trees (n >= 1e4).
+
  
  
                 CHANGES IN APE VERSION 3.0-5
diff --git a/R/cophenetic.phylo.R b/R/cophenetic.phylo.R

index a4f00beedb2ea32c692a77710b8b1dd0bfdbd054..a6669786814bcdb79ea941f3a2bce3937ab7b9cd 100644 (file)
--- a/R/cophenetic.phylo.R
+++ b/R/cophenetic.phylo.R
@@ -7,18 +7,18 @@
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
-dist.nodes <- function(phy)
+dist.nodes <- function(x)
  {
-    phy <- reorder(phy)
-    n <- Ntip(phy)
-    m <- phy$Nnode
+    x <- reorder(x) # required for the C code
+    n <- Ntip(x)
+    m <- x$Nnode
+    nm <- n + m
  
      d <- .C("dist_nodes", as.integer(n), as.integer(m),
-            as.integer(phy$edge[, 1] - 1L), as.integer(phy$edge[, 2] - 1L),
-            as.double(phy$edge.length), as.integer(Nedge(phy)),
+            as.integer(x$edge[, 1] - 1L), as.integer(x$edge[, 2] - 1L),
+            as.double(x$edge.length), as.integer(Nedge(x)),
              double(nm * nm), DUP = FALSE, NAOK = TRUE,
              PACKAGE = "ape")[[7]]
-    nm <- n + m
      dim(d) <- c(nm, nm)
      dimnames(d) <- list(1:nm, 1:nm)
      d
diff --git a/R/reorder.phylo.R b/R/reorder.phylo.R

index 226adb946c334f048382f0f04083429de3d4078d..c7e11d3bb5c4816d4dadccaf9fc69935060fd4ed 100644 (file)
--- a/R/reorder.phylo.R
+++ b/R/reorder.phylo.R
@@ -1,4 +1,4 @@
-## reorder.phylo.R (2012-08-01)
+## reorder.phylo.R (2012-08-17)
  
  ##   Internal Reordering of Trees
  
@@ -21,7 +21,7 @@ reorder.phylo <- function(x, order = "cladewise", ...)
              .C("neworder_cladewise", as.integer(nb.tip),
                 as.integer(x$edge[, 1]), as.integer(x$edge[, 2]),
                 as.integer(nb.edge), integer(nb.edge),
-               PACKAGE = "ape")[[5]]
+               DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")[[5]]
      } else {
          ##node.degree <- tabulate(x$edge[, 1])
          neworder <-
@@ -29,7 +29,7 @@ reorder.phylo <- function(x, order = "cladewise", ...)
                 as.integer(nb.node), as.integer(x$edge[, 1]),
                 as.integer(x$edge[, 2]), as.integer(nb.edge),
                 integer(nb.edge), PACKAGE = "ape")[[6]]
-  }
+    }
      x$edge <- x$edge[neworder, ]
      if (!is.null(x$edge.length))
        x$edge.length <- x$edge.length[neworder]
diff --git a/src/reorder_phylo.c b/src/reorder_phylo.c

index 979b23e3e31233a69b47cb182ee08093652dbc6b..c9c0fabe9a6f37cc01810792ec6db6ec4b9ecc01 100644 (file)
--- a/src/reorder_phylo.c
+++ b/src/reorder_phylo.c
@@ -1,4 +1,4 @@
-/* reorder_phylo.c       2012-08-01 */
+/* reorder_phylo.c       2012-08-17 */
  
  /* Copyright 2008-2012 Emmanuel Paradis */
  
@@ -7,44 +7,62 @@
  
  #include <R.h>
  
-void neworder_cladewise(int *n, int *edge1, int *edge2,
-                       int *N, int *neworder)
-/* n: nb of tips, N: nb of edges */
+static int iii;
+
+void foo_reorder(int node, int n, int m, int *e1, int *e2, int *neworder, int *L, int *pos)
  {
-    int i, j, k, node, *done, dn, *node_back, eb;
-    done = &dn;
-    node_back = &eb;
-
-    /* done: indicates whether an edge has been collected
-       node_back: the series of node from the root to `node'
-       node: the current node */
-
-    done = (int*)R_alloc(*N, sizeof(int));
-    node_back = (int*)R_alloc(*N + 2 - *n, sizeof(int));
-    memset(done, 0, *N * sizeof(int));
-
-    j = k = 0;
-    node = *n + 1;
-    while (j < *N) {
-        for (i = 0; i < *N; i++) {
-           if (done[i] || edge1[i] != node) continue;
-           neworder[j] = i + 1;
-           j++;
-           done[i] = 1;
-           if (edge2[i] > *n) {
-               node_back[k] = node;
-               k++;
-               node = edge2[i];
-               /* if found a new node, reset the loop */
-               i = -1;
-           }
+       int i = node - n - 1, j, k;
+
+/* 'i' is the C index corresponding to 'node' */
+
+       for (j = 0; j < pos[i]; j++) {
+               k = L[i + m * j];
+               neworder[iii++] = k + 1;
+               if (e2[k] > n) /* is it an internal edge? */
+                       foo_reorder(e2[k], n, m, e1, e2, neworder, L, pos);
         }
-       /* if arrived at the end of `edge', go down one node */
-       k--;
-       node = node_back[k];
-    }
  }
  
+void neworder_cladewise(int *n, int *e1, int *e2, int *N, int *neworder)
+/* n: nb of tips
+   m: nb of nodes
+   N: nb of edges */
+{
+       int i, j, k, *L, *pos, m = *N - *n + 1, degrmax = *n - m + 1;
+
+/* degrmax is the largest value that a node degree can be */
+
+/* L is a 1-d array storing, for each node, the C indices of the rows of
+   the edge matrix where the node is ancestor (i.e., present in the 1st
+   column). It is used in the same way than a matrix (which is actually
+   a vector) is used in R as a 2-d structure. */
+
+       L = (int*)R_alloc(m * degrmax, sizeof(int));
+
+/* pos gives the position for each 'row' of L, that is the number of elements
+   which have already been stored for that 'row'. */
+
+       pos = (int*)R_alloc(m, sizeof(int));
+       memset(pos, 0, m * sizeof(int));
+
+/* we now go down along the edge matrix */
+
+       for (i = 0; i < *N; i++) {
+               k = e1[i] - *n - 1; /* k is the 'row' index in L corresponding to node e1[i] */
+               j = pos[k]; /* the current 'column' position corresping to k */
+               pos[k]++; /* increment in case the same node is found in another row of the edge matrix */
+               L[k + m * j] = i;
+       }
+
+/* L is now ready: we can start the recursive calls. */
+/* We start with the root 'n + 1': its index will be changed into
+   the corresponding C index inside the recursive function. */
+
+       iii = 0;
+       foo_reorder(*n + 1, *n, m, e1, e2, neworder, L, pos);
+}
+
+
  #define DO_NODE_PRUNING\
      /* go back down in `edge' to set `neworder' */\
      for (j = 0; j <= i; j++) {\
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Mon, 20 Aug 2012 08:45:48 +0000 (08:45 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Mon, 20 Aug 2012 08:45:48 +0000 (08:45 +0000)
DESCRIPTION		patch \| blob \| history
NEWS		patch \| blob \| history
R/cophenetic.phylo.R		patch \| blob \| history
R/reorder.phylo.R		patch \| blob \| history
src/reorder_phylo.c		patch \| blob \| history