From: paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Date: Thu, 8 May 2008 05:14:48 +0000 (+0000)
Subject: final wrap of ape 2.2
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=3f8df9b013dc4ed297c9b242cd833698ce7d015a;p=ape.git

final wrap of ape 2.2

git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@30 6e262413-ae40-0410-9e79-b911bd7a66b7
---

diff --git a/Changes b/Changes
index 9633e88..05cbfab 100644
--- a/Changes
+++ b/Changes
@@ -8,12 +8,18 @@ NEW FEATURES
       subtreeplot), and to return the graphical coordinates of tree
       (without plotting).
 
-    o The new function corPagel() implements the Pagel's "lambda"
-      correlation structure.
+    o The new functions corPagel and corBlomberg implement the Pagel's
+      "lambda" and Blomberg et al.'s "ACDC" correlation structures.
 
     o chronopl() has been improved and gains several options: see its
       help page for details.
 
+    o boot.phylo() has now an option 'trees' to possibly return the
+      bootstraped trees (the default is FALSE).
+
+    o prop.part() has been improved and should now be faster in all
+      situations.
+
 
 BUG FIXES
 
@@ -29,10 +35,13 @@ BUG FIXES
     o zoom() failed when tip labels were used instead of their numbers
       (thanks to Yan Wong for the fix).
 
-    o drop.tip() failed with some trees  (fixed by Yan Wong).
+    o drop.tip() failed with some trees (fixed by Yan Wong).
 
     o seg.sites() failed with a list.
 
+    o consensus() failed in some cases. The function has been improved
+      as well and is faster.
+
 
 
 		CHANGES IN APE VERSION 2.1-3
diff --git a/DESCRIPTION b/DESCRIPTION
index abf1906..c4bf8f7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ape
 Version: 2.2
-Date: 2008-05-02
+Date: 2008-05-07
 Title: Analyses of Phylogenetics and Evolution
 Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong,
   Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel,
diff --git a/R/PGLS.R b/R/PGLS.R
index 8c5ab74..df3e6b1 100644
--- a/R/PGLS.R
+++ b/R/PGLS.R
@@ -1,4 +1,4 @@
-## PGLS.R (2008-04-18)
+## PGLS.R (2008-05-08)
 
 ##   Phylogenetic Generalized Least Squares
 
@@ -236,3 +236,43 @@ coef.corPagel <- function(object, unconstrained = TRUE, ...)
     names(aux) <- "lambda"
     aux
 }
+
+corBlomberg <- function(value, phy, form = ~1, fixed = FALSE)
+{
+    if (value <= 0)
+        stop("the value of g must be greater than 0.")
+    if (!("phylo" %in% class(phy)))
+        stop('object "phy" is not of class "phylo"')
+    attr(value, "formula") <- form
+    attr(value, "fixed") <- fixed
+    attr(value, "tree") <- phy
+    class(value) <- c("corBlomberg", "corPhyl", "corStruct")
+    value
+}
+
+corMatrix.corBlomberg <-
+    function(object, covariate = getCovariate(object), corr = TRUE, ...)
+{
+    index <- attr(object, "index")
+    if (is.null(index))
+        stop("object has not been initialized")
+    if (object[1] <= 0)
+        stop("the optimization has reached a value <= 0 for parameter 'g':
+probably need to set 'fixed = TRUE' in corBlomberg().")
+    phy <- attr(object, "tree")
+    d <- (dist.nodes(phy)[length(phy$tip.label) + 1, ])^(1/object[1])
+    phy$edge.length <- d[phy$edge[, 2]] - d[phy$edge[, 1]]
+    mat <- vcv.phylo(phy, cor = corr)
+    mat[index, index]
+}
+
+coef.corBlomberg <- function(object, unconstrained = TRUE, ...)
+{
+    if (unconstrained) {
+        if (attr(object, "fixed")) return(numeric(0))
+        else return(object[1])
+    }
+    aux <- object[1]
+    names(aux) <- "g"
+    aux
+}
diff --git a/R/dist.topo.R b/R/dist.topo.R
index 703f92d..cf1899a 100644
--- a/R/dist.topo.R
+++ b/R/dist.topo.R
@@ -1,4 +1,4 @@
-## dist.topo.R (2008-02-27)
+## dist.topo.R (2008-05-07)
 
 ##      Topological Distances, Tree Bipartitions,
 ##   Consensus Trees, and Bootstrapping Phylogenies
@@ -54,47 +54,50 @@ dist.topo <- function(x, y, method = "PH85")
     dT
 }
 
+.compressTipLabel <- function(x)
+{
+    ## 'x' is a list of objects of class "phylo" possibly with no class
+    if (!is.null(attr(x, "TipLabel"))) return(x)
+    ref <- x[[1]]$tip.label
+    if (any(table(ref) != 1))
+        stop("some tip labels are duplicated in tree no. 1")
+    n <- length(ref)
+    for (i in 2:length(x)) {
+        if (identical(x[[i]]$tip.label, ref)) next
+        ilab <- match(x[[i]]$tip.label, ref)
+        ## can use tabulate here because 'ilab' contains integers
+        if (any(tabulate(ilab) > 1))
+            stop(paste("some tip labels are duplicated in tree no.", i))
+        if (any(is.na(ilab)))
+            stop(paste("tree no.", i, "has different tip labels"))
+        ie <- match(1:n, x[[i]]$edge[, 2])
+        x[[i]]$edge[ie, 2] <- ilab
+    }
+    for (i in 1:length(x)) x[[i]]$tip.label <- NULL
+    attr(x, "TipLabel") <- ref
+    x
+}
+
 prop.part <- function(..., check.labels = FALSE)
 {
     obj <- list(...)
     if (length(obj) == 1 && class(obj[[1]]) != "phylo")
-      obj <- unlist(obj, recursive = FALSE)
+        obj <- obj[[1]]
+    ## <FIXME>
+    ## class(obj) <- NULL # needed?
+    ## </FIXME>
     ntree <- length(obj)
-    if (!check.labels) {
-        for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer"
-        clades <- .Call("prop_part", obj, ntree, TRUE, PACKAGE = "ape")
-        attr(clades, "number") <- attr(clades, "number")[1:length(clades)]
-        attr(clades, "labels") <- obj[[1]]$tip.label
-    } else {
-        bp <- .Call("bipartition", obj[[1]]$edge, length(obj[[1]]$tip.label),
-                    obj[[1]]$Nnode, PACKAGE = "ape")
-        clades <- lapply(bp, function(xx) sort(obj[[1]]$tip.label[xx]))
-        no <- rep(1, length(clades))
-
-        if (ntree > 1) {
-            for (k in 2:ntree) {
-                bp <- .Call("bipartition", obj[[k]]$edge,
-                            length(obj[[k]]$tip.label), obj[[k]]$Nnode,
-                            PACKAGE = "ape")
-                bp <- lapply(bp, function(xx) sort(obj[[k]]$tip.label[xx]))
-                for (i in 1:length(bp)) {
-                    done <- FALSE
-                    for (j in 1:length(clades)) {
-                        if (identical(all.equal(bp[[i]], clades[[j]]), TRUE)) {
-                            no[j] <- no[j] + 1
-                            done <- TRUE
-                            break
-                        }
-                    }
-                    if (!done) {
-                        clades <- c(clades, bp[i])
-                        no <- c(no, 1)
-                    }
-                }
-            }
-        }
-        attr(clades, "number") <- no
-    }
+    if (check.labels) obj <- .compressTipLabel(obj)
+    for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer"
+    ## <FIXME>
+    ## The 1st must have tip labels
+    ## Maybe simply pass the number of tips to the C code??
+    if (!is.null(attr(obj, "TipLabel")))
+        for (i in 1:ntree) obj[[i]]$tip.label <- attr(obj, "TipLabel")
+    ## </FIXME>
+    clades <- .Call("prop_part", obj, ntree, TRUE, PACKAGE = "ape")
+    attr(clades, "number") <- attr(clades, "number")[1:length(clades)]
+    attr(clades, "labels") <- obj[[1]]$tip.label
     class(clades) <- "prop.part"
     clades
 }
@@ -162,7 +165,7 @@ prop.clades <- function(phy, ..., part = NULL)
     n
 }
 
-boot.phylo <- function(phy, x, FUN, B = 100, block = 1)
+boot.phylo <- function(phy, x, FUN, B = 100, block = 1, trees = FALSE)
 {
     if (is.list(x)) {
         if (class(x) == "DNAbin") x <- as.matrix(x)
@@ -189,42 +192,71 @@ boot.phylo <- function(phy, x, FUN, B = 100, block = 1)
     }
     for (i in 1:B) storage.mode(boot.tree[[i]]$Nnode) <- "integer"
     storage.mode(phy$Nnode) <- "integer"
-    attr(.Call("prop_part", c(list(phy), boot.tree), B + 1, FALSE,
-               PACKAGE = "ape"), "number") - 1
+    ans <- attr(.Call("prop_part", c(list(phy), boot.tree),
+                      B + 1, FALSE, PACKAGE = "ape"), "number") - 1
+    if (trees) ans <- list(BP = ans, trees = boot.tree)
+    ans
 }
 
-consensus <- function(..., p = 1)
+consensus <- function(..., p = 1, check.labels = FALSE)
 {
+    foo <- function(ic, node) {
+        ## ic: index of 'pp'
+        ## node: node number in the final tree
+        pool <- pp[[ic]]
+        if (ic < m) {
+            for (j in (ic + 1):m) {
+                wh <- match(pp[[j]], pool)
+                if (!any(is.na(wh))) {
+                    edge[pos, 1] <<- node
+                    pool <- pool[-wh]
+                    edge[pos, 2] <<- nextnode <<- nextnode + 1L
+                    pos <<- pos + 1L
+                    foo(j, nextnode)
+                }
+            }
+        }
+        size <- length(pool)
+        if (size) {
+            ind <- pos:(pos + size - 1)
+            edge[ind, 1] <<- node
+            edge[ind, 2] <<- pool
+            pos <<- pos + size
+        }
+    }
     obj <- list(...)
-    if (length(obj) == 1 && class(obj[[1]]) != "phylo")
-      obj <- unlist(obj, recursive = FALSE)
+    if (length(obj) == 1) {
+        ## better than unlist(obj, recursive = FALSE)
+        ## because "[[" keeps the class of 'obj':
+        obj <- obj[[1]]
+        if (class(obj) == "phylo") return(obj)
+    }
+    if (!is.null(attr(obj, "TipLabel")))
+        labels <- attr(obj, "TipLabel")
+    else {
+        labels <- obj[[1]]$tip.label
+        if (check.labels) obj <- .compressTipLabel(obj)
+    }
     ntree <- length(obj)
     ## Get all observed partitions and their frequencies:
-    pp <- prop.part(obj, check.labels = TRUE)
+    pp <- prop.part(obj, check.labels = FALSE)
     ## Drop the partitions whose frequency is less than 'p':
     pp <- pp[attr(pp, "number") >= p * ntree]
     ## Get the order of the remaining partitions by decreasing size:
-    ind <- rev(sort(unlist(lapply(pp, length)),
-                    index.return = TRUE)$ix)
-    pp <- lapply(pp, function(xx) paste("IMPROBABLE_PREFIX", xx,
-                                        "IMPROBABLE_SUFFIX", sep = "_"))
-    STRING <- paste(pp[[1]], collapse = ",")
-    STRING <- paste("(", STRING, ");", sep = "")
-    for (i in ind[-1]) {
-        ## 1. Delete all tips in the focus partition:
-        STRING <- unlist(strsplit(STRING, paste(pp[[i]], collapse = "|")))
-        ## 2. Put the partition in any of the created gaps:
-        STRING <- c(STRING[1],
-                    paste("(", paste(pp[[i]], collapse = ","), ")", sep = ""),
-                    STRING[-1])
-        ## 3. Stick back the Newick string:
-        STRING <- paste(STRING, collapse = "")
+    ind <- sort(unlist(lapply(pp, length)), decreasing = TRUE,
+                index.return = TRUE)$ix
+    pp <- pp[ind]
+    n <- length(labels)
+    m <- length(pp)
+    edge <- matrix(0L, n + m - 1, 2)
+    if (m == 1) {
+        edge[, 1] <- n + 1L
+        edge[, 2] <- 1:n
+    } else {
+        nextnode <- n + 1L
+        pos <- 1L
+        foo(1, nextnode)
     }
-    ## Remove the extra commas:
-    STRING <- gsub(",{2,}", ",", STRING)
-    STRING <- gsub("\\(,", "\\(", STRING)
-    STRING <- gsub(",\\)", "\\)", STRING)
-    STRING <- gsub("IMPROBABLE_PREFIX_", "", STRING)
-    STRING <- gsub("_IMPROBABLE_SUFFIX", "", STRING)
-    read.tree(text = STRING)
+    structure(list(edge = edge, tip.label = labels,
+              Nnode = m), class = "phylo")
 }
diff --git a/R/plot.phylo.R b/R/plot.phylo.R
index 7d3ad8d..ca3ff1c 100644
--- a/R/plot.phylo.R
+++ b/R/plot.phylo.R
@@ -1,4 +1,4 @@
-## plot.phylo.R (2008-03-28)
+## plot.phylo.R (2008-05-08)
 
 ##   Plot Phylogenies
 
@@ -246,7 +246,7 @@ plot.phylo <- function(x, type = "phylogram", use.edge.length = TRUE,
         if (direction == "downwards") y.lim[2] <- y.lim[2] + x$root.edge
     }
     ## fix by Klaus Schliep (2008-03-28):
-    asp <- if (type %in% c("fan", "radial")) NA else 1
+    asp <- if (type %in% c("fan", "radial")) 1 else NA
     plot(0, type = "n", xlim = x.lim, ylim = y.lim, xlab = "",
          ylab = "", xaxt = "n", yaxt = "n", bty = "n", asp = asp, ...)
     if (is.null(adj))
diff --git a/R/rtree.R b/R/rtree.R
index 1c2052c..9425dad 100644
--- a/R/rtree.R
+++ b/R/rtree.R
@@ -1,4 +1,4 @@
-## rtree.R (2008-05-06)
+## rtree.R (2008-05-07)
 
 ##   Generates Random Trees
 
@@ -14,39 +14,39 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...)
         n2 <- n - n1
         po2 <- pos + 2*n1 - 1
         edge[c(pos, po2), 1] <<- nod
-        nod <<- nod + 1
+        nod <<- nod + 1L
         if (n1 > 2) {
             edge[pos, 2] <<- nod
             foo(n1, pos + 1)
         } else if (n1 == 2) {
             edge[c(pos + 1, pos + 2), 1] <<- edge[pos, 2] <<- nod
-            nod <<- nod + 1
+            nod <<- nod + 1L
         }
         if (n2 > 2) {
             edge[po2, 2] <<- nod
             foo(n2, po2 + 1)
         } else if (n2 == 2) {
             edge[c(po2 + 1, po2 + 2), 1] <<- edge[po2, 2] <<- nod
-            nod <<- nod + 1
+            nod <<- nod + 1L
         }
     }
 
     if (n < 2) stop("a tree must have at least 2 tips.")
-    nbr <- 2 * n - 2
-    if (!rooted) nbr <- nbr - 1
+    nbr <- 2 * n - 3 + rooted
     edge <- matrix(NA, nbr, 2)
 
+    n <- as.integer(n)
     if (n == 2) {
-        if (rooted) edge[] <- c(3, 3, 1, 2)
+        if (rooted) edge[] <- c(3L, 3L, 1L, 2L)
         else stop("an unrooted tree must have at least 3 tips.")
     } else if (n == 3) {
         edge[] <-
-          if (rooted) c(4, 5, 5, 4, 5, 1:3)
-          else c(4, 4, 4, 1:3)
+          if (rooted) c(4L, 5L, 5L, 4L, 5L, 1:3)
+          else c(4L, 4L, 4L, 1:3)
     } else if (n == 4 && !rooted) {
-        edge[] <- c(5, 6, 6, 5, 5, 6, 1:4)
+        edge[] <- c(5L, 6L, 6L, 5L, 5L, 6L, 1:4)
     } else {
-        nod <- n + 1
+        nod <- n + 1L
         if (rooted) { # n > 3
             foo(n, 1)
             ## The following is slightly more efficient than affecting the
@@ -70,21 +70,21 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...)
                 foo(n1, 2)
             } else if (n1 == 2) {
                 edge[2:3, 1] <- edge[1, 2] <- nod
-                nod <- nod + 1
+                nod <- nod + 1L
             }
             if (n2 > 2) {
                 edge[po2, 2] <- nod
                 foo(n2, po2 + 1)
             } else if (n2 == 2) {
                 edge[c(po2 + 1, po2 + 2), 1] <- edge[po2, 2] <- nod
-                nod <- nod + 1
+                nod <- nod + 1L
             }
             if (n3 > 2) {
                 edge[po3, 2] <- nod
                 foo(n3, po3 + 1)
             } else if (n3 == 2) {
                 edge[c(po3 + 1, po3 + 2), 1] <- edge[po3, 2] <- nod
-                ## nod <- nod + 1
+                ## nod <- nod + 1L
             }
             i <- which(is.na(edge[, 2]))
             edge[i, 2] <- 1:n
@@ -95,30 +95,31 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...)
       if (is.null(tip.label)) paste("t", sample(n), sep = "")
       else sample(tip.label)
     if (is.function(br)) phy$edge.length <- br(nbr, ...)
-    phy$Nnode <- n - 2 + rooted
+    phy$Nnode <- n - 2L + rooted
     class(phy) <- "phylo"
     phy
 }
 
 rcoal <- function(n, tip.label = NULL, br = "coalescent", ...)
 {
+    n <- as.integer(n)
     nbr <- 2*n - 2
     edge <- matrix(NA, nbr, 2)
     ## coalescence times by default:
     x <- if (is.character(br)) 2*rexp(n - 1)/(n:2 * (n - 1):1)
     else br(n - 1, ...)
     if (n == 2) {
-        edge[] <- c(3, 3, 1:2)
+        edge[] <- c(3L, 3L, 1:2)
         edge.length <- rep(x, 2)
     } else if (n == 3) {
-        edge[] <- c(4, 5, 5, 4, 5, 1:3)
+        edge[] <- c(4L, 5L, 5L, 4L, 5L, 1:3)
         edge.length <- c(x[2], x[1], x[1], sum(x))
     } else {
         edge.length <- numeric(nbr)
         h <- numeric(2*n - 1) # initialized with 0's
         node.height <- cumsum(x)
         pool <- 1:n
-        nextnode <- 2*n - 1
+        nextnode <- 2L*n - 1L
         for (i in 1:(n - 1)) {
             y <- sample(pool, size = 2)
             ind <- (i - 1)*2 + 1:2
@@ -127,14 +128,14 @@ rcoal <- function(n, tip.label = NULL, br = "coalescent", ...)
             edge.length[ind] <- node.height[i] - h[y]
             h[nextnode] <- node.height[i]
             pool <- c(pool[! pool %in% y], nextnode)
-            nextnode <- nextnode - 1
+            nextnode <- nextnode - 1L
         }
     }
     phy <- list(edge = edge, edge.length = edge.length)
     phy$tip.label <-
       if (is.null(tip.label)) paste("t", 1:n, sep = "")
       else tip.label
-    phy$Nnode <- n - 1
+    phy$Nnode <- n - 1L
     class(phy) <- "phylo"
     ##reorder(phy)
     ## to avoid crossings when converting with as.hclust:
diff --git a/man/boot.phylo.Rd b/man/boot.phylo.Rd
index fa39eb7..5c05fd1 100644
--- a/man/boot.phylo.Rd
+++ b/man/boot.phylo.Rd
@@ -7,7 +7,7 @@
 \alias{plot.prop.part}
 \title{Tree Bipartition and Bootstrapping Phylogenies}
 \usage{
-boot.phylo(phy, x, FUN, B = 100, block = 1)
+boot.phylo(phy, x, FUN, B = 100, block = 1, trees = FALSE)
 prop.part(..., check.labels = FALSE)
 prop.clades(phy, ..., part = NULL)
 \method{print}{prop.part}(x, ...)
@@ -23,6 +23,8 @@ prop.clades(phy, ..., part = NULL)
   \item{B}{the number of bootstrap replicates.}
   \item{block}{the number of columns in \code{x} that will be resampled
     together (see details).}
+  \item{trees}{a logical specifying whether to return the bootstraped
+    trees (\code{FALSE} by default).}
   \item{\dots}{either (i) a single object of class \code{"phylo"}, (ii) a
     series of such objects separated by commas, or (iii) a list
     containing such objects. In the case of \code{plot} further
@@ -95,7 +97,10 @@ prop.clades(phy, ..., part = NULL)
 
   \code{prop.clades} and \code{boot.phylo} return a numeric vector
   which \emph{i}th element is the number associated to the \emph{i}th
-  node of \code{phy}.
+  node of \code{phy}. If \code{trees = TRUE}, \code{boot.phylo} returns
+  a list whose first element (named \code{"BP"}) is like before, and the
+  second element (\code{"trees"}) is a list with the bootstraped
+  trees.
 
   \code{summary} returns a numeric vector.
 }
diff --git a/man/consensus.Rd b/man/consensus.Rd
index 281eb46..c99dd36 100644
--- a/man/consensus.Rd
+++ b/man/consensus.Rd
@@ -2,7 +2,7 @@
 \alias{consensus}
 \title{Concensus Trees}
 \usage{
-consensus(..., p = 1)
+consensus(..., p = 1, check.labels = FALSE)
 }
 \arguments{
   \item{...}{either (i) a single object of class \code{"phylo"}, (ii) a
@@ -10,6 +10,10 @@ consensus(..., p = 1)
     containing such objects.}
   \item{p}{a numeric value between 0.5 and 1 giving the proportion for a
     clade to be represented in the consensus tree.}
+  \item{check.labels}{a logical specifying whether to check the labels
+    of each tree. If \code{FALSE} (the default), it is assumed that all
+    trees have the same tip labels, and that they are in the same order
+    (see details).}
 }
 \description{
   Given a series of trees, this function returns the consensus tree. By
@@ -17,6 +21,13 @@ consensus(..., p = 1)
   majority-rule consensus tree, use \code{p = 0.5}. Any value between
   0.5 and 1 can be used.
 }
+\details{
+  Using (the default) \code{check.labels = FALSE} results in
+  considerable decrease in computing times. This requires that (i) all
+  trees have the same tip labels, \emph{and} (ii) these labels are
+  ordered similarly in all trees (in other words, the element
+  \code{tip.label} are identical in all trees).
+}
 \value{
   an object of class \code{"phylo"}.
 }
diff --git a/man/corClasses.Rd b/man/corClasses.Rd
index c7679be..cd1e0c4 100644
--- a/man/corClasses.Rd
+++ b/man/corClasses.Rd
@@ -11,6 +11,7 @@
     (1997)}
   \item{corGrafen}{The covariance matrix defined in Grafen (1989)}
   \item{corPagel}{The covariance matrix defined in Freckelton et al. (2002)}
+  \item{corBlomberg}{The covariance matrix defined in Blomberg et al. (2003)}
 
   See the help page of each class for references and detailed
   description.
@@ -19,7 +20,7 @@
   \code{\link[nlme]{corClasses}} and \code{\link[nlme]{gls}} in the
   \pkg{nlme} librarie, \code{\link{corBrownian}},
   \code{\link{corMartins}}, \code{\link{corGrafen}},
-  \code{\link{corPagel}}
+  \code{\link{corPagel}}, \code{\link{corBlomberg}}
 }
 \author{Julien Dutheil \email{julien.dutheil@univ-montp2.fr}, Emmanuel
   Paradis}