From: paradis Date: Thu, 8 May 2008 05:14:48 +0000 (+0000) Subject: final wrap of ape 2.2 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=3f8df9b013dc4ed297c9b242cd833698ce7d015a;p=ape.git final wrap of ape 2.2 git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@30 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/Changes b/Changes index 9633e88..05cbfab 100644 --- a/Changes +++ b/Changes @@ -8,12 +8,18 @@ NEW FEATURES subtreeplot), and to return the graphical coordinates of tree (without plotting). - o The new function corPagel() implements the Pagel's "lambda" - correlation structure. + o The new functions corPagel and corBlomberg implement the Pagel's + "lambda" and Blomberg et al.'s "ACDC" correlation structures. o chronopl() has been improved and gains several options: see its help page for details. + o boot.phylo() has now an option 'trees' to possibly return the + bootstraped trees (the default is FALSE). + + o prop.part() has been improved and should now be faster in all + situations. + BUG FIXES @@ -29,10 +35,13 @@ BUG FIXES o zoom() failed when tip labels were used instead of their numbers (thanks to Yan Wong for the fix). - o drop.tip() failed with some trees (fixed by Yan Wong). + o drop.tip() failed with some trees (fixed by Yan Wong). o seg.sites() failed with a list. + o consensus() failed in some cases. The function has been improved + as well and is faster. + CHANGES IN APE VERSION 2.1-3 diff --git a/DESCRIPTION b/DESCRIPTION index abf1906..c4bf8f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.2 -Date: 2008-05-02 +Date: 2008-05-07 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, diff --git a/R/PGLS.R b/R/PGLS.R index 8c5ab74..df3e6b1 100644 --- a/R/PGLS.R +++ b/R/PGLS.R @@ -1,4 +1,4 @@ -## PGLS.R (2008-04-18) +## PGLS.R (2008-05-08) ## Phylogenetic Generalized Least Squares @@ -236,3 +236,43 @@ coef.corPagel <- function(object, unconstrained = TRUE, ...) names(aux) <- "lambda" aux } + +corBlomberg <- function(value, phy, form = ~1, fixed = FALSE) +{ + if (value <= 0) + stop("the value of g must be greater than 0.") + if (!("phylo" %in% class(phy))) + stop('object "phy" is not of class "phylo"') + attr(value, "formula") <- form + attr(value, "fixed") <- fixed + attr(value, "tree") <- phy + class(value) <- c("corBlomberg", "corPhyl", "corStruct") + value +} + +corMatrix.corBlomberg <- + function(object, covariate = getCovariate(object), corr = TRUE, ...) +{ + index <- attr(object, "index") + if (is.null(index)) + stop("object has not been initialized") + if (object[1] <= 0) + stop("the optimization has reached a value <= 0 for parameter 'g': +probably need to set 'fixed = TRUE' in corBlomberg().") + phy <- attr(object, "tree") + d <- (dist.nodes(phy)[length(phy$tip.label) + 1, ])^(1/object[1]) + phy$edge.length <- d[phy$edge[, 2]] - d[phy$edge[, 1]] + mat <- vcv.phylo(phy, cor = corr) + mat[index, index] +} + +coef.corBlomberg <- function(object, unconstrained = TRUE, ...) +{ + if (unconstrained) { + if (attr(object, "fixed")) return(numeric(0)) + else return(object[1]) + } + aux <- object[1] + names(aux) <- "g" + aux +} diff --git a/R/dist.topo.R b/R/dist.topo.R index 703f92d..cf1899a 100644 --- a/R/dist.topo.R +++ b/R/dist.topo.R @@ -1,4 +1,4 @@ -## dist.topo.R (2008-02-27) +## dist.topo.R (2008-05-07) ## Topological Distances, Tree Bipartitions, ## Consensus Trees, and Bootstrapping Phylogenies @@ -54,47 +54,50 @@ dist.topo <- function(x, y, method = "PH85") dT } +.compressTipLabel <- function(x) +{ + ## 'x' is a list of objects of class "phylo" possibly with no class + if (!is.null(attr(x, "TipLabel"))) return(x) + ref <- x[[1]]$tip.label + if (any(table(ref) != 1)) + stop("some tip labels are duplicated in tree no. 1") + n <- length(ref) + for (i in 2:length(x)) { + if (identical(x[[i]]$tip.label, ref)) next + ilab <- match(x[[i]]$tip.label, ref) + ## can use tabulate here because 'ilab' contains integers + if (any(tabulate(ilab) > 1)) + stop(paste("some tip labels are duplicated in tree no.", i)) + if (any(is.na(ilab))) + stop(paste("tree no.", i, "has different tip labels")) + ie <- match(1:n, x[[i]]$edge[, 2]) + x[[i]]$edge[ie, 2] <- ilab + } + for (i in 1:length(x)) x[[i]]$tip.label <- NULL + attr(x, "TipLabel") <- ref + x +} + prop.part <- function(..., check.labels = FALSE) { obj <- list(...) if (length(obj) == 1 && class(obj[[1]]) != "phylo") - obj <- unlist(obj, recursive = FALSE) + obj <- obj[[1]] + ## + ## class(obj) <- NULL # needed? + ## ntree <- length(obj) - if (!check.labels) { - for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer" - clades <- .Call("prop_part", obj, ntree, TRUE, PACKAGE = "ape") - attr(clades, "number") <- attr(clades, "number")[1:length(clades)] - attr(clades, "labels") <- obj[[1]]$tip.label - } else { - bp <- .Call("bipartition", obj[[1]]$edge, length(obj[[1]]$tip.label), - obj[[1]]$Nnode, PACKAGE = "ape") - clades <- lapply(bp, function(xx) sort(obj[[1]]$tip.label[xx])) - no <- rep(1, length(clades)) - - if (ntree > 1) { - for (k in 2:ntree) { - bp <- .Call("bipartition", obj[[k]]$edge, - length(obj[[k]]$tip.label), obj[[k]]$Nnode, - PACKAGE = "ape") - bp <- lapply(bp, function(xx) sort(obj[[k]]$tip.label[xx])) - for (i in 1:length(bp)) { - done <- FALSE - for (j in 1:length(clades)) { - if (identical(all.equal(bp[[i]], clades[[j]]), TRUE)) { - no[j] <- no[j] + 1 - done <- TRUE - break - } - } - if (!done) { - clades <- c(clades, bp[i]) - no <- c(no, 1) - } - } - } - } - attr(clades, "number") <- no - } + if (check.labels) obj <- .compressTipLabel(obj) + for (i in 1:ntree) storage.mode(obj[[i]]$Nnode) <- "integer" + ## + ## The 1st must have tip labels + ## Maybe simply pass the number of tips to the C code?? + if (!is.null(attr(obj, "TipLabel"))) + for (i in 1:ntree) obj[[i]]$tip.label <- attr(obj, "TipLabel") + ## + clades <- .Call("prop_part", obj, ntree, TRUE, PACKAGE = "ape") + attr(clades, "number") <- attr(clades, "number")[1:length(clades)] + attr(clades, "labels") <- obj[[1]]$tip.label class(clades) <- "prop.part" clades } @@ -162,7 +165,7 @@ prop.clades <- function(phy, ..., part = NULL) n } -boot.phylo <- function(phy, x, FUN, B = 100, block = 1) +boot.phylo <- function(phy, x, FUN, B = 100, block = 1, trees = FALSE) { if (is.list(x)) { if (class(x) == "DNAbin") x <- as.matrix(x) @@ -189,42 +192,71 @@ boot.phylo <- function(phy, x, FUN, B = 100, block = 1) } for (i in 1:B) storage.mode(boot.tree[[i]]$Nnode) <- "integer" storage.mode(phy$Nnode) <- "integer" - attr(.Call("prop_part", c(list(phy), boot.tree), B + 1, FALSE, - PACKAGE = "ape"), "number") - 1 + ans <- attr(.Call("prop_part", c(list(phy), boot.tree), + B + 1, FALSE, PACKAGE = "ape"), "number") - 1 + if (trees) ans <- list(BP = ans, trees = boot.tree) + ans } -consensus <- function(..., p = 1) +consensus <- function(..., p = 1, check.labels = FALSE) { + foo <- function(ic, node) { + ## ic: index of 'pp' + ## node: node number in the final tree + pool <- pp[[ic]] + if (ic < m) { + for (j in (ic + 1):m) { + wh <- match(pp[[j]], pool) + if (!any(is.na(wh))) { + edge[pos, 1] <<- node + pool <- pool[-wh] + edge[pos, 2] <<- nextnode <<- nextnode + 1L + pos <<- pos + 1L + foo(j, nextnode) + } + } + } + size <- length(pool) + if (size) { + ind <- pos:(pos + size - 1) + edge[ind, 1] <<- node + edge[ind, 2] <<- pool + pos <<- pos + size + } + } obj <- list(...) - if (length(obj) == 1 && class(obj[[1]]) != "phylo") - obj <- unlist(obj, recursive = FALSE) + if (length(obj) == 1) { + ## better than unlist(obj, recursive = FALSE) + ## because "[[" keeps the class of 'obj': + obj <- obj[[1]] + if (class(obj) == "phylo") return(obj) + } + if (!is.null(attr(obj, "TipLabel"))) + labels <- attr(obj, "TipLabel") + else { + labels <- obj[[1]]$tip.label + if (check.labels) obj <- .compressTipLabel(obj) + } ntree <- length(obj) ## Get all observed partitions and their frequencies: - pp <- prop.part(obj, check.labels = TRUE) + pp <- prop.part(obj, check.labels = FALSE) ## Drop the partitions whose frequency is less than 'p': pp <- pp[attr(pp, "number") >= p * ntree] ## Get the order of the remaining partitions by decreasing size: - ind <- rev(sort(unlist(lapply(pp, length)), - index.return = TRUE)$ix) - pp <- lapply(pp, function(xx) paste("IMPROBABLE_PREFIX", xx, - "IMPROBABLE_SUFFIX", sep = "_")) - STRING <- paste(pp[[1]], collapse = ",") - STRING <- paste("(", STRING, ");", sep = "") - for (i in ind[-1]) { - ## 1. Delete all tips in the focus partition: - STRING <- unlist(strsplit(STRING, paste(pp[[i]], collapse = "|"))) - ## 2. Put the partition in any of the created gaps: - STRING <- c(STRING[1], - paste("(", paste(pp[[i]], collapse = ","), ")", sep = ""), - STRING[-1]) - ## 3. Stick back the Newick string: - STRING <- paste(STRING, collapse = "") + ind <- sort(unlist(lapply(pp, length)), decreasing = TRUE, + index.return = TRUE)$ix + pp <- pp[ind] + n <- length(labels) + m <- length(pp) + edge <- matrix(0L, n + m - 1, 2) + if (m == 1) { + edge[, 1] <- n + 1L + edge[, 2] <- 1:n + } else { + nextnode <- n + 1L + pos <- 1L + foo(1, nextnode) } - ## Remove the extra commas: - STRING <- gsub(",{2,}", ",", STRING) - STRING <- gsub("\\(,", "\\(", STRING) - STRING <- gsub(",\\)", "\\)", STRING) - STRING <- gsub("IMPROBABLE_PREFIX_", "", STRING) - STRING <- gsub("_IMPROBABLE_SUFFIX", "", STRING) - read.tree(text = STRING) + structure(list(edge = edge, tip.label = labels, + Nnode = m), class = "phylo") } diff --git a/R/plot.phylo.R b/R/plot.phylo.R index 7d3ad8d..ca3ff1c 100644 --- a/R/plot.phylo.R +++ b/R/plot.phylo.R @@ -1,4 +1,4 @@ -## plot.phylo.R (2008-03-28) +## plot.phylo.R (2008-05-08) ## Plot Phylogenies @@ -246,7 +246,7 @@ plot.phylo <- function(x, type = "phylogram", use.edge.length = TRUE, if (direction == "downwards") y.lim[2] <- y.lim[2] + x$root.edge } ## fix by Klaus Schliep (2008-03-28): - asp <- if (type %in% c("fan", "radial")) NA else 1 + asp <- if (type %in% c("fan", "radial")) 1 else NA plot(0, type = "n", xlim = x.lim, ylim = y.lim, xlab = "", ylab = "", xaxt = "n", yaxt = "n", bty = "n", asp = asp, ...) if (is.null(adj)) diff --git a/R/rtree.R b/R/rtree.R index 1c2052c..9425dad 100644 --- a/R/rtree.R +++ b/R/rtree.R @@ -1,4 +1,4 @@ -## rtree.R (2008-05-06) +## rtree.R (2008-05-07) ## Generates Random Trees @@ -14,39 +14,39 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...) n2 <- n - n1 po2 <- pos + 2*n1 - 1 edge[c(pos, po2), 1] <<- nod - nod <<- nod + 1 + nod <<- nod + 1L if (n1 > 2) { edge[pos, 2] <<- nod foo(n1, pos + 1) } else if (n1 == 2) { edge[c(pos + 1, pos + 2), 1] <<- edge[pos, 2] <<- nod - nod <<- nod + 1 + nod <<- nod + 1L } if (n2 > 2) { edge[po2, 2] <<- nod foo(n2, po2 + 1) } else if (n2 == 2) { edge[c(po2 + 1, po2 + 2), 1] <<- edge[po2, 2] <<- nod - nod <<- nod + 1 + nod <<- nod + 1L } } if (n < 2) stop("a tree must have at least 2 tips.") - nbr <- 2 * n - 2 - if (!rooted) nbr <- nbr - 1 + nbr <- 2 * n - 3 + rooted edge <- matrix(NA, nbr, 2) + n <- as.integer(n) if (n == 2) { - if (rooted) edge[] <- c(3, 3, 1, 2) + if (rooted) edge[] <- c(3L, 3L, 1L, 2L) else stop("an unrooted tree must have at least 3 tips.") } else if (n == 3) { edge[] <- - if (rooted) c(4, 5, 5, 4, 5, 1:3) - else c(4, 4, 4, 1:3) + if (rooted) c(4L, 5L, 5L, 4L, 5L, 1:3) + else c(4L, 4L, 4L, 1:3) } else if (n == 4 && !rooted) { - edge[] <- c(5, 6, 6, 5, 5, 6, 1:4) + edge[] <- c(5L, 6L, 6L, 5L, 5L, 6L, 1:4) } else { - nod <- n + 1 + nod <- n + 1L if (rooted) { # n > 3 foo(n, 1) ## The following is slightly more efficient than affecting the @@ -70,21 +70,21 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...) foo(n1, 2) } else if (n1 == 2) { edge[2:3, 1] <- edge[1, 2] <- nod - nod <- nod + 1 + nod <- nod + 1L } if (n2 > 2) { edge[po2, 2] <- nod foo(n2, po2 + 1) } else if (n2 == 2) { edge[c(po2 + 1, po2 + 2), 1] <- edge[po2, 2] <- nod - nod <- nod + 1 + nod <- nod + 1L } if (n3 > 2) { edge[po3, 2] <- nod foo(n3, po3 + 1) } else if (n3 == 2) { edge[c(po3 + 1, po3 + 2), 1] <- edge[po3, 2] <- nod - ## nod <- nod + 1 + ## nod <- nod + 1L } i <- which(is.na(edge[, 2])) edge[i, 2] <- 1:n @@ -95,30 +95,31 @@ rtree <- function(n, rooted = TRUE, tip.label = NULL, br = runif, ...) if (is.null(tip.label)) paste("t", sample(n), sep = "") else sample(tip.label) if (is.function(br)) phy$edge.length <- br(nbr, ...) - phy$Nnode <- n - 2 + rooted + phy$Nnode <- n - 2L + rooted class(phy) <- "phylo" phy } rcoal <- function(n, tip.label = NULL, br = "coalescent", ...) { + n <- as.integer(n) nbr <- 2*n - 2 edge <- matrix(NA, nbr, 2) ## coalescence times by default: x <- if (is.character(br)) 2*rexp(n - 1)/(n:2 * (n - 1):1) else br(n - 1, ...) if (n == 2) { - edge[] <- c(3, 3, 1:2) + edge[] <- c(3L, 3L, 1:2) edge.length <- rep(x, 2) } else if (n == 3) { - edge[] <- c(4, 5, 5, 4, 5, 1:3) + edge[] <- c(4L, 5L, 5L, 4L, 5L, 1:3) edge.length <- c(x[2], x[1], x[1], sum(x)) } else { edge.length <- numeric(nbr) h <- numeric(2*n - 1) # initialized with 0's node.height <- cumsum(x) pool <- 1:n - nextnode <- 2*n - 1 + nextnode <- 2L*n - 1L for (i in 1:(n - 1)) { y <- sample(pool, size = 2) ind <- (i - 1)*2 + 1:2 @@ -127,14 +128,14 @@ rcoal <- function(n, tip.label = NULL, br = "coalescent", ...) edge.length[ind] <- node.height[i] - h[y] h[nextnode] <- node.height[i] pool <- c(pool[! pool %in% y], nextnode) - nextnode <- nextnode - 1 + nextnode <- nextnode - 1L } } phy <- list(edge = edge, edge.length = edge.length) phy$tip.label <- if (is.null(tip.label)) paste("t", 1:n, sep = "") else tip.label - phy$Nnode <- n - 1 + phy$Nnode <- n - 1L class(phy) <- "phylo" ##reorder(phy) ## to avoid crossings when converting with as.hclust: diff --git a/man/boot.phylo.Rd b/man/boot.phylo.Rd index fa39eb7..5c05fd1 100644 --- a/man/boot.phylo.Rd +++ b/man/boot.phylo.Rd @@ -7,7 +7,7 @@ \alias{plot.prop.part} \title{Tree Bipartition and Bootstrapping Phylogenies} \usage{ -boot.phylo(phy, x, FUN, B = 100, block = 1) +boot.phylo(phy, x, FUN, B = 100, block = 1, trees = FALSE) prop.part(..., check.labels = FALSE) prop.clades(phy, ..., part = NULL) \method{print}{prop.part}(x, ...) @@ -23,6 +23,8 @@ prop.clades(phy, ..., part = NULL) \item{B}{the number of bootstrap replicates.} \item{block}{the number of columns in \code{x} that will be resampled together (see details).} + \item{trees}{a logical specifying whether to return the bootstraped + trees (\code{FALSE} by default).} \item{\dots}{either (i) a single object of class \code{"phylo"}, (ii) a series of such objects separated by commas, or (iii) a list containing such objects. In the case of \code{plot} further @@ -95,7 +97,10 @@ prop.clades(phy, ..., part = NULL) \code{prop.clades} and \code{boot.phylo} return a numeric vector which \emph{i}th element is the number associated to the \emph{i}th - node of \code{phy}. + node of \code{phy}. If \code{trees = TRUE}, \code{boot.phylo} returns + a list whose first element (named \code{"BP"}) is like before, and the + second element (\code{"trees"}) is a list with the bootstraped + trees. \code{summary} returns a numeric vector. } diff --git a/man/consensus.Rd b/man/consensus.Rd index 281eb46..c99dd36 100644 --- a/man/consensus.Rd +++ b/man/consensus.Rd @@ -2,7 +2,7 @@ \alias{consensus} \title{Concensus Trees} \usage{ -consensus(..., p = 1) +consensus(..., p = 1, check.labels = FALSE) } \arguments{ \item{...}{either (i) a single object of class \code{"phylo"}, (ii) a @@ -10,6 +10,10 @@ consensus(..., p = 1) containing such objects.} \item{p}{a numeric value between 0.5 and 1 giving the proportion for a clade to be represented in the consensus tree.} + \item{check.labels}{a logical specifying whether to check the labels + of each tree. If \code{FALSE} (the default), it is assumed that all + trees have the same tip labels, and that they are in the same order + (see details).} } \description{ Given a series of trees, this function returns the consensus tree. By @@ -17,6 +21,13 @@ consensus(..., p = 1) majority-rule consensus tree, use \code{p = 0.5}. Any value between 0.5 and 1 can be used. } +\details{ + Using (the default) \code{check.labels = FALSE} results in + considerable decrease in computing times. This requires that (i) all + trees have the same tip labels, \emph{and} (ii) these labels are + ordered similarly in all trees (in other words, the element + \code{tip.label} are identical in all trees). +} \value{ an object of class \code{"phylo"}. } diff --git a/man/corClasses.Rd b/man/corClasses.Rd index c7679be..cd1e0c4 100644 --- a/man/corClasses.Rd +++ b/man/corClasses.Rd @@ -11,6 +11,7 @@ (1997)} \item{corGrafen}{The covariance matrix defined in Grafen (1989)} \item{corPagel}{The covariance matrix defined in Freckelton et al. (2002)} + \item{corBlomberg}{The covariance matrix defined in Blomberg et al. (2003)} See the help page of each class for references and detailed description. @@ -19,7 +20,7 @@ \code{\link[nlme]{corClasses}} and \code{\link[nlme]{gls}} in the \pkg{nlme} librarie, \code{\link{corBrownian}}, \code{\link{corMartins}}, \code{\link{corGrafen}}, - \code{\link{corPagel}} + \code{\link{corPagel}}, \code{\link{corBlomberg}} } \author{Julien Dutheil \email{julien.dutheil@univ-montp2.fr}, Emmanuel Paradis}