From: paradis Date: Wed, 8 Feb 2012 13:01:16 +0000 (+0000) Subject: new files for ape 3.0 X-Git-Url: https://git.donarmstrong.com/?p=ape.git;a=commitdiff_plain;h=e9f75370481c37eb9d3d811ce7494f818b423136 new files for ape 3.0 git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@176 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/DESCRIPTION b/DESCRIPTION index d86b0c6..b16b849 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 3.0 -Date: 2012-01-13 +Date: 2012-02-03 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Andrei-Alin Popescu, Klaus Schliep, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/NEWS b/NEWS index 55d0adc..0ca6a14 100644 --- a/NEWS +++ b/NEWS @@ -3,8 +3,17 @@ NEW FEATURES + o The three functions dyule, dbd, and dbdTime calculate the + density probability (i.e., the distribution of the number of + species) for the Yule, the constant and the time-dependent + birth-beath models, respectively. These probabilities can be + conditional on no extinction and/or on a log-scale. + o plot.phylo() has a new option 'rotate.tree' to rotate unrooted, - fan or radial trees around the center of the plot. + fan, or radial trees around the center of the plot. + + o boot.phylo() and prop.clades() have a new option rooted = + FALSE. Note that the behaviour of prop.part() is unchanged. BUG FIXES @@ -15,7 +24,7 @@ BUG FIXES o extract.clade() sometimes shuffled some tip labels (thanks to Ludovic Mallet and Mahendra Mariadassou for the fix). - o clustal() should now by default find the executable under Windows. + o clustal() should now find by default the executable under Windows. OTHER CHANGES diff --git a/R/compute.brtime.R b/R/compute.brtime.R index 822f6f2..35a358a 100644 --- a/R/compute.brtime.R +++ b/R/compute.brtime.R @@ -16,7 +16,7 @@ compute.brtime <- m <- phy$Nnode N <- Nedge(phy) - ## x: branching times (aka, node ages or heights) + ## x: branching times (aka, node ages, depths, or heights) if (identical(method, "coalescent")) { # the default x <- 2 * rexp(m)/(as.double((m + 1):2) * as.double(m:1)) diff --git a/R/dbd.R b/R/dbd.R new file mode 100644 index 0000000..b6df418 --- /dev/null +++ b/R/dbd.R @@ -0,0 +1,121 @@ +## dbd.R (2012-02-01) + +## Probability Density Under Birth--Death Models + +## Copyright 2012 Emmanuel Paradis + +## This file is part of the R-package `ape'. +## See the file ../COPYING for licensing issues. + +dyule <- function(x, lambda = 0.1, t = 1, log = FALSE) +{ + tmp <- exp(-lambda * t) + res <- if (log) log(tmp) + (x - 1) * log(1 - tmp) else tmp * (1 - tmp)^(x - 1) + out.of.range <- x <= 0 + if (any(out.of.range)) + res[out.of.range] <- if (log) -Inf else 0 + res +} + +dbd <- function(x, lambda, mu, t, conditional = FALSE, log = FALSE) +{ + if (length(lambda) > 1) { + lambda <- lambda[1] + warning("only the first value of 'lambda' was considered") + } + if (length(mu) > 1) { + mu <- mu[1] + warning("only the first value of 'mu' was considered") + } + if (length(t) > 1) { + t <- t[1] + warning("only the first value of 't' was considered") + } + + if (mu == 0) return(ryule(x, lambda, t, log)) + + ## for the unconditional case, we have to consider x=0 separately: + if (!conditional) { + zero <- x == 0 + out.of.range <- x < 0 + } else { + out.of.range <- x <= 0 + } + + res <- numeric(length(x)) + + ## the situation were speciation and extinction probabilities are equal: + if (lambda == mu) { + tmp <- lambda * t + eta <- tmp/(1 + tmp) + if (conditional) { + res[] <- if (log) log(1 - eta) + (x - 1) * log(eta) else (1 - eta) * eta^(x - 1) + } else { # the unconditional case: + if (length(zero)) { + res[zero] <- eta + res[!zero] <- (1 - eta)^2 * eta^(x[!zero] - 1) + } else res[] <- (1 - eta)^2 * eta^(x - 1) + } + } else { # the general case with lambda != mu + + ## this expression is common to the conditional and unconditional cases: + Ent <- exp((lambda - mu) * t) + + if (conditional) { + if (log) { + res[] <- log(lambda - mu) - log(lambda * Ent - mu) + + (x - 1) * (log(lambda) + log(Ent - 1) - log(lambda * Ent - mu)) + } else { + eta <- lambda * (Ent - 1)/(lambda * Ent - mu) + res[] <- (1 - eta) * eta^(x - 1) + } + } else { # finally, the unconditional case: + eta <- lambda * (Ent - 1)/(lambda * Ent - mu) + if (length(zero)) { + res[zero] <- eta * mu / lambda + res[!zero] <- (1 - mu * eta / lambda) * (1 - eta) * eta^(x[!zero] - 1) + } else res[] <- (1 - mu * eta / lambda) * (1 - eta) * eta^(x - 1) + } + } + + if (any(out.of.range)) + res[out.of.range] <- if (log) -Inf else 0 + res +} + +dbdTime <- function(x, birth, death, t, conditional = FALSE, + BIRTH = NULL, DEATH = NULL, fast = FALSE) +{ + if (length(t) > 1) { + t <- t[1] + warning("only the first value of 't' was considered") + } + + if (conditional) { + PrNt <- function(t, T, x) { + tmp <- exp(-RHO(t, T)) + Wt <- tmp * (1 + INT(t)) + (1/Wt)*(1 - 1/Wt)^(x - 1) + } + } else { # the unconditional case: + PrNt <- function(t, T, x) + { + tmp <- exp(-RHO(t, T)) + Wt <- tmp * (1 + INT(t)) + out <- numeric(length(x)) + zero <- x == 0 + if (length(zero)) { + out[zero] <- 1 - tmp/Wt + out[!zero] <- (tmp/Wt^2)*(1 - 1/Wt)^(x[!zero] - 1) + } else out[] <- (tmp/Wt^2)*(1 - 1/Wt)^(x - 1) + out + } + } + case <- .getCase(birth, death, BIRTH, DEATH) + ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case = case, fast = fast) + RHO <- ff[[1]] + INT <- ff[[2]] + environment(RHO) <- environment(INT) <- environment() + Tmax <- t + PrNt(0, t, x) +} diff --git a/R/dist.topo.R b/R/dist.topo.R index 525178f..d4791c3 100644 --- a/R/dist.topo.R +++ b/R/dist.topo.R @@ -1,9 +1,9 @@ -## dist.topo.R (2011-07-13) +## dist.topo.R (2012-02-03) ## Topological Distances, Tree Bipartitions, ## Consensus Trees, and Bootstrapping Phylogenies -## Copyright 2005-2011 Emmanuel Paradis +## Copyright 2005-2012 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. @@ -157,24 +157,27 @@ plot.prop.part <- function(x, barcol = "blue", leftmar = 4, ...) mtext(attr(x, "labels"), side = 2, at = 1:n, las = 1) } -prop.clades <- function(phy, ..., part = NULL) +prop.clades <- function(phy, ..., part = NULL, rooted = FALSE) { if (is.null(part)) { + ## + ## Are we going to keep the '...' way of passing trees? obj <- list(...) if (length(obj) == 1 && class(obj[[1]]) != "phylo") - obj <- unlist(obj, recursive = FALSE) + obj <- unlist(obj, recursive = FALSE) + ## part <- prop.part(obj, check.labels = TRUE) } - bp <- .Call("bipartition", phy$edge, length(phy$tip.label), - phy$Nnode, PACKAGE = "ape") - if (!is.null(attr(part, "labels"))) - for (i in 1:length(part)) - part[[i]] <- sort(attr(part, "labels")[part[[i]]]) - bp <- lapply(bp, function(xx) sort(phy$tip.label[xx])) + + bp <- prop.part(phy) + if (!rooted) bp <- postprocess.prop.part(bp) + n <- numeric(phy$Nnode) - for (i in 1:phy$Nnode) { - for (j in 1:length(part)) { - if (identical(all.equal(bp[[i]], part[[j]]), TRUE)) { + for (i in seq_along(bp)) { + for (j in seq_along(part)) { + ## we rely on the fact the values returned by prop.part are + ## sorted and without attributes, so identical can be used: + if (identical(bp[[i]], part[[j]])) { n[i] <- attr(part, "number")[j] done <- TRUE break @@ -185,7 +188,7 @@ prop.clades <- function(phy, ..., part = NULL) } boot.phylo <- function(phy, x, FUN, B = 100, block = 1, - trees = FALSE, quiet = FALSE) + trees = FALSE, quiet = FALSE, rooted = FALSE) { if (is.list(x) && !is.data.frame(x)) { if (inherits(x, "DNAbin")) x <- as.matrix(x) @@ -207,7 +210,7 @@ boot.phylo <- function(phy, x, FUN, B = 100, block = 1, boot.samp <- numeric(ncol(x)) boot.samp[y] <- boot.i for (j in 1:(block - 1)) - boot.samp[y - j] <- boot.i - j + boot.samp[y - j] <- boot.i - j } else boot.samp <- sample(ncol(x), replace = TRUE) boot.tree[[i]] <- FUN(x[, boot.samp]) if (!quiet) utils::setTxtProgressBar(progbar, i/B) @@ -215,7 +218,11 @@ boot.phylo <- function(phy, x, FUN, B = 100, block = 1, if (!quiet) close(progbar) for (i in 1:B) storage.mode(boot.tree[[i]]$Nnode) <- "integer" storage.mode(phy$Nnode) <- "integer" - ans <- prop.clades(phy, boot.tree) + + pp <- prop.part(boot.tree) + if (!rooted) pp <- postprocess.prop.part(pp) + ans <- prop.clades(phy, part = pp, rooted = rooted) + ##ans <- attr(.Call("prop_part", c(list(phy), boot.tree), ## B + 1, FALSE, PACKAGE = "ape"), "number") - 1 if (trees) { @@ -225,6 +232,61 @@ boot.phylo <- function(phy, x, FUN, B = 100, block = 1, ans } +### The next function transforms an object of class "prop.part" so +### that the vectors which are identical in terms of split are aggregated. +### For instance if n = 5 tips, 1:2 and 3:5 actually represent the same +### split though they are different clades. The aggregation is done +### arbitrarily. The call to ONEwise() insures that all splits include +### the first tip. +postprocess.prop.part <- function(x) +{ + n <- length(x[[1]]) + N <- length(x) + w <- attr(x, "number") + + drop <- logical(N) + V <- numeric(n) + for (i in 2:(N - 1)) { + if (drop[i]) next + A <- x[[i]] + for (j in (i + 1):N) { + if (drop[j]) next + B <- x[[j]] + if (length(A) + length(B) != n) next + V[] <- 0L + V[A] <- 1L + V[B] <- 1L + if (all(V == 1L)) { + drop[j] <- TRUE + w[i] <- w[i] + w[j] + } + } + } + if (any(drop)) { + labels <- attr(x, "labels") + x <- x[!drop] + w <- w[!drop] + attr(x, "number") <- w + attr(x, "labels") <- labels + class(x) <- "prop.part" + } + ONEwise(x) +} + +### This function changes an object of class "prop.part" so that they +### all include the first tip. For instance if n = 5 tips, 3:5 is +### changed to 1:2. +ONEwise <- function(x) +{ + n <- length(x[[1L]]) + v <- 1:n + for (i in 2:length(x)) { + y <- x[[i]] + if (y[1] != 1) x[[i]] <- v[-y] + } + x +} + consensus <- function(..., p = 1, check.labels = TRUE) { foo <- function(ic, node) { diff --git a/man/ape-internal.Rd b/man/ape-internal.Rd index 73d53e6..838a3a3 100644 --- a/man/ape-internal.Rd +++ b/man/ape-internal.Rd @@ -29,11 +29,10 @@ \alias{plotPhyloCoor} \alias{integrateTrapeze} \alias{CDF.birth.death} +\alias{postprocess.prop.part} +\alias{ONEwise} \title{Internal Ape Functions} \description{ Internal \pkg{ape} functions. } -\note{ - These are not to be called by the user (unless you know what you're doing). -} \keyword{internal} diff --git a/man/boot.phylo.Rd b/man/boot.phylo.Rd index 6869f67..fcbf1f3 100644 --- a/man/boot.phylo.Rd +++ b/man/boot.phylo.Rd @@ -8,9 +8,9 @@ \title{Tree Bipartition and Bootstrapping Phylogenies} \usage{ boot.phylo(phy, x, FUN, B = 100, block = 1, - trees = FALSE, quiet = FALSE) + trees = FALSE, quiet = FALSE, rooted = FALSE) prop.part(..., check.labels = TRUE) -prop.clades(phy, ..., part = NULL) +prop.clades(phy, ..., part = NULL, rooted = FALSE) \method{print}{prop.part}(x, ...) \method{summary}{prop.part}(object, ...) \method{plot}{prop.part}(x, barcol = "blue", leftmar = 4, ...) @@ -27,6 +27,8 @@ prop.clades(phy, ..., part = NULL) \item{trees}{a logical specifying whether to return the bootstraped trees (\code{FALSE} by default).} \item{quiet}{a logical: a progress bar is displayed by default.} + \item{rooted}{a logical specifying whether the trees should be treated + as rooted or not (the default).} \item{\dots}{either (i) a single object of class \code{"phylo"}, (ii) a series of such objects separated by commas, or (iii) a list containing such objects. In the case of \code{plot} further diff --git a/man/dbd.Rd b/man/dbd.Rd new file mode 100644 index 0000000..f86b97b --- /dev/null +++ b/man/dbd.Rd @@ -0,0 +1,106 @@ +\name{dbd} +\alias{dyule} +\alias{dbd} +\alias{dbdTime} +\title{Probability Density Under Birth--Death Models} +\description{ + These functions compute the probability density under some + birth--death models, that is the probability of obtaining \emph{x} + species after a time \emph{t} giving how speciation and extinction + probabilities vary through time (these may be constant, or even equal + to zero for extinction). +} +\usage{ +dyule(x, lambda = 0.1, t = 1, log = FALSE) +dbd(x, lambda, mu, t, conditional = FALSE, log = FALSE) +dbdTime(x, birth, death, t, conditional = FALSE, + BIRTH = NULL, DEATH = NULL, fast = FALSE) +} +\arguments{ + \item{x}{a numeric vector of species numbers (see Details).} + \item{lambda}{a numerical value giving the probability of speciation; + can be a vector with several values for \code{dyule}.} + \item{mu}{id. for extinction.} + \item{t}{id. for the time(s).} + \item{log}{a logical value specifying whether the probabilities should + be returned log-transformed; the default is \code{FALSE}.} + \item{conditional}{a logical specifying whether the probabilities + should be computed conditional under the assumption of no extinction + after time \code{t}.} + \item{birth, death}{a (vectorized) function specifying how the + speciation or extinction probability changes through time (see + \code{\link{yule.time}} and below).} + \item{BIRTH, DEATH}{a (vectorized) function giving the primitive + of \code{birth} or \code{death}.} + \item{fast}{a logical value specifying whether to use faster + integration (see \code{\link{bd.time}}).} +} +\details{ + These three functions compute the probabilities to observe \code{x} + species starting from a single one after time units \code{t} (assumed + to be continuous). The first one is a short-cut for the second with + \code{mu = 0} and with default values for the two other parameters. + \code{dbdTime} is for time-varying \code{lambda} and \code{mu} + specified as \R functions. + + Only \code{dyule} is vectorized simultaneously on its three arguments + \code{x}, \code{lambda}, and \code{t}, according to \R's rules of + recycling arguments. The two others are vectorized only on \code{x}; + the other arguments are eventually shortened with a warning if + necessary. + + The returned value is, logically, zero for values of \code{x} out of + range, i.e., negative or zero for \code{dyule} or if \code{conditional + = TRUE}. However, it is not checked if the values of \code{x} are + non-integers and the probabilities are computed and returned. + + The details on the form of the arguments \code{birth}, \code{death}, + \code{BIRTH}, \code{DEATH}, and \code{fast} can be found in the links + below. +} +\note{ + If you use these functions to calculate a likelihood function, it is + strongly recommended to compute the log-likelihood with, for instance + in the case of a Yule process, \code{sum(dyule( , log = TRUE))} (see + examples). +} +\value{ + a numeric vector. +} +\references{ + Kendall, D. G. (1948) On the generalized ``birth-and-death'' + process. \emph{Annals of Mathematical Statistics}, \bold{19}, 1--15. +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{bd.time}}, \code{\link{yule.time}} +} +\examples{ +x <- 0:10 +plot(x, dyule(x), type = "h", main = "Density of the Yule process") +text(7, 0.85, expression(list(lambda == 0.1, t == 1))) + +y <- dbd(x, 0.1, 0.05, 10) +z <- dbd(x, 0.1, 0.05, 10, conditional = TRUE) +d <- rbind(y, z) +colnames(d) <- x +barplot(d, beside = TRUE, ylab = "Density", xlab = "Number of species", + legend = c("unconditional", "conditional on\nno extinction"), + args.legend = list(bty = "n")) +title("Density of the birth-death process") +text(17, 0.4, expression(list(lambda == 0.1, mu == 0.05, t == 10))) + +\dontrun{ +### generate 1000 values from a Yule process with lambda = 0.05 +x <- replicate(1e3, Ntip(rlineage(0.05, 0))) + +### the correct way to calculate the log-likelihood...: +sum(dyule(x, 0.05, 50, log = TRUE)) + +### ... and the wrong way: +log(prod(dyule(x, 0.05, 50))) + +### a third, less preferred, way: +sum(log(dyule(x, 0.05, 50))) +}} +\keyword{utilities}