o The new functions rlineage and rbdtree simulate phylogenies under
any user-defined time-dependent speciation-extinction model. They
- use new continuous time algorithms.
+ use continuous time algorithms.
o The new function drop.fossil removes the extinct species from a
phylogeny.
+ o The new function bd.time fits a user-defined time-dependent
+ birth-death model. It is a generalization of yule.time() taking
+ extinction into account.
+
o The new function MPR does most parsimonious reconstruction of
discrete characters.
Package: ape
Version: 2.6
-Date: 2010-09-16
+Date: 2010-09-27
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne
Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
-## CDF.birth.death.R (2010-08-09)
+## CDF.birth.death.R (2010-09-27)
## Functions to simulate and fit
## Time-Dependent Birth-Death Models
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
+integrateTrapeze <- function(FUN, from, to, nint = 10)
## compute an integral with a simple trapeze method
## (apparently, Vectorize doesn't give faster calculation)
-integrateTrapeze <- function(FUN, from, to, nint = 10)
{
x <- seq(from = from, to = to, length.out = nint + 1)
## reorganized to minimize the calls to FUN:
}
}
-###.getRHO <- function(birth, death, BIRTH = NULL, DEATH = NULL, case)
-###{
-### ## build the RHO(), \rho(t), function
-### switch (case,
-### function(t1, t2) (t2 - t1)*(death - birth), # case 1
-### function(t1, t2) # case 2
-### integrate(function(t) death(t) - birth(t), t1, t2)$value,
-### function(t1, t2)
-### DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1), # case 3
-### function(t1, t2)
-### death - integrate(birth, t1, t2)$value, # case 4
-### function(t1, t2)
-### integrate(death, t1, t2)$value - birth, # case 5
-### function(t1, t2) death - BIRTH(t2) + BIRTH(t1), # case 6
-### function(t1, t2) DEATH(t2) - DEATH(t1) - birth # case 7
-### )
-###}
-
.getRHOetINT <- function(birth, death, BIRTH = NULL, DEATH = NULL, case, fast)
{
## build the RHO(), \rho(t), and INT(), I(t), functions
foo(1L, 0)
.makePhylo(edge[1:j, ], edge.length[1:j], i)
}
+
+bd.time <- function(phy, birth, death, BIRTH = NULL, DEATH = NULL,
+ ip, lower, upper, fast = FALSE, boot = 0, trace = 0)
+{
+ guess.bounds <- if (missing(lower)) TRUE else FALSE
+ BIG <- 1e10
+ PrNt <- function(t, T, x)
+ {
+ tmp <- exp(-RHO(t, T))
+ Wt <- tmp * (1 + INT(t))
+ out <- numeric(length(x))
+ zero <- x == 0
+ if (length(zero)) {
+ out[zero] <- 1 - tmp/Wt
+ out[!zero] <- (tmp/Wt^2)*(1 - 1/Wt)^(x[!zero] - 1)
+ } else out[] <- (tmp/Wt^2)*(1 - 1/Wt)^(x - 1)
+ out
+ }
+
+ case <- .getCase(birth, death, BIRTH, DEATH)
+
+ if (is.function(birth)) {
+ paranam <- names(formals(birth))
+ if (guess.bounds) {
+ upper <- rep(BIG, length(paranam))
+ lower <- -upper
+ }
+ formals(birth) <- alist(t=)
+ environment(birth) <- environment()
+ if (!is.null(BIRTH)) environment(BIRTH) <- environment()
+ } else {
+ paranam <- "birth"
+ if (guess.bounds) {
+ upper <- 1
+ lower <- 0
+ }
+ }
+
+ if (is.function(death)) {
+ tmp <- names(formals(death))
+ np2 <- length(tmp)
+ if (guess.bounds) {
+ upper <- c(upper, rep(BIG, np2))
+ lower <- c(lower, rep(-BIG, np2))
+ }
+ paranam <- c(paranam, tmp)
+ formals(death) <- alist(t=)
+ environment(death) <- environment()
+ if (!is.null(DEATH)) environment(DEATH) <- environment()
+ } else {
+ paranam <- c(paranam, "death")
+ if (guess.bounds) {
+ upper <- c(upper, .1)
+ lower <- c(lower, 0)
+ }
+ }
+
+ np <- length(paranam)
+
+ ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case = case, fast = fast)
+ RHO <- ff[[1]]
+ INT <- ff[[2]]
+ environment(RHO) <- environment(INT) <- environment()
+
+ x <- branching.times(phy)
+ n <- length(x)
+ Tmax <- x[1]
+ x <- Tmax - x # change the time scale so the root is t=0
+ x <- sort(x)
+
+ foo <- function(para) {
+ for (i in 1:np)
+ assign(paranam[i], para[i], pos = sys.frame(1))
+ p <- CDF.birth.death(birth, death, BIRTH, DEATH, Tmax = Tmax,
+ x = x, case = case, fast = fast)
+ ## w is the probability of the observed tree size (= number of tips)
+ w <- PrNt(0, Tmax, Ntip(phy))
+ ## p is the expected CDF of branching times
+ ## ecdf(x)(x) is the observed CDF
+ sum((1:n/n - p)^2)/w # faster than sum((ecdf(x)(x) - p)^2)/w
+ }
+
+ if (missing(ip)) ip <- (upper - lower)/2
+
+ out <- nlminb(ip, foo, control = list(trace = trace, eval.max = 500),
+ upper = upper, lower = lower)
+
+ names(out$par) <- paranam
+ names(out)[2] <- "SS"
+
+ if (boot) { # nonparametric version
+ PAR <- matrix(NA, boot, np)
+ i <- 1L
+ while (i <= boot) {
+ cat("i =", i, "\n")
+ x <- sort(sample(x, replace = TRUE))
+ o <- try(nlminb(ip, foo, control = list(trace = 0, eval.max = 500),
+ upper = upper, lower = lower))
+ if (class(o) == "list") {
+ PAR[i, ] <- o$par
+ i <- i + 1L
+ }
+ }
+ out$boot <- PAR
+ }
+ out
+}
citHeader("To cite ape in a publication use:")
citEntry(entry="Article",
- title = "APE: analyses of phylogenetics and evolution in {R} language",
+ title = "A{PE}: analyses of phylogenetics and evolution in {R} language",
author = personList(as.person("E. Paradis"),as.person("J. Claude"), as.person("K. Strimmer")),
journal = "Bioinformatics",
year = "2004",
\seealso{
\code{\link{birthdeath}}, \code{\link{branching.times}},
\code{\link{diversi.gof}}, \code{\link{diversi.time}},
- \code{\link{ltt.plot}}, \code{\link{yule}}, \code{\link{yule.cov}}
+ \code{\link{ltt.plot}}, \code{\link{yule}}, \code{\link{yule.cov}},
+ \code{\link{bd.time}}
}
\examples{
### An example from Paradis (2003) using the avian orders:
--- /dev/null
+\name{bd.time}
+\alias{bd.time}
+\title{Time-Dependent Birth-Death Models}
+\description{
+ This function fits a used-defined time-dependent birth-death
+ model.
+}
+\usage{
+bd.time(phy, birth, death, BIRTH = NULL, DEATH = NULL,
+ ip, lower, upper, fast = FALSE, boot = 0, trace = 0)
+}
+\arguments{
+ \item{phy}{an object of class \code{"phylo"}.}
+ \item{birth}{either a numeric (if speciation rate is assumed
+ constant), or a (vectorized) function specifying how the birth
+ (speciation) probability changes through time (see details).}
+ \item{death}{id. for extinction probability.}
+ \item{BIRTH}{(optional) a vectorized function giving the primitive
+ of \code{birth}.}
+ \item{DEATH}{id. for \code{death}.}
+ \item{ip}{a numeric vector used as initial values for the estimation
+ procedure. If missing, these values are guessed.}
+ \item{lower, upper}{the lower and upper bounds of the parameters. If
+ missing, these values are guessed too.}
+ \item{fast}{a logical value specifying whether to use faster
+ integration (see details).}
+ \item{boot}{the number of bootstrap replicates to assess the
+ confidence intervals of the parameters. Not run by default.}
+ \item{trace}{an integer value. If non-zero, the fitting procedure is
+ printed every \code{trace} steps. This can be helpful if convergence
+ is particularly slow.}
+}
+\details{
+ Details on how to specify the birth and death functions and their
+ primitives can be found in the help page of \code{\link{yule.time}}.
+
+ The model is fitted by minimizing the least squares deviation between
+ the observed and predicted distributions of branching times. These
+ computations rely heavily on numerical integrations. If \code{fast =
+ FALSE}, integrations are done with R's \code{\link[stats]{integrate}}
+ function. If \code{fast = TRUE}, a faster but less accurate function
+ provided in \pkg{ape} is used. If fitting a complex model to a large
+ phylogeny, a strategy might be to first use the latter option, and
+ then to use the estimates as starting values with \code{fast = FALSE}.
+}
+\value{
+ A list with the following components:
+
+\itemize{
+ \item{par}{a vector of estimates with names taken from the parameters
+ in the specified functions.}
+ \item{SS}{the minimized sum of squares.}
+ \item{convergence}{output convergence criterion from
+ \code{\link[stats]{nlminb}}.}
+ \item{message}{id.}
+ \item{iterations}{id.}
+ \item{evaluations}{id.}
+}}
+\references{
+ Paradis, E. (2010) Time-dependent speciation and extinction from
+ phylogenies: a least squares approach. (under revision)
+ %\emph{Evolution}, \bold{59}, 1--12.
+}
+\author{Emmanuel Paradis}
+\seealso{
+ \code{\link{ltt.plot}}, \code{\link{birthdeath}},
+ \code{\link{yule.time}}
+}
+\examples{
+set.seed(3)
+tr <- rbdtree(0.1, 0.02)
+bd.time(tr, 0, 0) # fits a simple BD model
+bd.time(tr, 0, 0, ip = c(.1, .01)) # 'ip' is useful here
+## the classic logistic:
+birth.logis <- function(a, b) 1/(1 + exp(-a*t - b))
+\dontrun{
+bd.time(tr, birth.logis, 0, ip = c(0, -2, 0.01))
+## slow to get:
+## $par
+## a b death
+## -0.003486961 -1.995983179 0.016496454
+##
+## $SS
+## [1] 20.73023
+}
+}
\seealso{
\code{\link{branching.times}}, \code{\link{diversi.gof}},
\code{\link{diversi.time}}, \code{\link{ltt.plot}},
- \code{\link{yule}}, \code{\link{bd.ext}}, \code{\link{yule.cov}}
+ \code{\link{yule}}, \code{\link{bd.ext}}, \code{\link{yule.cov}},
+ \code{\link{bd.time}}
}
\keyword{models}
\author{Emmanuel Paradis}
\seealso{
\code{\link{skyline}}, \code{\link{branching.times}},
- \code{\link{birthdeath}}, \code{\link{bd.ext}}, \code{\link{yule.cov}}
+ \code{\link{birthdeath}}, \code{\link{bd.ext}},
+ \code{\link{yule.cov}}, \code{\link{bd.time}}
\code{\link[graphics]{plot}} for the basic plotting function in R
}
\examples{
\author{Emmanuel Paradis}
\seealso{
\code{\link{branching.times}}, \code{\link{ltt.plot}},
- \code{\link{birthdeath}}, \code{\link{yule}}, \code{\link{yule.cov}}
+ \code{\link{birthdeath}}, \code{\link{yule}}, \code{\link{yule.cov}},
+ \code{\link{bd.time}}
}
\examples{
### define two models...