From: paradis Date: Mon, 27 Sep 2010 16:01:03 +0000 (+0000) Subject: adding bd.time X-Git-Url: https://git.donarmstrong.com/?p=ape.git;a=commitdiff_plain;h=f91082164111e17be175283d9fe8335e89424d42 adding bd.time git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@129 6e262413-ae40-0410-9e79-b911bd7a66b7 --- diff --git a/ChangeLog b/ChangeLog index 5a6817e..abb2616 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,11 +5,15 @@ NEW FEATURES o The new functions rlineage and rbdtree simulate phylogenies under any user-defined time-dependent speciation-extinction model. They - use new continuous time algorithms. + use continuous time algorithms. o The new function drop.fossil removes the extinct species from a phylogeny. + o The new function bd.time fits a user-defined time-dependent + birth-death model. It is a generalization of yule.time() taking + extinction into account. + o The new function MPR does most parsimonious reconstruction of discrete characters. diff --git a/DESCRIPTION b/DESCRIPTION index 410085c..dc7c895 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.6 -Date: 2010-09-16 +Date: 2010-09-27 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/R/CDF.birth.death.R b/R/CDF.birth.death.R index 6f58fa4..3608a35 100644 --- a/R/CDF.birth.death.R +++ b/R/CDF.birth.death.R @@ -1,4 +1,4 @@ -## CDF.birth.death.R (2010-08-09) +## CDF.birth.death.R (2010-09-27) ## Functions to simulate and fit ## Time-Dependent Birth-Death Models @@ -8,9 +8,9 @@ ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. +integrateTrapeze <- function(FUN, from, to, nint = 10) ## compute an integral with a simple trapeze method ## (apparently, Vectorize doesn't give faster calculation) -integrateTrapeze <- function(FUN, from, to, nint = 10) { x <- seq(from = from, to = to, length.out = nint + 1) ## reorganized to minimize the calls to FUN: @@ -41,24 +41,6 @@ integrateTrapeze <- function(FUN, from, to, nint = 10) } } -###.getRHO <- function(birth, death, BIRTH = NULL, DEATH = NULL, case) -###{ -### ## build the RHO(), \rho(t), function -### switch (case, -### function(t1, t2) (t2 - t1)*(death - birth), # case 1 -### function(t1, t2) # case 2 -### integrate(function(t) death(t) - birth(t), t1, t2)$value, -### function(t1, t2) -### DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1), # case 3 -### function(t1, t2) -### death - integrate(birth, t1, t2)$value, # case 4 -### function(t1, t2) -### integrate(death, t1, t2)$value - birth, # case 5 -### function(t1, t2) death - BIRTH(t2) + BIRTH(t1), # case 6 -### function(t1, t2) DEATH(t2) - DEATH(t1) - birth # case 7 -### ) -###} - .getRHOetINT <- function(birth, death, BIRTH = NULL, DEATH = NULL, case, fast) { ## build the RHO(), \rho(t), and INT(), I(t), functions @@ -392,3 +374,110 @@ rbdtree <- foo(1L, 0) .makePhylo(edge[1:j, ], edge.length[1:j], i) } + +bd.time <- function(phy, birth, death, BIRTH = NULL, DEATH = NULL, + ip, lower, upper, fast = FALSE, boot = 0, trace = 0) +{ + guess.bounds <- if (missing(lower)) TRUE else FALSE + BIG <- 1e10 + PrNt <- function(t, T, x) + { + tmp <- exp(-RHO(t, T)) + Wt <- tmp * (1 + INT(t)) + out <- numeric(length(x)) + zero <- x == 0 + if (length(zero)) { + out[zero] <- 1 - tmp/Wt + out[!zero] <- (tmp/Wt^2)*(1 - 1/Wt)^(x[!zero] - 1) + } else out[] <- (tmp/Wt^2)*(1 - 1/Wt)^(x - 1) + out + } + + case <- .getCase(birth, death, BIRTH, DEATH) + + if (is.function(birth)) { + paranam <- names(formals(birth)) + if (guess.bounds) { + upper <- rep(BIG, length(paranam)) + lower <- -upper + } + formals(birth) <- alist(t=) + environment(birth) <- environment() + if (!is.null(BIRTH)) environment(BIRTH) <- environment() + } else { + paranam <- "birth" + if (guess.bounds) { + upper <- 1 + lower <- 0 + } + } + + if (is.function(death)) { + tmp <- names(formals(death)) + np2 <- length(tmp) + if (guess.bounds) { + upper <- c(upper, rep(BIG, np2)) + lower <- c(lower, rep(-BIG, np2)) + } + paranam <- c(paranam, tmp) + formals(death) <- alist(t=) + environment(death) <- environment() + if (!is.null(DEATH)) environment(DEATH) <- environment() + } else { + paranam <- c(paranam, "death") + if (guess.bounds) { + upper <- c(upper, .1) + lower <- c(lower, 0) + } + } + + np <- length(paranam) + + ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case = case, fast = fast) + RHO <- ff[[1]] + INT <- ff[[2]] + environment(RHO) <- environment(INT) <- environment() + + x <- branching.times(phy) + n <- length(x) + Tmax <- x[1] + x <- Tmax - x # change the time scale so the root is t=0 + x <- sort(x) + + foo <- function(para) { + for (i in 1:np) + assign(paranam[i], para[i], pos = sys.frame(1)) + p <- CDF.birth.death(birth, death, BIRTH, DEATH, Tmax = Tmax, + x = x, case = case, fast = fast) + ## w is the probability of the observed tree size (= number of tips) + w <- PrNt(0, Tmax, Ntip(phy)) + ## p is the expected CDF of branching times + ## ecdf(x)(x) is the observed CDF + sum((1:n/n - p)^2)/w # faster than sum((ecdf(x)(x) - p)^2)/w + } + + if (missing(ip)) ip <- (upper - lower)/2 + + out <- nlminb(ip, foo, control = list(trace = trace, eval.max = 500), + upper = upper, lower = lower) + + names(out$par) <- paranam + names(out)[2] <- "SS" + + if (boot) { # nonparametric version + PAR <- matrix(NA, boot, np) + i <- 1L + while (i <= boot) { + cat("i =", i, "\n") + x <- sort(sample(x, replace = TRUE)) + o <- try(nlminb(ip, foo, control = list(trace = 0, eval.max = 500), + upper = upper, lower = lower)) + if (class(o) == "list") { + PAR[i, ] <- o$par + i <- i + 1L + } + } + out$boot <- PAR + } + out +} diff --git a/inst/CITATION b/inst/CITATION index 9c7ef12..74c1a5a 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,7 +1,7 @@ citHeader("To cite ape in a publication use:") citEntry(entry="Article", - title = "APE: analyses of phylogenetics and evolution in {R} language", + title = "A{PE}: analyses of phylogenetics and evolution in {R} language", author = personList(as.person("E. Paradis"),as.person("J. Claude"), as.person("K. Strimmer")), journal = "Bioinformatics", year = "2004", diff --git a/man/bd.ext.Rd b/man/bd.ext.Rd index b26776f..2f246c9 100644 --- a/man/bd.ext.Rd +++ b/man/bd.ext.Rd @@ -43,7 +43,8 @@ bd.ext(phy, S) \seealso{ \code{\link{birthdeath}}, \code{\link{branching.times}}, \code{\link{diversi.gof}}, \code{\link{diversi.time}}, - \code{\link{ltt.plot}}, \code{\link{yule}}, \code{\link{yule.cov}} + \code{\link{ltt.plot}}, \code{\link{yule}}, \code{\link{yule.cov}}, + \code{\link{bd.time}} } \examples{ ### An example from Paradis (2003) using the avian orders: diff --git a/man/bd.time.Rd b/man/bd.time.Rd new file mode 100644 index 0000000..8cafdca --- /dev/null +++ b/man/bd.time.Rd @@ -0,0 +1,86 @@ +\name{bd.time} +\alias{bd.time} +\title{Time-Dependent Birth-Death Models} +\description{ + This function fits a used-defined time-dependent birth-death + model. +} +\usage{ +bd.time(phy, birth, death, BIRTH = NULL, DEATH = NULL, + ip, lower, upper, fast = FALSE, boot = 0, trace = 0) +} +\arguments{ + \item{phy}{an object of class \code{"phylo"}.} + \item{birth}{either a numeric (if speciation rate is assumed + constant), or a (vectorized) function specifying how the birth + (speciation) probability changes through time (see details).} + \item{death}{id. for extinction probability.} + \item{BIRTH}{(optional) a vectorized function giving the primitive + of \code{birth}.} + \item{DEATH}{id. for \code{death}.} + \item{ip}{a numeric vector used as initial values for the estimation + procedure. If missing, these values are guessed.} + \item{lower, upper}{the lower and upper bounds of the parameters. If + missing, these values are guessed too.} + \item{fast}{a logical value specifying whether to use faster + integration (see details).} + \item{boot}{the number of bootstrap replicates to assess the + confidence intervals of the parameters. Not run by default.} + \item{trace}{an integer value. If non-zero, the fitting procedure is + printed every \code{trace} steps. This can be helpful if convergence + is particularly slow.} +} +\details{ + Details on how to specify the birth and death functions and their + primitives can be found in the help page of \code{\link{yule.time}}. + + The model is fitted by minimizing the least squares deviation between + the observed and predicted distributions of branching times. These + computations rely heavily on numerical integrations. If \code{fast = + FALSE}, integrations are done with R's \code{\link[stats]{integrate}} + function. If \code{fast = TRUE}, a faster but less accurate function + provided in \pkg{ape} is used. If fitting a complex model to a large + phylogeny, a strategy might be to first use the latter option, and + then to use the estimates as starting values with \code{fast = FALSE}. +} +\value{ + A list with the following components: + +\itemize{ + \item{par}{a vector of estimates with names taken from the parameters + in the specified functions.} + \item{SS}{the minimized sum of squares.} + \item{convergence}{output convergence criterion from + \code{\link[stats]{nlminb}}.} + \item{message}{id.} + \item{iterations}{id.} + \item{evaluations}{id.} +}} +\references{ + Paradis, E. (2010) Time-dependent speciation and extinction from + phylogenies: a least squares approach. (under revision) + %\emph{Evolution}, \bold{59}, 1--12. +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{ltt.plot}}, \code{\link{birthdeath}}, + \code{\link{yule.time}} +} +\examples{ +set.seed(3) +tr <- rbdtree(0.1, 0.02) +bd.time(tr, 0, 0) # fits a simple BD model +bd.time(tr, 0, 0, ip = c(.1, .01)) # 'ip' is useful here +## the classic logistic: +birth.logis <- function(a, b) 1/(1 + exp(-a*t - b)) +\dontrun{ +bd.time(tr, birth.logis, 0, ip = c(0, -2, 0.01)) +## slow to get: +## $par +## a b death +## -0.003486961 -1.995983179 0.016496454 +## +## $SS +## [1] 20.73023 +} +} diff --git a/man/birthdeath.Rd b/man/birthdeath.Rd index 3bb5d4f..7e2cd5c 100644 --- a/man/birthdeath.Rd +++ b/man/birthdeath.Rd @@ -60,6 +60,7 @@ birthdeath(phy) \seealso{ \code{\link{branching.times}}, \code{\link{diversi.gof}}, \code{\link{diversi.time}}, \code{\link{ltt.plot}}, - \code{\link{yule}}, \code{\link{bd.ext}}, \code{\link{yule.cov}} + \code{\link{yule}}, \code{\link{bd.ext}}, \code{\link{yule.cov}}, + \code{\link{bd.time}} } \keyword{models} diff --git a/man/ltt.plot.Rd b/man/ltt.plot.Rd index 922fcfe..6f09500 100644 --- a/man/ltt.plot.Rd +++ b/man/ltt.plot.Rd @@ -72,7 +72,8 @@ mltt.plot(phy, ..., dcol = TRUE, dlty = FALSE, legend = TRUE, \author{Emmanuel Paradis} \seealso{ \code{\link{skyline}}, \code{\link{branching.times}}, - \code{\link{birthdeath}}, \code{\link{bd.ext}}, \code{\link{yule.cov}} + \code{\link{birthdeath}}, \code{\link{bd.ext}}, + \code{\link{yule.cov}}, \code{\link{bd.time}} \code{\link[graphics]{plot}} for the basic plotting function in R } \examples{ diff --git a/man/yule.time.Rd b/man/yule.time.Rd index fdbe0e7..91cbe04 100644 --- a/man/yule.time.Rd +++ b/man/yule.time.Rd @@ -49,7 +49,8 @@ yule.time(phy, birth, BIRTH = NULL, root.time = 0, opti = "nlm", start = 0.01) \author{Emmanuel Paradis} \seealso{ \code{\link{branching.times}}, \code{\link{ltt.plot}}, - \code{\link{birthdeath}}, \code{\link{yule}}, \code{\link{yule.cov}} + \code{\link{birthdeath}}, \code{\link{yule}}, \code{\link{yule.cov}}, + \code{\link{bd.time}} } \examples{ ### define two models...