From 24fc6c03893f85a3f9ab3d088201b3731f3035b4 Mon Sep 17 00:00:00 2001 From: paradis Date: Tue, 12 Jan 2010 10:32:11 +0000 Subject: [PATCH] new files for trait simulations git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@103 6e262413-ae40-0410-9e79-b911bd7a66b7 --- ChangeLog | 3 ++ DESCRIPTION | 2 +- R/ace.R | 12 +++-- R/rTrait.R | 127 +++++++++++++++++++++++++++++++++++++++++++++ man/ape-package.Rd | 2 +- man/rTraitCont.Rd | 97 ++++++++++++++++++++++++++++++++++ man/rTraitDisc.Rd | 91 ++++++++++++++++++++++++++++++++ src/rTrait.c | 30 +++++++++++ 8 files changed, 357 insertions(+), 7 deletions(-) create mode 100644 R/rTrait.R create mode 100644 man/rTraitCont.Rd create mode 100644 man/rTraitDisc.Rd create mode 100644 src/rTrait.c diff --git a/ChangeLog b/ChangeLog index 02164f8..fa76411 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ NEW FEATURES + o The new functions rTraitCont and rTraitDisc simulate continuous and + discrete traits under a wide range of evolutionary models. + o add.scale.bar() has a new option 'ask' to draw interactively. diff --git a/DESCRIPTION b/DESCRIPTION index 6609097..3d42a84 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: ape Version: 2.4-2 -Date: 2009-12-16 +Date: 2010-01-11 Title: Analyses of Phylogenetics and Evolution Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Gangolf Jobb, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne Maintainer: Emmanuel Paradis diff --git a/R/ace.R b/R/ace.R index 1dd0caa..c9a4511 100644 --- a/R/ace.R +++ b/R/ace.R @@ -131,9 +131,10 @@ did not match: the former were ignored in the analysis.') } if (model == "SYM") { np <- nl * (nl - 1)/2 - rate[col(rate) < row(rate)] <- 1:np + sel <- col(rate) < row(rate) + rate[sel] <- 1:np rate <- t(rate) - rate[col(rate) < row(rate)] <- 1:np + rate[sel] <- 1:np } } else { if (ncol(model) != nrow(model)) @@ -145,9 +146,10 @@ as the number of categories in `x'") np <- max(rate) } index.matrix <- rate - index.matrix[cbind(1:nl, 1:nl)] <- NA - rate[cbind(1:nl, 1:nl)] <- 0 - rate[rate == 0] <- np + 1 # to avoid 0's since we will use this an numeric indexing + tmp <- cbind(1:nl, 1:nl) + index.matrix[tmp] <- NA + rate[tmp] <- 0 + rate[rate == 0] <- np + 1 # to avoid 0's since we will use this as numeric indexing liks <- matrix(0, nb.tip + nb.node, nl) TIPS <- 1:nb.tip diff --git a/R/rTrait.R b/R/rTrait.R new file mode 100644 index 0000000..3359df6 --- /dev/null +++ b/R/rTrait.R @@ -0,0 +1,127 @@ +## rTrait.R (2010-01-11) + +## Trait Evolution + +## Copyright 2010 Emmanuel Paradis + +## This file is part of the R-package `ape'. +## See the file ../COPYING for licensing issues. + +rTraitDisc <- + function(phy, model = "ER", k = if (is.matrix(model)) ncol(model) else 2, + rate = 0.1, states = LETTERS[1:k], freq = rep(1/k, k), + ancestor = FALSE, root.value = 1) +{ + if (is.null(phy$edge.length)) + stop("tree has no branch length") + if (any(phy$edge.length < 0)) + stop("at least one branch length negative") + + if (is.character(model)) { + switch(model, "ER" = { + if (length(rate) != 1) + stop("`rate' must have one element") + Q <- matrix(rate, k, k) + }, "ARD" = { + if (length(rate) != k*(k - 1)) + stop("`rate' must have k(k - 1) elements") + Q <- matrix(0, k, k) + Q[col(Q) != row(Q)] <- rate + }, "SYM" = { + if (length(rate) != k*(k - 1)/2) + stop("`rate' must have k(k - 1)/2 elements") + Q <- matrix(0, k, k) + sel <- col(Q) < row(Q) + Q[sel] <- rate + Q <- t(Q) + Q[sel] <- rate + }) + } + if (is.matrix(model)) { + Q <- model + if (ncol(Q) != nrow(Q)) + stop("the matrix given as `model' must be square") + } + + phy <- reorder(phy, "pruningwise") + n <- length(phy$tip.label) + N <- dim(phy$edge)[1] + ROOT <- n + 1L + x <- integer(n + phy$Nnode) + x[ROOT] <- as.integer(root.value) + + anc <- phy$edge[, 1] + des <- phy$edge[, 2] + el <- phy$edge.length + + if (is.function(model)) { + environment(model) <- environment() # to find 'k' + for (i in N:1) x[des[i]] <- model(x[anc[i]], el[i]) + } else { + diag(Q) <- -rowSums(Q) + for (i in N:1) { + p <- matexpo(Q * freq * el[i])[x[anc[i]], ] + x[des[i]] <- .Internal(sample(k, size = 1, FALSE, prob = p)) + } + } + + if (ancestor) { + if (is.null(phy$node.label)) phy <- makeNodeLabel(phy) + names(x) <- c(phy$tip.label, phy$node.label) + } else { + x <- x[1:n] + names(x) <- phy$tip.label + } + class(x) <- "factor" + levels(x) <- states + x +} + +rTraitCont <- + function(phy, model = "BM", sigma = 0.1, alpha = 1, + theta = 0, ancestor = FALSE, root.value = 0) +{ + if (is.null(phy$edge.length)) + stop("tree has no branch length") + if (any(phy$edge.length < 0)) + stop("at least one branch length negative") + + phy <- reorder(phy, "pruningwise") + n <- length(phy$tip.label) + N <- dim(phy$edge)[1] + ROOT <- n + 1L + x <- numeric(n + phy$Nnode) + x[ROOT] <- root.value + + anc <- phy$edge[, 1] + des <- phy$edge[, 2] + el <- phy$edge.length + + if (is.function(model)) { + environment(model) <- environment() + for (i in N:1) x[des[i]] <- model(x[anc[i]], el[i]) + } else { + model <- pmatch(model, c("BM", "OU")) + if (length(sigma) == 1) sigma <- rep(sigma, N) + else if (length(sigma) != N) + stop("'sigma' must have one or Nedge(phy) elements") + if (model == 2) { # "OU" + if (length(alpha) == 1) alpha <- rep(alpha, N) + else if (length(alpha) != N) + stop("'alpha' must have one or Nedge(phy) elements") + if (length(theta) == 1) theta <- rep(theta, N) + else if (length(theta) != N) + stop("'theta' must have one or Nedge(phy) elements") + } + .C("rTraitCont", as.integer(model), as.integer(N), as.integer(anc - 1L), as.integer(des - 1L), el, sigma, alpha, theta, x, DUP = FALSE, NAOK = TRUE, PACKAGE = "ape") + } + + if (ancestor) { + if (is.null(phy$node.label)) phy <- makeNodeLabel(phy) + names(x) <- c(phy$tip.label, phy$node.label) + } else { + x <- x[1:n] + names(x) <- phy$tip.label + } + x +} diff --git a/man/ape-package.Rd b/man/ape-package.Rd index 803d5ef..d64b39b 100644 --- a/man/ape-package.Rd +++ b/man/ape-package.Rd @@ -29,7 +29,7 @@ Analyses of Phylogenetics and Evolution } \references{ Paradis, E. (2006) \emph{Analyses of Phylogenetics and Evolution with - R}. Springer, New York. + R.} New York: Springer. Paradis, E., Claude, J. and Strimmer, K. (2004) APE: analyses of phylogenetics and evolution in R language. \emph{Bioinformatics}, diff --git a/man/rTraitCont.Rd b/man/rTraitCont.Rd new file mode 100644 index 0000000..01d82e2 --- /dev/null +++ b/man/rTraitCont.Rd @@ -0,0 +1,97 @@ +\name{rTraitCont} +\alias{rTraitCont} +\title{Continuous Character Simulation} +\usage{ +rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1, + theta = 0, ancestor = FALSE, root.value = 0) +} +\arguments{ + \item{phy}{an object of class \code{"phylo"}.} + \item{model}{a character (either \code{"BM"} or \code{"OU"}) or a + function specifying the model (see details).} + \item{sigma}{a numeric vector giving the standard-deviation of the + random component for each branch (can be a single value).} + \item{alpha}{if \code{model = "OU"}, a numeric vector giving the + strength of the selective constraint for each branch (can be a + single value).} + \item{theta}{if \code{model = "OU"}, a numeric vector giving the + optimum for each branch (can be a single value).} + \item{ancestor}{a logical value specifying whether to return the + values at the nodes as well (by default, only the values at the tips + are returned).} + \item{root.value}{a numeric giving the value at the root.} +} +\description{ + This function simulates the evolution of a continuous character along a + phylogeny. The calculation is done recursively from the root. See + Paradis (2006, p. 151) for a brief introduction. +} +\details{ + There are three possibilities to specify \code{model}: + +\itemize{ + \item{\code{"BM"}:}{a Browian motion model is used. If the arguments + \code{sigma} has more than one value, its length must be equal to the + the branches of the tree. This allows to specify a model with variable + rates of evolution. You must be careful that branch numbering is done + with the tree in ``pruningwise'' order: to see the order of the branches + you can use: \code{tr <- reorder(tr, "p"); plor(tr); edgelabels()}. + The arguments \code{alpha} and \code{theta} are ignored.} + + \item{\code{"OU"}:}{an Ornstein-Uhlenbeck model is used. The above + indexing rule is used for the three parameters \code{sigma}, + \code{alpha}, and \code{theta}. This may be more interesting for the + last one to model varying phenotypic optima. Be careful that large + values of \code{alpha} may give unrealistic output.} + + \item{A function:}{it must be of the form \code{foo(x, l)} where + \code{x} is the trait of the ancestor and \code{l} is the branch + length. It must return the value of the descendant. The arguments + \code{sigma}, \code{alpha}, and \code{theta} are ignored.} +}} +\note{ + Currently, the OU model is a bit difficult to tune. Hopefully, this + may be improved in the future. +} +\value{ + A numeric vector with names taken from the tip labels of + \code{phy}. If \code{ancestor = TRUE}, the node labels are used if + present, otherwise, ``Node1'', ``Node2'', etc. +} +\references{ + Paradis, E. (2006) \emph{Analyses of Phylogenetics and Evolution with + R.} New York: Springer. +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{rTraitDisc}}, \code{\link{ace}} +} +\examples{ +data(bird.orders) +rTraitCont(bird.orders) # BM with sigma = 0.1 +### OU model with two optima: +tr <- reorder(bird.orders, "p") +plot(tr) +edgelabels() +theta <- rep(0, Nedge(tr)) +theta[c(1:4, 15:16, 23:24)] <- 2 +## sensitive to 'alpha' and 'sigma': +rTraitCont(tr, "OU", theta = theta, alpha=.1, sigma=.01) +### an imaginary model with stasis 0.5 time unit after a node, then +### BM evolution with sigma = 0.1: +foo <- function(x, l) { + if (l <= 0.5) return(x) + x + (l - 0.5)*rnorm(1, 0, 0.1) +} +tr <- rcoal(20, br = runif) +rTraitCont(tr, foo, ancestor = TRUE) +### a cumulative Poisson process: +bar <- function(x, l) x + rpois(1, l) +(x <- rTraitCont(tr, bar, ancestor = TRUE)) +plot(tr, show.tip.label = FALSE) +Y <- x[1:20] +A <- x[-(1:20)] +nodelabels(A) +tiplabels(Y) +} +\keyword{datagen} diff --git a/man/rTraitDisc.Rd b/man/rTraitDisc.Rd new file mode 100644 index 0000000..967042e --- /dev/null +++ b/man/rTraitDisc.Rd @@ -0,0 +1,91 @@ +\name{rTraitDisc} +\alias{rTraitDisc} +\title{Discrete Character Simulation} +\usage{ +rTraitDisc(phy, model = "ER", k = if (is.matrix(model)) ncol(model) else 2, + rate = 0.1, states = LETTERS[1:k], freq = rep(1/k, k), + ancestor = FALSE, root.value = 1) +} +\arguments{ + \item{phy}{an object of class \code{"phylo"}.} + \item{model}{a character, a square numeric matrix, or a function + specifying the model (see details).} + \item{k}{the number of states of the character.} + \item{rate}{the rate of change used if \code{model} is a character; it + is \emph{not} recycled if \code{model = "ARD"} of \code{model = + "SYM"}.} + \item{states}{the labels used for the states; by default ``A'', ``B'', + \dots} + \item{freq}{a numeric vector giving the equilibrium relative + frequencies of each state; by default the frequencies are equal.} + \item{ancestor}{a logical value specifying whether to return the + values at the nodes as well (by default, only the values at the tips + are returned).} + \item{root.value}{an integer giving the value at the root (by default, + it's the first state). To have a random value, use \code{root.value + = sample(k)}.} +} +\description{ + This function simulates the evolution of a discrete character along a + phylogeny. If \code{model} is a character or a matrix, evolution is + simulated with a Markovian model; the transition probabilities are + calculated for each branch with \eqn{P = e^{Qt}} where \eqn{Q} is the + rate matrix given by \code{model} and \eqn{t} is the branch length. + The calculation is done recursively from the root. See Paradis (2006, + p. 101) for a general introduction applied to evolution. +} +\details{ + There are three possibilities to specify \code{model}: + +\itemize{ + \item{A matrix:}{it must be a numeric square matrix; the diagonal is + always ignored. The arguments \code{k} and \code{rate} are ignored.} + + \item{A character:}{these are the same short-cuts than in the function + \link{\code{ace}}: \code{"ER"} is an equal-rates model, \code{"ARD"} + is an all-rates-different model, and \code{"SYM"} is a symmetrical + model. Note that the argument \code{rate} must be of the appropriate + length, i.e., 1, \eqn{k(k - 1)}, or \eqn{k(k - 1)/2} for the three models, + respectively. The rate matrix \eqn{Q} is then filled column-wise.} + + \item{A function:}{it must be of the form \code{foo(x, l)} where + \code{x} is the trait of the ancestor and \code{l} is the branch + length. It must return the value of the descendant as an integer.} +}} +\value{ + A factor with names taken from the tip labels of \code{phy}. If + \code{ancestor = TRUE}, the node labels are used if present, + otherwise, ``Node1'', ``Node2'', etc. +} +\references{ + Paradis, E. (2006) \emph{Analyses of Phylogenetics and Evolution with + R.} New York: Springer. +} +\author{Emmanuel Paradis} +\seealso{ + \code{\link{rTraitCont}}, \code{\link{ace}} +} +\examples{ +data(bird.orders) +### the two followings are the same: +rTraitDisc(bird.orders) +rTraitDisc(bird.orders, model = matrix(c(0, 0.1, 0.1, 0), 2)) +### two-state model with irreversibility: +rTraitDisc(bird.orders, model = matrix(c(0, 0, 0.1, 0), 2)) +### an imaginary model with stasis 0.5 time unit after a node, then +### random evolution: +foo <- function(x, l) { + if (l < 0.5) return(x) + sample(2, size = 1) +} +tr <- rcoal(20, br = runif) +x <- rTraitDisc(tr, foo, ancestor = TRUE) +plot(tr, show.tip.label = FALSE) +co <- c("blue", "yellow") +cot <- c("white", "black") +Y <- x[1:20] +A <- x[-(1:20)] +nodelabels(A, bg = co[A], col = cot[A]) +tiplabels(Y, bg = co[Y], col = cot[Y]) +} +\keyword{datagen} diff --git a/src/rTrait.c b/src/rTrait.c new file mode 100644 index 0000000..6441e16 --- /dev/null +++ b/src/rTrait.c @@ -0,0 +1,30 @@ +/* rTrait.c 2010-01-11 */ + +/* Copyright 2010 Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + +#include + +void rTraitCont(int *model, int *Nedge, int *edge1, int *edge2, double *el, + double *sigma, double *alpha, double *theta, double *x) +{ +/* The tree must be in pruningwise order */ + int i; + + switch(*model) { + case 1 : for (i = *Nedge - 1; i >= 0; i--) { + GetRNGstate(); + x[edge2[i]] = x[edge1[i]] + el[i] * sigma[i] * norm_rand(); + PutRNGstate(); + } + break; + case 2 : for (i = *Nedge - 1; i >= 0; i--) { + GetRNGstate(); + x[edge2[i]] = x[edge1[i]] + (sigma[i]*norm_rand() - alpha[i]*(x[edge1[i]] - theta[i])) * el[i]; + PutRNGstate(); + } + break; + } +} -- 2.39.2