- CHANGES IN APE VERSION 2.5-4
+ CHANGES IN APE VERSION 2.6
NEW FEATURES
+ o The new functions rlineage and rbdtree simulate phylogenies under
+ any user-defined time-dependent speciation-extinction model. They
+ use new continuous time algorithms.
+
+ o The new function drop.fossil removes the extinct species from a
+ phylogeny.
+
+ o The new function MPR does most parsimonious reconstruction of
+ discrete characters.
+
+ o The new function Ftab computes the contingency table of base
+ frequencies from a pair of sequences.
+
o There is now an 'as.list' method for the class "DNAbin".
o dist.dna() can compute the number of transitions or transversions
o compar.gee() has been improved with the new option 'corStruct' as an
alternative to 'phy' to specify the correlation structure, and
calculation of the QIC (Pan 2001, Biometrics). The display of the
- results have also been improved.
+ results has also been improved.
+
+ o read.GenBank() has a new option 'gene.names' to return the name of
+ the gene (FALSE by default).
BUG FIXES
o extract.clade() sometimes shuffled the tip labels.
+ o plot.phylo(type = "unrooted") did not force asp = 1 (thanks to Klaus
+ Schliep for the fix)
+
+ o dist.dna(model = "logdet") used to divide distances by 4. The
+ documentation has been clarified on the formulae used.
+
+
+OTHER CHANGES
+
+ o rTraitCont(model = "OU") has an option 'linear = TRUE' to possibly
+ change the parameterisation (see ?rTraitCont for details).
+
+ o pic() now returns a vector with the node labels of the tree (if
+ available) as names.
+
CHANGES IN APE VERSION 2.5-3
Package: ape
-Version: 2.5-4
-Date: 2010-07-13
+Version: 2.6
+Date: 2010-09-16
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne
Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
--- /dev/null
+## CDF.birth.death.R (2010-08-09)
+
+## Functions to simulate and fit
+## Time-Dependent Birth-Death Models
+
+## Copyright 2010 Emmanuel Paradis
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+## compute an integral with a simple trapeze method
+## (apparently, Vectorize doesn't give faster calculation)
+integrateTrapeze <- function(FUN, from, to, nint = 10)
+{
+ x <- seq(from = from, to = to, length.out = nint + 1)
+ ## reorganized to minimize the calls to FUN:
+ out <- FUN(x[1]) + FUN(x[length(x)])
+ for (i in 2:nint) out <- out + 2 * FUN(x[i])
+ (x[2] - x[1]) * out/2 # (x[2] - x[1]) is the width of the trapezes
+}
+
+## case:
+## 1: birth and death rates constant
+## 2: no primitive available
+## 3: primitives are available
+## 4: death rate constant, no primitive available
+## 5: birth rate constant, no primitive available
+## 6: death rate constant, primitive available for birth(t)
+## 7: birth rate constant, primitive available for death(t)
+
+.getCase <- function(birth, death, BIRTH = NULL, DEATH = NULL)
+{
+ if (is.numeric(birth)) {
+ if (is.numeric(death)) 1 else {
+ if (is.null(DEATH)) 5 else 7
+ }
+ } else {
+ if (is.numeric(death)) {
+ if (is.null(BIRTH)) 4 else 6
+ } else if (is.null(BIRTH) || is.null(DEATH)) 2 else 3
+ }
+}
+
+###.getRHO <- function(birth, death, BIRTH = NULL, DEATH = NULL, case)
+###{
+### ## build the RHO(), \rho(t), function
+### switch (case,
+### function(t1, t2) (t2 - t1)*(death - birth), # case 1
+### function(t1, t2) # case 2
+### integrate(function(t) death(t) - birth(t), t1, t2)$value,
+### function(t1, t2)
+### DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1), # case 3
+### function(t1, t2)
+### death - integrate(birth, t1, t2)$value, # case 4
+### function(t1, t2)
+### integrate(death, t1, t2)$value - birth, # case 5
+### function(t1, t2) death - BIRTH(t2) + BIRTH(t1), # case 6
+### function(t1, t2) DEATH(t2) - DEATH(t1) - birth # case 7
+### )
+###}
+
+.getRHOetINT <- function(birth, death, BIRTH = NULL, DEATH = NULL, case, fast)
+{
+ ## build the RHO(), \rho(t), and INT(), I(t), functions
+ switch (case,
+ { # case 1:
+ RHO <- function(t1, t2) (t2 - t1)*(death - birth)
+ INT <- function(t) {
+ rho <- death - birth
+ death*(exp(rho*(Tmax - t)) - 1)/rho
+ }
+ },{ # case 2:
+ if (fast) {
+ RHO <- function(t1, t2)
+ integrateTrapeze(function(t) death(t) - birth(t), t1, t2)
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death(u)
+ integrateTrapeze(FOO, t, Tmax)
+ }
+ } else {
+ RHO <- function(t1, t2)
+ integrate(function(t) death(t) - birth(t), t1, t2)$value
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death(u)
+ integrate(Vectorize(FOO), t, Tmax)$value # Vectorize required
+ }
+ }
+ },{ # case 3:
+ RHO <- function(t1, t2)
+ DEATH(t2) - BIRTH(t2) - DEATH(t1) + BIRTH(t1)
+ INT <- function(t) { # vectorized
+ FOO <- function(u) exp(RHO(tt, u)) * death(u)
+ out <- t
+ for (i in 1:length(t)) {
+ tt <- t[i]
+ out[i] <- integrate(FOO, tt, Tmax)$value
+ }
+ out
+ }
+ },{ # case 4:
+ if (fast) {
+ RHO <- function(t1, t2)
+ death * (t2 - t1) - integrateTrapeze(birth, t1, t2)
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death
+ integrateTrapeze(Vectorize(FOO), t, Tmax)
+ }
+ } else {
+ RHO <- function(t1, t2)
+ death * (t2 - t1) - integrate(birth, t1, t2)$value
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death
+ integrate(Vectorize(FOO), t, Tmax)$value
+ }
+ }
+ },{ # case 5:
+ RHO <- function(t1, t2)
+ integrate(death, t1, t2)$value - birth * (t2 - t1)
+ if (fast) {
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death(u)
+ integrateTrapeze(FOO, t, Tmax)
+ }
+ } else {
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death(u)
+ integrate(Vectorize(FOO), t, Tmax)$value
+ }
+ }
+ },{ # case 6:
+ RHO <- function(t1, t2) death * (t2 - t1) - BIRTH(t2) + BIRTH(t1)
+ INT <- function(t) { # vectorized
+ FOO <- function(u) exp(RHO(tt, u)) * death
+ out <- t
+ for (i in 1:length(t)) {
+ tt <- t[i]
+ out[i] <- integrate(FOO, tt, Tmax)$value
+ }
+ out
+ }
+ },{ # case 7:
+ RHO <- function(t1, t2) DEATH(t2) - DEATH(t1) - birth * (t2 - t1)
+ if (fast) {
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death(u)
+ integrateTrapeze(FOO, t, Tmax)
+ }
+ } else {
+ INT <- function(t) {
+ FOO <- function(u) exp(RHO(t, u)) * death(u)
+ integrate(Vectorize(FOO), t, Tmax)$value
+ }
+ }
+ })
+ list(RHO, INT)
+}
+
+CDF.birth.death <-
+ function(birth, death, BIRTH = NULL, DEATH = NULL, Tmax, x, case, fast = FALSE)
+{
+ ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case, fast)
+ RHO <- ff[[1]]
+ INT <- ff[[2]]
+ environment(INT) <- environment() # so that INT() can find Tmax
+ .CDF.birth.death2(RHO, INT, birth, death, BIRTH, DEATH,
+ Tmax, x, case, fast)
+}
+
+.CDF.birth.death2 <-
+ function(RHO, INT, birth, death, BIRTH, DEATH, Tmax, x, case, fast)
+{
+ Pi <- if (case %in% c(1, 5, 7))
+ function(t) (1/(1 + INT(t)))^2 * 2 * exp(-RHO(0, t)) * birth
+ else
+ function(t) (1/(1 + INT(t)))^2 * 2 * exp(-RHO(0, t)) * birth(t)
+
+ if (!case %in% c(1, 3, 6)) Pi <- Vectorize(Pi)
+
+ denom <- if (fast) integrateTrapeze(Pi, 0, Tmax) else integrate(Pi, 0, Tmax)$value
+ n <- length(x)
+ p <- numeric(n)
+ if (fast) {
+ for (i in 1:n) p[i] <- integrateTrapeze(Pi, 0, x[i])
+ } else {
+ for (i in 1:n) p[i] <- integrate(Pi, 0, x[i])$value
+ }
+ p/denom
+}
+
+.makePhylo <- function(edge, edge.length, i)
+{
+ NODES <- edge > 0
+ edge[NODES] <- edge[NODES] + i + 1L
+ edge[!NODES] <- 1:(i + 1L)
+
+ phy <- list(edge = edge, edge.length = edge.length,
+ tip.label = paste("t", 1:(i + 1), sep = ""), Nnode = i)
+ class(phy) <- "phylo"
+ phy
+}
+
+rlineage <-
+ function(birth, death, Tmax = 50, BIRTH = NULL, DEATH = NULL, eps = 1e-6)
+{
+ case <- .getCase(birth, death, BIRTH, DEATH)
+
+ rTimeToEvent <- function(t)
+ {
+ ## CDF of the times to event (speciation or extinction):
+ switch (case,
+ { # case 1:
+ Foo <- function(t, x)
+ 1 - exp(-(birth + death)*(x - t))
+ },{ # case 2:
+ Foo <- function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-integrate(function(t) birth(t) + death(t),
+ t, x)$value)
+ }
+ },{ # case 3:
+ Foo <- function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-(BIRTH(x) - BIRTH(t) + DEATH(x) - DEATH(t)))
+ }
+ },{ # case 4:
+ Foo <- function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-(integrate(function(t) birth(t), t, x)$value
+ + death*(x - t)))
+ }
+
+ },{ # case 5:
+ Foo <- function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-(birth*(x - t) +
+ integrate(function(t) death(t), t, x)$value))
+ }
+
+ },{ # case 6:
+ Foo <- function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-(BIRTH(x) - BIRTH(t) + death*(x - t)))
+ }
+
+ },{ # case 7:
+ Foo <- function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-(birth*(x - t) + DEATH(x) - DEATH(t) ))
+ }
+ })
+
+ ## generate a random time to event by the inverse method:
+ P <- runif(1)
+ ## in case speciation probability is so low
+ ## that time to speciation is infinite:
+ if (Foo(t, Tmax) < P) return(Tmax + 1)
+ inc <- 10
+ x <- t + inc
+ while (inc > eps) { # la précision influe sur le temps de calcul
+ if (Foo(t, x) > P) {
+ x <- x - inc
+ inc <- inc/10
+ } else x <- x + inc
+ }
+ x - t
+ }
+
+ if (case == 1)
+ speORext <- function(t) birth/(birth + death)
+ if (case == 2 || case == 3)
+ speORext <- function(t) birth(t)/(birth(t) + death(t))
+ if (case == 4 || case == 6)
+ speORext <- function(t) birth(t)/(birth(t) + death)
+ if (case == 5 || case == 7)
+ speORext <- function(t) birth/(birth + death(t))
+
+ ## the recursive function implementing algorithm 1
+ foo <- function(node) {
+ for (k in 0:1) {
+ X <- rTimeToEvent(t[node])
+ tmp <- t[node] + X
+ ## is the event a speciation or an extinction?
+ if (tmp >= Tmax) {
+ Y <- 0
+ tmp <- Tmax
+ } else Y <- rbinom(1, size = 1, prob = speORext(tmp))
+ j <<- j + 1L
+ edge.length[j] <<- tmp - t[node]
+ if (Y) {
+ i <<- i + 1L
+ t[i] <<- tmp
+ ## set internal edge:
+ edge[j, ] <<- c(node, i)
+ foo(i)
+ } else
+ ## set terminal edge:
+ edge[j, ] <<- c(node, 0L)
+ }
+ }
+
+ edge <- matrix(0L, 1e5, 2)
+ edge.length <- numeric(1e5)
+ j <- 0L; i <- 1; t <- 0
+ foo(1L)
+ .makePhylo(edge[1:j, ], edge.length[1:j], i)
+}
+
+drop.fossil <- function(phy, tol = 0)
+{
+ n <- Ntip(phy)
+ x <- dist.nodes(phy)[n + 1, ][1:n]
+ drop.tip(phy, which(x < max(x) - tol))
+}
+
+rbdtree <-
+ function(birth, death, Tmax = 50, BIRTH = NULL, DEATH = NULL, eps = 1e-6)
+{
+ case <- .getCase(birth, death, BIRTH, DEATH)
+ ff <- .getRHOetINT(birth, death, BIRTH, DEATH, case, FALSE)
+ RHO <- ff[[1]]
+ INT <- ff[[2]]
+ ## so that RHO() and INT() can find Tmax:
+ environment(RHO) <- environment(INT) <- environment()
+
+ rtimetospe <- function(t)
+ {
+ ## CDF of the times to speciation:
+ Foo <- if (case %in% c(1, 5, 7))
+ function(t, x) 1 - exp(-birth*(x - t))
+ else {
+ if (case %in% c(3, 6))
+ function(t, x) 1 - exp(-(BIRTH(x) - BIRTH(t)))
+ else {
+ function(t, x) {
+ if (t == x) return(0)
+ 1 - exp(-integrate(birth, t, x)$value)
+ }
+ }
+ }
+ ## generate a random time to speciation by the inverse method:
+ P <- runif(1)
+ ## in case speciation probability is so low
+ ## that time to speciation is infinite:
+ if (Foo(t, Tmax) < P) return(Tmax + 1)
+ inc <- 10
+ x <- t + inc
+ while (inc > eps) { # la précision influe sur le temps de calcul
+ if (Foo(t, x) > P) {
+ x <- x - inc
+ inc <- inc/10
+ } else x <- x + inc
+ }
+ x - t
+ }
+
+ ## the recursive function implementing algorithm 2
+ foo <- function(node, start) {
+ node <- node # make a local copy
+ for (k in 0:1) {
+ tau <- start # because tau is changed below
+ NoDesc <- TRUE
+ X <- rtimetospe(tau)
+ while (X < Tmax - tau) {
+ tau <- tau + X
+ ## does the new lineage survive until Tmax?
+ Y <- rbinom(1, size = 1, prob = 1/(1 + INT(tau)))
+ if (Y) {
+ i <<- i + 1L
+ t[i] <<- tau
+ ## set internal edge:
+ j <<- j + 1L
+ edge[j, ] <<- c(node, i)
+ edge.length[j] <<- tau - t[node]
+ foo(i, t[i])
+ NoDesc <- FALSE
+ break
+ }
+ X <- rtimetospe(tau)
+ }
+ ## set terminal edge:
+ if (NoDesc) {
+ j <<- j + 1L
+ edge[j, 1] <<- node # the 2nd column is already set to 0
+ edge.length[j] <<- Tmax - t[node]
+ }
+ }
+ }
+
+ edge <- matrix(0L, 1e5, 2)
+ edge.length <- numeric(1e5)
+ j <- 0L; i <- 1L; t <- 0
+ foo(1L, 0)
+ .makePhylo(edge[1:j, ], edge.length[1:j], i)
+}
-## DNA.R (2010-07-14)
+## DNA.R (2010-09-01)
## Manipulations and Comparisons of DNA Sequences
BF
}
+Ftab <- function(x, y = NULL)
+{
+ if (is.null(y)) {
+ if (is.list(x)) {
+ y <- x[[2]]
+ x <- x[[1]]
+ if (length(x) != length(y))
+ stop("'x' and 'y' not of same lenght")
+ } else { # 'x' is a matrix
+ y <- x[2, , drop = TRUE]
+ x <- x[1, , drop = TRUE]
+ }
+ } else {
+ x <- as.vector(x)
+ y <- as.vector(y)
+ if (length(x) != length(y))
+ stop("'x' and 'y' not of same lenght")
+ }
+ out <- matrix(0, 4, 4)
+ k <- c(136, 40, 72, 24)
+ for (i in 1:4) {
+ a <- x == k[i]
+ for (j in 1:4) {
+ b <- y == k[j]
+ out[i, j] <- sum(a & b)
+ }
+ }
+ dimnames(out)[1:2] <- list(c("a", "c", "g", "t"))
+ out
+}
+
GC.content <- function(x) sum(base.freq(x)[2:3])
seg.sites <- function(x)
--- /dev/null
+## MPR.R (2010-08-10)
+
+## Most Parsimonious Reconstruction
+
+## Copyright 2010 Emmanuel Paradis
+
+## This file is part of the R-package `ape'.
+## See the file ../COPYING for licensing issues.
+
+MPR <- function(x, phy, outgroup)
+{
+ if (is.rooted(phy))
+ stop("the tree must be unrooted")
+ if (!is.binary.tree(phy))
+ stop("the tree must be fully dichotomous")
+ if (length(outgroup) > 1L)
+ stop("outgroup must be a single tip")
+ if (is.character(outgroup))
+ outgroup <- which(phy$tip.label == outgroup)
+
+ if (!is.null(names(x))) {
+ if (all(names(x) %in% phy$tip.label))
+ x <- x[phy$tip.label]
+ else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.")
+ }
+
+ n <- length(phy$tip.label)
+ if (is.null(phy$node.label))
+ phy$node.label <- n + 1:(phy$Nnode)
+
+ phy <- drop.tip(root(phy, outgroup), outgroup)
+ n <- n - 1L
+ m <- phy$Nnode
+ phy <- reorder(phy, "pruningwise")
+
+ root.state <- x[outgroup]
+ I <- as.integer(x[-outgroup])
+ I[n + 1:m] <- NA
+ I <- cbind(I, I) # interval map
+
+ med <- function(x) {
+ i <- length(x)/2
+ sort(x)[c(i, i + 1L)]
+ }
+
+ ## 1st pass
+ s <- seq(from = 1, by = 2, length.out = m)
+ anc <- phy$edge[s, 1]
+ des <- matrix(phy$edge[, 2], ncol = 2, byrow = TRUE)
+ for (i in 1:m) I[anc[i], ] <- med(I[des[i, ], ])
+
+ ## 2nd pass
+ out <- matrix(NA, m, 2)
+ colnames(out) <- c("lower", "upper")
+ ## do the most basal node before looping
+ Iw <- as.vector(I[des[m, ], ]) # interval maps of the descendants
+ out[anc[m] - n, ] <- range(med(c(root.state, root.state, Iw)))
+ for (i in (m - 1):1) {
+ j <- anc[i]
+ Iw <- as.vector(I[des[i, ], ]) # interval maps of the descendants
+ k <- which(phy$edge[, 2] == j) # find the ancestor
+ tmp <- out[phy$edge[k, 1] - n, ]
+ out[j - n, 1] <- min(med(c(tmp[1], tmp[1], Iw)))
+ out[j - n, 2] <- max(med(c(tmp[2], tmp[2], Iw)))
+ }
+ rownames(out) <- phy$node.label
+ out
+}
scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1)
{
if (!inherits(phy, "phylo"))
- stop('object "phy" is not of class "phylo".')
+ stop('object "phy" is not of class "phylo".')
if (is.null(phy$edge.length))
stop("tree has no branch lengths")
type <- match.arg(type, c("continuous", "discrete"))
if (!is.null(names(x))) {
if(all(names(x) %in% phy$tip.label))
x <- x[phy$tip.label]
- else warning('the names of argument "x" and the tip labels of the tree
-did not match: the former were ignored in the analysis.')
+ else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.")
}
obj <- list()
if (kappa != 1) phy$edge.length <- phy$edge.length^kappa
Q[] <- c(p, 0)[rate]
diag(Q) <- -rowSums(Q)
for (i in seq(from = 1, by = 2, length.out = nb.node)) {
- j <- i + 1
+ j <- i + 1L
anc <- phy$edge[i, 1]
des1 <- phy$edge[i, 2]
des2 <- phy$edge[j, 2]
print.ace <- function(x, digits = 4, ...)
{
- cat("\n Ancestral Character Reconstruction\n\n")
+ cat("\n Ancestral Character Estimation\n\n")
cat("Call: ")
print(x$call)
cat("\n")
- cat(" Log-likelihood:", x$loglik, "\n\n")
+ if (!is.null(x$loglik))
+ cat(" Log-likelihood:", x$loglik, "\n\n")
ratemat <- x$index.matrix
if (is.null(ratemat)) { # to be improved
class(x) <- NULL
-## collapse.singles.R (2008-06-19)
+## collapse.singles.R (2010-07-23)
## Collapse "Single" Nodes
xmat <- xmat[xmat[, 1] != i, ] # drop
## changed by EP for the new coding of "phylo" (2006-10-05):
## xmat[xmat < i] <- xmat[xmat < i] + 1 ## adjust indices
- xmat[xmat > i] <- xmat[xmat > i] - 1 ## adjust indices
+ xmat[xmat > i] <- xmat[xmat > i] - 1L ## adjust indices # changed '1' by '1L' (2010-07-23)
## END
elen[prev.node] <- elen[prev.node] + elen[next.node]
## added by Elizabeth Purdom (2008-06-19):
## <FIXME>
## maybe need to refine below in case of non-Brownian corStruct
if (!missing(corStruct)) phy <- attr(corStruct, "tree")
- dfP <- sum(phy$edge.length)*N / sum(diag(R))
+ dfP <- sum(phy$edge.length)*N / sum(diag(vcv(phy))) # need the variances
## </FIXME>
## compute QIC:
dist.topo <- function(x, y, method = "PH85")
{
if (method == "score" && (is.null(x$edge.length) || is.null(y$edge.length)))
- stop("trees must have branch lengths for Billera et al.'s distance.")
+ stop("trees must have branch lengths for branch score distance.")
nx <- length(x$tip.label)
x <- unroot(x)
y <- unroot(y)
storage.mode(phy$Nnode) <- "integer"
ans <- attr(.Call("prop_part", c(list(phy), boot.tree),
B + 1, FALSE, PACKAGE = "ape"), "number") - 1
- if (trees) ans <- list(BP = ans, trees = boot.tree)
+ if (trees) {
+ class(boot.tree) <- "multiPhylo"
+ ans <- list(BP = ans, trees = boot.tree)
+ }
ans
}
-## pic.R (2009-05-10)
+## pic.R (2010-08-18)
## Phylogenetically Independent Contrasts
-## Copyright 2002-2009 Emmanuel Paradis
+## Copyright 2002-2010 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
##
##}
contr <- ans[[7]]
+ lbls <-
+ if (is.null(phy$node.label)) as.character(1:nb.node + nb.tip)
+ else phy$node.label
if (var.contrasts) {
contr <- cbind(contr, ans[[8]])
- dimnames(contr) <- list(1:nb.node + nb.tip, c("contrasts", "variance"))
- } else names(contr) <- 1:nb.node + nb.tip
+ dimnames(contr) <- list(lbls, c("contrasts", "variance"))
+ } else names(contr) <- lbls
contr
}
-## plot.phylo.R (2010-03-19)
+## plot.phylo.R (2010-08-12)
## Plot Phylogenies
if (direction == "leftwards") x.lim[2] <- x.lim[2] + x$root.edge
if (direction == "downwards") y.lim[2] <- y.lim[2] + x$root.edge
}
- asp <- if (type %in% c("fan", "radial")) 1 else NA # fix by Klaus Schliep (2008-03-28)
+ asp <- if (type %in% c("fan", "radial", "unrooted")) 1 else NA # fixes by Klaus Schliep (2008-03-28 and 2010-08-12)
plot(0, type = "n", xlim = x.lim, ylim = y.lim, ann = FALSE, axes = FALSE, asp = asp, ...)
if (is.null(adj))
adj <- if (phyloORclado && direction == "leftwards") 1 else 0
-## rTrait.R (2010-05-06)
+## rTrait.R (2010-07-26)
## Trait Evolution
stop("at least one branch length negative")
if (is.character(model)) {
- switch(model, "ER" = {
+ switch(toupper(model), "ER" = {
if (length(rate) != 1)
stop("`rate' must have one element")
Q <- matrix(rate, k, k)
}
rTraitCont <-
- function(phy, model = "BM", sigma = 0.1, alpha = 1,
- theta = 0, ancestor = FALSE, root.value = 0)
+ function(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0,
+ ancestor = FALSE, root.value = 0, linear = TRUE)
{
if (is.null(phy$edge.length))
stop("tree has no branch length")
environment(model) <- environment()
for (i in N:1) x[des[i]] <- model(x[anc[i]], el[i])
} else {
- model <- pmatch(model, c("BM", "OU"))
+ model <- pmatch(toupper(model), c("BM", "OU"))
if (length(sigma) == 1) sigma <- rep(sigma, N)
else if (length(sigma) != N)
stop("'sigma' must have one or Nedge(phy) elements")
if (length(theta) == 1) theta <- rep(theta, N)
else if (length(theta) != N)
stop("'theta' must have one or Nedge(phy) elements")
+ if (!linear) model <- model + 1L
}
.C("rTraitCont", as.integer(model), as.integer(N), as.integer(anc - 1L), as.integer(des - 1L), el, sigma, alpha, theta, x, DUP = FALSE, NAOK = TRUE, PACKAGE = "ape")
}
-## read.GenBank.R (2007-06-27)
+## read.GenBank.R (2010-07-22)
## Read DNA Sequences from GenBank via Internet
-## Copyright 2002-2007 Emmanuel Paradis
+## Copyright 2002-2010 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
-read.GenBank <- function(access.nb, seq.names = access.nb,
- species.names = TRUE, as.character = FALSE)
+read.GenBank <-
+ function(access.nb, seq.names = access.nb, species.names = TRUE,
+ gene.names = FALSE, as.character = FALSE)
{
N <- length(access.nb)
## If there are more than 400 sequences, we need to break down the
}
FI <- grep("^ {0,}ORIGIN", X) + 1
LA <- which(X == "//") - 1
- obj <- list()
- length(obj) <- N
+ obj <- vector("list", N)
for (i in 1:N) {
## remove all spaces and digits
tmp <- gsub("[[:digit:] ]", "", X[FI[i]:LA[i]])
tmp <- character(N)
sp <- grep("ORGANISM", X)
for (i in 1:N)
- tmp[i] <- unlist(strsplit(X[sp[i]], " +ORGANISM +"))[2]
+ tmp[i] <- unlist(strsplit(X[sp[i]], " +ORGANISM +"))[2]
attr(obj, "species") <- gsub(" ", "_", tmp)
}
+ if (gene.names) {
+ tmp <- character(N)
+ sp <- grep(" +gene +<", X)
+ for (i in 1:N)
+ tmp[i] <- unlist(strsplit(X[sp[i + 1L]], " +/gene=\""))[2]
+ attr(obj, "gene") <- gsub("\"$", "", tmp)
+ }
obj
}
\alias{cbind.DNAbin}
\alias{as.matrix.DNAbin}
\alias{c.DNAbin}
+\alias{as.list.DNAbin}
\alias{labels.DNAbin}
\title{Manipulate DNA Sequences in Bit-Level Format}
\description{
\method{[}{DNAbin}(x, i, j, drop = FALSE)
\method{as.matrix}{DNAbin}(x, \dots)
\method{c}{DNAbin}(\dots, recursive = FALSE)
+\method{as.list}{DNAbin}(x, \dots)
\method{labels}{DNAbin}(object, \dots)
}
\arguments{
\code{as.matrix} may be used to convert DNA sequences (of the same
length) stored in a list into a matrix while keeping the names and the
- class.
+ class. \code{as.list} does the reverse operation.
}
\value{
an object of class \code{"DNAbin"} in the case of \code{rbind},
--- /dev/null
+\name{MPR}
+\alias{MPR}
+\title{Most Parsimonious Reconstruction}
+\description{
+ This function does ancestral character reconstruction by parsimony as
+ described in Hanazawa et al. (1995) and modified by Narushima and
+ Hanazawa (1997).
+}
+\usage{
+MPR(x, phy, outgroup)
+}
+\arguments{
+ \item{x}{a vector of integers.}
+ \item{phy}{an object of class \code{"phylo"}; the tree must be
+ unrooted and fully dichotomous.}
+ \item{outgroup}{an integer or a character string giving the tip of
+ \code{phy} used as outgroup.}
+}
+\details{
+ Hanazawa et al. (1995) and Narushima and Hanazawa (1997) used Farris's
+ (1970) and Swofford and Maddison's (1987) framework to reconstruct
+ ancestral states using parsimony. The character is assumed to take
+ integer values. The algorithm finds the sets of values for each node
+ as intervals with lower and upper values.
+
+ It is recommended to root the tree with the outgroup before the
+ analysis, so plotting the values with \code{\link{nodelabels}} is
+ simple.
+}
+\value{
+ a matrix of integers with two columns named ``lower'' and ``upper''
+ giving the lower and upper values of the reconstructed sets for each
+ node.
+}
+\references{
+ Farris, J. M. (1970) Methods for computing Wagner trees.
+ \emph{Systematic Zoology}, \bold{19}, 83--92.
+
+ Hanazawa, M., Narushima, H. and Minaka, N. (1995) Generating most
+ parsimonious reconstructions on a tree: a generalization of the
+ Farris--Swofford--Maddison method. \emph{Discrete Applied
+ Mathematics}, \bold{56}, 245--265.
+
+ Narushima, H. and Hanazawa, M. (1997) A more efficient algorithm for
+ MPR problems in phylogeny. \emph{Discrete Applied Mathematics},
+ \bold{80}, 231--238.
+
+ Swofford, D. L. and Maddison, W. P. (1987) Reconstructing ancestral
+ character states under Wagner parsimony. \emph{Mathematical
+ Biosciences}, \bold{87}, 199--229.
+}\author{Emmanuel Paradis}
+\seealso{
+ \code{\link{ace}}, \code{\link{root}}, \code{\link{nodelabels}}
+}
+\examples{
+## the example in Narushima and Hanazawa (1997):
+tr <- read.tree(text = "(((i,j)c,(k,l)b)a,(h,g)e,f)d;")
+x <- c(1, 3, 0, 6, 5, 2, 4)
+names(x) <- letters[6:12]
+(o <- MPR(x, tr, "f"))
+plot(tr)
+nodelabels(paste("[", o[, 1], ",", o[, 2], "]", sep = ""))
+tiplabels(rev(x), adj = -2)
+## some random data:
+x <- rpois(30, 1)
+tr <- rtree(30, rooted = FALSE)
+MPR(x, tr, "t1")
+}
+\keyword{models}
\alias{getMRCA}
\alias{plotCophylo2}
\alias{plotPhyloCoor}
+\alias{integrateTrapeze}
+\alias{CDF.birth.death}
\title{Internal Ape Functions}
\description{
Internal \pkg{ape} functions.
\name{base.freq}
\alias{base.freq}
+\alias{Ftab}
\title{Base frequencies from DNA Sequences}
+\description{
+ \code{base.freq} computes the frequencies (absolute or relative) of
+ the four DNA bases (adenine, cytosine, guanine, and thymidine) from a
+ sample of sequences.
+
+ \code{Ftab} computes the contingency table with the absolute
+ frequencies of the DNA bases from a pair of sequences.
+}
\usage{
base.freq(x, freq = FALSE)
+Ftab(x, y = NULL)
}
\arguments{
\item{x}{a vector, a matrix, or a list which contains the DNA
sequences.}
+ \item{y}{a vector with a single DNA sequence.}
\item{freq}{a logical specifying whether to return the proportions
(the default) or the absolute frequencies (counts).}
}
-\description{
- This function computes the relative frequencies (i.e. proportions) of
- the four DNA bases (adenine, cytosine, guanine, and thymidine) from a
- sample of sequences.
-}
\details{
The base frequencies are computed over all sequences in the
sample. All missing or unknown sites are discarded from the
computations.
+
+ For \code{Ftab}, if the argument \code{y} is given then both \code{x}
+ and \code{y} are coerced as vectors and must be of equal length. If
+ \code{y} is not given, \code{x} must be a matrix or a list and only
+ the two first sequences are used.
}
\value{
- A numeric vector storing the relative frequencies with names
- \code{c("a", "c", "g", "t")}.
+ A numeric vector with names \code{c("a", "c", "g", "t")}, or a four by
+ four matrix with similar dimnames.
}
\author{Emmanuel Paradis}
\seealso{
\code{\link{GC.content}}, \code{\link{seg.sites}},
\code{\link{nuc.div}}, \code{\link{DNAbin}}
}
+\examples{
+data(woodmouse)
+base.freq(woodmouse)
+base.freq(woodmouse, TRUE)
+Ftab(woodmouse)
+Ftab(woodmouse[1, ], woodmouse[2, ]) # same than above
+Ftab(woodmouse[14:15, ]) # between the last two
+}
\keyword{univar}
+\keyword{manip}
transitons and transversions.}
\item{``logdet''}{The Log-Det distance, developed by Lockhart et
- al. (1994), is related to BH87. However, this distance is symmetric.}
+ al. (1994), is related to BH87. However, this distance is
+ symmetric. Formulae from Gu and Li (1996) are used.
+ \code{dist.logdet} in \pkg{phangorn} uses a different
+ implementation that gives substantially different distances for
+ low-diverging sequences.}
\item{``paralin''}{Lake (1994) developed the paralinear distance which
can be viewed as another variant of the Barry--Hartigan distance.}
sequences of unequal base compositions. \emph{Proceedings of the
National Academy of Sciences USA}, \bold{92}, 11317--11321.
+ Gu, X. and Li, W.-H. (1996) Bias-corrected paralinear and LogDet
+ distances and tests of molecular clocks and phylogenies under
+ nonstationary nucleotide frequencies. \emph{Molecular Biology and
+ Evolution}, \bold{13}, 1375--1383.
+
Jukes, T. H. and Cantor, C. R. (1969) Evolution of protein
molecules. in \emph{Mammalian Protein Metabolism}, ed. Munro, H. N.,
pp. 21--132, New York: Academic Press.
similar bipartitions (or splits) in both trees.
}
\note{
- The geodesic distance of Billera et al. (2001) has been disabled.
+ The geodesic distance of Billera et al. (2001) has been disabled: see
+ the package \pkg{distory} on CRAN.
}
\references{
Billera, L. J., Holmes, S. P. and Vogtmann, K. (2001) Geometry of the
parenthesis = 3)
layout(matrix(c(1, 2), 2))
plot(tr)
-tr$tip.label <- mixedFontLabel(genus, species, geo, sep = c(" ", " - "),
+tr$tip.label <- mixedFontLabel(genus, species, geo, sep = c(" ", " | "),
italic = 1:2, bold = 3)
plot(tr)
layout(1)
\alias{rTraitCont}
\title{Continuous Character Simulation}
\usage{
-rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1,
- theta = 0, ancestor = FALSE, root.value = 0)
+rTraitCont(phy, model = "BM", sigma = 0.1, alpha = 1, theta = 0,
+ ancestor = FALSE, root.value = 0, linear = TRUE)
}
\arguments{
\item{phy}{an object of class \code{"phylo"}.}
values at the nodes as well (by default, only the values at the tips
are returned).}
\item{root.value}{a numeric giving the value at the root.}
+ \item{linear}{a logical indicating which parameterisation of the OU
+ model to use (see details).}
}
\description{
This function simulates the evolution of a continuous character along a
\item{\code{"OU"}:}{an Ornstein-Uhlenbeck model is used. The above
indexing rule is used for the three parameters \code{sigma},
\code{alpha}, and \code{theta}. This may be more interesting for the
- last one to model varying phenotypic optima. Be careful that large
- values of \code{alpha} may give unrealistic output.}
+ last one to model varying phenotypic optima.
+
+ By default the following formula is used:
+
+ \deqn{x_{t''} = x_{t'} - \alpha l (x_{t'} - \theta) + \sigma
+ l \epsilon}{x(t'') = x(t') - alpha l (x(t') - theta) + sigma
+ l epsilon}
+
+ where \eqn{l (= t'' - t')} is the branch length, and \eqn{\epsilon
+ \sim N(0, 1)}{\epsilon ~ N(0, 1)}. If \eqn{\alpha > 1}{alpha > 1},
+ this may lead to chaotic oscillations. Thus an alternative
+ parameterisation is used if \code{linear = FALSE}:
+
+ \deqn{x_{t''} = x_{t'} - (1 - exp(-\alpha l)) * (x_{t'} - \theta) +
+ \sigma l \epsilon}{x(t'') = x(t') - (1 - exp(-alpha l)) * (x(t') -
+ theta) + sigma l epsilon}}
\item{A function:}{it must be of the form \code{foo(x, l)} where
\code{x} is the trait of the ancestor and \code{l} is the branch
length. It must return the value of the descendant. The arguments
\code{sigma}, \code{alpha}, and \code{theta} are ignored.}
}}
-\note{
- Currently, the OU model is a bit difficult to tune. Hopefully, this
- may be improved in the future.
-}
\value{
A numeric vector with names taken from the tip labels of
\code{phy}. If \code{ancestor = TRUE}, the node labels are used if
\alias{read.GenBank}
\title{Read DNA Sequences from GenBank via Internet}
\usage{
-read.GenBank(access.nb, seq.names = access.nb,
- species.names = TRUE, as.character = FALSE)
+read.GenBank(access.nb, seq.names = access.nb, species.names = TRUE,
+ gene.names = FALSE, as.character = FALSE)
}
\arguments{
\item{access.nb}{a vector of mode character giving the accession numbers.}
accession numbers are used.}
\item{species.names}{a logical indicating whether to attribute the
species names to the returned object.}
+ \item{gene.names}{a logical indicating whether to attribute the
+ gene names to the returned object. It is \code{FALSE} by default
+ because this will work correctly only when reading sequences with a
+ single gene.}
\item{as.character}{a logical controlling whether to return the
sequences as an object of class \code{"DNAbin"} (the default).}
}
If \code{species.names = TRUE}, the returned list has an attribute
\code{"species"} containing the names of the species taken from the
field ``ORGANISM'' in GenBank.
+
+ If \code{gene.names = TRUE}, the returned list has an attribute
+ \code{"gene"} containing the names of the gene. This will not work
+ correctly if reading a sequence with multiple genes (e.g., a
+ mitochondrial genome).
}
\seealso{
\code{\link{read.dna}}, \code{\link{write.dna}},
--- /dev/null
+\name{rlineage}
+\alias{rlineage}
+\alias{rbdtree}
+\alias{drop.fossil}
+\title{Tree Simulation Under the Time-Dependent Birth--Death Models}
+\description{
+ These two functions simulate phylogenies under any time-dependent
+ birth--death model. \code{lineage} generates a complete tree including
+ the species that go extinct; \code{rbdtree} generates a tree with only
+ the species until present; \code{drop.fossil} is a utility function to
+ remove the extinct species.
+}
+\usage{
+rlineage(birth, death, Tmax = 50, BIRTH = NULL,
+ DEATH = NULL, eps = 1e-6)
+rbdtree(birth, death, Tmax = 50, BIRTH = NULL,
+ DEATH = NULL, eps = 1e-6)
+drop.fossil(phy, tol = 0)
+}
+\arguments{
+ \item{birth, death}{a numeric value or a (vectorized) function
+ specifying how speciation and extinction through time.}
+ \item{Tmax}{a numeric value giving the length of the simulation.}
+ \item{BIRTH, DEATH}{a (vectorized) function which is the primitive
+ of \code{birth} or \code{death}. This can be used to speed-up the
+ computation. By default, numerical integration is done.}
+ \item{eps}{a numeric value giving the time resolution of the
+ simulation; this may be increased (e.g., 0.001) to shorten
+ computation times.}
+ \item{phy}{an object of class \code{"phylo"}.}
+ \item{tol}{a numeric value giving the tolerance to consider a species
+ as extinct.}
+}
+\details{
+ Both functions use continuous-time algorithms described in the
+ references. The models are time-dependent birth--death models as
+ described in Kendall (1948). Speciation (birth) and extinction (death)
+ rates may be constant or vary through time according to an R function
+ specified by the user. In the latter case, \code{BIRTH} and/or
+ \code{DEATH} may be used of the primitives of \code{birth} and
+ \code{death} are known. In these functions time is the formal argument
+ and must be named \code{t}.
+}
+\value{
+ An object of class \code{"phylo"}.
+}
+\references{
+ Kendall, D. G. (1948) On the generalized ``birth-and-death''
+ process. \emph{Annals of Mathematical Statistics}, \bold{19}, 1--15.
+
+ Paradis, E. (2010) Time-dependent speciation and extinction from
+ phylogenies: a least squares approach. (under revision)
+ %\emph{Evolution}, \bold{59}, 1--12.
+}
+\author{Emmanuel Paradis}
+\seealso{
+ \code{\link{yule}}, \code{\link{yule.time}}, \code{\link{birthdeath}},
+ \code{\link{rtree}}, \code{\link{stree}}
+}
+\examples{
+plot(rlineage(0.1, 0)) # Yule process with lambda = 0.1
+plot(rlineage(0.1, 0.05)) # simple birth-death process
+b <- function(t) 1/(1 + exp(0.1*t - 2)) # logistic
+layout(matrix(1:2, 1))
+plot(rlineage(b, 0.01))
+plot(rbdtree(b, 0.01))
+}
+\keyword{datagen}
the log-likelihood function.
}
\value{
- An object of class "yule" (see \code{\link{yule}}).
+ An object of class \code{"yule"} (see \code{\link{yule}}).
}
\author{Emmanuel Paradis}
\seealso{
-/* dist_dna.c 2010-07-14 */
+/* dist_dna.c 2010-09-16 */
/* Copyright 2005-2010 Emmanuel Paradis
#define COMPUTE_DIST_LogDet\
for (k = 0; k < 16; k++) Ftab[k] = ((double) Ntab[k]/L);\
- d[target] = (-log(detFourByFour(Ftab))/4 - LN4)/4;\
+ d[target] = -log(detFourByFour(Ftab))/4 - LN4;\
if (*variance) {\
/* For the inversion, we first make U an identity matrix */\
for (k = 1; k < 15; k++) U[k] = 0;\
-/* rTrait.c 2010-06-23 */
+/* rTrait.c 2010-07-26 */
/* Copyright 2010 Emmanuel Paradis */
break;
case 2 : for (i = *Nedge - 1; i >= 0; i--) {
GetRNGstate();
- x[edge2[i]] = x[edge1[i]] + sqrt(el[i])*(sigma[i]*norm_rand() - alpha[i]*(x[edge1[i]] - theta[i]));
+ x[edge2[i]] = x[edge1[i]] - alpha[i]*el[i]*(x[edge1[i]] - theta[i]) + sqrt(el[i])*sigma[i]*norm_rand();
+ PutRNGstate();
+ }
+ break;
+ case 3 : for (i = *Nedge - 1; i >= 0; i--) {
+ GetRNGstate();
+ x[edge2[i]] = x[edge1[i]] - (1 - exp(alpha[i]*el[i]))*(x[edge1[i]] - theta[i]) + sqrt(el[i])*sigma[i]*norm_rand();
PutRNGstate();
}
break;