3 ## Ancestral Character Estimation
5 ## Copyright 2005-2008 Emmanuel Paradis and Ben Bolker
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
11 model = if (type == "continuous") "BM" else "ER",
12 scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1)
14 if (class(phy) != "phylo")
15 stop('object "phy" is not of class "phylo".')
16 if (is.null(phy$edge.length))
17 stop("tree has no branch lengths")
18 type <- match.arg(type, c("continuous", "discrete"))
19 nb.tip <- length(phy$tip.label)
21 if (nb.node != nb.tip - 1)
22 stop('"phy" is not rooted AND fully dichotomous.')
23 if (length(x) != nb.tip)
24 stop("length of phenotypic and of phylogenetic data do not match.")
25 if (!is.null(names(x))) {
26 if(all(names(x) %in% phy$tip.label))
28 else warning('the names of argument "x" and the tip labels of the tree
29 did not match: the former were ignored in the analysis.')
32 if (kappa != 1) phy$edge.length <- phy$edge.length^kappa
33 if (type == "continuous") {
34 if (method == "pic") {
36 stop('the "pic" method can be used only with model = "BM".')
37 ## See pic.R for some annotations.
38 phy <- reorder(phy, "pruningwise")
39 phenotype <- numeric(nb.tip + nb.node)
40 phenotype[1:nb.tip] <- if (is.null(names(x))) x else x[phy$tip.label]
41 contr <- var.con <- numeric(nb.node)
42 ans <- .C("pic", as.integer(nb.tip), as.integer(nb.node),
43 as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]),
44 as.double(phy$edge.length), as.double(phenotype),
45 as.double(contr), as.double(var.con),
46 as.integer(CI), as.integer(scaled),
48 obj$ace <- ans[[6]][-(1:nb.tip)]
49 names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
52 CI95 <- matrix(NA, nb.node, 2)
53 CI95[, 1] <- obj$ace + se * qnorm(0.025)
54 CI95[, 2] <- obj$ace - se * qnorm(0.025)
60 tip <- phy$edge[, 2] <= nb.tip
61 dev.BM <- function(p) {
62 x1 <- p[-1][phy$edge[, 1] - nb.tip]
63 x2 <- numeric(length(x1))
64 x2[tip] <- x[phy$edge[tip, 2]]
65 x2[!tip] <- p[-1][phy$edge[!tip, 2] - nb.tip]
66 -2 * (-sum((x1 - x2)^2/phy$edge.length)/(2*p[1]) -
69 out <- nlm(function(p) dev.BM(p),
70 p = c(1, rep(mean(x), nb.node)), hessian = TRUE)
71 obj$loglik <- -out$minimum / 2
72 obj$ace <- out$estimate[-1]
73 names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
74 se <- sqrt(diag(solve(out$hessian)))
75 obj$sigma2 <- c(out$estimate[1], se[1])
78 CI95 <- matrix(NA, nb.node, 2)
79 CI95[, 1] <- obj$ace + se * qt(0.025, nb.node)
80 CI95[, 2] <- obj$ace - se * qt(0.025, nb.node)
85 if (method == "GLS") {
86 if (is.null(corStruct))
87 stop('you must give a correlation structure if method = "GLS".')
88 if (class(corStruct)[1] == "corMartins")
89 M <- corStruct[1] * dist.nodes(phy)
90 if (class(corStruct)[1] == "corGrafen")
91 phy <- compute.brlen(attr(corStruct, "tree"),
93 power = exp(corStruct[1]))
94 if (class(corStruct)[1] %in% c("corBrownian", "corGrafen")) {
95 dis <- dist.nodes(attr(corStruct, "tree"))
96 MRCA <- mrca(attr(corStruct, "tree"), full = TRUE)
97 M <- dis[as.character(nb.tip + 1), MRCA]
98 dim(M) <- rep(sqrt(length(M)), 2)
100 varAY <- M[-(1:nb.tip), 1:nb.tip]
101 varA <- M[-(1:nb.tip), -(1:nb.tip)]
102 V <- corMatrix(Initialize(corStruct, data.frame(x)),
105 o <- gls(x ~ 1, correlation = Initialize(corStruct, data.frame(x)))
107 obj$ace <- drop(varAY %*% invV %*% (x - GM) + GM)
108 names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
110 CI95 <- matrix(NA, nb.node, 2)
111 se <- sqrt((varA - varAY %*% invV %*% t(varAY))[cbind(1:nb.node, 1:nb.node)])
112 CI95[, 1] <- obj$ace + se * qnorm(0.025)
113 CI95[, 2] <- obj$ace - se * qnorm(0.025)
117 } else { # type == "discrete"
119 stop("only ML estimation is possible for discrete characters.")
120 if (!is.factor(x)) x <- factor(x)
124 if (is.character(model)) {
125 rate <- matrix(NA, nl, nl)
126 if (model == "ER") np <- rate[] <- 1
127 if (model == "ARD") {
129 rate[col(rate) != row(rate)] <- 1:np
131 if (model == "SYM") {
132 np <- nl * (nl - 1)/2
133 rate[col(rate) < row(rate)] <- 1:np
135 rate[col(rate) < row(rate)] <- 1:np
138 if (ncol(model) != nrow(model))
139 stop("the matrix given as `model' is not square")
140 if (ncol(model) != nl)
141 stop("the matrix `model' must have as many rows
142 as the number of categories in `x'")
147 index.matrix[cbind(1:nl, 1:nl)] <- NA
148 rate[cbind(1:nl, 1:nl)] <- 0
149 rate[rate == 0] <- np + 1 # to avoid 0's since we will use this an numeric indexing
151 liks <- matrix(0, nb.tip + nb.node, nl)
152 for (i in 1:nb.tip) liks[i, x[i]] <- 1
153 phy <- reorder(phy, "pruningwise")
155 Q <- matrix(0, nl, nl)
156 dev <- function(p, output.liks = FALSE) {
158 diag(Q) <- -rowSums(Q)
159 for (i in seq(from = 1, by = 2, length.out = nb.node)) {
161 anc <- phy$edge[i, 1]
162 des1 <- phy$edge[i, 2]
163 des2 <- phy$edge[j, 2]
164 tmp <- eigen(Q * phy$edge.length[i], symmetric = FALSE)
165 P1 <- tmp$vectors %*% diag(exp(tmp$values)) %*% solve(tmp$vectors)
166 tmp <- eigen(Q * phy$edge.length[j], symmetric = FALSE)
167 P2 <- tmp$vectors %*% diag(exp(tmp$values)) %*% solve(tmp$vectors)
168 liks[anc, ] <- P1 %*% liks[des1, ] * P2 %*% liks[des2, ]
170 if (output.liks) return(liks[-(1:nb.tip), ])
171 - 2 * log(sum(liks[nb.tip + 1, ]))
173 out <- nlm(function(p) dev(p), p = rep(ip, length.out = np),
175 obj$loglik <- -out$minimum / 2
176 obj$rates <- out$estimate
177 if (any(out$gradient == 0))
178 warning("The likelihood gradient seems flat in at least one dimension (gradient null):\ncannot compute the standard-errors of the transition rates.\n")
179 else obj$se <- sqrt(diag(solve(out$hessian)))
180 obj$index.matrix <- index.matrix
182 lik.anc <- dev(obj$rates, TRUE)
183 lik.anc <- lik.anc / rowSums(lik.anc)
184 colnames(lik.anc) <- lvls
185 obj$lik.anc <- lik.anc
188 obj$call <- match.call()
193 logLik.ace <- function(object, ...) object$loglik
195 deviance.ace <- function(object, ...) -2*object$loglik
197 AIC.ace <- function(object, ..., k = 2)
199 if (is.null(object$loglik)) return(NULL)
200 ## Trivial test of "type"; may need to be improved
201 ## if other models are included in ace(type = "c")
202 np <- if (!is.null(object$sigma2)) 1 else length(object$rates)
203 -2*object$loglik + np*k
207 anova.ace <- function(object, ...)
209 X <- c(list(object), list(...))
210 df <- sapply(lapply(X, "[[", "rates"), length)
211 ll <- sapply(X, "[[", "loglik")
212 ## check if models are in correct order?
213 dev <- c(NA, 2*diff(ll))
214 ddf <- c(NA, diff(df))
215 table <- data.frame(ll, df, ddf, dev,
216 pchisq(dev, ddf, lower.tail = FALSE))
217 dimnames(table) <- list(1:length(X), c("Log lik.", "Df",
218 "Df change", "Deviance",
220 structure(table, heading = "Likelihood Ratio Test Table",
221 class = c("anova", "data.frame"))