X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Face.R;h=5227e1cae84ca37882e379954a2e37b418f0c02b;hb=b0d1251527d8dd48ca1703b1cfaf217f413eda0e;hp=9ea659001688562e42f6e6f557b22720ad0830b2;hpb=06c3113db74a7cfa54c15a6f18163cd9b2c1f6db;p=ape.git diff --git a/R/ace.R b/R/ace.R index 9ea6590..5227e1c 100644 --- a/R/ace.R +++ b/R/ace.R @@ -1,39 +1,86 @@ -## ace.R (2010-04-23) +## ace.R (2013-03-18) ## Ancestral Character Estimation -## Copyright 2005-2010 Emmanuel Paradis and Ben Bolker +## Copyright 2005-2013 Emmanuel Paradis and Ben Bolker ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE, - model = if (type == "continuous") "BM" else "ER", - scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1) +.getSEs <- function(out) +{ + h <- out$hessian + if (any(diag(h) == 0)) { + warning("The likelihood gradient seems flat in at least one dimension (gradient null):\ncannot compute the standard-errors of the transition rates.\n") + se <- rep(NaN, nrow(h)) + } else { + se <- sqrt(diag(solve(h))) + } + se +} + +ace <- + function(x, phy, type = "continuous", + method = if (type == "continuous") "REML" else "ML", + CI = TRUE, model = if (type == "continuous") "BM" else "ER", + scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1, + use.expm = FALSE) { if (!inherits(phy, "phylo")) - stop('object "phy" is not of class "phylo".') + stop('object "phy" is not of class "phylo"') if (is.null(phy$edge.length)) stop("tree has no branch lengths") type <- match.arg(type, c("continuous", "discrete")) nb.tip <- length(phy$tip.label) nb.node <- phy$Nnode if (nb.node != nb.tip - 1) - stop('"phy" is not rooted AND fully dichotomous.') + stop('"phy" is not rooted AND fully dichotomous.') if (length(x) != nb.tip) - stop("length of phenotypic and of phylogenetic data do not match.") + stop("length of phenotypic and of phylogenetic data do not match.") if (!is.null(names(x))) { if(all(names(x) %in% phy$tip.label)) x <- x[phy$tip.label] - else warning('the names of argument "x" and the tip labels of the tree -did not match: the former were ignored in the analysis.') + else warning("the names of 'x' and the tip labels of the tree do not match: the former were ignored in the analysis.") } obj <- list() if (kappa != 1) phy$edge.length <- phy$edge.length^kappa if (type == "continuous") { - if (method == "pic") { + switch(method, "REML" = { + minusLogLik <- function(sig2) { + if (sig2 < 0) return(1e100) + V <- sig2 * vcv(phy) + ## next three lines borrowed from dmvnorm() in 'mvtnorm' + distval <- stats::mahalanobis(x, center = mu, cov = V) + logdet <- sum(log(eigen(V, symmetric = TRUE, only.values = TRUE)$values)) + (nb.tip * log(2 * pi) + logdet + distval)/2 + } + mu <- rep(ace(x, phy, method="pic")$ace[1], nb.tip) + out <- nlm(minusLogLik, 1, hessian = TRUE) + sigma2 <- out$estimate + se_sgi2 <- sqrt(1/out$hessian) + tip <- phy$edge[, 2] <= nb.tip + minus.REML.BM <- function(p) { + x1 <- p[phy$edge[, 1] - nb.tip] + x2 <- numeric(length(x1)) + x2[tip] <- x[phy$edge[tip, 2]] + x2[!tip] <- p[phy$edge[!tip, 2] - nb.tip] + -(-sum((x1 - x2)^2/phy$edge.length)/(2 * sigma2) - + nb.node * log(sigma2)) + } + out <- nlm(function(p) minus.REML.BM(p), + p = rep(mu[1], nb.node), hessian = TRUE) + obj$resloglik <- -out$minimum + obj$ace <- out$estimate + names(obj$ace) <- nb.tip + 1:nb.node + obj$sigma2 <- c(sigma2, se_sgi2) + if (CI) { + se <- .getSEs(out) + tmp <- se * qt(0.025, nb.node) + obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) + } + }, "pic" = { if (model != "BM") - stop('the "pic" method can be used only with model = "BM".') + stop('the "pic" method can be used only with model = "BM".') ## See pic.R for some annotations. phy <- reorder(phy, "pruningwise") phenotype <- numeric(nb.tip + nb.node) @@ -46,16 +93,13 @@ did not match: the former were ignored in the analysis.') as.integer(CI), as.integer(scaled), PACKAGE = "ape") obj$ace <- ans[[6]][-(1:nb.tip)] - names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) + names(obj$ace) <- nb.tip + 1:nb.node if (CI) { se <- sqrt(ans[[8]]) - CI95 <- matrix(NA, nb.node, 2) - CI95[, 1] <- obj$ace + se * qnorm(0.025) - CI95[, 2] <- obj$ace - se * qnorm(0.025) - obj$CI95 <- CI95 + tmp <- se * qnorm(0.025) + obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } - } - if (method == "ML") { + }, "ML" = { if (model == "BM") { tip <- phy$edge[, 2] <= nb.tip dev.BM <- function(p) { @@ -72,26 +116,22 @@ did not match: the former were ignored in the analysis.') obj$loglik <- -out$minimum / 2 obj$ace <- out$estimate[-1] names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) - se <- sqrt(diag(solve(out$hessian))) + se <- .getSEs(out) obj$sigma2 <- c(out$estimate[1], se[1]) - se <- se[-1] if (CI) { - CI95 <- matrix(NA, nb.node, 2) - CI95[, 1] <- obj$ace + se * qt(0.025, nb.node) - CI95[, 2] <- obj$ace - se * qt(0.025, nb.node) - obj$CI95 <- CI95 + tmp <- se[-1] * qt(0.025, nb.node) + obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } } - } - if (method == "GLS") { + }, "GLS" = { if (is.null(corStruct)) - stop('you must give a correlation structure if method = "GLS".') + stop('you must give a correlation structure if method = "GLS".') if (class(corStruct)[1] == "corMartins") - M <- corStruct[1] * dist.nodes(phy) + M <- corStruct[1] * dist.nodes(phy) if (class(corStruct)[1] == "corGrafen") - phy <- compute.brlen(attr(corStruct, "tree"), - method = "Grafen", - power = exp(corStruct[1])) + phy <- compute.brlen(attr(corStruct, "tree"), + method = "Grafen", + power = exp(corStruct[1])) if (class(corStruct)[1] %in% c("corBrownian", "corGrafen")) { dis <- dist.nodes(attr(corStruct, "tree")) MRCA <- mrca(attr(corStruct, "tree"), full = TRUE) @@ -108,16 +148,14 @@ did not match: the former were ignored in the analysis.') obj$ace <- drop(varAY %*% invV %*% (x - GM) + GM) names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node) if (CI) { - CI95 <- matrix(NA, nb.node, 2) se <- sqrt((varA - varAY %*% invV %*% t(varAY))[cbind(1:nb.node, 1:nb.node)]) - CI95[, 1] <- obj$ace + se * qnorm(0.025) - CI95[, 2] <- obj$ace - se * qnorm(0.025) - obj$CI95 <- CI95 + tmp <- se * qnorm(0.025) + obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp) } - } + }) } else { # type == "discrete" if (method != "ML") - stop("only ML estimation is possible for discrete characters.") + stop("only ML estimation is possible for discrete characters.") if (!is.factor(x)) x <- factor(x) nl <- nlevels(x) lvls <- levels(x) @@ -138,10 +176,9 @@ did not match: the former were ignored in the analysis.') } } else { if (ncol(model) != nrow(model)) - stop("the matrix given as `model' is not square") + stop("the matrix given as 'model' is not square") if (ncol(model) != nl) - stop("the matrix `model' must have as many rows -as the number of categories in `x'") + stop("the matrix 'model' must have as many rows as the number of categories in 'x'") rate <- model np <- max(rate) } @@ -156,6 +193,12 @@ as the number of categories in `x'") liks[cbind(TIPS, x)] <- 1 phy <- reorder(phy, "pruningwise") + ## E <- if (use.expm) expm::expm else ape::matexpo + E <- if (use.expm) { + library(expm) + get("expm", "package:expm") + } else ape::matexpo + Q <- matrix(0, nl, nl) dev <- function(p, output.liks = FALSE) { if (any(is.nan(p)) || any(is.infinite(p))) return(1e50) @@ -164,18 +207,19 @@ as the number of categories in `x'") Q[] <- c(p, 0)[rate] diag(Q) <- -rowSums(Q) for (i in seq(from = 1, by = 2, length.out = nb.node)) { - j <- i + 1 + j <- i + 1L anc <- phy$edge[i, 1] des1 <- phy$edge[i, 2] des2 <- phy$edge[j, 2] - v.l <- matexpo(Q * phy$edge.length[i]) %*% liks[des1, ] - v.r <- matexpo(Q * phy$edge.length[j]) %*% liks[des2, ] + v.l <- E(Q * phy$edge.length[i]) %*% liks[des1, ] + v.r <- E(Q * phy$edge.length[j]) %*% liks[des2, ] v <- v.l * v.r comp[anc] <- sum(v) liks[anc, ] <- v/comp[anc] } if (output.liks) return(liks[-TIPS, ]) - -2 * sum(log(comp[-TIPS])) + dev <- -2 * sum(log(comp[-TIPS])) + if (is.na(dev)) Inf else dev } out <- nlminb(rep(ip, length.out = np), function(p) dev(p), lower = rep(0, np), upper = rep(1e50, np)) @@ -183,12 +227,10 @@ as the number of categories in `x'") obj$rates <- out$par oldwarn <- options("warn") options(warn = -1) - h <- nlm(function(p) dev(p), p = obj$rates, iterlim = 1, - stepmax = 0, hessian = TRUE)$hessian + out.nlm <- nlm(function(p) dev(p), p = obj$rates, iterlim = 1, + stepmax = 0, hessian = TRUE) options(oldwarn) - if (any(h == 0)) - warning("The likelihood gradient seems flat in at least one dimension (gradient null):\ncannot compute the standard-errors of the transition rates.\n") - else obj$se <- sqrt(diag(solve(h))) + obj$se <- .getSEs(out.nlm) obj$index.matrix <- index.matrix if (CI) { obj$lik.anc <- dev(obj$rates, TRUE) @@ -233,21 +275,30 @@ anova.ace <- function(object, ...) print.ace <- function(x, digits = 4, ...) { - cat("\n Ancestral Character Reconstruction\n\n") + cat("\n Ancestral Character Estimation\n\n") cat("Call: ") print(x$call) cat("\n") - cat(" Log-likelihood:", x$loglik, "\n\n") - cat("Rate index matrix:\n") + if (!is.null(x$loglik)) + cat(" Log-likelihood:", x$loglik, "\n\n") + if (!is.null(x$resloglik)) + cat(" Residual log-likelihood:", x$resloglik, "\n\n") ratemat <- x$index.matrix - dimnames(ratemat)[1:2] <- dimnames(x$lik.anc)[2] - print(ratemat, na.print = ".") - cat("\n") - npar <- length(x$rates) - estim <- data.frame(1:npar, round(x$rates, digits), round(x$se, digits)) - cat("Parameter estimates:\n") - names(estim) <- c("rate index", "estimate", "std-err") - print(estim, row.names = FALSE) - cat("\nScaled likelihoods at the root (type 'x$lik.anc' to get them for all nodes):\n") - print(x$lik.anc[1, ]) + if (is.null(ratemat)) { # to be improved + class(x) <- NULL + x$resloglik <- x$loglik <- x$call <- NULL + print(x) + } else { + dimnames(ratemat)[1:2] <- dimnames(x$lik.anc)[2] + cat("Rate index matrix:\n") + print(ratemat, na.print = ".") + cat("\n") + npar <- length(x$rates) + estim <- data.frame(1:npar, round(x$rates, digits), round(x$se, digits)) + cat("Parameter estimates:\n") + names(estim) <- c("rate index", "estimate", "std-err") + print(estim, row.names = FALSE) + cat("\nScaled likelihoods at the root (type '...$lik.anc' to get them for all nodes):\n") + print(x$lik.anc[1, ]) + } }