From ce466967e4824b3f6aec6f75798da08fe8c10666 Mon Sep 17 00:00:00 2001
From: paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Date: Mon, 18 Jul 2011 04:05:06 +0000
Subject: [PATCH] new method = "REML" in ace() with continuous traits

git-svn-id: https://svn.mpl.ird.fr/ape/dev/ape@166 6e262413-ae40-0410-9e79-b911bd7a66b7
---
 DESCRIPTION |  2 +-
 NEWS        |  4 +++
 R/ace.R     | 93 +++++++++++++++++++++++++++++++++--------------------
 man/ace.Rd  | 25 +++++++++-----
 4 files changed, 81 insertions(+), 43 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2089fab..de29863 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ape
 Version: 2.7-3
-Date: 2011-07-15
+Date: 2011-07-18
 Title: Analyses of Phylogenetics and Evolution
 Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
 Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/NEWS b/NEWS
index cfd28ab..92a114a 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@ NEW FEATURES
       "two-sided" by default. Previously, this test was one-tailed
       with no possibility to change.
 
+    o ace() can now do REML estimation with continuous characters,
+      giving better estimates of the variance of the Brownian motion
+      process.
+
 
 BUG FIXES
 
diff --git a/R/ace.R b/R/ace.R
index b67549f..b51b58e 100644
--- a/R/ace.R
+++ b/R/ace.R
@@ -1,8 +1,8 @@
-## ace.R (2010-12-08)
+## ace.R (2011-07-18)
 
 ##   Ancestral Character Estimation
 
-## Copyright 2005-2010 Emmanuel Paradis and Ben Bolker
+## Copyright 2005-2011 Emmanuel Paradis and Ben Bolker
 
 ## This file is part of the R-package `ape'.
 ## See the file ../COPYING for licensing issues.
@@ -19,9 +19,9 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
     nb.tip <- length(phy$tip.label)
     nb.node <- phy$Nnode
     if (nb.node != nb.tip - 1)
-      stop('"phy" is not rooted AND fully dichotomous.')
+        stop('"phy" is not rooted AND fully dichotomous.')
     if (length(x) != nb.tip)
-      stop("length of phenotypic and of phylogenetic data do not match.")
+        stop("length of phenotypic and of phylogenetic data do not match.")
     if (!is.null(names(x))) {
         if(all(names(x) %in% phy$tip.label))
           x <- x[phy$tip.label]
@@ -30,9 +30,42 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
     obj <- list()
     if (kappa != 1) phy$edge.length <- phy$edge.length^kappa
     if (type == "continuous") {
-        if (method == "pic") {
+        switch(method, "REML" = {
+            minusLogLik <- function(sig2) {
+                if (sig2 < 0) return(1e100)
+                V <- sig2 * vcv(phy)
+                ## next three lines borrowed from dmvnorm() in 'mvtnorm'
+                distval <- mahalanobis(x, center = mu, cov = V)
+                logdet <- sum(log(eigen(V, symmetric = TRUE, only.values = TRUE)$values))
+                (nb.tip * log(2 * pi) + logdet + distval)/2
+            }
+            mu <- rep(ace(x, phy, method="pic")$ace[1], nb.tip)
+            out <- nlm(minusLogLik, 1, hessian = TRUE)
+            sigma2 <- out$estimate
+            se_sgi2 <- sqrt(1/out$hessian)
+            tip <- phy$edge[, 2] <= nb.tip
+            minus.REML.BM <- function(p) {
+                x1 <- p[phy$edge[, 1] - nb.tip]
+                x2 <- numeric(length(x1))
+                x2[tip] <- x[phy$edge[tip, 2]]
+                x2[!tip] <- p[phy$edge[!tip, 2] - nb.tip]
+                -(-sum((x1 - x2)^2/phy$edge.length)/(2 * sigma2) -
+                  nb.node * log(sigma2))
+            }
+            out <- nlm(function(p) minus.REML.BM(p),
+                       p = rep(mu[1], nb.node), hessian = TRUE)
+            obj$resloglik <- -out$minimum
+            obj$ace <- out$estimate
+            names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
+            obj$sigma2 <- c(sigma2, se_sgi2)
+            if (CI) {
+                se <- sqrt(diag(solve(out$hessian)))
+                tmp <- se * qt(0.025, nb.node)
+                obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp)
+            }
+        }, "pic" = {
             if (model != "BM")
-              stop('the "pic" method can be used only with model = "BM".')
+                stop('the "pic" method can be used only with model = "BM".')
             ## See pic.R for some annotations.
             phy <- reorder(phy, "pruningwise")
             phenotype <- numeric(nb.tip + nb.node)
@@ -48,13 +81,10 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
             names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
             if (CI) {
                 se <- sqrt(ans[[8]])
-                CI95 <- matrix(NA, nb.node, 2)
-                CI95[, 1] <- obj$ace + se * qnorm(0.025)
-                CI95[, 2] <- obj$ace - se * qnorm(0.025)
-                obj$CI95 <- CI95
+                tmp <- se * qnorm(0.025)
+                obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp)
             }
-        }
-        if (method == "ML") {
+        }, "ML" = {
             if (model == "BM") {
                 tip <- phy$edge[, 2] <= nb.tip
                 dev.BM <- function(p) {
@@ -73,24 +103,20 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
                 names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
                 se <- sqrt(diag(solve(out$hessian)))
                 obj$sigma2 <- c(out$estimate[1], se[1])
-                se <- se[-1]
                 if (CI) {
-                    CI95 <- matrix(NA, nb.node, 2)
-                    CI95[, 1] <- obj$ace + se * qt(0.025, nb.node)
-                    CI95[, 2] <- obj$ace - se * qt(0.025, nb.node)
-                    obj$CI95 <- CI95
+                    tmp <- se[-1] * qt(0.025, nb.node)
+                    obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp)
                 }
             }
-        }
-        if (method == "GLS") {
+        }, "GLS" = {
             if (is.null(corStruct))
-              stop('you must give a correlation structure if method = "GLS".')
+                stop('you must give a correlation structure if method = "GLS".')
             if (class(corStruct)[1] == "corMartins")
-              M <- corStruct[1] * dist.nodes(phy)
+                M <- corStruct[1] * dist.nodes(phy)
             if (class(corStruct)[1] == "corGrafen")
-              phy <- compute.brlen(attr(corStruct, "tree"),
-                                   method = "Grafen",
-                                   power = exp(corStruct[1]))
+                phy <- compute.brlen(attr(corStruct, "tree"),
+                                     method = "Grafen",
+                                     power = exp(corStruct[1]))
             if (class(corStruct)[1] %in% c("corBrownian", "corGrafen")) {
                 dis <- dist.nodes(attr(corStruct, "tree"))
                 MRCA <- mrca(attr(corStruct, "tree"), full = TRUE)
@@ -107,16 +133,14 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
             obj$ace <- drop(varAY %*% invV %*% (x - GM) + GM)
             names(obj$ace) <- (nb.tip + 1):(nb.tip + nb.node)
             if (CI) {
-                CI95 <- matrix(NA, nb.node, 2)
                 se <- sqrt((varA - varAY %*% invV %*% t(varAY))[cbind(1:nb.node, 1:nb.node)])
-                CI95[, 1] <- obj$ace + se * qnorm(0.025)
-                CI95[, 2] <- obj$ace - se * qnorm(0.025)
-                obj$CI95 <- CI95
+                tmp <- se * qnorm(0.025)
+                obj$CI95 <- cbind(obj$ace + tmp, obj$ace - tmp)
             }
-        }
+        })
     } else { # type == "discrete"
         if (method != "ML")
-          stop("only ML estimation is possible for discrete characters.")
+            stop("only ML estimation is possible for discrete characters.")
         if (!is.factor(x)) x <- factor(x)
         nl <- nlevels(x)
         lvls <- levels(x)
@@ -137,10 +161,9 @@ ace <- function(x, phy, type = "continuous", method = "ML", CI = TRUE,
             }
         } else {
             if (ncol(model) != nrow(model))
-              stop("the matrix given as `model' is not square")
+              stop("the matrix given as 'model' is not square")
             if (ncol(model) != nl)
-              stop("the matrix `model' must have as many rows
-as the number of categories in `x'")
+              stop("the matrix 'model' must have as many rows as the number of categories in 'x'")
             rate <- model
             np <- max(rate)
         }
@@ -239,10 +262,12 @@ print.ace <- function(x, digits = 4, ...)
     cat("\n")
     if (!is.null(x$loglik))
         cat("    Log-likelihood:", x$loglik, "\n\n")
+    if (!is.null(x$resloglik))
+        cat("    Residual log-likelihood:", x$resloglik, "\n\n")
     ratemat <- x$index.matrix
     if (is.null(ratemat)) { # to be improved
         class(x) <- NULL
-        x$loglik <- x$call <- NULL
+        x$resloglik <- x$loglik <- x$call <- NULL
         print(x)
     } else {
         dimnames(ratemat)[1:2] <- dimnames(x$lik.anc)[2]
diff --git a/man/ace.Rd b/man/ace.Rd
index 126a216..1fc5d01 100644
--- a/man/ace.Rd
+++ b/man/ace.Rd
@@ -23,8 +23,8 @@ ace(x, phy, type = "continuous", method = "ML", CI = TRUE,
   \item{type}{the variable type; either \code{"continuous"} or
     \code{"discrete"} (or an abbreviation of these).}
   \item{method}{a character specifying the method used for
-    estimation. Three choices are possible: \code{"ML"}, \code{"pic"},
-    or \code{"GLS"}.}
+    estimation. Four choices are possible: \code{"ML"}, \code{"REML"},
+    \code{"pic"}, or \code{"GLS"}.}
   \item{CI}{a logical specifying whether to return the 95\% confidence
     intervals of the ancestral state estimates (for continuous
     characters) or the likelihood of the different states (for discrete
@@ -75,13 +75,22 @@ ace(x, phy, type = "continuous", method = "ML", CI = TRUE,
   \code{corStruct}.
 
   In the default setting (i.e., \code{method = "ML"} and \code{model =
-  "BM"}) the maximum likelihood estimation is done simultaneously on
-  the ancestral values and the variance of the Brownian motion process;
+  "BM"}) the maximum likelihood estimation is done simultaneously on the
+  ancestral values and the variance of the Brownian motion process;
   these estimates are then used to compute the confidence intervals in
-  the standard way (see the package \pkg{geiger} for a different
-  implementation). If \code{method = "pic"} or \code{"GLS"}, the
-  confidence intervals are computed using the expected variances under
-  the model, so they depend only on the tree.
+  the standard way. The REML method first estimates the ancestral value
+  at the root (aka, the phylogenetic mean), then the variance of the
+  Brownian motion process is estimated by optimizing the residual
+  log-likelihood. The ancestral values are finally inferred from the
+  likelihood function giving these two parameters. If \code{method =
+  "pic"} or \code{"GLS"}, the confidence intervals are computed using
+  the expected variances under the model, so they depend only on the
+  tree.
+
+  It could be shown that, with a continous character, REML results in
+  unbiased estimates of the variance of the Brownian motion process
+  while ML gives a downward bias. Therefore, the former is recommanded
+  over the latter, even though it is not the default.
 
   For discrete characters (\code{type = "discrete"}), only maximum
   likelihood estimation is available (Pagel 1994). The model is
-- 
2.39.5