man/ace.Rd

   1 \name{ace}
   2 \alias{ace}
   3 \alias{print.ace}
   4 \alias{logLik.ace}
   5 \alias{deviance.ace}
   6 \alias{AIC.ace}
   7 \alias{anova.ace}
   8 \title{Ancestral Character Estimation}
   9 \description{
  10   This function estimates ancestral character states, and the associated
  11   uncertainty, for continuous and discrete characters.
  12
  13   \code{logLik}, \code{deviance}, and \code{AIC} are generic functions
  14   used to extract the log-likelihood, the deviance, or the Akaike
  15   information criterion of a fitted object. If no such values are
  16   available, \code{NULL} is returned.
  17
  18   \code{anova} is another generic function which is used to compare
  19   nested models: the significance of the additional parameter(s) is
  20   tested with likelihood ratio tests. You must ensure that the models
  21   are effectively nested (if they are not, the results will be
  22   meaningless). It is better to list the models from the smallest to the
  23   largest.
  24 }
  25 \usage{
  26 ace(x, phy, type = "continuous", method = if (type == "continuous")
  27    "REML" else "ML", CI = TRUE,
  28     model = if (type == "continuous") "BM" else "ER",
  29     scaled = TRUE, kappa = 1, corStruct = NULL, ip = 0.1,
  30     use.expm = FALSE)
  31 \method{print}{ace}(x, digits = 4, ...)
  32 \method{logLik}{ace}(object, ...)
  33 \method{deviance}{ace}(object, ...)
  34 \method{AIC}{ace}(object, ..., k = 2)
  35 \method{anova}{ace}(object, ...)
  36 }
  37 \arguments{
  38   \item{x}{a vector or a factor; an object of class \code{"ace"} in the
  39     case of \code{print}.}
  40   \item{phy}{an object of class \code{"phylo"}.}
  41   \item{type}{the variable type; either \code{"continuous"} or
  42     \code{"discrete"} (or an abbreviation of these).}
  43   \item{method}{a character specifying the method used for
  44     estimation. Four choices are possible: \code{"ML"}, \code{"REML"},
  45     \code{"pic"}, or \code{"GLS"}.}
  46   \item{CI}{a logical specifying whether to return the 95\% confidence
  47     intervals of the ancestral state estimates (for continuous
  48     characters) or the likelihood of the different states (for discrete
  49     ones).}
  50   \item{model}{a character specifying the model (ignored if \code{method
  51       = "GLS"}), or a numeric matrix if \code{type = "discrete"} (see
  52     details).}
  53   \item{scaled}{a logical specifying whether to scale the contrast
  54     estimate (used only if \code{method = "pic"}).}
  55   \item{kappa}{a positive value giving the exponent transformation of
  56     the branch lengths (see details).}
  57   \item{corStruct}{if \code{method = "GLS"}, specifies the correlation
  58     structure to be used (this also gives the assumed model).}
  59   \item{ip}{the initial value(s) used for the ML estimation procedure
  60     when \code{type == "discrete"} (possibly recycled).}
  61   \item{use.expm}{a logical specifying whether to use the package
  62     \pkg{expm} to compute the matrix exponential (relevant only if
  63     \code{type = "d"}). The default is to use the function
  64     \code{matexpo} from \pkg{ape} (see details).}
  65   \item{digits}{the number of digits to be printed.}
  66   \item{object}{an object of class \code{"ace"}.}
  67   \item{k}{a numeric value giving the penalty per estimated parameter;
  68     the default is \code{k = 2} which is the classical Akaike
  69     information criterion.}
  70   \item{\dots}{further arguments passed to or from other methods.}
  71 }
  72 \details{
  73   If \code{type = "continuous"}, the default model is Brownian motion
  74   where characters evolve randomly following a random walk. This model
  75   can be fitted by residual maximum likelihood (the default), maximum
  76   likelihood (Felsenstein 1973, Schluter et al. 1997), least squares
  77   (\code{method = "pic"}, Felsenstein 1985), or generalized least
  78   squares (\code{method = "GLS"}, Martins and Hansen 1997, Cunningham et
  79   al. 1998). In the last case, the specification of \code{phy} and
  80   \code{model} are actually ignored: it is instead given through a
  81   correlation structure with the option \code{corStruct}.
  82
  83   In the setting \code{method = "ML"} and \code{model = "BM"} (this used
  84   to be the default until \pkg{ape} 3.0-7) the maximum likelihood
  85   estimation is done simultaneously on the ancestral values and the
  86   variance of the Brownian motion process; these estimates are then used
  87   to compute the confidence intervals in the standard way. The REML
  88   method first estimates the ancestral value at the root (aka, the
  89   phylogenetic mean), then the variance of the Brownian motion process
  90   is estimated by optimizing the residual log-likelihood. The ancestral
  91   values are finally inferred from the likelihood function giving these
  92   two parameters. If \code{method = "pic"} or \code{"GLS"}, the
  93   confidence intervals are computed using the expected variances under
  94   the model, so they depend only on the tree.
  95
  96   It could be shown that, with a continous character, REML results in
  97   unbiased estimates of the variance of the Brownian motion process
  98   while ML gives a downward bias. Therefore the former is recommanded.
  99
 100   For discrete characters (\code{type = "discrete"}), only maximum
 101   likelihood estimation is available (Pagel 1994). The model is
 102   specified through a numeric matrix with integer values taken as
 103   indices of the parameters. The numbers of rows and of columns of this
 104   matrix must be equal, and are taken to give the number of states of
 105   the character. For instance, \code{matrix(c(0, 1, 1, 0), 2)} will
 106   represent a model with two character states and equal rates of
 107   transition, \code{matrix(c(0, 1, 2, 0), 2)} a model with unequal
 108   rates, \code{matrix(c(0, 1, 1, 1, 0, 1, 1, 1, 0), 3)} a model with
 109   three states and equal rates of transition (the diagonal is always
 110   ignored). There are short-cuts to specify these models: \code{"ER"} is
 111   an equal-rates model (e.g., the first and third examples above),
 112   \code{"ARD"} is an all-rates-different model (the second example), and
 113   \code{"SYM"} is a symmetrical model (e.g., \code{matrix(c(0, 1, 2, 1,
 114   0, 3, 2, 3, 0), 3)}). If a short-cut is used, the number of states is
 115   determined from the data.
 116
 117   With discrete characters it is necessary to compute the exponential of
 118   the rate matrix. By default (and the only possible choice until
 119   \pkg{ape} 3.0-7) the function \code{\link{matexpo}} in \pkg{ape} is
 120   used. If \code{use.expm = TRUE}, the function
 121   \code{\link[expm]{expm}}, in the package of the same name, is
 122   used. \code{matexpo} is faster but quite inaccurate for large and/or
 123   asymmetric matrices. In case of doubt, use the latter.
 124 }
 125 \value{
 126   an object of class \code{"ace"} with the following elements:
 127
 128   \item{ace}{if \code{type = "continuous"}, the estimates of the
 129     ancestral character values.}
 130   \item{CI95}{if \code{type = "continuous"}, the estimated 95\%
 131     confidence intervals.}
 132   \item{sigma2}{if \code{type = "continuous"}, \code{model = "BM"}, and
 133     \code{method = "ML"}, the maximum likelihood estimate of the
 134     Brownian parameter.}
 135   \item{rates}{if \code{type = "discrete"}, the maximum likelihood
 136     estimates of the transition rates.}
 137   \item{se}{if \code{type = "discrete"}, the standard-errors of
 138     estimated rates.}
 139   \item{index.matrix}{if \code{type = "discrete"}, gives the indices of
 140     the \code{rates} in the rate matrix.}
 141   \item{loglik}{if \code{method = "ML"}, the maximum log-likelihood.}
 142   \item{lik.anc}{if \code{type = "discrete"}, the scaled likelihoods of
 143     each ancestral state.}
 144   \item{call}{the function call.}
 145 }
 146 \references{
 147   Cunningham, C. W., Omland, K. E. and Oakley, T. H. (1998)
 148   Reconstructing ancestral character states: a critical
 149   reappraisal. \emph{Trends in Ecology & Evolution}, \bold{13},
 150   361--366.
 151
 152   Felsenstein, J. (1973) Maximum likelihood estimation
 153   of evolutionary trees from continuous characters. \emph{American Journal of Human Genetics}, \bold{25}, 471--492.
 154
 155   Felsenstein, J. (1985) Phylogenies and the comparative
 156   method. \emph{American Naturalist}, \bold{125}, 1--15.
 157
 158   Martins, E. P. and Hansen, T. F. (1997) Phylogenies and the
 159   comparative method: a general approach to incorporating phylogenetic
 160   information into the analysis of interspecific data. \emph{American
 161     Naturalist}, \bold{149}, 646--667.
 162
 163   Pagel, M. (1994) Detecting correlated evolution on phylogenies: a
 164   general method for the comparative analysis of discrete
 165   characters. \emph{Proceedings of the Royal Society of London. Series
 166     B. Biological Sciences}, \bold{255}, 37--45.
 167
 168   Schluter, D., Price, T., Mooers, A. O. and Ludwig, D. (1997)
 169   Likelihood of ancestor states in adaptive radiation. \emph{Evolution},
 170   \bold{51}, 1699--1711.
 171 }
 172 \author{Emmanuel Paradis, Ben Bolker}
 173 \seealso{
 174   \code{\link{corBrownian}}, \code{\link{corGrafen}},
 175   \code{\link{corMartins}}, \code{\link{compar.ou}},
 176   \code{\link[stats]{anova}}
 177 }
 178 \examples{
 179 ### Just some random data...
 180 data(bird.orders)
 181 x <- rnorm(23)
 182 ### Compare the three methods for continuous characters:
 183 ace(x, bird.orders)
 184 ace(x, bird.orders, method = "pic")
 185 ace(x, bird.orders, method = "GLS",
 186     corStruct = corBrownian(1, bird.orders))
 187 ### For discrete characters:
 188 x <- factor(c(rep(0, 5), rep(1, 18)))
 189 ans <- ace(x, bird.orders, type = "d")
 190 #### Showing the likelihoods on each node:
 191 plot(bird.orders, type = "c", FALSE, label.offset = 1)
 192 co <- c("blue", "yellow")
 193 tiplabels(pch = 22, bg = co[as.numeric(x)], cex = 2, adj = 1)
 194 nodelabels(thermo = ans$lik.anc, piecol = co, cex = 0.75)
 195 ### An example of the use of the argument `ip':
 196 tr <- character(4)
 197 tr[1] <- "((((t10:5.03,t2:5.03):2.74,(t9:4.17,"
 198 tr[2] <- "t5:4.17):3.60):2.80,(t3:4.05,t7:"
 199 tr[3] <- "4.05):6.53):2.32,((t6:4.38,t1:4.38):"
 200 tr[4] <- "2.18,(t8:2.17,t4:2.17):4.39):6.33);"
 201 tr <- read.tree(text = paste(tr, collapse = ""))
 202 y <- c(rep(1, 6), rep(2, 4))
 203 ### The default `ip = 0.1' makes ace fails:
 204 ace(y, tr, type = "d")
 205 ace(y, tr, type = "d", ip = 0.01)
 206 ### Surprisingly, using an initial value farther to the
 207 ### MLE than the default one works:
 208 ace(y, tr, type = "d", ip = 0.3)
 209 }
 210 \keyword{models}