improvements to compar.gee + bug fix in extract.clade

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Wed, 21 Jul 2010 08:43:53 +0000 (08:43 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Wed, 21 Jul 2010 08:43:53 +0000 (08:43 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Wed, 21 Jul 2010 08:43:53 +0000 (08:43 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Wed, 21 Jul 2010 08:43:53 +0000 (08:43 +0000)
diff --git a/ChangeLog b/ChangeLog

index df3b32bc4fd721ecdac92e35d283d1c2b09e506b..f92c51c79ab6a4b03291edf8ebd528884cd2884d 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,6 +12,16 @@ NEW FEATURES
        control the aspect of thermometers: horiz = TRUE, width = NULL,
        and height = NULL.
  
+    o compar.gee() has been improved with the new option 'corStruct' as an
+      alternative to 'phy' to specify the correlation structure, and
+      calculation of the QIC (Pan 2001, Biometrics). The display of the
+      results have also been improved.
+
+
+BUG FIXES
+
+    o extract.clade() sometimes shuffled the tip labels.
+
  
  
                 CHANGES IN APE VERSION 2.5-3
diff --git a/R/compar.gee.R b/R/compar.gee.R

index 488379c4b00d52775651515b4c3cba090790ba30..caead0776b8dc6161a14034805071bdc89fbcc45 100644 (file)
--- a/R/compar.gee.R
+++ b/R/compar.gee.R
@@ -1,31 +1,51 @@
-## compar.gee.R (2008-02-21)
+## compar.gee.R (2010-07-20)
  
  ##   Comparative Analysis with GEEs
  
-## Copyright 2002-2008 Emmanuel Paradis
+## Copyright 2002-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
  
-compar.gee <- function(formula, data = NULL, family = "gaussian", phy,
-                       scale.fix = FALSE, scale.value = 1)
+compar.gee <-
+    function(formula, data = NULL, family = gaussian, phy,
+             corStruct, scale.fix = FALSE, scale.value = 1)
  {
      require(gee, quietly = TRUE)
-    if (is.null(data)) data <- parent.frame() else {
-        if(!any(is.na(match(rownames(data), phy$tip.label))))
-          data <- data[phy$tip.label, ]
-        else warning("the rownames of the data.frame and the tip labels of the tree
-do not match: the former were ignored in the analysis.")
+
+    if (!missing(corStruct)) {
+        if (!missing(phy))
+            warning("the phylogeny was ignored because you gave a 'corStruct' object")
+        R <- vcv(corStruct, corr = TRUE)
+    } else {
+        R <- vcv(phy, corr = TRUE)
+    }
+
+    if (is.null(data)) data <- parent.frame()
+    else {
+        nmsR <- rownames(R)
+        if (!identical(rownames(data), nmsR)) {
+            if (!any(is.na(match(rownames(data), nmsR))))
+                data <- data[nmsR, ]
+            else {
+                msg <- if (missing(corStruct))
+                    "the tip labels of the tree" else "those of the correlation structure"
+                msg <- paste("the rownames of the data.frame and", msg,
+                             "do not match: the former were ignored in the analysis")
+                warning(msg)
+            }
+        }
      }
+
      effect.assign <- attr(model.matrix(formula, data = data), "assign")
+
      for (i in all.vars(formula)) {
          if (any(is.na(eval(parse(text = i), envir = data))))
-          stop("the present method cannot (yet) be used directly with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
+          stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
      }
-    if (is.null(phy$edge.length)) stop("the tree has no branch lengths.")
-    R <- vcv.phylo(phy, cor = TRUE)
+
      id <- rep(1, dim(R)[1])
-    geemod <- do.call("gee", list(formula, id, data = data, family = family, R = R,
+    geemod <<- geemod <- do.call("gee", list(formula, id, data = data, family = family, R = R,
                                    corstr = "fixed", scale.fix = scale.fix,
                                    scale.value = scale.value))
      W <- geemod$naive.variance
@@ -34,10 +54,30 @@ do not match: the former were ignored in the analysis.")
      if (fname == "binomial")
          W <- summary(glm(formula, family = quasibinomial, data = data))$cov.scaled
      N <- geemod$nobs
-    dfP <- sum(phy$edge.length)*N / sum(diag(vcv.phylo(phy)))
-    obj <- list(call = geemod$call,
+    ## <FIXME>
+    ## maybe need to refine below in case of non-Brownian corStruct
+    if (!missing(corStruct)) phy <- attr(corStruct, "tree")
+    dfP <- sum(phy$edge.length)*N / sum(diag(R))
+    ## </FIXME>
+
+    ## compute QIC:
+    Y <- geemod$y
+    MU <- geemod$fitted.values
+    Qlik <- switch(fname,
+                   "gaussian" = -sum((Y - MU)^2)/2,
+                   "binomial" = sum(Y*log(MU/(1 - MU)) + log(1 - MU)),
+                   "poisson" = sum(Y*log(MU) - MU),
+                   "Gamma" = sum(Y/MU + log(MU)),
+                   "inverse.gaussian" = sum(-Y/(2*MU^2) + 1/MU))
+    Ai <- do.call("gee", list(formula, id, data = data, family = family,
+                              corstr = "independence", scale.fix = scale.fix,
+                              scale.value = scale.value))$naive.variance
+    QIC <- -2*Qlik + 2*sum(diag(solve(Ai) %*% W))
+
+    obj <- list(call = match.call(),
                  effect.assign = effect.assign,
                  nobs = N,
+                QIC = QIC,
                  coefficients = geemod$coefficients,
                  residuals = geemod$residuals,
                  family = geemod$family$family,
@@ -66,13 +106,13 @@ print.compar.gee <- function(x, ...)
      } else coef[, 4] <- 2 * (1 -  pt(abs(coef[, 3]), df))
      residu <- quantile(as.vector(x$residuals))
      names(residu) <- c("Min", "1Q", "Median", "3Q", "Max")
-    cat("\nCall:\n")
-    cat("  formula: ")
-    print(x$call$formula)
-    cat("\nNumber of observations: ", x$nobs, "\n")
-    cat("\nModel:\n")
-    cat(" Link:                     ", x$link, "\n")
+    cat("Call: ")
+    print(x$call)
+    cat("Number of observations: ", x$nobs, "\n")
+    cat("Model:\n")
+    cat("                      Link:", x$link, "\n")
      cat(" Variance to Mean Relation:", x$family, "\n")
+    cat("\nQIC:", x$QIC, "\n")
      cat("\nSummary of Residuals:\n")
      print(residu)
      if (any(nas))
@@ -107,7 +147,7 @@ drop1.compar.gee <- function(object, scope, quiet = FALSE, ...)
        warning("there is at least one interaction term in your model:
  you should be careful when interpreting the significance of the main effects.")
      class(ans) <- "anova"
-    attr(ans, "heading") <- c("Single term deletions\n\nModel:\n",
-                              as.character(as.expression(fm)))
+    attr(ans, "heading") <- paste("Single term deletions\n\n  Model:",
+                                  as.character(as.expression(fm)), "\n")
      ans
  }
diff --git a/R/drop.tip.R b/R/drop.tip.R

index e59b59b957a06ebf2a350b5d68d10741e3e3fbc4..c5eb07c45c728d381830ff3761c165c4f9e329be 100644 (file)
--- a/R/drop.tip.R
+++ b/R/drop.tip.R
@@ -1,4 +1,4 @@
-## drop.tip.R (2010-02-11)
+## drop.tip.R (2010-07-21)
  
  ##   Remove Tips in a Phylogenetic Tree
  
@@ -54,7 +54,7 @@ extract.clade <- function(phy, node, root.edge = 0, interactive = FALSE)
      if (wbl) phy$edge.length <- phy$edge.length[keep]
      TIPS <- phy$edge[, 2] <= Ntip
      tip <- phy$edge[TIPS, 2]
-    phy$tip.label <- phy$tip.label[tip]
+    phy$tip.label <- phy$tip.label[sort(tip)] # <- added sort to avoid shuffling of tip labels (2010-07-21)
      ## keep the ordering so no need to reorder tip.label:
      phy$edge[TIPS, 2] <- order(tip)
      if (!is.null(phy$node.label))
diff --git a/man/compar.gee.Rd b/man/compar.gee.Rd

index eec81f1b3ffe6a6dfd03577b7bd1756163ff5214..170b8e0d17cb2b8ac0e248b180212ee027e7dab1 100644 (file)
--- a/man/compar.gee.Rd
+++ b/man/compar.gee.Rd
@@ -3,8 +3,15 @@
  \alias{print.compar.gee}
  \alias{drop1.compar.gee}
  \title{Comparative Analysis with GEEs}
+\description{
+  \code{compar.gee} performs the comparative analysis using generalized
+  estimating equations as described by Paradis and Claude (2002).
+
+  \code{drop1} tests single effects of a fitted model output from
+  \code{compar.gee}.
+}
  \usage{
-compar.gee(formula, data = NULL, family = "gaussian", phy,
+compar.gee(formula, data = NULL, family = "gaussian", phy, corStruct,
            scale.fix = FALSE, scale.value = 1)
  \method{drop1}{compar.gee}(object, scope, quiet = FALSE, ...)
  }
@@ -13,11 +20,13 @@ compar.gee(formula, data = NULL, family = "gaussian", phy,
    \item{data}{the name of the data frame where the variables in
      \code{formula} are to be found; by default, the variables are looked
      for in the global environment.}
-  \item{family}{a character string specifying the distribution assumed
-    for the response; by default a Gaussian distribution (with link
-    identity) is assumed (see \code{?family} for details on specifying
-    the distribution, and on changing the link function).}
-  \item{phy}{an object of class \code{"phylo"}.}
+  \item{family}{a function specifying the distribution assumed for the
+    response; by default a Gaussian distribution (with link identity) is
+    assumed (see \code{?family} for details on specifying the
+    distribution, and on changing the link function).}
+  \item{phy}{an object of class \code{"phylo"} (ignored if
+    \code{corStruct} is used).}
+  \item{corStruct}{a (phylogenetic) correlation structure.}
    \item{scale.fix}{logical, indicates whether the scale parameter should
      be fixed (TRUE) or estimated (FALSE, the default).}
    \item{scale.value}{if \code{scale.fix = TRUE}, gives the value for the
@@ -29,13 +38,6 @@ compar.gee(formula, data = NULL, family = "gaussian", phy,
      about eventual ``marginality principle violation''.}
    \item{\dots}{further arguments to be passed to \code{drop1}.}
  }
-\description{
-  \code{compar.gee} performs the comparative analysis using generalized
-  estimating equations as described by Paradis and Claude (2002).
-
-  \code{drop1} tests single effects of a fitted model output from
-  \code{compar.gee}.
-}
  \details{
    If a data frame is specified for the argument \code{data}, then its
    rownames are matched to the tip labels of \code{phy}. The user must be
@@ -48,6 +50,14 @@ compar.gee(formula, data = NULL, family = "gaussian", phy,
    If \code{data = NULL}, then it is assumed that the variables are in
    the same order than the tip labels of \code{phy}.
  }
+\note{
+  The calculation of the phylogenetic degrees of freedom is likely to be
+  approximative for non-Brownian correlation structures (this will be
+  refined soon).
+
+  The calculation of the quasilikelihood information criterion (QIC)
+  needs to be tested.
+}
  \value{
    \code{compar.gee} returns an object of class \code{"compar.gee"} with
    the following components:
@@ -55,6 +65,8 @@ compar.gee(formula, data = NULL, family = "gaussian", phy,
    \item{effect.assign}{a vector of integers assigning the coefficients
      to the effects (used by \code{drop1}).}
    \item{nobs}{the number of observations.}
+  \item{QIC}{the quasilikelihood information criterion as defined by Pan
+    (2001).}
    \item{coefficients}{the estimated coefficients (or regression parameters).}
    \item{residuals}{the regression residuals.}
    \item{family}{a character string, the distribution assumed for the response.}
@@ -67,6 +79,9 @@ compar.gee(formula, data = NULL, family = "gaussian", phy,
    \code{drop1} returns an object of class \code{"\link[stats]{anova}"}.
  }
  \references{
+  Pan, W. (2001) Akaike's information criterion in generalized
+  estimating equations. \emph{Biometrics}, \bold{57}, 120--125.
+
    Paradis, E. and Claude J. (2002) Analysis of comparative data using
    generalized estimating equations. \emph{Journal of theoretical
      Biology}, \bold{218}, 175--185.
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Wed, 21 Jul 2010 08:43:53 +0000 (08:43 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Wed, 21 Jul 2010 08:43:53 +0000 (08:43 +0000)
ChangeLog		patch \| blob \| history
R/compar.gee.R		patch \| blob \| history
R/drop.tip.R		patch \| blob \| history
man/compar.gee.Rd		patch \| blob \| history