X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fcompar.gee.R;h=1df21079ca726daed70b8db034ea6e42eb8d42ce;hb=f5c4abe6ac31486e821d82788bf66b5db2be51d1;hp=9c52460198409835b7aea9042664b3132d2048b7;hpb=cc052f3adebcde0dbf4531719f84dfc349373f2c;p=ape.git diff --git a/R/compar.gee.R b/R/compar.gee.R index 9c52460..1df2107 100644 --- a/R/compar.gee.R +++ b/R/compar.gee.R @@ -1,46 +1,86 @@ -## compar.gee.R (2008-02-21) +## compar.gee.R (2011-06-14) ## Comparative Analysis with GEEs -## Copyright 2002-2008 Emmanuel Paradis +## Copyright 2002-2010 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -compar.gee <- function(formula, data = NULL, family = "gaussian", phy, - scale.fix = FALSE, scale.value = 1) +compar.gee <- + function(formula, data = NULL, family = gaussian, phy, + corStruct, scale.fix = FALSE, scale.value = 1) { require(gee, quietly = TRUE) - if (is.null(data)) data <- parent.frame() else { - if(!any(is.na(match(rownames(data), phy$tip.label)))) - data <- data[phy$tip.label, ] - else warning("the rownames of the data.frame and the tip labels of the tree -do not match: the former were ignored in the analysis.") + + if (!missing(corStruct)) { + if (!missing(phy)) + warning("the phylogeny was ignored because you gave a 'corStruct' object") + R <- vcv(corStruct, corr = TRUE) + } else { + R <- vcv(phy, corr = TRUE) + } + + if (is.null(data)) data <- parent.frame() + else { + nmsR <- rownames(R) + if (!identical(rownames(data), nmsR)) { + if (!any(is.na(match(rownames(data), nmsR)))) + data <- data[nmsR, ] + else { + msg <- if (missing(corStruct)) + "the tip labels of the tree" else "those of the correlation structure" + msg <- paste("the rownames of the data.frame and", msg, + "do not match: the former were ignored in the analysis") + warning(msg) + } + } } + effect.assign <- attr(model.matrix(formula, data = data), "assign") + for (i in all.vars(formula)) { if (any(is.na(eval(parse(text = i), envir = data)))) - stop("the present method cannot (yet) be used directly with missing data: you may consider removing the species with missing data from your tree with the function `drop.tip'.") + stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.") } - if (is.null(phy$edge.length)) - stop("the tree has no branch lengths.") - R <- vcv.phylo(phy, cor = TRUE) + id <- rep(1, dim(R)[1]) geemod <- do.call("gee", list(formula, id, data = data, family = family, R = R, corstr = "fixed", scale.fix = scale.fix, scale.value = scale.value)) W <- geemod$naive.variance fname <- - if is.function(family) deparse(substitute(family)) else family + if (is.function(family)) deparse(substitute(family)) else family if (fname == "binomial") - W <- summary(glm(formula, family = quasibinomial, data = data))$cov.scaled + W <- summary(glm(formula, family = quasibinomial, data = data))$cov.scaled N <- geemod$nobs - dfP <- sum(phy$edge.length)*N / sum(diag(vcv.phylo(phy))) - obj <- list(call = geemod$call, + ## + ## maybe need to refine below in case of non-Brownian corStruct + if (!missing(corStruct)) phy <- attr(corStruct, "tree") + dfP <- sum(phy$edge.length)*N / sum(diag(vcv(phy))) # need the variances + ## + + ## compute QIC: + Y <- geemod$y + MU <- geemod$fitted.values + Qlik <- switch(fname, + "gaussian" = -sum((Y - MU)^2)/2, + "binomial" = sum(Y*log(MU/(1 - MU)) + log(1 - MU)), + "poisson" = sum(Y*log(MU) - MU), + "Gamma" = sum(Y/MU + log(MU)), + "inverse.gaussian" = sum(-Y/(2*MU^2) + 1/MU)) + Ai <- do.call("gee", list(formula, id, data = data, family = family, + corstr = "independence", scale.fix = scale.fix, + scale.value = scale.value))$naive.variance + QIC <- -2*Qlik + 2*sum(diag(solve(Ai) %*% W)) + + obj <- list(call = match.call(), effect.assign = effect.assign, nobs = N, + QIC = QIC, coefficients = geemod$coefficients, residuals = geemod$residuals, + fitted.values = MU, family = geemod$family$family, link = geemod$family$link, scale = geemod$scale, @@ -67,13 +107,13 @@ print.compar.gee <- function(x, ...) } else coef[, 4] <- 2 * (1 - pt(abs(coef[, 3]), df)) residu <- quantile(as.vector(x$residuals)) names(residu) <- c("Min", "1Q", "Median", "3Q", "Max") - cat("\nCall:\n") - cat(" formula: ") - print(x$call$formula) - cat("\nNumber of observations: ", x$nobs, "\n") - cat("\nModel:\n") - cat(" Link: ", x$link, "\n") + cat("Call: ") + print(x$call) + cat("Number of observations: ", x$nobs, "\n") + cat("Model:\n") + cat(" Link:", x$link, "\n") cat(" Variance to Mean Relation:", x$family, "\n") + cat("\nQIC:", x$QIC, "\n") cat("\nSummary of Residuals:\n") print(residu) if (any(nas)) @@ -108,7 +148,17 @@ drop1.compar.gee <- function(object, scope, quiet = FALSE, ...) warning("there is at least one interaction term in your model: you should be careful when interpreting the significance of the main effects.") class(ans) <- "anova" - attr(ans, "heading") <- c("Single term deletions\n\nModel:\n", - as.character(as.expression(fm))) + attr(ans, "heading") <- paste("Single term deletions\n\n Model:", + as.character(as.expression(fm)), "\n") ans } + +predict.compar.gee <- + function(object, type = c("link", "response"), ...) +{ + type <- match.arg(type) + pred <- object$fitted.values + if (type == "link") return(pred) + f <- match.fun(object$family) + f(link = object$link)$linkinv(pred) +}