## MoranI.R (2008-01-14) ## Moran's I Autocorrelation Index ## Copyright 2004 Julien Dutheil, 2007-2008 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. ## code cleaned-up by EP (Dec. 2007) Moran.I <- function(x, weight, scaled = FALSE, na.rm = FALSE, alternative = "two.sided") { if(dim(weight)[1] != dim(weight)[2]) stop("'weight' must be a square matrix") n <- length(x) if(dim(weight)[1] != n) stop("'weight' must have as many rows as observations in 'x'") ## Expected mean: ei <- -1/(n - 1) nas <- is.na(x) if (any(nas)) { if (na.rm) { x <- x[!nas] n <- length(x) weight <- weight[!nas, !nas] } else { warning("'x' has missing values: maybe you wanted to set na.rm=TRUE?") return(list(observed = NA, expected = ei, sd = NA, p.value = NA)) } } ## normaling the weights: ## Note that we normalize after possibly removing the ## missing data. ROWSUM <- rowSums(weight) ## the following is useful if an observation has no "neighbour": ROWSUM[ROWSUM == 0] <- 1 weight <- weight/ROWSUM # ROWSUM is properly recycled s <- sum(weight) m <- mean(x) y <- x - m # centre the x's cv <- sum(weight * y %o% y) v <- sum(y^2) obs <- (n/s) * (cv/v) ## Scaling: if (scaled) { i.max <- (n/s) * (sd(rowSums(weight) * y)/sqrt(v/(n - 1))) obs <- obs/i.max } ## Expected sd: S1 <- 0.5 * sum((weight + t(weight))^2) S2 <- sum((apply(weight, 1, sum) + apply(weight, 2, sum))^2) ## the above is the same than: ##S2 <- 0 ##for (i in 1:n) ## S2 <- S2 + (sum(weight[i, ]) + sum(weight[, i]))^2 s.sq <- s^2 k <- (sum(y^4)/n) / (v/n)^2 sdi <- sqrt((n*((n^2 - 3*n + 3)*S1 - n*S2 + 3*s.sq) - k*(n*(n - 1)*S1 - 2*n*S2 + 6*s.sq))/ ((n - 1)*(n - 2)*(n - 3)*s.sq) - 1/((n - 1)^2)) alternative <- match.arg(alternative, c("two.sided", "less", "greater")) pv <- pnorm(obs, mean = ei, sd = sdi) if (alternative == "two.sided") pv <- if (obs <= ei) 2*pv else 2*(1 - pv) if (alternative == "greater") pv <- 1 - pv list(observed = obs, expected = ei, sd = sdi, p.value = pv) } weight.taxo <- function(x) { d <- outer(x, x, "==") diag(d) <- 0 # implicitly converts 'd' into numeric d } weight.taxo2 <- function(x, y) { d <- outer(x, x, "==") & outer(y, y, "!=") diag(d) <- 0 d } correlogram.formula <- function(formula, data = NULL, use = "all.obs") { err <- 'formula must be of the form "y1+...+yn ~ x1/x2/../xn"' use <- match.arg(use, c("all.obs", "complete.obs", "pairwise.complete.obs")) if (formula[[1]] != "~") stop(err) lhs <- formula[[2]] y.nms <- if (length(lhs) > 1) unlist(strsplit(as.character(as.expression(lhs)), " \\+ ")) else as.character(as.expression(lhs)) rhs <- formula[[3]] gr.nms <- if (length(rhs) > 1) rev(unlist(strsplit(as.character(as.expression(rhs)), "/"))) else as.character(as.expression(rhs)) if (is.null(data)) { ## we 'get' the variables in the .GlobalEnv: y <- as.data.frame(sapply(y.nms, get)) gr <- as.data.frame(sapply(gr.nms, get)) } else { y <- data[y.nms] gr <- data[gr.nms] } if (use == "all.obs") { na.fail(y) na.fail(gr) } if (use == "complete.obs") { sel <- complete.cases(y, gr) y <- y[sel] gr <- gr[sel] } na.rm <- use == "pairwise.complete.obs" foo <- function(x, gr, na.rm) { res <- data.frame(obs = NA, p.values = NA, labels = colnames(gr)) for (i in 1:length(gr)) { sel <- if (na.rm) !is.na(x) & !is.na(gr[, i]) else TRUE xx <- x[sel] g <- gr[sel, i] w <- if (i > 1) weight.taxo2(g, gr[sel, i - 1]) else weight.taxo(g) o <- Moran.I(xx, w, scaled = TRUE) res[i, 1] <- o$observed res[i, 2] <- o$p.value } ## We need to specify the two classes; if we specify ## only "correlogram", 'res' is coerced as a list ## (data frames are of class "data.frame" and mode "list") structure(res, class = c("correlogram", "data.frame")) } if (length(y) == 1) foo(y[[1]], gr, na.rm) else structure(lapply(y, foo, gr = gr, na.rm = na.rm), names = y.nms, class = "correlogramList") } plot.correlogram <- function(x, legend = TRUE, test.level = 0.05, col = c("grey", "red"), type = "b", xlab = "", ylab = "Moran's I", pch = 21, cex = 2, ...) { BG <- col[(x$p.values < test.level) + 1] if (pch > 20 && pch < 26) { bg <- col col <- CO <- "black" } else { CO <- BG BG <- bg <- NULL } plot(1:length(x$obs), x$obs, type = type, xaxt = "n", xlab = xlab, ylab = ylab, col = CO, bg = BG, pch = pch, cex = cex, ...) axis(1, at = 1:length(x$obs), labels = x$labels) if (legend) legend("top", legend = paste(c("P >=", "P <"), test.level), pch = pch, col = col, pt.bg = bg, pt.cex = cex, horiz = TRUE) } plot.correlogramList <- function(x, lattice = TRUE, legend = TRUE, test.level = 0.05, col = c("grey", "red"), xlab = "", ylab = "Moran's I", type = "b", pch = 21, cex = 2, ...) { n <- length(x) obs <- unlist(lapply(x, "[[", "obs")) pval <- unlist(lapply(x, "[[", "p.values")) gr <- factor(unlist(lapply(x, "[[", "labels")), ordered = TRUE, levels = x[[1]]$labels) vars <- gl(n, nlevels(gr), labels = names(x)) BG <- col[(pval < test.level) + 1] if (lattice) { ## trellis.par.set(list(plot.symbol=list(pch=19))) lattice::xyplot(obs ~ gr | vars, xlab = xlab, ylab = ylab, panel = function(x, y) { lattice::panel.lines(x, y, lty = 2) lattice::panel.points(x, y, cex = cex, pch = 19, col = BG) ##lattice::panel.abline(h = 0, lty = 3) }) } else { if (pch > 20 && pch < 26) { bg <- col CO <- rep("black", length(obs)) col <- "black" } else { CO <- BG BG <- bg <- NULL } plot(as.numeric(gr), obs, type = "n", xlab = xlab, ylab = ylab, xaxt = "n") for (i in 1:n) { sel <- as.numeric(vars) == i lines(as.numeric(gr[sel]), obs[sel], type = type, lty = i, col = CO[sel], bg = BG[sel], pch = pch, cex = cex, ...) } axis(1, at = 1:length(x[[i]]$obs), labels = x[[i]]$labels) if (legend) { legend("topright", legend = names(x), lty = 1:n, bty = "n") legend("top", legend = paste(c("P >=", "P <"), test.level), pch = pch, col = col, pt.bg = bg, pt.cex = cex, horiz = TRUE) } } }