-## dist.gene.R (2002-08-28)
+## dist.gene.R (2009-04-01)
## Pairwise Distances from Genetic Data
-## Copyright 2002 Emmanuel Paradis
+## Copyright 2002-2009 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
-dist.gene.pairwise <- function(x, variance = FALSE)
+dist.gene <-
+ function(x, method = "pairwise", pairwise.deletion = FALSE,
+ variance = FALSE)
{
- if (is.data.frame(x)) x <- as.matrix(x)
- L <- ncol(x)
- n <- nrow(x)
- D <- matrix(NA, n, n)
- diag(D) <- 0
+ if (!is.data.frame(x) && !is.matrix(x))
+ stop("'x' should be a matrix or a data.frame")
+ method <- match.arg(method, c("pairwise", "percentage"))
+
+ if (!pairwise.deletion) {
+ ## delete the columns with at least one NA:
+ del <- apply(x, 2, function(xx) any(is.na(xx)))
+ x <- x[, !del]
+ }
+ n <- dim(x)
+ L <- n[2]
+ n <- n[1]
+ D <- double(n*(n - 1)/2)
+ if (pairwise.deletion) L <- D
+ k <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
- D[i, j] <- D[j, i] <- L - sum(x[i, ] == x[j, ])
+ y <- x[i, ] != x[j, ]
+ if (pairwise.deletion) L[k] <- sum(!is.na(y))
+ D[k] <- sum(y, na.rm = TRUE)
+ k <- k + 1
}
}
- if (!is.null(rownames(x))) rownames(D) <- colnames(D) <- rownames(x)
- if (variance) {
- var.D <- D * (L - D) / L
- return(list(distances = D, variance = var.D))
- }
- else return(D)
-}
+ ## L is either a single integer value if pairwise.deletion = FALSE,
+ ## or a vector of integers if pairwise.deletion = TRUE
+
+ if (method == "percentage") D <- D/L
+
+ attr(D, "Size") <- n
+ attr(D, "Labels") <- dimnames(x)[[1]]
+ attr(D, "Diag") <- attr(D, "Upper") <- FALSE
+ attr(D, "call") <- match.call()
+ attr(D, "method") <- method
+ class(D) <- "dist"
-dist.gene.percentage <- function(x, variance = FALSE)
-{
- L <- ncol(x)
- D <- dist.gene.pairwise(x) / L
if (variance) {
- var.D <- D * (1 - D) / L
- return(list(pairwise.distances = D, variance = var.D))
+ y <- if (method == "pairwise") L else 1
+ attr(D, "variance") <- D*(y - D)/L
}
- else return(D)
-}
-
-dist.gene <- function(x, method = "pairwise", variance = FALSE)
-{
- if (method == "pairwise")
- return(dist.gene.pairwise(x, variance = variance))
- if (method == "percentage")
- return(dist.gene.percentage(x, variance = variance))
+ D
}