X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2Fdist.gene.R;h=bee883c9b3e266e9bdda1c8bb4b23526812f31ed;hb=a8d86ed3981c64b6d20316d54d3ed4d02c2b887e;hp=b0c432eef9fb1cbb8a91958dd646eeded6536ff4;hpb=c827059eeafc8cbe41c812b26979543ab287803e;p=ape.git diff --git a/R/dist.gene.R b/R/dist.gene.R index b0c432e..bee883c 100644 --- a/R/dist.gene.R +++ b/R/dist.gene.R @@ -1,47 +1,54 @@ -## dist.gene.R (2002-08-28) +## dist.gene.R (2009-04-01) ## Pairwise Distances from Genetic Data -## Copyright 2002 Emmanuel Paradis +## Copyright 2002-2009 Emmanuel Paradis ## This file is part of the R-package `ape'. ## See the file ../COPYING for licensing issues. -dist.gene.pairwise <- function(x, variance = FALSE) +dist.gene <- + function(x, method = "pairwise", pairwise.deletion = FALSE, + variance = FALSE) { - if (is.data.frame(x)) x <- as.matrix(x) - L <- ncol(x) - n <- nrow(x) - D <- matrix(NA, n, n) - diag(D) <- 0 + if (!is.data.frame(x) && !is.matrix(x)) + stop("'x' should be a matrix or a data.frame") + method <- match.arg(method, c("pairwise", "percentage")) + + if (!pairwise.deletion) { + ## delete the columns with at least one NA: + del <- apply(x, 2, function(xx) any(is.na(xx))) + x <- x[, !del] + } + n <- dim(x) + L <- n[2] + n <- n[1] + D <- double(n*(n - 1)/2) + if (pairwise.deletion) L <- D + k <- 1 for (i in 1:(n - 1)) { for (j in (i + 1):n) { - D[i, j] <- D[j, i] <- L - sum(x[i, ] == x[j, ]) + y <- x[i, ] != x[j, ] + if (pairwise.deletion) L[k] <- sum(!is.na(y)) + D[k] <- sum(y, na.rm = TRUE) + k <- k + 1 } } - if (!is.null(rownames(x))) rownames(D) <- colnames(D) <- rownames(x) - if (variance) { - var.D <- D * (L - D) / L - return(list(distances = D, variance = var.D)) - } - else return(D) -} + ## L is either a single integer value if pairwise.deletion = FALSE, + ## or a vector of integers if pairwise.deletion = TRUE + + if (method == "percentage") D <- D/L + + attr(D, "Size") <- n + attr(D, "Labels") <- dimnames(x)[[1]] + attr(D, "Diag") <- attr(D, "Upper") <- FALSE + attr(D, "call") <- match.call() + attr(D, "method") <- method + class(D) <- "dist" -dist.gene.percentage <- function(x, variance = FALSE) -{ - L <- ncol(x) - D <- dist.gene.pairwise(x) / L if (variance) { - var.D <- D * (1 - D) / L - return(list(pairwise.distances = D, variance = var.D)) + y <- if (method == "pairwise") L else 1 + attr(D, "variance") <- D*(y - D)/L } - else return(D) -} - -dist.gene <- function(x, method = "pairwise", variance = FALSE) -{ - if (method == "pairwise") - return(dist.gene.pairwise(x, variance = variance)) - if (method == "percentage") - return(dist.gene.percentage(x, variance = variance)) + D }