+ CHANGES IN APE VERSION 2.2-2
+
+
+NEW FEATURES
+
+ o dist.gene() has been substantially improved and gains an option
+ 'pairwise.deletion'.
+
+
+
CHANGES IN APE VERSION 2.2-1
Package: ape
-Version: 2.2-1
-Date: 2008-07-11
+Version: 2.2-2
+Date: 2008-07-18
Title: Analyses of Phylogenetics and Evolution
Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong,
Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel,
-## dist.gene.R (2002-08-28)
+## dist.gene.R (2008-07-18)
## Pairwise Distances from Genetic Data
-## Copyright 2002 Emmanuel Paradis
+## Copyright 2002-2008 Emmanuel Paradis
## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.
-dist.gene.pairwise <- function(x, variance = FALSE)
+dist.gene <-
+ function(x, method = "pairwise", pairwise.deletion = FALSE,
+ variance = FALSE)
{
- if (is.data.frame(x)) x <- as.matrix(x)
- L <- ncol(x)
- n <- nrow(x)
- D <- matrix(NA, n, n)
- diag(D) <- 0
+ if (!is.data.frame(x) || !is.matrix(x))
+ stop("'x' should be a matrix or a data.frame")
+ method <- match.arg(method, c("pairwise", "percentage"))
+
+ if (!pairwise.deletion) {
+ ## delete the columns with at least one NA:
+ del <- apply(x, 2, function(xx) any(is.na(xx)))
+ x <- x[, -del]
+ }
+ n <- dim(x)
+ L <- n[2]
+ n <- n[1]
+ D <- double(n*(n - 1)/2)
+ if (pairwise.deletion) L <- D
+ k <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
- D[i, j] <- D[j, i] <- L - sum(x[i, ] == x[j, ])
+ y <- x[i, ] != x[j, ]
+ if (pairwise.deletion) L[k] <- sum(!is.na(y))
+ D[k] <- sum(y, na.rm = TRUE)
+ k <- k + 1
}
}
- if (!is.null(rownames(x))) rownames(D) <- colnames(D) <- rownames(x)
- if (variance) {
- var.D <- D * (L - D) / L
- return(list(distances = D, variance = var.D))
- }
- else return(D)
-}
+ ## L is either a single integer value if pairwise.deletion = FALSE,
+ ## or a vector of integers if pairwise.deletion = TRUE
+
+ if (method == "percentage") D <- D/L
+
+ attr(D, "Size") <- n
+ attr(D, "Labels") <- dimnames(x)[[1]]
+ attr(D, "Diag") <- attr(d, "Upper") <- FALSE
+ attr(D, "call") <- match.call()
+ attr(D, "method") <- method
+ class(D) <- "dist"
-dist.gene.percentage <- function(x, variance = FALSE)
-{
- L <- ncol(x)
- D <- dist.gene.pairwise(x) / L
if (variance) {
- var.D <- D * (1 - D) / L
- return(list(pairwise.distances = D, variance = var.D))
+ y <- if (method == "pairwise") L else 1
+ attr(D, "variance") <- D*(y - D)/L
}
- else return(D)
-}
-
-dist.gene <- function(x, method = "pairwise", variance = FALSE)
-{
- if (method == "pairwise")
- return(dist.gene.pairwise(x, variance = variance))
- if (method == "percentage")
- return(dist.gene.percentage(x, variance = variance))
+ D
}
-## mlphylo.R (2008-06-18)
+## mlphylo.R (2008-07-15)
## Estimating Phylogenies by Maximum Likelihood
loglik <- 0
if (!quiet) cat("Fitting in progress... ")
- res <<- res <- .C("mlphylo_DNAmodel", as.integer(nb.tip), as.integer(S),
+ res <- .C("mlphylo_DNAmodel", as.integer(nb.tip), as.integer(S),
as.raw(Y$SEQ), as.double(Y$ANC), as.double(Y$w),
as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]),
as.double(phy$edge.length), as.integer(npart),
as.double(alpha), as.integer(Y$nalpha),
as.integer(Y$ncat), as.double(invar), as.integer(Y$ninvar),
as.double(BF), as.integer(search.tree), as.integer(fixed),
- as.double(loglik), NAOK = TRUE, PACKAGE = "mlphylo")
+ as.double(loglik), NAOK = TRUE, PACKAGE = "ape")
if (!quiet) cat("DONE!\n")
phy$edge.length = res[[8]]
attr(phy, "loglik") <- res[[23]]
\name{dist.gene}
\alias{dist.gene}
-\alias{dist.gene.pairwise}
-\alias{dist.gene.percentage}
\title{Pairwise Distances from Genetic Data}
\usage{
-dist.gene(x, method = "pairwise", variance = FALSE)
-dist.gene.pairwise(x, variance = FALSE)
-dist.gene.percentage(x, variance = FALSE)
+dist.gene(x, method = "pairwise", pairwise.deletion = FALSE,
+ variance = FALSE)
}
\arguments{
\item{x}{a matrix or a data frame.}
\item{method}{a character string specifying the method used to compute
- the distances; only two choices are available: \code{"pairwise"},
- and \code{"percentage"}.}
+ the distances; two choices are available: \code{"pairwise"} and
+ \code{"percentage"}, or any unambiguous abbreviation of these.}
+ \item{pairwise.deletion}{a logical indicating whether to delete the
+ columns with missing data on a pairwise basis. The default is to
+ delete the columns with at least one missing observation.}
\item{variance}{a logical, indicates whether the variance of the
- distances should be returned (default to FALSE).}
+ distances should be returned (default to \code{FALSE}).}
}
\description{
- These functions compute a matrix of distances between pairs of
+ This function computes a matrix of distances between pairs of
individuals from a matrix or a data frame of genetic data.
}
\details{
This function is meant to be very general and accepts different kinds
- of data (alleles, haplotypes, DNA sequences, and so on). The rows of
+ of data (alleles, haplotypes, SNP, DNA sequences, \dots). The rows of
the data matrix represent the individuals, and the columns the loci.
In the case of the pairwise method, the distance \eqn{d} between two
For more elaborate distances with DNA sequences, see the function
\code{dist.dna}.
}
+\note{
+ Missing data (\code{NA}) are coded and treated in R's usual way.
+}
\value{
- either a numeric matrix with possibly the names of the individuals (as
- given by the rownames of the argument \code{x}) as colnames and rownames
- (if \code{variance = FALSE}, the default), or a list of two matrices names
- \code{distances} and \code{variance}, respectively (if \code{variance =
- TRUE}).
+ an object of class \link[stats]{"dist"}. If \code{variance = TRUE} an
+ attribute called \code{"variance"} is given to the returned object.
}
\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
\seealso{
- \code{\link{dist.dna}}, \code{\link{cophenetic.phylo}}
+ \code{\link{dist.dna}}, \code{\link{cophenetic.phylo}},
+ \code{\link[stats]{dist}}
}
\keyword{manip}