+++ /dev/null
-## heterozygosity.R (2002-08-28)
-
-## Heterozygosity at a Locus Using Gene Frequencies
-
-## Copyright 2002 Emmanuel Paradis
-
-## This file is part of the R-package `ape'.
-## See the file ../COPYING for licensing issues.
-
-heterozygosity <- function(x, variance = FALSE)
-{
- if (!is.factor(x)) {
- if (is.numeric(x)) {
- n <- sum(x)
- k <- length(x)
- freq <- x/n
- }
- else x <- factor(x)
- }
- if (is.factor(x)) { # ne pas remplacer par `else'...
- n <- length(x)
- k <- nlevels(x)
- freq <- table(x)/n
- }
- sp2 <- sum(freq^2)
- H <- n * (1 - sp2) / (n - 1)
- if (variance) {
- sp3 <- sum(freq^3)
- var.H <- 2 * (2 * (n - 2) * (sp3 - sp2^2) + sp2 - sp2^2) / (n * (n - 1))
- return(c(H, var.H))
- }
- else return(H)
-}
-
-H <- function(x, variance = FALSE) heterozygosity(x, variance)
+++ /dev/null
-## theta.R (2002-08-28)
-
-## Population Parameter THETA
-
-## theta.h: using homozigosity
-## theta.k: using expected number of alleles
-## theta.s: using segregating sites in DNA sequences
-
-## Copyright 2002 Emmanuel Paradis
-
-## This file is part of the R-package `ape'.
-## See the file ../COPYING for licensing issues.
-
-theta.h <- function(x, standard.error = FALSE)
-{
- HE <- H(x, variance = TRUE)
- sdH <- HE[2]
- HE <- HE[1]
- f <- function(th) HE - th * (1 + (2 * (1 + th)) / ((2 + th) * (3 + th)))
- th <- uniroot(f, interval = c(0, 1))$root
- if (standard.error) {
- SE <- (2 + th)^2 * (2 + th)^3 * sdH /
- HE^2 * (1 + th) * ((2 + th) * (3 + th) * (4 + th) + 10 * (2 + th) + 4)
- return(c(th, SE))
- }
- else return(th)
-}
-
-theta.k <- function(x, n = NULL, k = NULL)
-{
- if (is.null(n)) {
- if (!is.factor(x)) {
- if (is.numeric(x)) {
- n <- sum(x)
- k <- length(x)
- }
- else x <- factor(x)
- }
- if (is.factor(x)) { # ne pas remplacer par `else'...
- n <- length(x)
- k <- nlevels(x)
- }
- }
- f <- function(th) th * sum(1 / (th + (0:(n - 1)))) - k
- th <- uniroot(f, interval = c(1e-8, 100))$root
- return(th)
-}
-
-theta.s <- function(s, n, variance = FALSE)
-{
- a1 <- sum(1 / (1:(n - 1)))
- th <- s / a1
- if (variance) {
- a2 <- sum(1 / (1:(n - 1))^2)
- var.th <- (a1^2 * s + a2 * s^2) / (a1^2 * (a1^2 + a2))
- return(c(th, var.th))
- }
- else return(th)
-}
+++ /dev/null
-\name{heterozygosity}
-\alias{heterozygosity}
-\alias{H}
-\title{Heterozygosity at a Locus Using Gene Frequencies}
-\usage{
-heterozygosity(x, variance = FALSE)
-H(x, variance = FALSE)
-}
-\arguments{
- \item{x}{a vector or a factor.}
- \item{variance}{a logical indicating whether the variance of the
- estimated heterozygosity should be returned (\code{TRUE}), the
- default being \code{FALSE}.}
-}
-\description{
- This function computes the mean heterozygosity from gene frequencies,
- and returns optionally the associated variance.
-}
-\value{
- a numeric vector of length one with the estimated mean heterozygosity
- (the default), or of length two if the variance is returned
- \code{variance = TRUE}.
-}
-\details{
- The argument \code{x} can be either a factor or a vector. If it is a
- factor, then it is taken to give the individual alleles in the
- population. If it is a numeric vector, then its values are taken to be
- the numbers of each allele in the population. If it is a non-numeric
- vector, it is a coerced as a factor.
-
- The mean heterozygosity is estimated with:
-
- \deqn{\hat{H} = \frac{n}{n-1} \left(1 - \sum_{i=1}^k p_i^2 \right)}{%
- H = n(1 - SUM (FROM i=1 TO k) p_i^2)/(n - 1)}
-
- where \eqn{n} is the number of genes in the sample, \eqn{k} is the
- number of alleles, and \eqn{p_i} is the observed (relative) frequency
- of the allele \eqn{i}.
-}
-\references{
- Nei, M. (1987) \emph{Molecular evolutionary genetics}. New York:
- Columbia University Press.
-}
-\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
-\seealso{
- \code{\link{theta.s}}
-}
-\keyword{manip}
-\keyword{univar}
+++ /dev/null
-\name{nuc.div}
-\alias{nuc.div}
-\title{Nucleotide Diversity}
-\description{
- This function computes the nucleotide diversity from a sample of DNA
- sequences.
-}
-\usage{
-nuc.div(x, variance = FALSE, pairwise.deletion = FALSE)
-}
-\arguments{
- \item{x}{a matrix or a list which contains the DNA sequences.}
- \item{variance}{a logical indicating whether to compute the variance
- of the estimated nucleotide diversity.}
- \item{pairwise.deletion}{a logical indicating whether to delete the
- sites with missing data in a pairwise way. The default is to delete
- the sites with at least one missing data for all sequences.}
-}
-\details{
- The nucleotide diversity is the sum of the number of differences
- between pairs of sequences divided by the number of comparisons
- (i.e. n(n - 1)/2, where n is the number of sequences).
-
- The variance of the estimated diversity uses formula (10.9) from Nei
- (1987). This applies only if all sequences are of the same lengths,
- and cannot be used if \code{pairwise.deletion = TRUE}. A bootstrap
- estimate may be in order if you insist on using the latter option.
-}
-\value{
- A numeric vector with one or two values (if \code{variance = TRUE}).
-}
-\references{
- Nei, M. (1987) \emph{Molecular evolutionary genetics}. New York:
- Columbia University Press.
-}
-\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
-\seealso{
- \code{\link{base.freq}}, \code{\link{GC.content}},
- \code{\link{theta.s}}, \code{\link{seg.sites}}
-}
-\examples{
-data(woodmouse)
-nuc.div(woodmouse)
-nuc.div(woodmouse, TRUE)
-nuc.div(woodmouse, FALSE, TRUE)
-}
-\keyword{manip}
-\keyword{univar}
+++ /dev/null
-\name{theta.h}
-\alias{theta.h}
-\title{Population Parameter THETA using Homozygosity}
-\usage{
-theta.h(x, standard.error = FALSE)
-}
-\arguments{
- \item{x}{a vector or a factor.}
- \item{standard.error}{a logical indicating whether the standard error
- of the estimated theta should be returned (\code{TRUE}), the default
- being \code{FALSE}.}
-}
-\description{
- This function computes the population parameter THETA using the
- homozygosity (or mean heterozygosity) from gene frequencies.
-}
-\value{
- a numeric vector of length one with the estimated theta (the default),
- or of length two if the standard error is returned
- (\code{standard.error = TRUE}).
-}
-\details{
- The argument \code{x} can be either a factor or a vector. If it is a
- factor, then it is taken to give the individual alleles in the
- population. If it is a numeric vector, then its values are taken to be
- the numbers of each allele in the population. If it is a non-numeric
- vector, it is a coerced as a factor.
-
- The standard error is computed with an approximation due to
- Chakraborty and Weiss (1991).
-}
-\references{
- Zouros, E. (1979) Mutation rates, population sizes and amounts of
- electrophoretic variation at enzyme loci in natural
- populations. \emph{Genetics}, \bold{92}, 623--646.
-
- Chakraborty, R. and Weiss, K. M. (1991) Genetic variation of the
- mitochondrial DNA genome in American Indians is at mutation-drift
- equilibrium. \emph{American Journal of Human Genetics}, \bold{86}, 497--506.
-}
-\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
-\seealso{
- \code{\link{heterozygosity}}, \code{\link{theta.s}}, \code{\link{theta.k}}
-}
-\keyword{manip}
-\keyword{univar}
+++ /dev/null
-\name{theta.k}
-\alias{theta.k}
-\title{Population Parameter THETA using Expected Number of Alleles}
-\usage{
-theta.k(x, n = NULL, k = NULL)
-}
-\arguments{
- \item{x}{a vector or a factor.}
- \item{n}{a numeric giving the sample size.}
- \item{k}{a numeric giving the number of alleles.}
-}
-\description{
- This function computes the population parameter THETA using the
- expected number of alleles.
-}
-\value{
- a numeric vector of length one with the estimated theta.
-}
-\details{
- This function can be used in two ways: either with a vector giving the
- individual genotypes from which the sample size and number of alleles
- are derived (\code{theta.k(x)}), or giving directly these two
- quantities (\code{theta.k(n, k)}).
-
- The argument \code{x} can be either a factor or a vector. If it is a
- factor, then it is taken to give the individual alleles in the
- population. If it is a numeric vector, then its values are taken to be
- the numbers of each allele in the population. If it is a non-numeric
- vector, it is a coerced as a factor.
-
- Both arguments \code{n} and \code{k} must be single numeric values.
-}
-\note{
- For the moment, no standard-error or confidence interval is computed.
-}
-\references{
- Ewens, W. J. (1972) The sampling theory of selectively neutral
- alleles. \emph{Theoretical Population Biology}, \bold{3}, 87--112.
-}
-\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
-\seealso{
- \code{\link{theta.h}}, \code{\link{theta.s}}
-}
-\keyword{manip}
-\keyword{univar}
+++ /dev/null
-\name{theta.s}
-\alias{theta.s}
-\title{Population Parameter THETA using Segregating Sites
- in DNA Sequences}
-\usage{
-theta.s(s, n, variance = FALSE)
-}
-\arguments{
- \item{s}{a numeric giving the number of segregating sites.}
- \item{n}{a numeric giving the number of sequences.}
- \item{variance}{a logical indicating whether the variance of the
- estimated THETA should be returned (\code{TRUE}), the default being
- \code{FALSE}.}
-}
-\description{
- This function computes the population parameter THETA using the
- number of segregating sites \code{s} in a sample of \code{n} DNA sequences.
-}
-\value{
- a numeric vector of length one with the estimated theta (the default),
- or of length two if the standard error is returned
- (\code{variance = TRUE}).
-}
-\note{
- The number of segregating sites needs to be computed beforehand, for
- instance with the function \code{seg.sites} (see example below).
-}
-\references{
- Watterson, G. (1975) On the number of segragating sites in genetical
- models without recombination. \emph{Theoretical Population Biology},
- \bold{7}, 256--276.
-
- Tajima, F. (1989) Statistical method for testing the neutral mutation
- hypothesis by DNA polymorphism. \emph{Genetics}, \bold{123}, 585--595.
-}
-\author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
-\seealso{
- \code{\link{theta.h}}, \code{\link{theta.k}}, \code{\link{seg.sites}},
- \code{\link{nuc.div}}
-}
-\examples{
-data(woodmouse)
-s <- length(seg.sites(woodmouse))
-n <- nrow(woodmouse)
-theta.s(s, n)
-theta.s(s, n, variance = TRUE)
-}
-\keyword{manip}
-\keyword{univar}