new pic.ortho() and varCompPhylip() + fix in dist.nodes()

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Mon, 15 Nov 2010 13:56:37 +0000 (13:56 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Mon, 15 Nov 2010 13:56:37 +0000 (13:56 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Mon, 15 Nov 2010 13:56:37 +0000 (13:56 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Mon, 15 Nov 2010 13:56:37 +0000 (13:56 +0000)
diff --git a/ChangeLog b/ChangeLog

index 69c19c3a7f200c10591a523c3c074b532b64437d..94d9816f25afd2f3ca0a76b58d2918b99f01788e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,6 +3,10 @@
  
  NEW FEATURES
  
+    o Two new functions, pic.ortho and varCompPhylip, implements the
+      orthonormal contrasts of Felsenstein (2008, Am Nat, 171:713). The
+      second function requires Phylip to be installed on the computer.
+
      o bd.ext() has a new option conditional = TRUE to use probabilities
        conditioned on no extinction for the taxonomic data.
  
@@ -11,6 +15,9 @@ BUG FIXES
  
      o write.tree() failed to output correctly tree names.
  
+    o dist.nodes() returned duplicated column(s) with unrooted and/or
+      multichotomous trees.
+
  
  
                 CHANGES IN APE VERSION 2.6-1
diff --git a/DESCRIPTION b/DESCRIPTION

index 01bf1ad59b8b99084e9509456f7ac86e49ed4d43..f007b94050f0ed70c9f0a925f38547f99e6a1259 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: ape
  Version: 2.6-2
-Date: 2010-11-04
+Date: 2010-11-15
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Klaus Schliep, Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/R/cophenetic.phylo.R b/R/cophenetic.phylo.R

index a40cce092e292bc0a54f6bb4451d5f173a951779..bdcbfcf75e808d34abfc548da5352a07615e0be2 100644 (file)
--- a/R/cophenetic.phylo.R
+++ b/R/cophenetic.phylo.R
@@ -1,8 +1,8 @@
-## cophenetic.phylo.R (2007-01-23)
+## cophenetic.phylo.R (2010-11-15)
  
  ##   Pairwise Distances from a Phylogenetic Tree
  
-## Copyright 2006-2007 Emmanuel Paradis
+## Copyright 2006-2010 Emmanuel Paradis
  
  ## This file is part of the R-package `ape'.
  ## See the file ../COPYING for licensing issues.
@@ -10,10 +10,15 @@
  dist.nodes <- function(x)
  {
      if (is.null(x$edge.length))
-      stop("your tree has no branch lengths")
+        stop("your tree has no branch lengths")
+
+    trim <- FALSE
+    if (!is.binary.tree(x) || !is.rooted(x)) {
+        trim <- TRUE
+        x <- makeNodeLabel(x)
+        x <- multi2di(x, random = FALSE)
+    }
  
-    if (!is.binary.tree(x) || !is.rooted(x))
-      x <- multi2di(x, random = FALSE)
      n <- length(x$tip.label)
      n.node <- x$Nnode
      N <- n + n.node
@@ -57,6 +62,11 @@ dist.nodes <- function(x)
              res[des1, nod] <- res[nod, des1] <- res[anc, des1] + l
          }
      }
+    if (trim) {
+        i <- which(x$node.label == "") + n
+        res <- res[-i, -i]
+        N <- dim(res)[1]
+    }
      dimnames(res)[1:2] <- list(1:N)
      res
  }
diff --git a/R/pic.R b/R/pic.R

index 140743987e9757bd605470f08be45cca1190194d..122b60d3c7fd679acd091b6a694303d965ce0bc2 100644 (file)
--- a/R/pic.R
+++ b/R/pic.R
@@ -1,4 +1,4 @@
-## pic.R (2010-08-18)
+## pic.R (2010-11-15)
  
  ##   Phylogenetically Independent Contrasts
  
@@ -12,7 +12,7 @@ pic <- function(x, phy, scaled = TRUE, var.contrasts = FALSE)
      if (!inherits(phy, "phylo"))
        stop("object 'phy' is not of class \"phylo\"")
      if (is.null(phy$edge.length))
-      stop("your tree has no branch lengths: you may consider setting them equal to one, or using the function `compute.brlen'.")
+      stop("your tree has no branch lengths")
      nb.tip <- length(phy$tip.label)
      nb.node <- phy$Nnode
      if (nb.node != nb.tip - 1)
@@ -20,7 +20,7 @@ pic <- function(x, phy, scaled = TRUE, var.contrasts = FALSE)
      if (length(x) != nb.tip)
        stop("length of phenotypic and of phylogenetic data do not match")
      if (any(is.na(x)))
-      stop("the present method cannot (yet) be used directly with missing data: you may consider removing the species with missing data from your tree with the function `drop.tip'.")
+      stop("missing data in 'x': you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
  
      phy <- reorder(phy, "pruningwise")
      phenotype <- numeric(nb.tip + nb.node)
@@ -32,18 +32,17 @@ pic <- function(x, phy, scaled = TRUE, var.contrasts = FALSE)
            phenotype[1:nb.tip] <- x[phy$tip.label]
          else {
              phenotype[1:nb.tip] <- x
-            warning('the names of argument "x" and the tip labels of the tree did not match: the former were ignored in the analysis.')
+            warning("the names of argument 'x' and the tip labels of the tree did not match: the former were ignored in the analysis.")
          }
      }
      ## No need to copy the branch lengths: they are rescaled
      ## in the C code, so it's important to leave the default
      ## `DUP = TRUE' of .C.
-    contr <- var.con <- numeric(nb.node)
  
      ans <- .C("pic", as.integer(nb.tip), as.integer(nb.node),
                as.integer(phy$edge[, 1]), as.integer(phy$edge[, 2]),
                as.double(phy$edge.length), as.double(phenotype),
-              as.double(contr), as.double(var.con),
+              double(nb.node), double(nb.node),
                as.integer(var.contrasts), as.integer(scaled),
                PACKAGE = "ape")
  
@@ -73,3 +72,114 @@ pic <- function(x, phy, scaled = TRUE, var.contrasts = FALSE)
      } else names(contr) <- lbls
      contr
  }
+
+pic.ortho <- function(x, phy, var.contrasts = FALSE, intra = FALSE)
+{
+    n <- length(x)
+    m <- n - 1L # number of nodes
+    phy <- reorder(phy, "pruningwise")
+    xx <- unlist(lapply(x, mean)) # 'x' in Felsenstein's paper
+    xx <- c(xx, numeric(m))
+    delta.v <- numeric(n + m)
+    s <- 1/unlist(lapply(x, length))
+    s <- c(s, numeric(m))
+    contrast <- var.cont <- numeric(m)
+
+    i <- 1L
+    while (i < m + n) {
+        d1 <- phy$edge[i, 2]
+        d2 <- phy$edge[i + 1L, 2]
+        a <- phy$edge[i, 1]
+        tmp1 <- 1/(phy$edge.length[i] + delta.v[d1])
+        tmp2 <- 1/(phy$edge.length[i + 1L] + delta.v[d2])
+        xx[a] <- (tmp1 * xx[d1] + tmp2 * xx[d2])/(tmp1 + tmp2)
+        delta.v[a] <- 1/(tmp1 + tmp2)
+        f1 <- tmp1/(tmp1 + tmp2)
+        f2 <- tmp2/(tmp1 + tmp2)
+        s[a] <- f1*f1 * s[d1] + f2*f2 * s[d2]
+        tmp <- 1/(s[d1] + s[d2])
+        contrast[a - n] <- (xx[d1] - xx[d2]) * sqrt(tmp)
+        var.cont[a - n] <- (1/tmp1 + 1/tmp2) * tmp
+        i <- i + 2L
+    }
+
+    lbls <-
+        if (is.null(phy$node.label)) as.character(1:m + n)
+        else phy$node.label
+
+    if (var.contrasts) {
+        contrast <- cbind(contrast, var.cont)
+        dimnames(contrast) <- list(lbls, c("contrasts", "variance"))
+    } else names(contrast) <- lbls
+
+    if (intra) {
+        intraspe.ctr <- function(x) {
+            k <- length(x) - 1L
+            if (!k) return(NULL)
+            ctr <- numeric(k)
+            ctr[1L] <- x[1L] - x[2L]
+            if (k > 1)
+                for (i in 2:k)
+                    ctr[i] <- x[i + 1L] - mean(x[1:i])
+            sqrt((1:k)/(1:k + 1)) * ctr
+        }
+        tmp <- lapply(x, intraspe.ctr)
+        names(tmp) <- phy$tip.label
+        attr(contrast, "intra") <- tmp
+    }
+
+    contrast
+}
+
+varCompPhylip <- function(x, phy, exec = NULL)
+{
+    n <- Ntip(phy)
+    if (is.vector(x)) x <- as.list(x)
+    if (is.matrix(x) || is.data.frame(x)) {
+        tmpx <- vector("list", n)
+        for (i in 1:n) tmpx[[i]] <- x[i, , drop = FALSE]
+        names(tmpx) <- rownames(x)
+        x <- tmpx
+    }
+    p <- if (is.vector(x[[1]])) 1L else ncol(x[[1]])
+    if (!is.null(names(x))) x <- x[phy$tip.label]
+
+    phy <- makeLabel(phy, len = 10)
+    lbs <- phy$tip.label
+
+    ni <- sapply(x, function(xx) if (is.vector(xx)) 1L else nrow(xx))
+
+    pfx <- tempdir()
+    write.tree(phy, file = paste(pfx, "intree", sep = "/"))
+    infile <- paste(pfx, "infile", sep = "/")
+    file.create(infile)
+    cat(n, " ", p, "\n", sep = "", file = infile, append = TRUE)
+    for (i in 1:n) {
+        cat(lbs[i], file = infile, append = TRUE)
+        ## can surely be better but OK for the moment:
+        cat(paste(rep(" ", 11 - nchar(lbs[i])), collapse = ""),
+            file = infile, append = TRUE)
+        cat(ni[i], "\n", sep = "", file = infile, append = TRUE)
+        if (ni[i] == 1) {
+            cat(x[[i]], sep = " ", file = infile, append = TRUE)
+            cat("\n", file = infile, append = TRUE)
+        } else write(t(x[[i]]), file = infile, ncolumns = p, append = TRUE)
+    }
+
+    if (is.null(exec))
+        exec <-
+            if (.Platform$OS.type == "unix") "phylip contrast"
+            else "contrast"
+
+    odir <- setwd(pfx)
+    on.exit(setwd(odir))
+    if (file.exists("outfile")) unlink("outfile")
+    system("phylip contrast", intern = TRUE, input = c("W", "A", "Y"))
+    varA <- scan("outfile", skip = 7, nlines = p, quiet = TRUE)
+    varE <- scan("outfile", skip = 11 + p, nlines = p, quiet = TRUE)
+    if (p > 1) {
+        varA <- matrix(varA, p, p, byrow = TRUE)
+        varE <- matrix(varE, p, p, byrow = TRUE)
+    }
+    list(varA = varA, varE = varE)
+}
diff --git a/man/pic.Rd b/man/pic.Rd

index 0c9de0ba5ce040f424a512dc1f1625f664942eb5..f0d57da5fbe569308109a6dbec2e15ab22722fbe 100644 (file)
--- a/man/pic.Rd
+++ b/man/pic.Rd
@@ -8,9 +8,9 @@ pic(x, phy, scaled = TRUE, var.contrasts = FALSE)
    \item{x}{a numeric vector.}
    \item{phy}{an object of class \code{"phylo"}.}
    \item{scaled}{logical, indicates whether the contrasts should be
-    scaled with their expected variance (default to \code{TRUE}).}
+    scaled with their expected variances (default to \code{TRUE}).}
    \item{var.contrasts}{logical, indicates whether the expected
-    variance of the contrasts should be returned (default to \code{FALSE}).}
+    variances of the contrasts should be returned (default to \code{FALSE}).}
  }
  \description{
    Compute the phylogenetically independent contrasts using the method
@@ -31,7 +31,9 @@ pic(x, phy, scaled = TRUE, var.contrasts = FALSE)
    either a vector of phylogenetically independent contrasts (if
    \code{var.contrasts = FALSE}), or a two-column matrix with the
    phylogenetically independent contrasts in the first column and their
-  expected variance in the second column (if \code{var.contrasts = TRUE}).
+  expected variance in the second column (if \code{var.contrasts =
+  TRUE}). If the tree has node labels, these are used as labels of the
+  returned object.
  }
  \references{
    Felsenstein, J. (1985) Phylogenies and the comparative method.
@@ -39,7 +41,9 @@ pic(x, phy, scaled = TRUE, var.contrasts = FALSE)
  }
  \author{Emmanuel Paradis}
  \seealso{
-  \code{\link{read.tree}}, \code{\link{compar.gee}}, \code{\link{compar.lynch}}
+  \code{\link{read.tree}}, \code{\link{compar.gee}},
+  \code{\link{compar.lynch}}, \code{\link{pic.ortho}},
+  \code{\link{varCompPhylip}}
  }
  \examples{
  ### The example in Phylip 3.5c (originally from Lynch 1991)
diff --git a/man/pic.ortho.Rd b/man/pic.ortho.Rd

new file mode 100644 (file)

index 0000000..c55e032
--- /dev/null
+++ b/man/pic.ortho.Rd
@@ -0,0 +1,59 @@
+\name{pic.ortho}
+\alias{pic.ortho}
+\title{Phylogenetically Independent Orthonormal Contrasts}
+\description{
+  This function computes the orthonormal contrasts using the method
+  described by Felsenstein (2008). Only a single trait can be analyzed;
+  there can be several observations per species.
+}
+\usage{
+pic.ortho(x, phy, var.contrasts = FALSE, intra = FALSE)
+}
+\arguments{
+  \item{x}{a numeric vector or a list of numeric vectors.}
+  \item{phy}{an object of class \code{"phylo"}.}
+  \item{var.contrasts}{logical, indicates whether the expected
+    variances of the contrasts should be returned (default to
+    \code{FALSE}).}
+  \item{intra}{logical, whether to return the intraspecific contrasts.}
+}
+\details{
+  The data \code{x} can be in two forms: a vector if there is a single
+  observation for each species, or a list whose elements are vectors
+  containing the individual observations for each species. These vectors
+  may be of different lengths.
+
+  If \code{x} has names, its values are matched to the tip labels of
+  \code{phy}, otherwise its values are taken to be in the same order
+  than the tip labels of \code{phy}.
+}
+\value{
+  either a vector of contrasts, or a two-column matrix with the
+  contrasts in the first column and their expected variances in the
+  second column (if \code{var.contrasts = TRUE}). If the tree has node
+  labels, these are used as labels of the returned object.
+
+  If \code{intra = TRUE}, the attribute \code{"intra"}, a list of
+  vectors with the intraspecific contrasts or \code{NULL} for the
+  species with a one observation, is attached to the returned object.
+}
+\references{
+  Felsenstein, J. (2008) Comparative methods with sampling error and
+  within-species variation: Contrasts revisited and revised.
+  \emph{American Naturalist}, \bold{171}, 713--725.
+}
+\author{Emmanuel Paradis}
+\seealso{
+  \code{\link{pic}}, \code{\link{varCompPhylip}}
+}
+\examples{
+tr <- rcoal(30)
+### a single observation per species:
+x <- rTraitCont(tr)
+pic.ortho(x, tr)
+pic.ortho(x, tr, TRUE)
+### different number of observations per species:
+x <- lapply(sample(1:5, 30, TRUE), rnorm)
+pic.ortho(x, tr, intra = TRUE)
+}
+\keyword{regression}
diff --git a/man/varCompPhylip.Rd b/man/varCompPhylip.Rd

new file mode 100644 (file)

index 0000000..77a5136
--- /dev/null
+++ b/man/varCompPhylip.Rd
@@ -0,0 +1,68 @@
+\name{varCompPhylip}
+\alias{varCompPhylip}
+\title{Variance Components with Orthonormal Contrasts}
+\description{
+  This function calls Phylip's contrast program and returns the
+  phylogenetic and phenotypic variance-covariance components for one or
+  several traits. There can be several observations per species.
+}
+\usage{
+varCompPhylip(x, phy, exec = NULL)
+}
+\arguments{
+  \item{x}{a numeric vector, a matrix (or data frame), or a list.}
+  \item{phy}{an object of class \code{"phylo"}.}
+  \item{exec}{a character string giving the name of the executable
+    contrast program (see details).}
+}
+\details{
+  The data \code{x} can be in several forms: (i) a numeric vector if
+  there is single trait and one observation per species; (ii) a
+  matrix or data frame if there are several traits (as columns) and a
+  single observation of each trait for each species; (iii) a list of
+  vectors if there is a single trait and several observations per
+  species; (iv) a list of matrices or data frames: same than (ii) but
+  with several traits and the rows are individuals.
+
+  If \code{x} has names, its values are matched to the tip labels of
+  \code{phy}, otherwise its values are taken to be in the same order
+  than the tip labels of \code{phy}.
+
+  Phylip (version 3.68 or higher) must be accessible on your computer. If
+  you have a Unix-like operating system, the executable name is assumed
+  to be \code{"phylip contrast"} (as in Debian); otherwise it is set
+  to \code{"contrast"}. If this doesn't suit your system, use the
+  option \code{exec} accordingly. If the executable is not in the path, you
+  may need to specify it, e.g., \code{exec = "C:/Program Files/Phylip/contrast"}.
+}
+\value{
+  a list with elements \code{varA} and \code{varE} with the phylogenetic
+  (additive) and phenotypic (environmental) variance-covariance
+  matrices. If a single trait is analyzed, these contains its variances.
+}
+\references{
+  Felsenstein, J. (2004) Phylip (Phylogeny Inference Package) version
+  3.68. Department of Genetics, University of Washington, Seattle, USA.
+  \url{http://evolution.genetics.washington.edu/phylip/phylip.html}.
+
+  Felsenstein, J. (2008) Comparative methods with sampling error and
+  within-species variation: Contrasts revisited and revised.
+  \emph{American Naturalist}, \bold{171}, 713--725.
+}
+\author{Emmanuel Paradis}
+\seealso{
+  \code{\link{pic}}, \code{\link{pic.ortho}}, \code{\link{compar.lynch}}
+}
+\examples{
+\dontrun{
+tr <- rcoal(30)
+### Five traits, one observation per species:
+x <- replicate(5, rTraitCont(tr, sigma = 1))
+varCompPhylip(x, tr) # varE is small
+x <- replicate(5, rnorm(30))
+varCompPhylip(x, tr) # varE is large
+### Five traits, ten observations per species:
+x <- replicate(30, replicate(5, rnorm(10)), simplify = FALSE)
+varCompPhylip(x, tr)
+}}
+\keyword{regression}
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Mon, 15 Nov 2010 13:56:37 +0000 (13:56 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Mon, 15 Nov 2010 13:56:37 +0000 (13:56 +0000)
ChangeLog		patch \| blob \| history
DESCRIPTION		patch \| blob \| history
R/cophenetic.phylo.R		patch \| blob \| history
R/pic.R		patch \| blob \| history
man/pic.Rd		patch \| blob \| history
man/pic.ortho.Rd	[new file with mode: 0644]	patch \| blob
man/varCompPhylip.Rd	[new file with mode: 0644]	patch \| blob