new code from Pierre Legendre

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Fri, 22 Jan 2010 14:56:16 +0000 (14:56 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Fri, 22 Jan 2010 14:56:16 +0000 (14:56 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Fri, 22 Jan 2010 14:56:16 +0000 (14:56 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Fri, 22 Jan 2010 14:56:16 +0000 (14:56 +0000)
diff --git a/ChangeLog b/ChangeLog

index 0809491f4a2177b13b2a74e2711042de2e33df87..3d55ac2e92a2c68f6d1a42692b7467d39eb3d740 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,6 +3,11 @@
  
  NEW FEATURES
  
+    o The new function parafit by Pierre Legendre tests for the
+      coevolution between hosts and parasites. It has a companion
+      function, pcoa, that does principal coordinate decomposition. The
+      latter has a biplot method.
+
      o The new functions rTraitCont and rTraitDisc simulate continuous and
        discrete traits under a wide range of evolutionary models.
  
diff --git a/DESCRIPTION b/DESCRIPTION

index 1eec4260a9b155a0c61b41117e589edb117bd8fd..e21013f2c656ff31866cd616b65f5579fab60120 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
  Package: ape
  Version: 2.5
-Date: 2010-01-14
+Date: 2010-01-21
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong, Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel, Gangolf Jobb, Christoph Heibl, Daniel Lawson, Vincent Lefort, Pierre Legendre, Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
diff --git a/R/biplot.pcoa.R b/R/biplot.pcoa.R

new file mode 100644 (file)

index 0000000..9ba8809
--- /dev/null
+++ b/R/biplot.pcoa.R
@@ -0,0 +1,49 @@
+'biplot.pcoa' <- 
+    function(x, Y=NULL, plot.axes = c(1,2), dir.axis1=1, dir.axis2=1, rn=NULL, ...)
+# x = output object from function pcoa.R
+# Y = optional sites-by-variables data table
+# plot.axes = the two axes to be plotted
+# rn = an optional vector, length n, of object labels
+# dir.axis.1 = -1 to revert axis 1 for the projection of points and variables
+# dir.axis.2 = -1 to revert axis 2 for the projection of points and variables
+#
+# Author: Pierre Legendre, January 2009
+       {
+       k <- ncol(x$vectors)
+       if(k < 2) stop("There is a single eigenvalue. No plot can be produced.")
+       if(k < plot.axes[1]) stop("Axis",plot.axes[1],"does not exist.")
+       if(k < plot.axes[2]) stop("Axis",plot.axes[2],"does not exist.")
+
+       if(!is.null(rn)) rownames(x$vectors) <- rn
+       labels = colnames(x$vectors[,plot.axes])
+       diag.dir <- diag(c(dir.axis1,dir.axis2))
+       x$vectors[,plot.axes] <- x$vectors[,plot.axes] %*% diag.dir
+
+       if(is.null(Y)) {
+               limits <- apply(x$vectors[,plot.axes], 2, range) 
+               ran.x <- limits[2,1] - limits[1,1]
+               ran.y <- limits[2,2] - limits[1,2]
+               xlim <- c((limits[1,1]-ran.x/10), (limits[2,1]+ran.x/5)) 
+               ylim <- c((limits[1,2]-ran.y/10), (limits[2,2]+ran.y/10))
+
+               par(mai = c(1.0, 1.0, 1.0, 0.5))
+               plot(x$vectors[,plot.axes], xlab=labels[1], ylab=labels[2], xlim=xlim, ylim=ylim, asp=1)
+               text(x$vectors[,plot.axes], labels=rownames(x$vectors), pos=4, cex=1, offset=0.5)
+               title(main = "PCoA ordination", line=2.5)
+       
+               } else {
+               # Find positions of variables in biplot:
+               # construct U from covariance matrix between Y and standardized point vectors
+               # (equivalent to PCA scaling 1, since PCoA preserves distances among objects)
+               n <- nrow(Y)
+               points.stand <- scale(x$vectors[,plot.axes])
+               S <- cov(Y, points.stand)
+               U <- S %*% diag((x$values$Eigenvalues[plot.axes]/(n-1))^(-0.5))
+               colnames(U) <- colnames(x$vectors[,plot.axes])
+
+               par(mai = c(1, 0.5, 1.4, 0))
+               biplot(x$vectors[,plot.axes], U, xlab=labels[1], ylab=labels[2])
+               title(main = c("PCoA biplot","Response variables projected","as in PCA with scaling 1"), line=4)
+       }
+    invisible()
+}
diff --git a/R/parafit.R b/R/parafit.R

new file mode 100644 (file)

index 0000000..8c80019
--- /dev/null
+++ b/R/parafit.R
@@ -0,0 +1,160 @@
+'parafit' <-
+       function(host.D, para.D, HP, nperm=999, test.links=FALSE, seed=NULL, correction="none", silent=FALSE)
+#
+# Test of host-parasite coevolution
+# host.D = host distance or patristic matrix (class dist or matrix)
+# para.D = parasite distance or patristic matrix (class dist or matrix)
+# HP = host-parasite link matrix (n.host, n.para)
+#
+#      Pierre Legendre, May 2009
+{
+epsilon <- sqrt(.Machine$double.eps)
+if(is.null(seed)) {
+       runif(1)
+       seed <- .Random.seed[trunc(runif(1,1,626))]
+       }
+
+host.D <- as.matrix(host.D)
+host.pc <- pcoa(host.D, correction=correction)
+if(host.pc$correction[2] == 1) {
+       if(min(host.pc$values[,2]) < -epsilon) stop('Host D matrix has negative eigenvalues. Rerun with correction="lingoes" or correction="cailliez"')
+       sum.host.values.sq <- sum(host.pc$values[,1]^2)
+       host.vectors <- host.pc$vectors
+       } else {
+       sum.host.values.sq <- sum(host.pc$values[,2]^2)
+       host.vectors <- host.pc$vectors.cor
+       }
+n.host <- nrow(host.D)
+
+para.D <- as.matrix(para.D)
+para.pc <- pcoa(para.D, correction=correction)
+if(para.pc$correction[2] == 1) {
+       if(min(para.pc$values[,2]) < -epsilon) stop('Parasite D matrix has negative eigenvalues. Rerun with correction="lingoes" or correction="cailliez"')
+       sum.para.values.sq <- sum(para.pc$values[,1]^2)
+       para.vectors <- para.pc$vectors
+       } else {
+       sum.para.values.sq <- sum(para.pc$values[,2]^2)
+       para.vectors <- para.pc$vectors.cor
+       }
+n.para <- nrow(para.D)
+
+if(!silent) cat("n.hosts =", n.host, ", n.parasites =", n.para,'\n')
+
+a <- system.time({
+tracemax <- max(sum.host.values.sq, sum.para.values.sq)
+
+if(n.host == n.para) {
+       if(!silent) cat("The function cannot check if matrix HP has been entered in the right way.",'\n')
+       if(!silent) cat("It will assume that the rows of HP are the hosts.",'\n')
+       } else {
+       temp <- dim(HP)
+       if(temp[1] == n.host) {
+               if(temp[2] != n.para) stop("Matrices host.D, para.D and HP not comformable")
+               } else if(temp[2] == n.host) {
+                       if(temp[1] != n.para) stop("Matrices host.D, para.D and HP not comformable")
+                       HP <- t(HP)
+                       if(!silent) cat("Matrix HP has been transposed for comformity with host.D and para.D.",'\n')
+               } else {
+               stop("Matrices host.D, para.D and HP not comformable")
+               }
+       }
+p.per.h <- apply(HP, 1, sum)
+h.per.p <- apply(HP, 2, sum)
+#
+# Compute and test the global statistics
+mat.4 <- t(host.vectors) %*% HP %*% para.vectors
+global <- sum(mat.4^2)
+if(nperm > 0) {
+       set.seed(seed)
+       nGT <- 1
+       global.perm <- NA
+       for(i in 1:nperm) {
+               HP.perm <- apply(HP, 2, sample)
+               mat.4.perm <- t(host.vectors) %*% HP.perm %*% para.vectors
+               global.perm <- c(global.perm, sum(mat.4.perm^2))
+               if(global.perm[i+1] >= global) nGT <- nGT+1
+               }
+       global.perm <- global.perm[-1]
+       p.global <- nGT/(nperm+1)
+       } else { p.global <- NA }
+
+#
+# Test individual H-P links
+if(test.links) {
+       # 1. Create the list of H-P pairs
+       list.hp <- which( t(cbind(HP,rep(0,n.host))) > 0)
+       HP.list <- cbind((list.hp %/% (n.para+1))+1, list.hp %% (n.para+1))
+       colnames(HP.list) <- c("Host","Parasite")
+       n.links <- length(list.hp)
+
+       stat1 <- NA
+       stat2 <- NA
+       p.stat1 <- NA
+       p.stat2 <- NA
+       for(k in 1:n.links) {
+               #
+               # 2. Compute reference values of link statistics
+               HP.k <- HP
+               HP.k[HP.list[k,1], HP.list[k,2]] <- 0
+               mat.4.k <- t(host.vectors) %*% HP.k %*% para.vectors
+               trace.k <- sum(mat.4.k^2)
+               stat1 <- c(stat1, (global-trace.k))
+               den <- tracemax-global
+               if(den > epsilon) { 
+                       stat2 <- c(stat2, stat1[k+1]/den) 
+                       } else { 
+                       stat2 <- c(stat2, NA) 
+                       }
+               #
+               # 3. Test link statistics by permutations
+               if(nperm > 0) {
+                       set.seed(seed)
+                       nGT1 <- 1
+                       nGT2 <- 1
+                       nperm2 <- nperm
+                       #
+                       for(i in 1:nperm) {
+                               HP.k.perm <- apply(HP.k, 2, sample)
+                               mat.4.k.perm <- t(host.vectors) %*% HP.k.perm %*% para.vectors
+                               trace.k.perm <- sum(mat.4.k.perm^2)
+                               stat1.perm <- global.perm[i]-trace.k.perm
+                               if(stat1.perm >= stat1[k+1]) nGT1 <- nGT1+1
+                               #
+                               if(!is.na(stat2[k+1])) {
+                                       den <- tracemax-global.perm[i]
+                                       if(den > epsilon) {
+                                               stat2.perm <- stat1.perm/den 
+                                               if(stat2.perm >= stat2[k+1]) nGT2 <- nGT2+1
+                                               } else {
+                                               nperm2 <- nperm2-1
+                                               # if(!silent) cat("In permutation #",i,"den < epsilon",'\n')
+                                               }
+                                       }
+                               }
+                       p.stat1 <- c(p.stat1, nGT1/(nperm+1))
+                       if(!is.na(stat2[k+1])) {
+                               p.stat2 <- c(p.stat2, nGT2/(nperm2+1))
+                               } else {
+                               p.stat2 <- NA
+                               }
+                       } else { 
+                       p.stat1 <- NA
+                       p.stat2 <- NA
+                       }
+               }
+       #
+       link.table <- cbind(HP.list, stat1[-1], p.stat1[-1], stat2[-1], p.stat2[-1])
+       colnames(link.table) = c("Host","Parasite","F1.stat","p.F1","F2.stat","p.F2")
+       out <-list(ParaFitGlobal=global, p.global=p.global, link.table=link.table, para.per.host=p.per.h, host.per.para=h.per.p, nperm=nperm)
+       } else {
+       if(!silent) cat("Rerun the program with option 'test.links=TRUE' to test the individual H-P links",'\n')
+       out <-list(ParaFitGlobal=global, p.global=p.global, para.per.host=p.per.h, host.per.para=h.per.p, nperm=nperm)
+       }
+#
+})
+a[3] <- sprintf("%2f",a[3])
+if(!silent) cat("Computation time =",a[3]," sec",'\n')
+#
+class(out) <- "parafit"
+out
+}
diff --git a/R/pcoa.R b/R/pcoa.R

new file mode 100644 (file)

index 0000000..335fd15
--- /dev/null
+++ b/R/pcoa.R
@@ -0,0 +1,170 @@
+pcoa <- function(D, correction="none", rn=NULL)
+#
+# Principal coordinate analysis (PCoA) of a square distance matrix D
+# with correction for negative eigenvalues.
+# 
+# References:
+# Gower, J. C. 1966. Some distance properties of latent root and vector methods
+#    used in multivariate analysis. Biometrika. 53: 325-338.
+# Gower, J. C. and P. Legendre. 1986. Metric and Euclidean properties of 
+#    dissimilarity coefficients. J. Classif. 3: 5-48.
+# Legendre, P. and L. Legendre. 1998. Numerical ecology, 2nd English edition. 
+#    Elsevier Science BV, Amsterdam. [PCoA: Section 9.2]
+#
+#      Pierre Legendre, October 2007
+{
+centre <- function(D,n)
+# Centre a square matrix D by matrix algebra
+# mat.cen = (I - 11'/n) D (I - 11'/n)
+{      One <- matrix(1,n,n)
+       mat <- diag(n) - One/n
+       mat.cen <- mat %*% D %*% mat
+}
+
+bstick.def <- function (n, tot.var = 1, ...)   # 'bstick.default' from vegan
+{
+    res <- rev(cumsum(tot.var/n:1)/n)
+    names(res) <- paste("Stick", seq(len = n), sep = "")
+    return(res)
+}
+
+# ===== The PCoA function begins here =====
+
+# Preliminary actions
+       D <- as.matrix(D)
+       n <- nrow(D)
+       epsilon <- sqrt(.Machine$double.eps)
+       if(length(rn)!=0) {
+               names <- rn
+               } else {
+               names <- rownames(D)
+               }
+       CORRECTIONS <- c("none","lingoes","cailliez")
+       correct <- pmatch(correction, CORRECTIONS)
+       if(is.na(correct)) stop("Invalid correction method")
+       # cat("Correction method =",correct,'\n')
+
+# Gower centring of matrix D
+# delta1 = (I - 11'/n) [-0.5 d^2] (I - 11'/n)
+       delta1 <- centre((-0.5*D^2),n)
+       trace <- sum(diag(delta1))
+
+# Eigenvalue decomposition
+       D.eig <- eigen(delta1)
+       
+# Negative eigenvalues?
+       min.eig <- min(D.eig$values)
+       zero.eig <- which(abs(D.eig$values) < epsilon)
+       D.eig$values[zero.eig] <- 0
+
+# No negative eigenvalue
+       if(min.eig > -epsilon) {   # Curly 1
+               correct <- 1
+               eig <- D.eig$values
+               k <- length(which(eig > epsilon))
+               rel.eig <- eig[1:k]/trace
+               cum.eig <- cumsum(rel.eig) 
+               vectors <- sweep(D.eig$vectors[,1:k], 2, sqrt(eig[1:k]), FUN="*")
+               bs <- bstick.def(k)
+               cum.bs <- cumsum(bs)
+
+               res <- data.frame(eig[1:k], rel.eig, bs, cum.eig, cum.bs)
+               colnames(res) <- c("Eigenvalues","Relative_eig","Broken_stick","Cumul_eig","Cumul_br_stick")
+               rownames(res) <- 1:nrow(res)
+
+               rownames(vectors) <- names
+               colnames(vectors) <- colnames(vectors, do.NULL = FALSE, prefix = "Axis.")
+               note <- paste("There were no negative eigenvalues. No correction was applied")
+               out <- (list(correction=c(correction,correct), note=note, values=res, vectors=vectors, trace=trace))
+
+# Negative eigenvalues present
+       } else {   # Curly 1
+               k <- n 
+               eig <- D.eig$values
+               rel.eig <- eig/trace
+               rel.eig.cor <- (eig - min.eig)/(trace - (n-1)*min.eig) # Eq. 9.27 for a single dimension
+               rel.eig.cor = c(rel.eig.cor[1:(zero.eig[1]-1)], rel.eig.cor[(zero.eig[1]+1):n], 0)
+               cum.eig.cor <- cumsum(rel.eig.cor) 
+               k2 <- length(which(eig > epsilon))
+               k3 <- length(which(rel.eig.cor > epsilon))
+               vectors <- sweep(D.eig$vectors[,1:k2], 2, sqrt(eig[1:k2]), FUN="*")
+               # Only the eigenvectors with positive eigenvalues are shown
+
+# Negative eigenvalues: three ways of handling the situation
+       if((correct==2) | (correct==3)) {   # Curly 2
+
+               if(correct == 2) {   # Curly 3
+                       # Lingoes correction: compute c1, then the corrected D
+                       c1 <- -min.eig
+                       note <- paste("Lingoes correction applied to negative eigenvalues: D' = -0.5*D^2 -",c1,", except diagonal elements")
+                       D <- -0.5*(D^2 + 2*c1)
+
+                       # Cailliez correction: compute c2, then the corrected D
+                       } else if(correct == 3) {
+                               delta2 <- centre((-0.5*D),n)
+                               upper <- cbind(matrix(0,n,n), 2*delta1)
+                               lower <- cbind(-diag(n), -4*delta2)
+                               sp.matrix <- rbind(upper, lower)
+                               c2 <- max(Re(eigen(sp.matrix, symmetric=FALSE, only.values=TRUE)$values))
+                               note <- paste("Cailliez correction applied to negative eigenvalues: D' = -0.5*(D +",c2,")^2, except diagonal elements")
+                               D <- -0.5*(D + c2)^2
+                       }   # End curly 3
+
+       diag(D) <- 0
+       mat.cor <- centre(D,n)
+       toto.cor <- eigen(mat.cor)
+       trace.cor <- sum(diag(mat.cor))
+
+       # Negative eigenvalues present?
+       min.eig.cor <- min(toto.cor$values)
+       zero.eig.cor <- which((toto.cor$values < epsilon) & (toto.cor$values > -epsilon))
+       toto.cor$values[zero.eig.cor] <- 0
+
+       # No negative eigenvalue after correction: result OK
+       if(min.eig.cor > -epsilon) {   # Curly 4
+               eig.cor <- toto.cor$values
+               rel.eig.cor <- eig.cor[1:k]/trace.cor
+               cum.eig.cor <- cumsum(rel.eig.cor) 
+               k2 <- length(which(eig.cor > epsilon))
+               vectors.cor <- sweep(toto.cor$vectors[,1:k2], 2, sqrt(eig.cor[1:k2]), FUN="*")
+               # bs <- broken.stick(k2)[,2]
+               bs <- bstick.def(k2)
+               bs <- c(bs, rep(0,(k-k2)))
+               cum.bs <- cumsum(bs)
+
+               # Negative eigenvalues still present after correction: incorrect result
+               } else { 
+               if(correct == 2) cat("Problem! Negative eigenvalues are still present after Lingoes",'\n') 
+               if(correct == 3) cat("Problem! Negative eigenvalues are still present after Cailliez",'\n') 
+               rel.eig.cor <- cum.eig.cor <- bs <- cum.bs <- rep(NA,n)
+               vectors.cor <- matrix(NA,n,2)
+               }   # End curly 4
+
+       res <- data.frame(eig[1:k], eig.cor[1:k], rel.eig.cor, bs, cum.eig.cor, cum.bs)
+       colnames(res) <- c("Eigenvalues", "Corr_eig", "Rel_corr_eig", "Broken_stick", "Cum_corr_eig", "Cum_br_stick")
+       rownames(res) <- 1:nrow(res)
+
+       rownames(vectors) <- names
+       colnames(vectors) <- colnames(vectors, do.NULL = FALSE, prefix = "Axis.")
+       out <- (list(correction=c(correction,correct), note=note, values=res, vectors=vectors, trace=trace, vectors.cor=vectors.cor, trace.cor=trace.cor))
+
+       } else {   # Curly 2
+                       
+       note <- "No correction was applied to the negative eigenvalues"
+       bs <- bstick.def(k3)
+       bs <- c(bs, rep(0,(k-k3)))
+       cum.bs <- cumsum(bs)
+
+       res <- data.frame(eig[1:k], rel.eig, rel.eig.cor, bs, cum.eig.cor, cum.bs)
+       colnames(res) <- c("Eigenvalues","Relative_eig","Rel_corr_eig","Broken_stick","Cum_corr_eig","Cumul_br_stick")
+       rownames(res) <- 1:nrow(res)
+
+       rownames(vectors) <- names
+       colnames(vectors) <- colnames(vectors, do.NULL = FALSE, prefix = "Axis.")
+       out <- (list(correction=c(correction,correct), note=note, values=res, vectors=vectors, trace=trace))
+
+                       }   # End curly 2: three ways of handling the situation
+               }       # End curly 1
+       class(out) <- "pcoa"
+       out
+}      # End of PCoA
diff --git a/R/print.parafit.R b/R/print.parafit.R

new file mode 100644 (file)

index 0000000..f23c009
--- /dev/null
+++ b/R/print.parafit.R
@@ -0,0 +1,18 @@
+'print.parafit' <-
+    function(x, ...)
+{
+       cat("\nTest of host-parasite coevolution",'\n','\n')
+
+       cat("Global test:  ParaFitGlobal =",x$ParaFitGlobal,", p-value =", x$p.global, "(", x$nperm,"permutations)",'\n','\n')
+
+       n.links <- nrow(x$link.table)
+       cat("There are",n.links,"host-parasite links in matrix HP",'\n','\n')
+       cat("Test of individual host-parasite links", "(", x$nperm, "permutations)",'\n','\n')
+       print(x$link.table)
+
+       cat('\n',"Number of parasites per host",'\n')
+       print(x$para.per.host)
+       cat('\n',"Number of hosts per parasite",'\n')
+       print(x$host.per.para)
+    invisible(x)
+}
diff --git a/data/HP.links.rda b/data/HP.links.rda

new file mode 100644 (file)

index 0000000..c05999d

Binary files /dev/null and b/data/HP.links.rda differ
diff --git a/data/gopher.D.rda b/data/gopher.D.rda

new file mode 100644 (file)

index 0000000..5d8f617

Binary files /dev/null and b/data/gopher.D.rda differ
diff --git a/data/lice.D.rda b/data/lice.D.rda

new file mode 100644 (file)

index 0000000..6a66396

Binary files /dev/null and b/data/lice.D.rda differ
diff --git a/man/makeLabel.Rd b/man/makeLabel.Rd

index 758769a06e6bdcaa81f0b167f28082438e5456fc..939e4de5d753f771bfaff4c987fcd59c1a7ab21b 100644 (file)
--- a/man/makeLabel.Rd
+++ b/man/makeLabel.Rd
@@ -59,8 +59,8 @@ makeLabel(x, ...)
  }
  \author{Emmanuel Paradis \email{Emmanuel.Paradis@mpl.ird.fr}}
  \seealso{
-  \code{\link[base]{make.unique}}, \code{\link[base]{make.names}},
-  \code{\link[base]{abbreviate}}
+  \code{\link{makeNodeLabel}}, \code{\link[base]{make.unique}},
+  \code{\link[base]{make.names}}, code{\link[base]{abbreviate}}
  }
  \examples{
  x <- rep("a", 3)
diff --git a/man/parafit.Rd b/man/parafit.Rd

new file mode 100644 (file)

index 0000000..0d74006
--- /dev/null
+++ b/man/parafit.Rd
@@ -0,0 +1,77 @@
+\name{parafit}
+\alias{parafit}
+\alias{print.parafit}
+\alias{gopher.D}
+\alias{lice.D}
+\alias{HP.links}
+\title{ Test of host-parasite coevolution }
+\description{
+Function \code{\link{parafit}} tests the hypothesis of coevolution between a clade of hosts and a clade of parasites. The null hypothesis (H0) of the global test is that the evolution of the two groups, as revealed by the two phylogenetic trees and the set of host-parasite association links, has been independent. Tests of individual host-parasite links are also available as an option.
+
+The method, which is described in detail in Legendre et al. (2002), requires some estimates of the phylogenetic trees or phylogenetic distances, and also a description of the host-parasite associations (H-P links) observed in nature.
+}
+\usage{
+parafit(host.D, para.D ,HP ,nperm=999, test.links=FALSE, seed=NULL, 
+correction="none", silent=FALSE)
+}
+
+\arguments{
+  \item{host.D }{ A matrix of phylogenetic or patristic distances among the hosts. A matrix of patristic distances exactly represents the information in a phylogenetic tree. }
+  \item{para.D }{ A matrix of phylogenetic or patristic distances among the parasites. A matrix of patristic distances exactly represents the information in a phylogenetic tree. }
+  \item{HP }{ A rectangular matrix with hosts as rows and parasites as columns. The matrix contains 1's when a host-parasite link has been observed in nature between the host in the row and the parasite in the column, and 0's otherwise. }
+  \item{nperm}{ Number of permutations for the tests. If \code{nperm =
+   0}, permutation tests will not be computed. The default value is \code{nperm = 999}. For large data files, the permutation test is rather slow since the permutation procedure is not compiled. } 
+  \item{test.links }{ \code{test.links = TRUE} will test the significance of individual host-parasite links. Default: \code{test.links = FALSE}. }
+  \item{seed }{ \code{seed = NULL} (default): a seed is chosen at random by the function. That seed is used as the starting point for all tests of significance, i.e. the global H-P test and the tests of individual H-P links if they are requested. Users can select a seed of their choice by giving any integer value to \code{seed}, for example \code{seed = -123456}. Running the function again with the same seed value will produce the exact same test results. }
+  \item{correction}{ Correction methods for negative eigenvalues (details below): \code{correction="lingoes"} and \code{correction="cailliez"}. Default value: \code{"none"}.  }
+  \item{silent}{ Informative messages and the time to compute the tests will not be written to the R console if silent=TRUE. Useful when the function is called by a numerical simulation function. }
+}
+
+\details{
+Two types of test are produced by the program: a global test of coevolution and, optionally, a test on the individual host-parasite (H-P) link.
+
+The function computes principal coordinates for the host and the parasite distance matrices. The principal coordinates (all of them) act as a complete representation of either the phylogenetic distance matrix or the phylogenetic tree. 
+
+Phylogenetic distance matrices are normally Euclidean. Patristic distance matrices are additive, thus they are metric and Euclidean. Euclidean matrices are fully represented by real-valued principal coordinate axes. For non-Euclidean matrices, negative eigenvalues are produced; complex principal coordinate axes are associated with the negative eigenvalues. So, the program rejects matrices that are not Euclidean and stops.
+
+Negative eigenvalues can be corrected for by one of two methods: the Lingoes or the Caillez correction. It is up to the user to decide which correction method should be applied. This is done by selecting the option \code{correction="lingoes"} or \code{correction="cailliez"}. Details on these correction methods are given in the help file of the \code{pcoa} function.
+
+The principle of the global test is the following (H0: independent evolution of the hosts and parasites): (1) Compute matrix D = C t(A) B. Note: D is a fourth-corner matrix (sensu Legendre et al. 1997), where A is the H-P link matrix, B is the matrix of principal coordinates computed from the host.D matrix, and C is the matrix of principal coordinates computed from the para.D matrix. (2) Compute the statistic ParaFitGlobal, the sum of squares of all values in matrix D. (3) Permute at random, separately, each row of matrix A, obtaining matrix A.perm. Compute D.perm = C %*% t(A.perm) %*% B, and from it, compute a permuted value ParaFitGlobal.perm for the statistic. Save this value in a vector trace.perm for the tests of individual links (below). (4) Repeat step 4 a large number of times. (5) Add the reference value of ParaFitGlobal to the distribution of ParaFitGlobal.perm values. Calculate the permutational probability associated to ParaFitGlobal.
+
+The test of each individual H-P link is carried out as follows (H0: this particular link is random): (1) Remove one link (k) from matrix A. (2) Compute matrix D = C t(A) B. (3a) Compute trace(k), the sum of squares of all values in matrix D. (3b) Compute the statistic ParaFitLink1 = (trace - trace(k)) where trace is the ParaFitGlobal statistic. (3c) Compute the statistic ParaFitLink2 = (trace - trace(k)) / (tracemax - trace) where tracemax is the maximum value that can be taken by trace. (4) Permute at random, separately, each row of matrix A, obtaining A.perm. Use the same sequences of permutations as were used in the test of ParaFitGlobal. Using the values of trace and trace.perm saved during the global test, compute the permuted values of the two statistics, ParaFit1.perm and ParaFit2.perm. (5) Repeat step 4 a large number of times. (6) Add the reference value of ParaFit1 to the distribution of ParaFit1.perm values; add the reference value of ParaFit2 to the distribution of ParaFit2.perm values. Calculate the permutational probabilities associated to ParaFit1 and ParaFit2.
+
+The \code{print.parafit} function prints out the results of the global test and, optionally, the results of the tests of the individual host-parasite links.
+}
+
+\value{ 
+
+  \item{ParaFitGlobal }{The statistic of the global H-P test. }
+  \item{p.global }{The permutational p-value associated with the ParaFitGlobal statistic. }  
+  \item{link.table }{The results of the tests of individual H-P links, including the ParaFitLink1 and ParaFitLink2 statistics and the p-values obtained from their respective permutational tests. }  
+  \item{para.per.host }{Number of parasites per host. }
+  \item{host.per.para }{Number of hosts per parasite. }
+  \item{nperm }{Number of permutations for the tests. }
+  }
+
+\author{ Pierre Legendre, Universite de Montreal }
+
+\references{
+Hafner, M. S, P. D. Sudman, F. X. Villablanca, T. A. Spradling, J. W. Demastes and S. A. Nadler. 1994. Disparate rates of molecular evolution in cospeciating hosts and parasites. \emph{Science}, \bold{265}, 1087--1090.
+
+Legendre, P., Y. Desdevises and E. Bazin. 2002. A statistical test for host-parasite coevolution. \emph{Systematic Biology}, \bold{51}, 217--234.
+}
+
+\seealso{\code{\link{pcoa}} }
+
+\examples{
+## Gopher and lice data from Hafner et al. (1994)
+
+data(gopher.D)
+data(lice.D)
+data(HP.links)
+
+res <- parafit(gopher.D, lice.D, HP.links, nperm=99, test.links=TRUE)
+# res     # or else: print(res)
+}
+
+\keyword{ multivariate }
diff --git a/man/pcoa.Rd b/man/pcoa.Rd

new file mode 100644 (file)

index 0000000..8e2137b
--- /dev/null
+++ b/man/pcoa.Rd
@@ -0,0 +1,119 @@
+\name{pcoa}
+\alias{pcoa}
+\alias{biplot.pcoa}
+\title{ Principal Coordinate Analysis }
+\description{
+Function \code{\link{pcoa}} computes principal coordinate decomposition (also called classical scaling) of a distance matrix D (Gower 1966). It implements two correction methods for negative eigenvalues.
+}
+\usage{
+pcoa(D, correction="none", rn=NULL)
+
+\method{biplot}{pcoa}(x, Y=NULL, plot.axes = c(1,2), dir.axis1=1, dir.axis2=1, rn=NULL, ...)
+}
+
+\arguments{
+  \item{D}{ A distance matrix of class \code{dist} or \code{matrix}. }
+  \item{correction}{ Correction methods for negative eigenvalues (details below): \code{"lingoes"} and \code{"cailliez"}. Default value: \code{"none"}.  }
+  \item{rn}{ An optional vector of row names, of length n, for the n objects. }
+  \item{x}{ Output object from \code{\link{pcoa}}. }
+  \item{Y}{ Any rectangular data table containing explanatory variables to be projected onto the ordination plot. That table may contain, for example, the community composition data used to compute D, or any transformation of these data; see examples. }
+  \item{plot.axes}{ The two PCoA axes to plot. }
+  \item{dir.axis1}{ = -1 to revert axis 1 for the projection of points and variables. Default value: +1. }
+  \item{dir.axis2}{ = -1 to revert axis 2 for the projection of points and variables. Default value: +1. }
+  \item{...}{ Other graphical arguments passed to function. }
+}
+
+\details{
+This function implements two methods for correcting for negative values in principal coordinate analysis (PCoA). Negative eigenvalues can be produced in PCoA when decomposing distance matrices produced by coefficients that are not Euclidean (Gower and Legendre 1986, Legendre and Legendre 1998).
+
+Another function in this library, \code{pcoa.all}, was developed to provide principal coordinate decomposition for the function \code{PCNM} of this library. It computes the eigenvectors corresponding to all eigenvalues, positive and negative, using matrix algebra.
+
+In \code{pcoa}, when negative eigenvalues are present in the decomposition results, the distance matrix D can be modified using either the Lingoes or the Cailliez procedure to produce results without negative eigenvalues.
+
+In the Lingoes (1971) procedure, a constant c1, equal to twice absolute value of the largest negative value of the original principal coordinate analysis, is added to each original squared distance in the distance matrix, except the diagonal values. A newe principal coordinate analysis, performed on the modified distances, has at most (n-2) positive eigenvalues, at least 2 null eigenvalues, and no negative eigenvalue.
+
+In the Cailliez (1983) procedure, a constant c2 is added to the original distances in the distance matrix, except the diagonal values. The calculation of c2 is described in Legendre and Legendre (1998). A new principal coordinate analysis, performed on the modified distances, has at most (n-2) positive eigenvalues, at least 2 null eigenvalues, and no negative eigenvalue.
+
+In all cases, only the eigenvectors corresponding to positive eigenvalues are shown in the output list. The eigenvectors are scaled to the square root of the corresponding eigenvalues. Gower (1966) has shown that eigenvectors scaled in that way preserve the original distance (in the D matrix) among the objects. These eigenvectors can be used to plot ordination graphs of the objects.
+
+We recommend not to use PCoA to produce ordinations from the chord, chi-square, abundance profile, or Hellinger distances. It is easier to first transform the community composition data using the following transformations,available in the \code{decostand} function of the \code{vegan} package, and then to carry out a principal component analysis (PCA) on the transformed data:
+
+\describe{
+\item{ }{Chord transformation: decostand(spiders,"normalize") }
+\item{ }{Transformation to relative abundance profiles: decostand(spiders,"total") }
+\item{ }{Hellinger transformation: decostand(spiders,"hellinger") }
+\item{ }{Chi-square transformation: decostand(spiders,"chi.square") }
+}
+
+The ordination results will be identical and the calculations shorter. This two-step ordination method, called transformation-based PCA (tb-PCA), was described by Legendre and Gallagher (2001).
+
+The \code{biplot.pcoa} function produces plots for any pair of principal coordinates. The original variables can be projected onto the ordination plot.
+}
+
+\value{
+
+  \item{correction }{The values of parameter \code{correction} and variable 'correct' in the function. }
+  \item{note }{A note describing the type of correction done, if any. }
+  \item{values }{The eigenvalues and related information: }
+  \item{Eigenvalues}{All eigenvalues (positive, null, negative). }
+  \item{Relative_eig}{Relative eigenvalues. }
+  \item{Corr_eig}{Corrected eigenvalues (Lingoes correction); Legendre and Legendre (1998, p. 438, eq. 9.27). }
+  \item{Rel_corr_eig}{Relative eigenvalues after Lingoes or Cailliez correction. }
+  \item{Broken_stick}{Expected fractions of variance under the broken stick model. }
+  \item{Cumul_eig}{Cumulative relative eigenvalues. }
+  \item{Cum_corr_eig}{Cumulative corrected relative eigenvalues. }
+  \item{Cumul_br_stick}{Cumulative broken stick fractions. }
+  \item{vectors}{The principal coordinates with positive eigenvalues. }
+  \item{trace}{The trace of the distance matrix. This is also the sum of all eigenvalues, positive and negative. }
+  \item{vectors.cor }{The principal coordinates with positive eigenvalues from the distance matrix corrected using the method specified by parameter \code{correction}. }
+  \item{trace.cor }{The trace of the corrected distance matrix. This is also the sum of its eigenvalues. }
+}
+
+\references{
+Cailliez, F. 1983. The analytical solution of the additive constant problem. \emph{Psychometrika}, \bold{48}, 305--308.
+
+Gower, J. C. 1966. Some distance properties of latent root and vector methods used in multivariate analysis. \emph{Biometrika}, \bold{53}, 325--338.
+
+Gower, J. C. and P. Legendre. 1986. Metric and Euclidean properties of
+dissimilarity coefficients. \emph{Journal of Classification}, \bold{3}, 5--48.
+
+Legendre, P. and E. D. Gallagher. 2001. Ecologically meaningful transformations for ordination of species data. \emph{Oecologia}, \bold{129}, 271--280.
+
+Legendre, P. and L. Legendre. 1998. \emph{Numerical ecology, 2nd English
+  edition.} Amsterdam: Elsevier Science BV.
+
+Lingoes, J. C. 1971. Some boundary conditions for a monotone analysis of symmetric matrices. \emph{Psychometrika}, \bold{36}, 195--203.
+}
+
+\author{ Pierre Legendre, Universite de Montreal }
+
+\examples{
+# Oribatid mite data from Borcard and Legendre (1994)
+
+if (require(vegan)) {
+data(mite)  # Community composition data, 70 peat cores, 35 species
+
+# Select rows 1:30. Species 35 is absent from these rows. Transform to log
+mite.log <- log(mite[1:30,-35]+1)  # Equivalent: log1p(mite[1:30,-35])
+
+# Principal coordinate analysis and simple ordination plot
+mite.D <- vegdist(mite.log, "bray")
+res <- pcoa(mite.D)
+res$values
+biplot(res)
+
+# Project unstandardized and standardized species on the PCoA ordination plot
+mite.log.st = apply(mite.log, 2, scale, center=TRUE, scale=TRUE)
+
+par(mfrow=c(1,2))
+biplot(res, mite.log)
+biplot(res, mite.log.st)
+
+# Reverse the ordination axes in the  plot
+par(mfrow=c(1,2))
+biplot(res, mite.log, dir.axis1=-1, dir.axis2=-1)
+biplot(res, mite.log.st, dir.axis1=-1, dir.axis2=-1)
+}
+}
+
+\keyword{ multivariate }
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Fri, 22 Jan 2010 14:56:16 +0000 (14:56 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Fri, 22 Jan 2010 14:56:16 +0000 (14:56 +0000)
ChangeLog		patch \| blob \| history
DESCRIPTION		patch \| blob \| history
R/biplot.pcoa.R	[new file with mode: 0644]	patch \| blob
R/parafit.R	[new file with mode: 0644]	patch \| blob
R/pcoa.R	[new file with mode: 0644]	patch \| blob
R/print.parafit.R	[new file with mode: 0644]	patch \| blob
data/HP.links.rda	[new file with mode: 0644]	patch \| blob
data/gopher.D.rda	[new file with mode: 0644]	patch \| blob
data/lice.D.rda	[new file with mode: 0644]	patch \| blob
man/makeLabel.Rd		patch \| blob \| history
man/parafit.Rd	[new file with mode: 0644]	patch \| blob
man/pcoa.Rd	[new file with mode: 0644]	patch \| blob