added the contribution of PL on CDAM

author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Fri, 20 Feb 2009 10:28:02 +0000 (10:28 +0000)

committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>

Fri, 20 Feb 2009 10:28:02 +0000 (10:28 +0000)
author paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Fri, 20 Feb 2009 10:28:02 +0000 (10:28 +0000)
committer paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
Fri, 20 Feb 2009 10:28:02 +0000 (10:28 +0000)
diff --git a/ChangeLog b/ChangeLog

index d3e56a54ec36272bd5b0e5e61f9e504d21153152..2d14a5ba3a3332a55185379fde7d3eaf19533bc7 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,12 @@
-               CHANGES IN APE VERSION 2.2-5
+               CHANGES IN APE VERSION 2.3
  
  
  NEW FEATURES
  
+    o The new functions CADM.global and CADM.post, contributed by
+      Pierre Legendre, test the congruence among several distance
+      matrices.
+
      o The new function yule.time fits a user-defined time-dependent
        Yule model by maximum likelihood.
  
diff --git a/DESCRIPTION b/DESCRIPTION

index d168fa62cf38f04f6cac39bf3f21364264ff6b43..e013e3f4742e1a9987b44e0d489931341564ec72 100644 (file)
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,12 +1,12 @@
  Package: ape
-Version: 2.2-5
+Version: 2.3
  Date: 2009-02-20
  Title: Analyses of Phylogenetics and Evolution
  Author: Emmanuel Paradis, Ben Bolker, Julien Claude, Hoa Sien Cuong,
    Richard Desper, Benoit Durand, Julien Dutheil, Olivier Gascuel,
-  Gangolf Jobb, Christoph Heibl, Vincent Lefort, Jim Lemon,
-  Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein, Korbinian Strimmer,
-  Damien de Vienne
+  Gangolf Jobb, Christoph Heibl, Vincent Lefort, Pierre Legendre,
+  Jim Lemon, Yvonnick Noel, Johan Nylander, Rainer Opgen-Rhein,
+  Korbinian Strimmer, Damien de Vienne
  Maintainer: Emmanuel Paradis <Emmanuel.Paradis@ird.fr>
  Depends: R (>= 2.6.0)
  Suggests: gee
diff --git a/R/CDAM.global.R b/R/CDAM.global.R

new file mode 100644 (file)

index 0000000..184bffa
--- /dev/null
+++ b/R/CDAM.global.R
@@ -0,0 +1,216 @@
+`CADM.global` <-
+       function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, silent=FALSE)
+{
+### Function to test the overall significance of the congruence among 
+### a group of distance matrices using Kendall's coefficient of concordance W.
+###
+### copyleft - Pierre Legendre, December 2008
+###
+### Reference -
+### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance 
+### matrices: single malt Scotch whiskies revisited. Australian and New Zealand 
+### Journal of Statistics 46: 615-629.
+###
+### Parameters of the function --
+###
+### Dmat = A text file listing the distance matrices one after the other, with
+###        or without blank lines.
+###        Each matrix is in the form of a square distance matrix with 0's 
+###        on the diagonal.
+###
+### nmat = number of distance matrices in file Dmat.
+###
+### n = number of objects in each distance matrix. All matrices have same n.
+###
+### nperm = number of permutations for the tests.
+###
+### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by 
+###            averaging the two triangular portions.
+###          = FALSE: analyse asymmetric matrices as they are.
+###
+### weights = a vector of positive weights for the distance matrices. 
+###           Example: weights = c(1,2,3)
+###         = NULL (default): all matrices have same weight in calculation of W.
+###
+### silent = TRUE: informative messages will not be printed, except stopping 
+###          messages. Option useful for simulation work.
+###        = FALSE: informative messages will be printed.
+###
+################################################################################
+       
+       if(nmat < 2) 
+               stop("Analysis requested for a single D matrix: CADM is useless")
+       
+       a <- system.time({
+
+    ## Check the input file
+    if(ncol(Dmat) != n) 
+       stop("Error in the value of 'n' or in the D matrices themselves")
+    nmat2 <- nrow(Dmat)/n
+    if(nmat2 < nmat)  # OK if 'nmat' < number of matrices in the input file
+       stop("Number of input D matrices = ",nmat2,"; this value is < nmat")
+
+    nd <- n*(n-1)/2
+    if(is.null(weights)) {
+       w <- rep(1,nmat)
+       } else {
+       if(length(weights) != nmat) 
+               stop("Incorrect number of values in vector 'weights'")
+       if(length(which(weights < 0)) > 0) 
+               stop("Negative weights are not permitted")
+       w <- weights*nmat/sum(weights)
+       if(!silent) cat("Normalized weights =",w,'\n')
+       }
+    
+    ## Are asymmetric D matrices present?
+    asy <- rep(FALSE, nmat)
+       asymm <- FALSE
+    end <- 0
+    for(k in 1:nmat) {
+        begin <- end+1
+        end <- end+n
+        D.temp <- Dmat[begin:end,]
+        if(sum(abs(diag(as.matrix(D.temp)))) > 0) 
+               stop("Diagonal not 0: matrix #",k," is not a distance matrix")
+        vec1 <- as.vector(as.dist(D.temp))
+        vec2 <- as.vector(as.dist(t(D.temp)))
+        if(sum(abs((vec1-vec2))) > 0) {
+               if(!silent) cat("Matrix #",k," is asymmetric",'\n')
+               asy[k] <- TRUE
+               asymm <- TRUE
+               }
+        }
+    D1 <- as.list(1:nmat)
+    if(asymm) {
+       if(make.sym) {
+               if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n')
+               } else {
+               nd <- nd*2
+               if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n')
+               D2 <- as.list(1:nmat)
+               }
+       } else {
+       if(!silent) cat("Analysis of symmetric matrices",'\n')
+       }
+    Y <- rep(NA,nd)
+    
+    ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y'
+    ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test
+    end <- 0
+    for(k in 1:nmat) {
+        begin <- end+1
+        end <- end+n
+        D.temp <- as.matrix(Dmat[begin:end,])
+        vec <- as.vector(as.dist(D.temp))
+        if(asymm) {
+               if(!make.sym) {
+                       ## Analysis carried out on asymmetric matrices: 
+                       ## The ranks are computed on the whole matrix except the diagonal values.
+                       ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]]
+                       vec <- c(vec, as.vector(as.dist(t(D.temp))))
+                       diag(D.temp) <- NA
+                       D.temp2 <- rank(D.temp)
+                       diag(D.temp2) <- 0
+                       # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n')
+                               # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n')
+                               # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n')
+                       D1[[k]] <- as.matrix(as.dist(D.temp2))
+                       D2[[k]] <- as.matrix(as.dist(t(D.temp2)))
+                       } else {
+                       ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]]
+                       vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2
+                               D.temp2 <- (D.temp + t(D.temp)) / 2
+                               D.temp2 <- as.dist(D.temp2)
+                       D.temp2[] <- rank(D.temp2)
+                               D.temp2 <- as.matrix(D.temp2)
+                       D1[[k]] <- D.temp2
+                       }
+               } else {
+               ## Symmetric matrices are stored in D1[[k]]
+               D.temp2 <- as.dist(D.temp)
+               D.temp2[] <- rank(D.temp2)
+               D1[[k]] <- as.matrix(D.temp2)
+               }
+        Y <- cbind(Y, vec)
+        }
+    Y <- as.matrix(Y[,-1])
+    colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.")
+    
+    ## Begin calculations for global test
+    
+    ## Compute the reference values of the statistics: W and Chi2
+       ## Transform the distances to ranks, by column
+       Rmat <- apply(Y,2,rank)
+
+       ## Correction factors for tied ranks (eq. 3.3)
+       t.ranks <- apply(Rmat, 2, function(x) summary(as.factor(x)))
+       TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
+       # if(!silent) cat("TT = ",TT,'\n')
+       
+       ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a)
+       ## The ranks are weighted during the sum by the vector of matrix weights 'w'
+       ## Eq. 1b cannot be used with weights; see formula for W below
+       sumRanks <- as.vector(Rmat%*%w)
+       S <- (nd-1)*var(sumRanks)
+       
+       ## Compute Kendall's W (eq. 2a)
+       ## Eq. 2b cannot be used with weights 
+       ## because the sum of all ranks is not equal to m*n*(n+1)/2 in that case
+       W <- (12*S)/(((nmat^2)*((nd^3)-nd))-(nmat*TT))
+               
+       ## Calculate Friedman's Chi-square (Kendall W paper, 2005, eq. 3.4)
+       Chi2 <- nmat*(nd-1)*W
+
+       ## Test the Chi2 statistic by permutation
+       counter <- 1
+       for(j in 1:nperm) {   # Each matrix is permuted independently
+                             # There is no need to permute the last matrix
+               Rmat.perm <- rep(NA,nd)
+               ##
+               if(asymm & !make.sym) {
+                       ## For asymmetric matrices: permute the values within each triangular 
+                       ## portion, stored as square matrices in D1[[]] and D2[[]]
+                       for(k in 1:(nmat-1)) {
+                               order <- sample(n)
+                               vec <- as.vector(as.dist(D1[[k]][order,order]))
+                           vec <- c(vec, as.vector(as.dist(D2[[k]][order,order])))
+                               Rmat.perm <- cbind(Rmat.perm, vec)
+                               }
+                               vec <- as.vector(as.dist(D1[[nmat]]))
+                           vec <- c(vec, as.vector(as.dist(D2[[nmat]])))
+                               Rmat.perm <- cbind(Rmat.perm, vec)
+                       } else {
+                       for(k in 1:(nmat-1)) {
+                               order <- sample(n)
+                               vec <- as.vector(as.dist(D1[[k]][order,order]))
+                               Rmat.perm <- cbind(Rmat.perm, vec)
+                               }
+                               vec <- as.vector(as.dist(D1[[nmat]]))
+                               Rmat.perm <- cbind(Rmat.perm, vec)
+                       }
+               # Remove the first column of Rmat.perm containing NA
+               # The test is based on the comparison of S and S.perm instead of the comparison of 
+               # Chi2 and Chi2.perm: it is faster that way. 
+               # S, W, and Chi2 are equivalent statistics for permutation tests.
+               Rmat.perm <- as.matrix(Rmat.perm[,-1])
+               S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w))
+               if(S.perm >= S) counter <- counter+1
+       }
+       prob.perm.gr <- counter/(nperm+1)
+
+       table <- rbind(W, Chi2, prob.perm.gr)
+       colnames(table) <- "Statistics"
+       rownames(table) <- c("W", "Chi2", "Prob.perm")
+       })
+       a[3] <- sprintf("%2f",a[3])
+       if(!silent) cat("\nTime to compute global test =",a[3]," sec",'\n')
+#
+       # if(asymm & !make.sym) { out <- list(congruence_analysis=table, D1=D1, D2=D2)
+       # } else {
+          out <- list(congruence_analysis=table)
+       #    }
+#
+       out$nperm <- nperm
+       class(out) <- "CADM.global"
+       out
+}
diff --git a/R/CDAM.post.R b/R/CDAM.post.R

new file mode 100644 (file)

index 0000000..00b3d76
--- /dev/null
+++ b/R/CDAM.post.R
@@ -0,0 +1,276 @@
+`CADM.post` <-
+       function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, mult="holm", mantel=FALSE, silent=FALSE)
+{
+### Function to carry out a posteriori tests of the contribution of individual 
+### matrices to the congruence of a group of distance matrices.
+###
+### copyleft - Pierre Legendre, December 2008
+###
+### Reference -
+### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance 
+### matrices: single malt Scotch whiskies revisited. Australian and New Zealand 
+### Journal of Statistics 46: 615-629.
+###
+### Parameters of the function --
+###
+### Dmat = A text file listing the distance matrices one after the other, with
+###        or without blank lines.
+###        Each matrix is in the form of a square distance matrix with 0's 
+###        on the diagonal.
+###
+### nmat = number of distance matrices in file Dmat.
+###
+### n = number of objects in each distance matrix. All matrices have same n.
+###
+### nperm = number of permutations for the tests.
+###
+### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by 
+###            averaging the two triangular portions.
+###          = FALSE: analyse asymmetric matrices as they are.
+###
+### weights = a vector of positive weights for the distance matrices. 
+###           Example: weights = c(1,2,3)
+###         = NULL (default): all matrices have same weight in calculation of W.
+###
+### mult = method for correcting P-values due to multiple testing. The methods 
+###        are "holm" (default), "sidak", and "bonferroni". The Bonferroni 
+###        correction is overly conservative; it is not recommended. It is 
+###        included to allow comparisons with the other methods.
+###
+### mantel = TRUE: Mantel statistics are computed from ranked distances,
+###          as well as permutational P-values.
+###        = FALSE (default): Mantel statistics and tests are not computed.
+###
+### silent = TRUE: informative messages will not be printed, except stopping 
+###          messages. Option useful for simulation work.
+###        = FALSE: informative messages will be printed.
+###
+################################################################################
+       
+       mult <- match.arg(mult, c("sidak", "holm", "bonferroni"))
+       if(nmat < 2) 
+               stop("Analysis requested for a single D matrix: CADM is useless")
+       
+       a <- system.time({
+
+    ## Check the input file
+    if(ncol(Dmat) != n) 
+       stop("Error in the value of 'n' or in the D matrices themselves")
+    nmat2 <- nrow(Dmat)/n
+    if(nmat2 < nmat)  # OK if 'nmat' < number of matrices in the input file
+       stop("Number of input D matrices = ",nmat2,"; this value is < nmat")
+
+    nd <- n*(n-1)/2
+    if(is.null(weights)) {
+       w <- rep(1,nmat)
+       } else {
+       if(length(weights) != nmat) 
+               stop("Incorrect number of values in vector 'weights'")
+       if(length(which(weights < 0)) > 0) 
+               stop("Negative weights are not permitted")
+       w <- weights*nmat/sum(weights)
+       if(!silent) cat("Normalized weights =",w,'\n')
+       }
+    
+    ## Are asymmetric D matrices present?
+    asy <- rep(FALSE, nmat)
+       asymm <- FALSE
+    end <- 0
+    for(k in 1:nmat) {
+        begin <- end+1
+        end <- end+n
+        D.temp <- Dmat[begin:end,]
+        if(sum(abs(diag(as.matrix(D.temp)))) > 0) 
+               stop("Diagonal not 0: matrix #",k," is not a distance matrix")
+        vec1 <- as.vector(as.dist(D.temp))
+        vec2 <- as.vector(as.dist(t(D.temp)))
+        if(sum(abs((vec1-vec2))) > 0) {
+               if(!silent) cat("Matrix #",k," is asymmetric",'\n')
+               asy[k] <- TRUE
+               asymm <- TRUE
+               }
+        }
+    D1 <- as.list(1:nmat)
+    if(asymm) {
+       if(make.sym) {
+               if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n')
+               } else {
+               nd <- nd*2
+               if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n')
+               D2 <- as.list(1:nmat)
+               }
+       } else {
+       if(!silent) cat("Analysis of symmetric matrices",'\n')
+       }
+    Y <- rep(NA,nd)
+    
+    ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y'
+    ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test
+    end <- 0
+    for(k in 1:nmat) {
+        begin <- end+1
+        end <- end+n
+        D.temp <- as.matrix(Dmat[begin:end,])
+        vec <- as.vector(as.dist(D.temp))
+        if(asymm) {
+               if(!make.sym) {
+                       ## Analysis carried out on asymmetric matrices: 
+                       ## The ranks are computed on the whole matrix except the diagonal values.
+                       ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]]
+                       vec <- c(vec, as.vector(as.dist(t(D.temp))))
+                       diag(D.temp) <- NA
+                       D.temp2 <- rank(D.temp)
+                       diag(D.temp2) <- 0
+                       # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n')
+                               # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n')
+                               # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n')
+                       D1[[k]] <- as.matrix(as.dist(D.temp2))
+                       D2[[k]] <- as.matrix(as.dist(t(D.temp2)))
+                       } else {
+                       ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]]
+                       vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2
+                               D.temp2 <- (D.temp + t(D.temp)) / 2
+                               D.temp2 <- as.dist(D.temp2)
+                       D.temp2[] <- rank(D.temp2)
+                               D.temp2 <- as.matrix(D.temp2)
+                       D1[[k]] <- D.temp2
+                       }
+               } else {
+               ## Symmetric matrices are stored in D1[[k]]
+               D.temp2 <- as.dist(D.temp)
+               D.temp2[] <- rank(D.temp2)
+               D1[[k]] <- as.matrix(D.temp2)
+               }
+        Y <- cbind(Y, vec)
+        }
+    Y <- as.matrix(Y[,-1])
+    colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.")
+    
+    ## Begin calculations: compute reference value of S
+
+               ## Transform the distances to ranks, by column
+               Rmat <- apply(Y,2,rank)
+
+               ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a)
+               ## The ranks are weighted during the sum by the vector of matrix weights 'w'
+               sumRanks <- as.vector(Rmat%*%w)
+               S <- (nd-1)*var(sumRanks)
+
+    ## Begin a posteriori tests of individual matrices
+
+    ## Statistics displayed for each matrix: "Mantel.mean" and "W.per.matrix"
+       ## Calculate the mean of the Mantel correlations on ranks for each matrix
+       Mantel.cor <- cor(Rmat)
+       diag(Mantel.cor) <- 0
+       spear.mean <- as.vector(Mantel.cor%*%w)/(nmat-1)
+       ## Calculate Kendall's W for each variable
+       ## W.var <- ((nmat-1)*spear.mean+1)/nmat
+
+       ## P-value for each matrix: test of S, permuting values in matrix[[k]] only
+       ## as in program CADM.f (2004)
+       ## Initialize the counters
+       counter <- rep(1,nmat)
+
+       ## Test each matrix 'k' in turn
+       for(k in 1:nmat) {
+               ## Create a new Rmat table where the permuted column has been removed
+               Rmat.mod <- Rmat[,-k]
+                               
+               ## Permutation loop: string out permuted matrix 'k' only
+               for(j in 1:nperm) {
+                       order <- sample(n)
+                       if(asymm & !make.sym) {
+                               ## For asymmetric matrices: permute the values within each triangular 
+                               ## portion, stored as square matrices in D1[[]] and D2[[]]
+                               vec <- as.vector(as.dist(D1[[k]][order,order]))
+                           vec <- c(vec, as.vector(as.dist(D2[[k]][order,order])))
+                               } else {
+                               vec <- as.vector(as.dist(D1[[k]][order,order]))
+                               }
+                       Rmat.perm <- cbind(Rmat.mod, vec)
+                       S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w))
+                       if(S.perm >= S) counter[k] <- counter[k]+1
+               }
+       }
+
+       ## Calculate P-values
+       counter <- counter/(nperm+1)
+       
+       ## Correction to P-values for multiple testing
+               if(mult == "sidak") {
+                       vec.corr = NA
+                       for(i in 1:p) vec.corr = c(vec.corr, (1-(1-counter[i])^p))
+                       vec.corr <- vec.corr[-1]
+                       }
+               if(mult == "holm") vec.corr <- p.adjust(counter, method="holm")
+               if(mult == "bonferroni") vec.corr <- p.adjust(counter, method="bonferroni")
+
+       ## Create a data frame containing the results
+               # table <- rbind(spear.mean, W.var, counter, vec.corr)
+               # rownames(table) <- c("Mantel.mean", "W.per.matrix", "Prob", "Corrected prob")
+               table <- rbind(spear.mean, counter, vec.corr)
+               rownames(table) <- c("Mantel.mean", "Prob", "Corrected.prob")
+               colnames(table) <- colnames(table,do.NULL = FALSE, prefix = "Dmat.")
+       
+       ## Mantel tests
+       if(mantel) {
+               diag(Mantel.cor) <- 1
+               rownames(Mantel.cor) <- colnames(table)
+               colnames(Mantel.cor) <- colnames(table)
+               Mantel.prob <- matrix(1,nmat,nmat)
+               rownames(Mantel.prob) <- colnames(table)
+               colnames(Mantel.prob) <- colnames(table)
+               
+               for(j in 1:nperm) {   # Each matrix is permuted independently
+                                 # There is no need to permute the last matrix
+                       Rmat.perm <- rep(NA,nd)
+                       ##
+                       if(asymm & !make.sym) {
+                               ## For asymmetric matrices: permute the values within each triangular 
+                               ## portion, stored as square matrices in D1[[]] and D2[[]]
+                               for(k in 1:(nmat-1)) {
+                                       order <- sample(n)
+                                       vec <- as.vector(as.dist(D1[[k]][order,order]))
+                               vec <- c(vec, as.vector(as.dist(D2[[k]][order,order])))
+                                       Rmat.perm <- cbind(Rmat.perm, vec)
+                                       }
+                                       vec <- as.vector(as.dist(D1[[nmat]]))
+                               vec <- c(vec, as.vector(as.dist(D2[[nmat]])))
+                                       Rmat.perm <- cbind(Rmat.perm, vec)
+                               } else {
+                               for(k in 1:(nmat-1)) {
+                                       order <- sample(n)
+                                       vec <- as.vector(as.dist(D1[[k]][order,order]))
+                                       Rmat.perm <- cbind(Rmat.perm, vec)
+                                       }
+                                       vec <- as.vector(as.dist(D1[[nmat]]))
+                                       Rmat.perm <- cbind(Rmat.perm, vec)
+                               }
+                       # Remove the first column of Rmat.perm containing NA
+                       Rmat.perm <- as.matrix(Rmat.perm[,-1])
+                       # Compute Mantel correlations on ranks under permutation
+                       Mantel.cor.perm <- cor(Rmat.perm)
+                       for(j2 in 1:(nmat-1)) { # Compute prob in the upper tail
+                               for(j1 in (j2+1):nmat) {
+                                       if(Mantel.cor.perm[j1,j2] >= Mantel.cor[j1,j2]) Mantel.prob[j1,j2] <- Mantel.prob[j1,j2]+1
+                                       }
+                               }
+                       }
+               Mantel.prob <- as.matrix(as.dist(Mantel.prob/(nperm+1)))
+               diag(Mantel.prob) <- 1
+               }
+       
+       })
+       a[3] <- sprintf("%2f",a[3])
+       if(!silent) cat("Time to compute a posteriori tests (per matrix) =",a[3]," sec",'\n')
+
+       out <- list(A_posteriori_tests=table, Correction.type=mult)
+
+       if(mantel) {
+               out$Mantel.cor  <- Mantel.cor
+               out$Mantel.prob <- Mantel.prob
+               }
+       out$nperm <- nperm
+       class(out) <- "CADM.post"
+       out
+}
diff --git a/data/mat3.RData b/data/mat3.RData

new file mode 100644 (file)

index 0000000..e41691b

Binary files /dev/null and b/data/mat3.RData differ
diff --git a/data/mat5M3ID.RData b/data/mat5M3ID.RData

new file mode 100644 (file)

index 0000000..62b55ee

Binary files /dev/null and b/data/mat5M3ID.RData differ
diff --git a/data/mat5Mrand.RData b/data/mat5Mrand.RData

new file mode 100644 (file)

index 0000000..b47aa25

Binary files /dev/null and b/data/mat5Mrand.RData differ
diff --git a/man/CADM.global.Rd b/man/CADM.global.Rd

new file mode 100644 (file)

index 0000000..efd5729
--- /dev/null
+++ b/man/CADM.global.Rd
@@ -0,0 +1,121 @@
+\name{CADM.global}
+\alias{CADM.global}
+\alias{CADM.post}
+\title{ Congruence among distance matrices }
+\description{
+Function \code{\link{CADM.global}} compute and test the coefficient of concordance among several distance matrices through a permutation test.
+
+Function \code{\link{CADM.post}} carries out a posteriori permutation tests of the contributions of individual distance matrices to the overall concordance of the group.
+
+Use in phylogenetic analysis: to identify congruence among distance matrices (D) representing different genes or different types of data. Congruent D matrices correspond to data tables that can be used together in a combined phylogenetic or other type of multivariate analysis.
+}
+\usage{
+CADM.global(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, silent=FALSE)
+CADM.post  (Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, mult="holm", mantel=FALSE, silent=FALSE)
+}
+
+\arguments{
+  \item{Dmat}{ A text file listing the distance matrices one after the other, with or without blank lines in-between. Each matrix is in the form of a square distance matrix with 0's on the diagonal. }
+  \item{nmat}{ Number of distance matrices in file Dmat. }
+  \item{n}{ Number of objects in each distance matrix. All matrices must have the same number of objects. }
+  \item{nperm}{ Number of permutations for the tests of significance. }
+  \item{make.sym}{ TRUE: turn asymmetric matrices into symmetric matrices by averaging the two triangular portions. FALSE: analyse asymmetric matrices as they are. }
+  \item{weights}{ A vector of positive weights for the distance matrices. Example: weights = c(1,2,3). NULL (default): all matrices have same weight in the calculation of W. }
+  \item{mult}{ Method for correcting P-values in multiple testing. The methods are "holm" (default), "sidak", and "bonferroni". The Bonferroni correction is overly conservative; it is not recommended. It is included to allow comparisons with the other methods. }
+  \item{mantel}{ TRUE: Mantel statistics will be computed from ranked distances, as well as permutational P-values. FALSE (default): Mantel statistics and tests will not be computed. }
+  \item{silent}{ TRUE: informative messages will not be printed, but stopping messages will. Option useful for simulation work. FALSE: informative messages will be printed. }
+}
+\details{
+\code{Dmat} must contain two or more distance matrices, listed one after the other, all of the same size, and corresponding to the same objects in the same order. Raw data tables can be transformed into distance matrices before comparison with other such distance matrices, or with data that have been obtained as distance matrices, e.g. serological or DNA hybridization data. The distances will be transformed to ranks before computation of the coefficient of concordance and other statistics.
+
+\code{CADM.global} tests the global null hypothesis that all matrices are incongruent. If the global null is rejected, function \code{CADM.post} can be used to identify the concordant (H0 rejected) and discordant matrices (H0 not rejected) in the group. If a distance matrix has a negative value for the \code{Mantel.mean} statistic, that matrix clearly does not belong to the group. Remove that matrix (if there are more than one, remove first the matrix that has the most strongly negative value for \code{Mantel.mean}) and run the analysis again.
+
+The corrections used for multiple testing are applied to the list of P-values (P) produced in the a posteriori tests; they take into account the number of tests (k) carried out simulatenously (number of matrices, parameter \code{nmat}).
+
+The Holm correction is computed after ordering the P-values in a list with the smallest value to the left. Compute adjusted P-values as:
+
+\eqn{P_corr = (k-i+1)*P}
+
+where i is the position in the ordered list. Final step: from left to right, if an adjusted P_corr in the ordered list is smaller than the one occurring at its left, make the smallest one equal to the largest one.
+
+The Sidak correction is:
+
+\eqn{P_corr = 1 - (1 - P)^k}
+
+The Bonferonni correction is:
+
+\eqn{P_corr = k*P}
+}
+
+\value{
+
+\code{CADM.global} produces a small table containing the W, Chi2, and Prob.perm statistics described in the following list. 
+\code{CADM.post} produces a table stored in element $A_posteriori_tests, containing Mantel.mean, Prob, and Corrected.prob statistics in rows; the columns correspond to the k distance matrices under study, labeled Dmat.1 to Dmat.k.
+If parameter \code{mantel} is TRUE, tables of Mantel statistics and P-values are computed among the matrices.
+
+  \item{W }{Kendall's coefficient of concordance, W (Kendall and Babington Smith 1939). }
+  \item{Chi2 }{Friedman's chi-square statistic (Friedman 1937) used in the permutation test of W. }
+  \item{Prob.perm }{Permutational probability. }
+
+  \item{Mantel.mean }{Mean of the Mantel correlations, computed on rank-transformed distances, between the distance matrix under test and all the other matrices in the study. }
+  \item{Prob }{Permutational probabilities, uncorrected. }
+  \item{Corrected prob }{Permutational probabilities corrected using the method selected in parameter \code{mult}. }
+
+  \item{Mantel.cor }{Matrix of Mantel correlations, computed on rank-transformed distances, among the distance matrices. }
+  \item{Mantel.prob }{One-tailed P-values associated with the Mantel correlations of the previous table. The probabilities are computed in the right-hand tail. H0 is tested against the alternative one-tailed hypothesis that the Mantel correlation under test is positive. No correction is made for multiple testing. }
+}
+
+\references{ 
+Campbell, V., P. Legendre and F.-J. Lapointe. 2009. Assessing congruence among ultrametric distance matrices. Journal of Classification (In press).
+
+Campbell, V., P. Legendre and F.-J. Lapointe. Performance of the congruence test among distance matrices in phylogenetic analysis. (Submitted MS).
+
+Friedman, M. 1937. The use of ranks to avoid the assumption of normality implicit in the analysis of variance. Journal of the American Statistical Association 32: 675-701.
+
+Kendall, M. G. and B. Babington Smith. 1939. The problem of m rankings. Annals of Mathematical Statistics 10: 275-287.
+
+Lapointe, F.-J., J. A. W. Kirsch and J. M. Hutcheon. 1999. Total evidence, consensus, and bat phylogeny: a distance-based approach. Molecular Phylogenetics and Evolution 11: 55-66.
+
+Legendre, P. 2008. Coefficient of concordance. In: Encyclopedia of Research Design. SAGE Publications (in press).
+
+Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance matrices: single malt Scotch whiskies revisited. Australian and New Zealand Journal of Statistics 46: 615-629. 
+
+Legendre, P. et F.-J. Lapointe. 2005. Congruence entre matrices de distance. P. 178-181 in: Makarenkov, V., G. Cucumel et F.-J. Lapointe [eds] Comptes rendus des 12emes Rencontres de la Societe Francophone de Classification, Montreal, 30 mai - 1er juin 2005.
+
+Siegel, S. and N. J. Castellan, Jr. 1988. Nonparametric statistics for the behavioral sciences. 2nd edition. McGraw-Hill, New York.
+}
+\seealso{ \code{\link{kendall.W}}, \code{\link[ape:ape-package]{ape}} }
+\author{ Pierre Legendre, Universite de Montreal }
+
+\examples{
+
+# Examples 1 and 2: 5 genetic distance matrices computed from simulated DNA 
+# sequences representing 50 taxa having evolved along additive trees with 
+# identical evolutionary parameters (GTR+ Gamma + I). Distance matrices were 
+# computed from the DNA sequence matrices using a p distance corrected with the 
+# same parameters as those used to simulate the DNA sequences. See Campbell et 
+# al. (submitted) for details. 
+
+# First example: five independent additive trees. Data provided by V. Campbell.
+
+data(mat5Mrand)
+res.global <- CADM.global(mat5Mrand, 5, 50)
+
+# Second example: three partly similar trees, two independent trees. 
+# Data provided by V. Campbell.
+
+data(mat5M3ID)
+res.global <- CADM.global(mat5M3ID, 5, 50)
+res.post   <- CADM.post(mat5M3ID, 5, 50, mantel=TRUE)
+
+# Third example: three matrices respectively representing Serological 
+# (asymmetric), DNA hybridization (asymmetric) and Anatomical (symmetric) 
+# distances among 9 families. Data from Lapointe et al. (1999).
+
+data(mat3)
+res.global <- CADM.global(mat3, 3, 9, nperm=999)
+res.post   <- CADM.post(mat3, 3, 9, nperm=999, mantel=TRUE)
+}
+
+\keyword{ multivariate }
+\keyword{ nonparametric }
author	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Fri, 20 Feb 2009 10:28:02 +0000 (10:28 +0000)
committer	paradis <paradis@6e262413-ae40-0410-9e79-b911bd7a66b7>
	Fri, 20 Feb 2009 10:28:02 +0000 (10:28 +0000)
ChangeLog		patch \| blob \| history
DESCRIPTION		patch \| blob \| history
R/CDAM.global.R	[new file with mode: 0644]	patch \| blob
R/CDAM.post.R	[new file with mode: 0644]	patch \| blob
data/mat3.RData	[new file with mode: 0644]	patch \| blob
data/mat5M3ID.RData	[new file with mode: 0644]	patch \| blob
data/mat5Mrand.RData	[new file with mode: 0644]	patch \| blob
man/CADM.global.Rd	[new file with mode: 0644]	patch \| blob