+++ /dev/null
-`CADM.global` <-
- function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, silent=FALSE)
-{
-### Function to test the overall significance of the congruence among
-### a group of distance matrices using Kendall's coefficient of concordance W.
-###
-### copyleft - Pierre Legendre, December 2008
-###
-### Reference -
-### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance
-### matrices: single malt Scotch whiskies revisited. Australian and New Zealand
-### Journal of Statistics 46: 615-629.
-###
-### Parameters of the function --
-###
-### Dmat = A text file listing the distance matrices one after the other, with
-### or without blank lines.
-### Each matrix is in the form of a square distance matrix with 0's
-### on the diagonal.
-###
-### nmat = number of distance matrices in file Dmat.
-###
-### n = number of objects in each distance matrix. All matrices have same n.
-###
-### nperm = number of permutations for the tests.
-###
-### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by
-### averaging the two triangular portions.
-### = FALSE: analyse asymmetric matrices as they are.
-###
-### weights = a vector of positive weights for the distance matrices.
-### Example: weights = c(1,2,3)
-### = NULL (default): all matrices have same weight in calculation of W.
-###
-### silent = TRUE: informative messages will not be printed, except stopping
-### messages. Option useful for simulation work.
-### = FALSE: informative messages will be printed.
-###
-################################################################################
-
- if(nmat < 2)
- stop("Analysis requested for a single D matrix: CADM is useless")
-
- a <- system.time({
-
- ## Check the input file
- if(ncol(Dmat) != n)
- stop("Error in the value of 'n' or in the D matrices themselves")
- nmat2 <- nrow(Dmat)/n
- if(nmat2 < nmat) # OK if 'nmat' < number of matrices in the input file
- stop("Number of input D matrices = ",nmat2,"; this value is < nmat")
-
- nd <- n*(n-1)/2
- if(is.null(weights)) {
- w <- rep(1,nmat)
- } else {
- if(length(weights) != nmat)
- stop("Incorrect number of values in vector 'weights'")
- if(length(which(weights < 0)) > 0)
- stop("Negative weights are not permitted")
- w <- weights*nmat/sum(weights)
- if(!silent) cat("Normalized weights =",w,'\n')
- }
-
- ## Are asymmetric D matrices present?
- asy <- rep(FALSE, nmat)
- asymm <- FALSE
- end <- 0
- for(k in 1:nmat) {
- begin <- end+1
- end <- end+n
- D.temp <- Dmat[begin:end,]
- if(sum(abs(diag(as.matrix(D.temp)))) > 0)
- stop("Diagonal not 0: matrix #",k," is not a distance matrix")
- vec1 <- as.vector(as.dist(D.temp))
- vec2 <- as.vector(as.dist(t(D.temp)))
- if(sum(abs((vec1-vec2))) > 0) {
- if(!silent) cat("Matrix #",k," is asymmetric",'\n')
- asy[k] <- TRUE
- asymm <- TRUE
- }
- }
- D1 <- as.list(1:nmat)
- if(asymm) {
- if(make.sym) {
- if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n')
- } else {
- nd <- nd*2
- if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n')
- D2 <- as.list(1:nmat)
- }
- } else {
- if(!silent) cat("Analysis of symmetric matrices",'\n')
- }
- Y <- rep(NA,nd)
-
- ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y'
- ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test
- end <- 0
- for(k in 1:nmat) {
- begin <- end+1
- end <- end+n
- D.temp <- as.matrix(Dmat[begin:end,])
- vec <- as.vector(as.dist(D.temp))
- if(asymm) {
- if(!make.sym) {
- ## Analysis carried out on asymmetric matrices:
- ## The ranks are computed on the whole matrix except the diagonal values.
- ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]]
- vec <- c(vec, as.vector(as.dist(t(D.temp))))
- diag(D.temp) <- NA
- D.temp2 <- rank(D.temp)
- diag(D.temp2) <- 0
- # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n')
- # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n')
- # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n')
- D1[[k]] <- as.matrix(as.dist(D.temp2))
- D2[[k]] <- as.matrix(as.dist(t(D.temp2)))
- } else {
- ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]]
- vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2
- D.temp2 <- (D.temp + t(D.temp)) / 2
- D.temp2 <- as.dist(D.temp2)
- D.temp2[] <- rank(D.temp2)
- D.temp2 <- as.matrix(D.temp2)
- D1[[k]] <- D.temp2
- }
- } else {
- ## Symmetric matrices are stored in D1[[k]]
- D.temp2 <- as.dist(D.temp)
- D.temp2[] <- rank(D.temp2)
- D1[[k]] <- as.matrix(D.temp2)
- }
- Y <- cbind(Y, vec)
- }
- Y <- as.matrix(Y[,-1])
- colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.")
-
- ## Begin calculations for global test
-
- ## Compute the reference values of the statistics: W and Chi2
- ## Transform the distances to ranks, by column
- Rmat <- apply(Y,2,rank)
-
- ## Correction factors for tied ranks (eq. 3.3)
- t.ranks <- apply(Rmat, 2, function(x) summary(as.factor(x), maxsum=nd))
- TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
- # if(!silent) cat("TT = ",TT,'\n')
-
- ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a)
- ## The ranks are weighted during the sum by the vector of matrix weights 'w'
- ## Eq. 1b cannot be used with weights; see formula for W below
- sumRanks <- as.vector(Rmat%*%w)
- S <- (nd-1)*var(sumRanks)
-
- ## Compute Kendall's W (eq. 2a)
- ## Eq. 2b cannot be used with weights
- ## because the sum of all ranks is not equal to m*n*(n+1)/2 in that case
- W <- (12*S)/(((nmat^2)*((nd^3)-nd))-(nmat*TT))
-
- ## Calculate Friedman's Chi-square (Kendall W paper, 2005, eq. 3.4)
- Chi2 <- nmat*(nd-1)*W
-
- ## Test the Chi2 statistic by permutation
- counter <- 1
- for(j in 1:nperm) { # Each matrix is permuted independently
- # There is no need to permute the last matrix
- Rmat.perm <- rep(NA,nd)
- ##
- if(asymm & !make.sym) {
- ## For asymmetric matrices: permute the values within each triangular
- ## portion, stored as square matrices in D1[[]] and D2[[]]
- for(k in 1:(nmat-1)) {
- order <- sample(n)
- vec <- as.vector(as.dist(D1[[k]][order,order]))
- vec <- c(vec, as.vector(as.dist(D2[[k]][order,order])))
- Rmat.perm <- cbind(Rmat.perm, vec)
- }
- vec <- as.vector(as.dist(D1[[nmat]]))
- vec <- c(vec, as.vector(as.dist(D2[[nmat]])))
- Rmat.perm <- cbind(Rmat.perm, vec)
- } else {
- for(k in 1:(nmat-1)) {
- order <- sample(n)
- vec <- as.vector(as.dist(D1[[k]][order,order]))
- Rmat.perm <- cbind(Rmat.perm, vec)
- }
- vec <- as.vector(as.dist(D1[[nmat]]))
- Rmat.perm <- cbind(Rmat.perm, vec)
- }
- # Remove the first column of Rmat.perm containing NA
- # The test is based on the comparison of S and S.perm instead of the comparison of
- # Chi2 and Chi2.perm: it is faster that way.
- # S, W, and Chi2 are equivalent statistics for permutation tests.
- Rmat.perm <- as.matrix(Rmat.perm[,-1])
- S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w))
- if(S.perm >= S) counter <- counter+1
- }
- prob.perm.gr <- counter/(nperm+1)
-
- table <- rbind(W, Chi2, prob.perm.gr)
- colnames(table) <- "Statistics"
- rownames(table) <- c("W", "Chi2", "Prob.perm")
- })
- a[3] <- sprintf("%2f",a[3])
- if(!silent) cat("\nTime to compute global test =",a[3]," sec",'\n')
-#
- # if(asymm & !make.sym) { out <- list(congruence_analysis=table, D1=D1, D2=D2)
- # } else {
- out <- list(congruence_analysis=table)
- # }
-#
- out$nperm <- nperm
- class(out) <- "CADM.global"
- out
-}