2 function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, silent=FALSE)
4 ### Function to test the overall significance of the congruence among
5 ### a group of distance matrices using Kendall's coefficient of concordance W.
7 ### copyleft - Pierre Legendre, December 2008
10 ### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance
11 ### matrices: single malt Scotch whiskies revisited. Australian and New Zealand
12 ### Journal of Statistics 46: 615-629.
14 ### Parameters of the function --
16 ### Dmat = A text file listing the distance matrices one after the other, with
17 ### or without blank lines.
18 ### Each matrix is in the form of a square distance matrix with 0's
21 ### nmat = number of distance matrices in file Dmat.
23 ### n = number of objects in each distance matrix. All matrices have same n.
25 ### nperm = number of permutations for the tests.
27 ### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by
28 ### averaging the two triangular portions.
29 ### = FALSE: analyse asymmetric matrices as they are.
31 ### weights = a vector of positive weights for the distance matrices.
32 ### Example: weights = c(1,2,3)
33 ### = NULL (default): all matrices have same weight in calculation of W.
35 ### silent = TRUE: informative messages will not be printed, except stopping
36 ### messages. Option useful for simulation work.
37 ### = FALSE: informative messages will be printed.
39 ################################################################################
42 stop("Analysis requested for a single D matrix: CADM is useless")
46 ## Check the input file
48 stop("Error in the value of 'n' or in the D matrices themselves")
50 if(nmat2 < nmat) # OK if 'nmat' < number of matrices in the input file
51 stop("Number of input D matrices = ",nmat2,"; this value is < nmat")
54 if(is.null(weights)) {
57 if(length(weights) != nmat)
58 stop("Incorrect number of values in vector 'weights'")
59 if(length(which(weights < 0)) > 0)
60 stop("Negative weights are not permitted")
61 w <- weights*nmat/sum(weights)
62 if(!silent) cat("Normalized weights =",w,'\n')
65 ## Are asymmetric D matrices present?
66 asy <- rep(FALSE, nmat)
72 D.temp <- Dmat[begin:end,]
73 if(sum(abs(diag(as.matrix(D.temp)))) > 0)
74 stop("Diagonal not 0: matrix #",k," is not a distance matrix")
75 vec1 <- as.vector(as.dist(D.temp))
76 vec2 <- as.vector(as.dist(t(D.temp)))
77 if(sum(abs((vec1-vec2))) > 0) {
78 if(!silent) cat("Matrix #",k," is asymmetric",'\n')
86 if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n')
89 if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n')
93 if(!silent) cat("Analysis of symmetric matrices",'\n')
97 ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y'
98 ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test
103 D.temp <- as.matrix(Dmat[begin:end,])
104 vec <- as.vector(as.dist(D.temp))
107 ## Analysis carried out on asymmetric matrices:
108 ## The ranks are computed on the whole matrix except the diagonal values.
109 ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]]
110 vec <- c(vec, as.vector(as.dist(t(D.temp))))
112 D.temp2 <- rank(D.temp)
114 # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n')
115 # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n')
116 # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n')
117 D1[[k]] <- as.matrix(as.dist(D.temp2))
118 D2[[k]] <- as.matrix(as.dist(t(D.temp2)))
120 ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]]
121 vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2
122 D.temp2 <- (D.temp + t(D.temp)) / 2
123 D.temp2 <- as.dist(D.temp2)
124 D.temp2[] <- rank(D.temp2)
125 D.temp2 <- as.matrix(D.temp2)
129 ## Symmetric matrices are stored in D1[[k]]
130 D.temp2 <- as.dist(D.temp)
131 D.temp2[] <- rank(D.temp2)
132 D1[[k]] <- as.matrix(D.temp2)
136 Y <- as.matrix(Y[,-1])
137 colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.")
139 ## Begin calculations for global test
141 ## Compute the reference values of the statistics: W and Chi2
142 ## Transform the distances to ranks, by column
143 Rmat <- apply(Y,2,rank)
145 ## Correction factors for tied ranks (eq. 3.3)
146 t.ranks <- apply(Rmat, 2, function(x) summary(as.factor(x)))
147 TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x))))
148 # if(!silent) cat("TT = ",TT,'\n')
150 ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a)
151 ## The ranks are weighted during the sum by the vector of matrix weights 'w'
152 ## Eq. 1b cannot be used with weights; see formula for W below
153 sumRanks <- as.vector(Rmat%*%w)
154 S <- (nd-1)*var(sumRanks)
156 ## Compute Kendall's W (eq. 2a)
157 ## Eq. 2b cannot be used with weights
158 ## because the sum of all ranks is not equal to m*n*(n+1)/2 in that case
159 W <- (12*S)/(((nmat^2)*((nd^3)-nd))-(nmat*TT))
161 ## Calculate Friedman's Chi-square (Kendall W paper, 2005, eq. 3.4)
162 Chi2 <- nmat*(nd-1)*W
164 ## Test the Chi2 statistic by permutation
166 for(j in 1:nperm) { # Each matrix is permuted independently
167 # There is no need to permute the last matrix
168 Rmat.perm <- rep(NA,nd)
170 if(asymm & !make.sym) {
171 ## For asymmetric matrices: permute the values within each triangular
172 ## portion, stored as square matrices in D1[[]] and D2[[]]
173 for(k in 1:(nmat-1)) {
175 vec <- as.vector(as.dist(D1[[k]][order,order]))
176 vec <- c(vec, as.vector(as.dist(D2[[k]][order,order])))
177 Rmat.perm <- cbind(Rmat.perm, vec)
179 vec <- as.vector(as.dist(D1[[nmat]]))
180 vec <- c(vec, as.vector(as.dist(D2[[nmat]])))
181 Rmat.perm <- cbind(Rmat.perm, vec)
183 for(k in 1:(nmat-1)) {
185 vec <- as.vector(as.dist(D1[[k]][order,order]))
186 Rmat.perm <- cbind(Rmat.perm, vec)
188 vec <- as.vector(as.dist(D1[[nmat]]))
189 Rmat.perm <- cbind(Rmat.perm, vec)
191 # Remove the first column of Rmat.perm containing NA
192 # The test is based on the comparison of S and S.perm instead of the comparison of
193 # Chi2 and Chi2.perm: it is faster that way.
194 # S, W, and Chi2 are equivalent statistics for permutation tests.
195 Rmat.perm <- as.matrix(Rmat.perm[,-1])
196 S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w))
197 if(S.perm >= S) counter <- counter+1
199 prob.perm.gr <- counter/(nperm+1)
201 table <- rbind(W, Chi2, prob.perm.gr)
202 colnames(table) <- "Statistics"
203 rownames(table) <- c("W", "Chi2", "Prob.perm")
205 a[3] <- sprintf("%2f",a[3])
206 if(!silent) cat("\nTime to compute global test =",a[3]," sec",'\n')
208 # if(asymm & !make.sym) { out <- list(congruence_analysis=table, D1=D1, D2=D2)
210 out <- list(congruence_analysis=table)
214 class(out) <- "CADM.global"