X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=R%2FCADM.global.R;fp=R%2FCADM.global.R;h=5644ca5ad3af09ebfb47dd79f1f950a9bbec1ab6;hb=0eea4e45fea5cbcd63d5b257578579d52970d68c;hp=0000000000000000000000000000000000000000;hpb=dfe58641d0e6fe53612710cd92401306273609f4;p=ape.git diff --git a/R/CADM.global.R b/R/CADM.global.R new file mode 100644 index 0000000..5644ca5 --- /dev/null +++ b/R/CADM.global.R @@ -0,0 +1,216 @@ +`CADM.global` <- + function(Dmat, nmat, n, nperm=99, make.sym=TRUE, weights=NULL, silent=FALSE) +{ +### Function to test the overall significance of the congruence among +### a group of distance matrices using Kendall's coefficient of concordance W. +### +### copyleft - Pierre Legendre, December 2008 +### +### Reference - +### Legendre, P. and F.-J. Lapointe. 2004. Assessing congruence among distance +### matrices: single malt Scotch whiskies revisited. Australian and New Zealand +### Journal of Statistics 46: 615-629. +### +### Parameters of the function -- +### +### Dmat = A text file listing the distance matrices one after the other, with +### or without blank lines. +### Each matrix is in the form of a square distance matrix with 0's +### on the diagonal. +### +### nmat = number of distance matrices in file Dmat. +### +### n = number of objects in each distance matrix. All matrices have same n. +### +### nperm = number of permutations for the tests. +### +### make.sym = TRUE: turn asymmetric matrices into symmetric matrices by +### averaging the two triangular portions. +### = FALSE: analyse asymmetric matrices as they are. +### +### weights = a vector of positive weights for the distance matrices. +### Example: weights = c(1,2,3) +### = NULL (default): all matrices have same weight in calculation of W. +### +### silent = TRUE: informative messages will not be printed, except stopping +### messages. Option useful for simulation work. +### = FALSE: informative messages will be printed. +### +################################################################################ + + if(nmat < 2) + stop("Analysis requested for a single D matrix: CADM is useless") + + a <- system.time({ + + ## Check the input file + if(ncol(Dmat) != n) + stop("Error in the value of 'n' or in the D matrices themselves") + nmat2 <- nrow(Dmat)/n + if(nmat2 < nmat) # OK if 'nmat' < number of matrices in the input file + stop("Number of input D matrices = ",nmat2,"; this value is < nmat") + + nd <- n*(n-1)/2 + if(is.null(weights)) { + w <- rep(1,nmat) + } else { + if(length(weights) != nmat) + stop("Incorrect number of values in vector 'weights'") + if(length(which(weights < 0)) > 0) + stop("Negative weights are not permitted") + w <- weights*nmat/sum(weights) + if(!silent) cat("Normalized weights =",w,'\n') + } + + ## Are asymmetric D matrices present? + asy <- rep(FALSE, nmat) + asymm <- FALSE + end <- 0 + for(k in 1:nmat) { + begin <- end+1 + end <- end+n + D.temp <- Dmat[begin:end,] + if(sum(abs(diag(as.matrix(D.temp)))) > 0) + stop("Diagonal not 0: matrix #",k," is not a distance matrix") + vec1 <- as.vector(as.dist(D.temp)) + vec2 <- as.vector(as.dist(t(D.temp))) + if(sum(abs((vec1-vec2))) > 0) { + if(!silent) cat("Matrix #",k," is asymmetric",'\n') + asy[k] <- TRUE + asymm <- TRUE + } + } + D1 <- as.list(1:nmat) + if(asymm) { + if(make.sym) { + if(!silent) cat("\nAsymmetric matrices were transformed to be symmetric",'\n') + } else { + nd <- nd*2 + if(!silent) cat("\nAnalysis carried out on asymmetric matrices",'\n') + D2 <- as.list(1:nmat) + } + } else { + if(!silent) cat("Analysis of symmetric matrices",'\n') + } + Y <- rep(NA,nd) + + ## String out the distance matrices (vec) and assemble them as columns into matrix 'Y' + ## Construct also matrices of ranked distances D1[[k]] and D2[[k]] for permutation test + end <- 0 + for(k in 1:nmat) { + begin <- end+1 + end <- end+n + D.temp <- as.matrix(Dmat[begin:end,]) + vec <- as.vector(as.dist(D.temp)) + if(asymm) { + if(!make.sym) { + ## Analysis carried out on asymmetric matrices: + ## The ranks are computed on the whole matrix except the diagonal values. + ## The two halves are stored as symmetric matrices in D1[[k]] and D2[[k]] + vec <- c(vec, as.vector(as.dist(t(D.temp)))) + diag(D.temp) <- NA + D.temp2 <- rank(D.temp) + diag(D.temp2) <- 0 + # cat("nrow =",nrow(D.temp2)," ncol =",ncol(D.temp2),'\n') + # cat("Matrix ",k," min =",min(D.temp2)," max =",max(D.temp2),'\n') + # cat("Matrix ",k," max values #",which(D.temp2 == max(D.temp2)),'\n') + D1[[k]] <- as.matrix(as.dist(D.temp2)) + D2[[k]] <- as.matrix(as.dist(t(D.temp2))) + } else { + ## Asymmetric matrices transformed to be symmetric, stored in D1[[k]] + vec <- (vec + as.vector(as.dist(t(D.temp)))) / 2 + D.temp2 <- (D.temp + t(D.temp)) / 2 + D.temp2 <- as.dist(D.temp2) + D.temp2[] <- rank(D.temp2) + D.temp2 <- as.matrix(D.temp2) + D1[[k]] <- D.temp2 + } + } else { + ## Symmetric matrices are stored in D1[[k]] + D.temp2 <- as.dist(D.temp) + D.temp2[] <- rank(D.temp2) + D1[[k]] <- as.matrix(D.temp2) + } + Y <- cbind(Y, vec) + } + Y <- as.matrix(Y[,-1]) + colnames(Y) <- colnames(Y,do.NULL = FALSE, prefix = "Dmat.") + + ## Begin calculations for global test + + ## Compute the reference values of the statistics: W and Chi2 + ## Transform the distances to ranks, by column + Rmat <- apply(Y,2,rank) + + ## Correction factors for tied ranks (eq. 3.3) + t.ranks <- apply(Rmat, 2, function(x) summary(as.factor(x), maxsum=nd)) + TT <- sum(unlist(lapply(t.ranks, function(x) sum((x^3)-x)))) + # if(!silent) cat("TT = ",TT,'\n') + + ## Compute the S = Sum-of-Squares of the row-marginal sums of ranks (eq. 1a) + ## The ranks are weighted during the sum by the vector of matrix weights 'w' + ## Eq. 1b cannot be used with weights; see formula for W below + sumRanks <- as.vector(Rmat%*%w) + S <- (nd-1)*var(sumRanks) + + ## Compute Kendall's W (eq. 2a) + ## Eq. 2b cannot be used with weights + ## because the sum of all ranks is not equal to m*n*(n+1)/2 in that case + W <- (12*S)/(((nmat^2)*((nd^3)-nd))-(nmat*TT)) + + ## Calculate Friedman's Chi-square (Kendall W paper, 2005, eq. 3.4) + Chi2 <- nmat*(nd-1)*W + + ## Test the Chi2 statistic by permutation + counter <- 1 + for(j in 1:nperm) { # Each matrix is permuted independently + # There is no need to permute the last matrix + Rmat.perm <- rep(NA,nd) + ## + if(asymm & !make.sym) { + ## For asymmetric matrices: permute the values within each triangular + ## portion, stored as square matrices in D1[[]] and D2[[]] + for(k in 1:(nmat-1)) { + order <- sample(n) + vec <- as.vector(as.dist(D1[[k]][order,order])) + vec <- c(vec, as.vector(as.dist(D2[[k]][order,order]))) + Rmat.perm <- cbind(Rmat.perm, vec) + } + vec <- as.vector(as.dist(D1[[nmat]])) + vec <- c(vec, as.vector(as.dist(D2[[nmat]]))) + Rmat.perm <- cbind(Rmat.perm, vec) + } else { + for(k in 1:(nmat-1)) { + order <- sample(n) + vec <- as.vector(as.dist(D1[[k]][order,order])) + Rmat.perm <- cbind(Rmat.perm, vec) + } + vec <- as.vector(as.dist(D1[[nmat]])) + Rmat.perm <- cbind(Rmat.perm, vec) + } + # Remove the first column of Rmat.perm containing NA + # The test is based on the comparison of S and S.perm instead of the comparison of + # Chi2 and Chi2.perm: it is faster that way. + # S, W, and Chi2 are equivalent statistics for permutation tests. + Rmat.perm <- as.matrix(Rmat.perm[,-1]) + S.perm <- (nd-1)*var(as.vector(Rmat.perm%*%w)) + if(S.perm >= S) counter <- counter+1 + } + prob.perm.gr <- counter/(nperm+1) + + table <- rbind(W, Chi2, prob.perm.gr) + colnames(table) <- "Statistics" + rownames(table) <- c("W", "Chi2", "Prob.perm") + }) + a[3] <- sprintf("%2f",a[3]) + if(!silent) cat("\nTime to compute global test =",a[3]," sec",'\n') +# + # if(asymm & !make.sym) { out <- list(congruence_analysis=table, D1=D1, D2=D2) + # } else { + out <- list(congruence_analysis=table) + # } +# + out$nperm <- nperm + class(out) <- "CADM.global" + out +}