]> git.donarmstrong.com Git - ape.git/blob - R/parafit.R
few corrections and fixes
[ape.git] / R / parafit.R
1 'parafit' <-
2         function(host.D, para.D, HP, nperm=999, test.links=FALSE, seed=NULL, correction="none", silent=FALSE)
3 #
4 # Test of host-parasite coevolution
5 # host.D = host distance or patristic matrix (class dist or matrix)
6 # para.D = parasite distance or patristic matrix (class dist or matrix)
7 # HP = host-parasite link matrix (n.host, n.para)
8 #
9 #      Pierre Legendre, May 2009
10 {
11 epsilon <- sqrt(.Machine$double.eps)
12 if(is.null(seed)) {
13         runif(1)
14         seed <- .Random.seed[trunc(runif(1,1,626))]
15         }
16
17 host.D <- as.matrix(host.D)
18 host.pc <- pcoa(host.D, correction=correction)
19 if(host.pc$correction[2] == 1) {
20         if(min(host.pc$values[,2]) < -epsilon) stop('Host D matrix has negative eigenvalues. Rerun with correction="lingoes" or correction="cailliez"')
21         sum.host.values.sq <- sum(host.pc$values[,1]^2)
22         host.vectors <- host.pc$vectors
23         } else {
24         sum.host.values.sq <- sum(host.pc$values[,2]^2)
25         host.vectors <- host.pc$vectors.cor
26         }
27 n.host <- nrow(host.D)
28
29 para.D <- as.matrix(para.D)
30 para.pc <- pcoa(para.D, correction=correction)
31 if(para.pc$correction[2] == 1) {
32         if(min(para.pc$values[,2]) < -epsilon) stop('Parasite D matrix has negative eigenvalues. Rerun with correction="lingoes" or correction="cailliez"')
33         sum.para.values.sq <- sum(para.pc$values[,1]^2)
34         para.vectors <- para.pc$vectors
35         } else {
36         sum.para.values.sq <- sum(para.pc$values[,2]^2)
37         para.vectors <- para.pc$vectors.cor
38         }
39 n.para <- nrow(para.D)
40
41 if(!silent) cat("n.hosts =", n.host, ", n.parasites =", n.para,'\n')
42
43 a <- system.time({
44 tracemax <- max(sum.host.values.sq, sum.para.values.sq)
45
46 if(n.host == n.para) {
47         if(!silent) cat("The function cannot check if matrix HP has been entered in the right way.",'\n')
48         if(!silent) cat("It will assume that the rows of HP are the hosts.",'\n')
49         } else {
50         temp <- dim(HP)
51         if(temp[1] == n.host) {
52                 if(temp[2] != n.para) stop("Matrices host.D, para.D and HP not comformable")
53                 } else if(temp[2] == n.host) {
54                         if(temp[1] != n.para) stop("Matrices host.D, para.D and HP not comformable")
55                         HP <- t(HP)
56                         if(!silent) cat("Matrix HP has been transposed for comformity with host.D and para.D.",'\n')
57                 } else {
58                 stop("Matrices host.D, para.D and HP not comformable")
59                 }
60         }
61 p.per.h <- apply(HP, 1, sum)
62 h.per.p <- apply(HP, 2, sum)
63 #
64 # Compute and test the global statistics
65 mat.4 <- t(host.vectors) %*% HP %*% para.vectors
66 global <- sum(mat.4^2)
67 if(nperm > 0) {
68         set.seed(seed)
69         nGT <- 1
70         global.perm <- NA
71         for(i in 1:nperm) {
72                 HP.perm <- apply(HP, 2, sample)
73                 mat.4.perm <- t(host.vectors) %*% HP.perm %*% para.vectors
74                 global.perm <- c(global.perm, sum(mat.4.perm^2))
75                 if(global.perm[i+1] >= global) nGT <- nGT+1
76                 }
77         global.perm <- global.perm[-1]
78         p.global <- nGT/(nperm+1)
79         } else { p.global <- NA }
80
81 #
82 # Test individual H-P links
83 if(test.links) {
84         # 1. Create the list of H-P pairs
85         list.hp <- which( t(cbind(HP,rep(0,n.host))) > 0)
86         HP.list <- cbind((list.hp %/% (n.para+1))+1, list.hp %% (n.para+1))
87         colnames(HP.list) <- c("Host","Parasite")
88         n.links <- length(list.hp)
89
90         stat1 <- NA
91         stat2 <- NA
92         p.stat1 <- NA
93         p.stat2 <- NA
94         for(k in 1:n.links) {
95                 #
96                 # 2. Compute reference values of link statistics
97                 HP.k <- HP
98                 HP.k[HP.list[k,1], HP.list[k,2]] <- 0
99                 mat.4.k <- t(host.vectors) %*% HP.k %*% para.vectors
100                 trace.k <- sum(mat.4.k^2)
101                 stat1 <- c(stat1, (global-trace.k))
102                 den <- tracemax-global
103                 if(den > epsilon) { 
104                         stat2 <- c(stat2, stat1[k+1]/den) 
105                         } else { 
106                         stat2 <- c(stat2, NA) 
107                         }
108                 #
109                 # 3. Test link statistics by permutations
110                 if(nperm > 0) {
111                         set.seed(seed)
112                         nGT1 <- 1
113                         nGT2 <- 1
114                         nperm2 <- nperm
115                         #
116                         for(i in 1:nperm) {
117                                 HP.k.perm <- apply(HP.k, 2, sample)
118                                 mat.4.k.perm <- t(host.vectors) %*% HP.k.perm %*% para.vectors
119                                 trace.k.perm <- sum(mat.4.k.perm^2)
120                                 stat1.perm <- global.perm[i]-trace.k.perm
121                                 if(stat1.perm >= stat1[k+1]) nGT1 <- nGT1+1
122                                 #
123                                 if(!is.na(stat2[k+1])) {
124                                         den <- tracemax-global.perm[i]
125                                         if(den > epsilon) {
126                                                 stat2.perm <- stat1.perm/den 
127                                                 if(stat2.perm >= stat2[k+1]) nGT2 <- nGT2+1
128                                                 } else {
129                                                 nperm2 <- nperm2-1
130                                                 # if(!silent) cat("In permutation #",i,"den < epsilon",'\n')
131                                                 }
132                                         }
133                                 }
134                         p.stat1 <- c(p.stat1, nGT1/(nperm+1))
135                         if(!is.na(stat2[k+1])) {
136                                 p.stat2 <- c(p.stat2, nGT2/(nperm2+1))
137                                 } else {
138                                 p.stat2 <- NA
139                                 }
140                         } else { 
141                         p.stat1 <- NA
142                         p.stat2 <- NA
143                         }
144                 }
145         #
146         link.table <- cbind(HP.list, stat1[-1], p.stat1[-1], stat2[-1], p.stat2[-1])
147         colnames(link.table) = c("Host","Parasite","F1.stat","p.F1","F2.stat","p.F2")
148         out <-list(ParaFitGlobal=global, p.global=p.global, link.table=link.table, para.per.host=p.per.h, host.per.para=h.per.p, nperm=nperm)
149         } else {
150         if(!silent) cat("Rerun the program with option 'test.links=TRUE' to test the individual H-P links",'\n')
151         out <-list(ParaFitGlobal=global, p.global=p.global, para.per.host=p.per.h, host.per.para=h.per.p, nperm=nperm)
152         }
153 #
154 })
155 a[3] <- sprintf("%2f",a[3])
156 if(!silent) cat("Computation time =",a[3]," sec",'\n')
157 #
158 class(out) <- "parafit"
159 out
160 }