1 ## chronopl.R (2008-03-26)
3 ## Molecular Dating With Penalized Likelihood
5 ## Copyright 2005-2008 Emmanuel Paradis
7 ## This file is part of the R-package `ape'.
8 ## See the file ../COPYING for licensing issues.
10 chronopl <- function(phy, lambda, age.min = 1, age.max = NULL,
11 node = "root", S = 1, tol = 1e-8,
12 CV = FALSE, eval.max = 500, iter.max = 500, ...)
14 n <- length(phy$tip.label)
16 if (length(node) == 1 && node == "root") node <- ROOT
18 stop("node numbers should be greater than the number of tips")
19 zerobl <- which(phy$edge.length <= 0)
21 if (any(phy$edge[zerobl, 2] <= n))
22 stop("at least one terminal branch is of length zero:
23 you should remove it to have a meaningful estimation.")
25 warning("at least one internal branch is of length zero:
26 it was collapsed and some nodes have been deleted.")
27 if (length(node) == 1 && node == ROOT)
31 if (is.null(phy$node.label)) {
33 phy$node.label <- paste("node", 1:phy$Nnode)
35 node.lab <- phy$node.label[node - n]
37 node <- match(node.lab, phy$node.label) + n
38 if (tmp) phy$node.label <- NULL
52 ## `basal' contains the indices of the basal edges
53 ## (ie, linked to the root):
54 basal <- which(e[, 1] == ROOT)
55 Nbasal <- length(basal)
57 ## `ind' contains in its 1st column the index of all nonbasal
58 ## edges, and in its second column the index of the edges
59 ## where these edges come from (ie, this matrix contains pairs
60 ## of contiguous edges), eg:
68 ind <- matrix(0L, N - Nbasal, 2)
69 ind[, 1] <- EDGES[-basal]
70 ind[, 2] <- match(e[EDGES[-basal], 1], e[, 2])
74 ##ini.time <- node.depth(phy)[-TIPS] - 1
75 ini.time <- node.depth(phy) - 1
77 ## first, rescale all times with respect to
78 ## the age of the 1st element of `node':
79 ratio <- age.min[1]/ini.time[node[1]]
80 ini.time <- ini.time*ratio
82 if (length(node) > 1) {
83 ini.time[node] <- age.min
84 real.edge.length <- ini.time[e[, 1]] - ini.time[e[, 2]]
85 while (any(real.edge.length <= 0)) {
87 if (real.edge.length[i] <= 0) {
88 if (e[i, 1] %in% node) {
90 ini.time[e[, 2]] - 2*real.edge.length[i]
93 if (e[i, 2] %in% node) {
95 ini.time[e[, 1]] + 2*real.edge.length[i]
99 ini.time[e[, 2]] - real.edge.length[i]
101 ini.time[e[, 1]] + real.edge.length[i]
104 real.edge.length <- ini.time[e[, 1]] - ini.time[e[, 2]]
108 ## `unknown.ages' will contain the index of the nodes of unknown age:
109 unknown.ages <- n + 1:m
111 ## define the bounds for the node ages:
112 lower <- rep(tol, length(unknown.ages))
113 upper <- rep(1/tol, length(unknown.ages))
115 if (!is.null(age.max)) { # are some nodes known within some intervals?
116 lower[node - n] <- age.min
117 upper[node - n] <- age.max
118 interv <- which(age.min != age.max)
119 node <- node[-interv]
120 if (length(node)) age[node] <- age.min[-interv]
121 } else age[node] <- age.min
124 unknown.ages <- unknown.ages[n - node]
125 lower <- lower[n - node]
126 upper <- upper[n - node]
129 ## `known.ages' contains the index of all nodes (internal and
130 ## terminal) of known age:
131 known.ages <- c(TIPS, node)
133 ## concatenate the bounds for the rates:
134 lower <- c(rep(tol, N), lower)
135 upper <- c(rep(1 - tol, N), upper)
137 minusploglik.gr <- function(rate, node.time) {
138 grad <- numeric(N + length(unknown.ages))
139 age[unknown.ages] <- node.time
140 real.edge.length <- age[e[, 1]] - age[e[, 2]]
141 if (any(real.edge.length < 0)) {
145 ## gradient for the rates:
146 ## the parametric part can be calculated without a loop:
147 grad[EDGES] <- real.edge.length - el/rate
148 if (Nbasal == 2) { # the simpler formulae if there's a basal dichotomy
150 grad[basal[1]] + lambda*(rate[basal[1]] - rate[basal[2]])
152 grad[basal[2]] + lambda*(rate[basal[2]] - rate[basal[1]])
153 } else { # the general case
155 grad[basal[i]] <- grad[basal[i]] +
156 lambda*(2*rate[basal[i]]*(1 - 1/Nbasal) -
157 2*sum(rate[basal[-i]])/Nbasal)/(Nbasal - 1)
161 ii <- c(which(e[, 2] == e[i, 1]), which(e[, 1] == e[i, 2]))
162 if (!length(ii)) next
163 grad[i] <- grad[i] + lambda*(2*length(ii)*rate[i] - 2*sum(rate[ii]))
166 ## gradient for the 'node times'
167 for (i in 1:length(unknown.ages)) {
168 nd <- unknown.ages[i]
169 ii <- which(e[, 1] == nd)
171 sum(rate[ii] - el[ii]/real.edge.length[ii])#, na.rm = TRUE)
173 ii <- which(e[, 2] == nd)
174 grad[i + N] <- grad[i + N] -
175 rate[ii] + el[ii]/real.edge.length[ii]
181 minusploglik <- function(rate, node.time) {
182 age[unknown.ages] <- node.time
183 real.edge.length <- age[e[, 1]] - age[e[, 2]]
184 if (any(real.edge.length < 0)) return(1e50)
185 B <- rate*real.edge.length
186 loglik <- sum(-B + el*log(B) - lfactorial(el))
187 -(loglik - lambda*(sum((rate[ind[, 1]] - rate[ind[, 2]])^2)
191 out <- nlminb(c(ini.rate, ini.time[unknown.ages]),
192 function(p) minusploglik(p[EDGES], p[-EDGES]),
193 function(p) minusploglik.gr(p[EDGES], p[-EDGES]),
194 control = list(eval.max = eval.max, iter.max = iter.max, ...),
195 lower = lower, upper = upper)
197 attr(phy, "ploglik") <- -out$objective
198 attr(phy, "rates") <- out$par[EDGES]
199 attr(phy, "message") <- out$message
200 age[unknown.ages] <- out$par[-EDGES]
202 phy$edge.length <- age[e[, 1]] - age[e[, 2]]
203 if (CV) attr(phy, "D2") <-
204 chronopl.cv(ophy, lambda, age.min, age.max, node,
205 n, S, tol, eval.max, iter.max, ...)
209 chronopl.cv <- function(ophy, lambda, age.min, age.max, nodes,
210 n, S, tol, eval.max, iter.max, ...)
211 ### ophy: the original phylogeny
212 ### n: number of tips
213 ### Note that we assume here that the order of the nodes
214 ### in node.label are not modified by the drop.tip operation
216 cat("Doing cross-validation\n")
217 BT <- branching.times(ophy)
222 cat(" ", i, sep = "")
223 tr <- drop.tip(ophy, i)
224 j <- which(ophy$edge[, 2] == i)
225 if (ophy$edge[j, 1] %in% nodes) {
226 k <- which(nodes == ophy$edge[j, 1])
228 agemin <- age.min[-k]
229 agemax <- age.max[-k]
232 chr <- chronopl(tr, lambda, age.min, age.max, node,
233 S, tol, FALSE, eval.max, iter.max, ...)
234 D2[i] <- sum((tmp - branching.times(chr))^2 / tmp)