1 ## mcmc.popsize.R (2010-11-16)
3 ## Run reversible jump MCMC to sample demographic histories
5 ## Copyright 2004-2010 Rainer Opgen-Rhein and Korbinian Strimmer
7 ## Portions of this function are adapted from rjMCMC code by
8 ## Karl W Broman (see http://www.biostat.wisc.edu/~kbroman/)
10 ## This file is part of the R-package `ape'.
11 ## See the file ../COPYING for licensing issues.
18 thinning=1, burn.in=0, progress.bar=TRUE,
19 method.prior.changepoints=c("hierarchical", "fixed.lambda"),
21 lambda=0.5, # "fixed.lambda" method.prior.changepoints
22 gamma.shape=0.5, gamma.scale=2, # gamma distribution from which lambda is drawn (for "hierarchical" method)
23 method.prior.heights=c("skyline", "constant", "custom"),
28 method.prior.changepoints <- match.arg(method.prior.changepoints)
29 method.prior.heights <- match.arg(method.prior.heights)
32 #Calculate skylineplot, coalescent intervals and estimated population sizes
34 if(attr(tree, "class")=="phylo")
35 {ci <- coalescent.intervals(tree)
37 else if (attr(tree, "class")=="coalescentIntervals")
41 stop("tree must be an object of class phylo or coalescentIntervals")
43 #consider possibility of more than one lineage
44 ci$lineages<-ci$lineages[sk1$interval.length>0]
45 ci$interval.length<-ci$interval.length[sk1$interval.length>0]
46 data<-sk1$time<-sk1$time[sk1$interval.length>0]
47 sk1$population.size<-sk1$population.size[sk1$interval.length>0]
48 sk1$interval.length<-sk1$interval.length[sk1$interval.length>0]
50 # constant prior for heights
52 if (method.prior.heights=="constant"){
53 prior.height.mean<-function(position){
54 return(mean(sk1$population.size))
56 prior.height.var<-function(position){
57 return((mean(sk1$population.size))^2)
61 # skyline plot prior for heights
63 if (method.prior.heights=="skyline"){
67 prior.change.times<-abs((0:numb.interv)*max(TIME)/numb.interv)
68 prior.height.mean.all<-prior.height.var.all<-vector(length=numb.interv)
69 for(p.int in 1:(numb.interv))
73 sample.pop<-sk1$population.size[sk1$time>=prior.change.times[left]&sk1$time<=prior.change.times[right]]
74 while(length(sample.pop)<10){
75 if(left>1){left<-left-1}
76 if(right<length(prior.change.times)){right<-right+1}
77 sample.pop<-sk1$population.size[sk1$time>=prior.change.times[left]&sk1$time<=prior.change.times[right]]
79 prior.height.mean.all[p.int]<-sum(sample.pop)/length(sample.pop)
80 prior.height.var.all[p.int]<-sum((sample.pop-prior.height.mean.all[p.int])^2)/(length(sample.pop)-1)
83 prior.height.mean<-function(position)
85 j<-sum(prior.change.times<=position)
86 if(j>=length(prior.height.mean.all)){j<-length(prior.height.mean.all)}
87 prior.mean<-prior.height.mean.all[j]
91 prior.height.var<-function(position)
93 j<-sum(prior.change.times<=position)
94 if(j>=length(prior.height.var.all)){j<-length(prior.height.var.all)}
95 prior.var<-prior.height.var.all[j]
100 if(method.prior.heights=="custom"){
101 if(missing(prior.height.mean)||missing(prior.height.var)){
102 stop("custom priors not specified")}
106 prior<-vector(length=4)
109 # set initial position of markov chain and likelihood
111 h<-c(rep(mean(sk1$population.size), 2))
113 b.lin<-choose(ci$lineages, 2)
117 count.it<-floor((nstep-burn.in)/thinning)
118 save.pos <- save.h <- vector("list",count.it)
119 save.loglik <- 1:count.it
120 save.steptype <- 1:count.it
121 save.accept <- 1:count.it
123 # calculate jump probabilities for given lambda of the prior
124 if(method.prior.changepoints=="fixed.lambda")
127 jump.prob <- matrix(ncol=4,nrow=prior[4]+1)
128 p <- dpois(0:prior[4],prior[1])/ppois(prior[4]+1,prior[1])
129 bk <- c(p[-1]/p[-length(p)],0)
131 dk <- c(0,p[-length(p)]/p[-1])
136 bk[is.na(bk)]<-0 # added
137 dk[is.na(dk)]<-0 # added
141 jump.prob[1,1] <- 1-bk[1]-dk[1]
142 jump.prob[-1,1] <- jump.prob[-1,2] <-
143 (1-jump.prob[-1,3]-jump.prob[-1,4])/2
147 # calculate starting loglik
148 curloglik <- loglik(data,pos,h,b.lin,sk1,ci)
153 if(progress.bar==TRUE)
155 X11(width=3, height=0.7)
156 par(mar=c(0.5,0.5,2,0.5))
157 plot(x=c(0,0),y=c(0,1), type="l", xlim=c(0,1), ylim=c(0,1),
158 main="rjMCMC in progress", ylab="", xlab="", xaxs="i", yaxs="i", xaxt="n", yaxt="n")
163 for(i in (1:nstep + 1))
166 if(progress.bar==TRUE)
172 polygon(c(zt,zt,z,z), c(1,0,0,1), col="black")
176 # calculate jump probabilities without given lamda
177 if(method.prior.changepoints=="hierarchical"){
178 prior[1]<-rgamma(1,shape=gamma.shape,scale=gamma.scale)
179 jump.prob <- matrix(ncol=4,nrow=prior[4]+1)
180 p <- dpois(0:prior[4],prior[1])/ppois(prior[4]+1,prior[1])
181 bk <- c(p[-1]/p[-length(p)],0)
183 dk <- c(0,p[-length(p)]/p[-1])
188 bk[is.na(bk)]<-0 # added
189 dk[is.na(dk)]<-0 # added
193 jump.prob[1,1] <- 1-bk[1]-dk[1]
194 jump.prob[-1,1] <- jump.prob[-1,2] <-
195 (1-jump.prob[-1,3]-jump.prob[-1,4])/2
198 # determine what type of jump to make
199 wh <- sample(1:4,1,prob=jump.prob[length(h)-1,])
201 if (i %% thinning == 0& i>burn.in) {save.steptype[[count.i]] <- wh}
204 step <- ht.move(data,pos,h,curloglik,prior, b.lin, sk1, ci, prior.height.mean, prior.height.var)
206 curloglik <- step[[2]]
207 if(i%%thinning==0 & i>burn.in){
208 save.pos[[count.i]]<-pos
210 save.loglik[[count.i]]<-step[[2]]
211 save.accept[[count.i]]<-step[[3]]
215 step <- pos.move(data,pos,h,curloglik, b.lin,sk1,ci)
217 curloglik <- step[[2]]
218 if(i%%thinning==0 & i>burn.in){
219 save.pos[[count.i]]<-pos
221 save.loglik[[count.i]]<-step[[2]]
222 save.accept[[count.i]]<-step[[3]]
226 step <- birth.step(data,pos,h,curloglik,prior,jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var)
229 curloglik <- step[[3]]
230 if(i%%thinning==0 & i>burn.in){
231 save.pos[[count.i]]<-pos
233 save.loglik[[count.i]]<-step[[3]]
234 save.accept[[count.i]]<-step[[4]]
238 step <- death.step(data,pos,h,curloglik,prior,jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var)
241 curloglik <- step[[3]]
242 if(i%%thinning==0 & i>burn.in){
243 save.pos[[count.i]]<-pos
245 save.loglik[[count.i]]<-step[[3]]
246 save.accept[[count.i]]<-step[[4]]
249 if (i %% thinning == 0& i>burn.in) {count.i<-count.i+1}
252 if(progress.bar==TRUE) dev.off()
254 list(pos=save.pos,h=save.h,loglik=save.loglik,
255 steptype=save.steptype,accept=save.accept)
261 function(data,pos,h,curloglik,prior, b.lin,sk1,ci, prior.height.mean, prior.height.var)
264 j <- sample(1:length(h),1)
266 prior.mean<-prior.height.mean(pos[j])
267 prior.var<-prior.height.var(pos[j])
269 prior[3]<-prior.mean/prior.var
270 prior[2]<-(prior.mean^2)/prior.var
273 newh[j] <- h[j]*exp(runif(1,-0.5,0.5))
275 newloglik <- loglik(data,pos,newh,b.lin,sk1,ci)
276 lr <- newloglik - curloglik
278 ratio <- exp(lr + prior[2]*(log(newh[j])-log(h[j])) - prior[3]*(newh[j]-h[j]))
280 if(runif(1,0,1) < ratio)
281 return(list(newh,newloglik,1))
283 return(list(h,curloglik,0))
287 function(data,pos,h,curloglik, b.lin,sk1,ci)
290 if(length(pos)==3) j <- 2
291 else j <- sample(2:(length(pos)-1),1)
295 newpos[j] <- runif(1,left,right)
297 newloglik <- loglik(data,newpos,h, b.lin,sk1,ci)
298 lr <- newloglik - curloglik
300 ratio <- exp(lr) * (right-newpos[j])*(newpos[j]-left)/
301 (right-pos[j])/(pos[j]-left)
303 if(runif(1,0,1) < ratio)
304 return(list(newpos,newloglik,1))
306 return(list(pos,curloglik,0))
310 function(data,pos,h,curloglik,prior,jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var)
313 newpos <- runif(1,0,pos[length(pos)])
314 j <- sum(pos < newpos)
319 prior.mean<-prior.height.mean(pos[j])
320 prior.var<-prior.height.var(pos[j])
321 prior[3]<-prior.mean/prior.var
322 prior[2]<-(prior.mean^2)/prior.var
324 u <- runif(1,-0.5,0.5)
325 oldh<-(((newpos-left)/(right-left))*(h[j+1]-h[j])+h[j])
326 newheight<-oldh*(1+u)
329 # recall that prior = (lambda, alpha, beta, maxk)
333 prior.logratio <- log(prior[1]) - log(k+1) + log((2*k+3)*(2*k+2)) - 2*log(L) +
334 log(newpos-left) + log(right-newpos) - log(right-left) +
335 prior[2]*log(prior[3]) - lgamma(prior[2]) +
336 (prior[2]-1) * log(newheight) +
339 proposal.ratio <- jump.prob[k+2,4]*L/jump.prob[k+1,3]/(k+1)
340 jacobian <- (((newpos-left)/(right-left))*(h[j+1]-h[j]))+h[j]
342 # form new parameters
343 newpos <- sort(c(pos,newpos))
344 newh <- c(h[1:j], newheight, h[(j+1):length(h)])
346 newloglik <- loglik(data,newpos,newh, b.lin,sk1,ci)
348 lr <- newloglik - curloglik
350 ratio <- exp(lr + prior.logratio) * proposal.ratio * jacobian
352 if(runif(1,0,1) < ratio)
353 return(list(newpos,newh,newloglik,1))
355 return(list(pos,h,curloglik,0))
359 function(data,pos,h,curloglik,prior,jump.prob, b.lin,sk1,ci, prior.height.mean, prior.height.var)
363 if(length(pos)==3) j <- 2
364 else j <- sample(2:(length(pos)-1),1)
369 prior.mean<-prior.height.mean(pos[j])
370 prior.var<-prior.height.var(pos[j])
371 prior[3]<-prior.mean/prior.var
372 prior[2]<-(prior.mean^2)/prior.var
377 newheight <- (((pos[j]-left)/(right-left))*(h.right-h.left)+h.left)
380 # recall that prior = (lambda, alpha, beta, maxk)
384 prior.logratio <- log(k+1) - log(prior[1]) - log(2*(k+1)*(2*k+3)) + 2*log(L) -
385 log(pos[j]-left) - log(right-pos[j]) + log(right-left) -
386 prior[2]*log(prior[3]) + lgamma(prior[2]) -
387 (prior[2]-1) * log(newheight) -
389 proposal.ratio <- (k+1)*jump.prob[k+1,3]/jump.prob[k+2,4]/L
390 jacobian <- ((pos[j]-left)/(right-left))*(h[j+1]-h[j-1])+h[j-1]
392 # form new parameters
397 newloglik <- loglik(data,newpos,newh, b.lin,sk1,ci)
399 lr <- newloglik - curloglik
401 ratio <- exp(lr + prior.logratio) * proposal.ratio * (jacobian^(-1))
403 if(runif(1,0,1) < ratio)
404 return(list(newpos,newh,newloglik,1))
406 return(list(pos,h,curloglik,0))
411 # calculate the log likelihood for a set of data
413 function(time=sk1$time, pos=c(0,max(sk1$time)), h=mean(sk1$population.size),b=b.lin,sk1,ci){
418 h1<-c(h, h[length(h)])
419 pos1<-c(pos, pos[length(pos)])
420 while(i<length(time)){
421 left.pos<-sum(data.time[i+1]>=pos)
422 right.pos<-left.pos+1
423 h.mix<-(((data.time[i+1]-pos[left.pos])/(pos[right.pos]-pos[left.pos]))*(h[right.pos]-h[left.pos]))+h[left.pos]
424 leftside<-leftside+log(b[i]/h.mix)
433 jumps<-sort(c(time1, pos))
435 while(time.count<=length(jumps)){
436 left.pos<-sum(jumps[time.count]>=pos)
437 right.pos<-left.pos+1
438 h.jumps[time.count]<-(((jumps[time.count]-pos[left.pos])/(pos[right.pos]-pos[left.pos]))*(h[right.pos]-h[left.pos]))+h[left.pos]
439 if(is.na(h.jumps[time.count])){h.jumps[time.count]<-h[left.pos]}
440 time.count<-time.count+1
443 # Vektor for lineages
445 lineages.jumps<-jumps
446 while(i<=length(jumps)){
447 lineages.jumps[i]<-sum(jumps[i]>=time)
448 if(lineages.jumps[i]==0){lineages.jumps[i]<-1}
451 lineage<-ci$lineages[lineages.jumps]
452 b1<-choose(lineage, 2)
455 a<-(h.jumps[-1]-h.jumps[-length(h.jumps)])/(jumps[-1]-jumps[-length(jumps)])
456 c<-h.jumps[-1]-jumps[-1]*a
457 area<-(1/a)*log(a*jumps[-1]+c)-(1/a)*log(a*jumps[-length(jumps)]+c)
458 stepfunction<-(jumps[-1]-jumps[-length(jumps)])/h.jumps[-1]
459 area[is.na(area)]<-stepfunction[is.na(area)]
461 rightside<-sum(area*b1[-1])
463 loglik<-leftside-rightside