1 ## mcmc.popsize.R (2004-12-02)
3 ## Run reversible jump MCMC to sample demographic histories
5 ## Copyright 2004 Rainer Opgen-Rhein and Korbinian Strimmer
7 ## Portions of this function are adapted from rjMCMC code by
8 ## Karl Broman (see http://www.biostat.jhsph.edu/~kbroman/)
10 ## This file is part of the R-package `ape'.
11 ## See the file ../COPYING for licensing issues.
18 thinning=1, burn.in=0, progress.bar=TRUE,
19 method.prior.changepoints=c("hierarchical", "fixed.lambda"),
21 lambda=0.5, # "fixed.lambda" method.prior.changepoints
22 gamma.shape=0.5, gamma.scale=2, # gamma distribution from which lambda is drawn (for "hierarchical" method)
23 method.prior.heights=c("skyline", "constant", "custom"),
28 method.prior.changepoints <- match.arg(method.prior.changepoints)
29 method.prior.heights <- match.arg(method.prior.heights)
32 #Calculate skylineplot, coalescent intervals and estimated population sizes
34 if(attr(tree, "class")=="phylo")
35 {ci <- coalescent.intervals(tree)
37 else if (attr(tree, "class")=="coalescentIntervals")
41 stop("tree must be an object of class phylo or coalescentIntervals")
43 #consider possibility of more than one lineage
44 ci$lineages<-ci$lineages[sk1$interval.length>0]
45 ci$interval.length<-ci$interval.length[sk1$interval.length>0]
46 data<-sk1$time<-sk1$time[sk1$interval.length>0]
47 sk1$population.size<-sk1$population.size[sk1$interval.length>0]
48 sk1$interval.length<-sk1$interval.length[sk1$interval.length>0]
50 # constant prior for heights
52 if (method.prior.heights=="constant"){
53 prior.height.mean<-function(position){
54 return(mean(sk1$population.size))
56 prior.height.var<-function(position){
57 return((mean(sk1$population.size))^2)
61 # skyline plot prior for heights
63 if (method.prior.heights=="skyline"){
67 prior.change.times<-abs((0:numb.interv)*max(TIME)/numb.interv)
68 prior.height.mean.all<-prior.height.var.all<-vector(length=numb.interv)
69 for(p.int in 1:(numb.interv))
73 sample.pop<-sk1$population.size[sk1$time>=prior.change.times[left]&sk1$time<=prior.change.times[right]]
74 while(length(sample.pop)<10){
75 if(left>1){left<-left-1}
76 if(right<length(prior.change.times)){right<-right+1}
77 sample.pop<-sk1$population.size[sk1$time>=prior.change.times[left]&sk1$time<=prior.change.times[right]]
79 prior.height.mean.all[p.int]<-sum(sample.pop)/length(sample.pop)
80 prior.height.var.all[p.int]<-sum((sample.pop-prior.height.mean.all[p.int])^2)/(length(sample.pop)-1)
83 prior.height.mean<-function(position)
85 j<-sum(prior.change.times<=position)
86 if(j>=length(prior.height.mean.all)){j<-length(prior.height.mean.all)}
87 prior.mean<-prior.height.mean.all[j]
91 prior.height.var<-function(position)
93 j<-sum(prior.change.times<=position)
94 if(j>=length(prior.height.var.all)){j<-length(prior.height.var.all)}
95 prior.var<-prior.height.var.all[j]
100 if(method.prior.heights=="custom"){
101 if(missing(prior.height.mean)||missing(prior.height.var)){
102 stop("custom priors not specified")}
106 prior<-vector(length=4)
109 # set initial position of markov chain and likelihood
111 h<-c(rep(mean(sk1$population.size), 2))
113 b.lin<-choose(ci$lineages, 2)
117 count.it<-floor((nstep-burn.in)/thinning)
118 save.pos <- save.h <- vector("list",count.it)
119 save.loglik <- 1:count.it
120 save.steptype <- 1:count.it
121 save.accept <- 1:count.it
123 # calculate jump probabilities for given lambda of the prior
124 if(method.prior.changepoints=="fixed.lambda")
127 jump.prob <- matrix(ncol=4,nrow=prior[4]+1)
128 p <- dpois(0:prior[4],prior[1])/ppois(prior[4]+1,prior[1])
129 bk <- c(p[-1]/p[-length(p)],0)
131 dk <- c(0,p[-length(p)]/p[-1])
136 bk[is.na(bk)]<-0 # added
137 dk[is.na(dk)]<-0 # added
141 jump.prob[1,1] <- 1-bk[1]-dk[1]
142 jump.prob[-1,1] <- jump.prob[-1,2] <-
143 (1-jump.prob[-1,3]-jump.prob[-1,4])/2
147 # calculate starting loglik
148 curloglik <- loglik(data,pos,h,b.lin,sk1,ci)
153 if(progress.bar==TRUE)
155 X11(width=3, height=0.7)
156 par(mar=c(0.5,0.5,2,0.5))
157 plot(x=c(0,0),y=c(0,1), type="l", xlim=c(0,1), ylim=c(0,1),
158 main="rjMCMC in progress", ylab="", xlab="", xaxs="i", yaxs="i", xaxt="n", yaxt="n")
163 for(i in (1:nstep + 1)) {
169 polygon(c(zt,zt,z,z), c(1,0,0,1), col="black")
173 # calculate jump probabilities without given lamda
174 if(method.prior.changepoints=="hierarchical"){
175 prior[1]<-rgamma(1,shape=gamma.shape,scale=gamma.scale)
176 jump.prob <- matrix(ncol=4,nrow=prior[4]+1)
177 p <- dpois(0:prior[4],prior[1])/ppois(prior[4]+1,prior[1])
178 bk <- c(p[-1]/p[-length(p)],0)
180 dk <- c(0,p[-length(p)]/p[-1])
185 bk[is.na(bk)]<-0 # added
186 dk[is.na(dk)]<-0 # added
190 jump.prob[1,1] <- 1-bk[1]-dk[1]
191 jump.prob[-1,1] <- jump.prob[-1,2] <-
192 (1-jump.prob[-1,3]-jump.prob[-1,4])/2
195 # determine what type of jump to make
196 wh <- sample(1:4,1,prob=jump.prob[length(h)-1,])
198 if (i %% thinning == 0& i>burn.in) {save.steptype[[count.i]] <- wh}
201 step <- ht.move(data,pos,h,curloglik,prior, b.lin, sk1, ci, prior.height.mean, prior.height.var)
203 curloglik <- step[[2]]
204 if(i%%thinning==0 & i>burn.in){
205 save.pos[[count.i]]<-pos
207 save.loglik[[count.i]]<-step[[2]]
208 save.accept[[count.i]]<-step[[3]]
212 step <- pos.move(data,pos,h,curloglik, b.lin,sk1,ci)
214 curloglik <- step[[2]]
215 if(i%%thinning==0 & i>burn.in){
216 save.pos[[count.i]]<-pos
218 save.loglik[[count.i]]<-step[[2]]
219 save.accept[[count.i]]<-step[[3]]
223 step <- birth.step(data,pos,h,curloglik,prior,jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var)
226 curloglik <- step[[3]]
227 if(i%%thinning==0 & i>burn.in){
228 save.pos[[count.i]]<-pos
230 save.loglik[[count.i]]<-step[[3]]
231 save.accept[[count.i]]<-step[[4]]
235 step <- death.step(data,pos,h,curloglik,prior,jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var)
238 curloglik <- step[[3]]
239 if(i%%thinning==0 & i>burn.in){
240 save.pos[[count.i]]<-pos
242 save.loglik[[count.i]]<-step[[3]]
243 save.accept[[count.i]]<-step[[4]]
246 if (i %% thinning == 0& i>burn.in) {count.i<-count.i+1}
250 list(pos=save.pos,h=save.h,loglik=save.loglik,
251 steptype=save.steptype,accept=save.accept)
257 function(data,pos,h,curloglik,prior, b.lin,sk1,ci, prior.height.mean, prior.height.var)
260 j <- sample(1:length(h),1)
262 prior.mean<-prior.height.mean(pos[j])
263 prior.var<-prior.height.var(pos[j])
265 prior[3]<-prior.mean/prior.var
266 prior[2]<-(prior.mean^2)/prior.var
269 newh[j] <- h[j]*exp(runif(1,-0.5,0.5))
271 newloglik <- loglik(data,pos,newh,b.lin,sk1,ci)
272 lr <- newloglik - curloglik
274 ratio <- exp(lr + prior[2]*(log(newh[j])-log(h[j])) - prior[3]*(newh[j]-h[j]))
276 if(runif(1,0,1) < ratio)
277 return(list(newh,newloglik,1))
279 return(list(h,curloglik,0))
283 function(data,pos,h,curloglik, b.lin,sk1,ci)
286 if(length(pos)==3) j <- 2
287 else j <- sample(2:(length(pos)-1),1)
291 newpos[j] <- runif(1,left,right)
293 newloglik <- loglik(data,newpos,h, b.lin,sk1,ci)
294 lr <- newloglik - curloglik
296 ratio <- exp(lr) * (right-newpos[j])*(newpos[j]-left)/
297 (right-pos[j])/(pos[j]-left)
299 if(runif(1,0,1) < ratio)
300 return(list(newpos,newloglik,1))
302 return(list(pos,curloglik,0))
306 function(data,pos,h,curloglik,prior,jump.prob, b.lin, sk1, ci, prior.height.mean, prior.height.var)
309 newpos <- runif(1,0,pos[length(pos)])
310 j <- sum(pos < newpos)
315 prior.mean<-prior.height.mean(pos[j])
316 prior.var<-prior.height.var(pos[j])
317 prior[3]<-prior.mean/prior.var
318 prior[2]<-(prior.mean^2)/prior.var
320 u <- runif(1,-0.5,0.5)
321 oldh<-(((newpos-left)/(right-left))*(h[j+1]-h[j])+h[j])
322 newheight<-oldh*(1+u)
325 # recall that prior = (lambda, alpha, beta, maxk)
329 prior.logratio <- log(prior[1]) - log(k+1) + log((2*k+3)*(2*k+2)) - 2*log(L) +
330 log(newpos-left) + log(right-newpos) - log(right-left) +
331 prior[2]*log(prior[3]) - lgamma(prior[2]) +
332 (prior[2]-1) * log(newheight) +
335 proposal.ratio <- jump.prob[k+2,4]*L/jump.prob[k+1,3]/(k+1)
336 jacobian <- (((newpos-left)/(right-left))*(h[j+1]-h[j]))+h[j]
338 # form new parameters
339 newpos <- sort(c(pos,newpos))
340 newh <- c(h[1:j], newheight, h[(j+1):length(h)])
342 newloglik <- loglik(data,newpos,newh, b.lin,sk1,ci)
344 lr <- newloglik - curloglik
346 ratio <- exp(lr + prior.logratio) * proposal.ratio * jacobian
348 if(runif(1,0,1) < ratio)
349 return(list(newpos,newh,newloglik,1))
351 return(list(pos,h,curloglik,0))
355 function(data,pos,h,curloglik,prior,jump.prob, b.lin,sk1,ci, prior.height.mean, prior.height.var)
359 if(length(pos)==3) j <- 2
360 else j <- sample(2:(length(pos)-1),1)
365 prior.mean<-prior.height.mean(pos[j])
366 prior.var<-prior.height.var(pos[j])
367 prior[3]<-prior.mean/prior.var
368 prior[2]<-(prior.mean^2)/prior.var
373 newheight <- (((pos[j]-left)/(right-left))*(h.right-h.left)+h.left)
376 # recall that prior = (lambda, alpha, beta, maxk)
380 prior.logratio <- log(k+1) - log(prior[1]) - log(2*(k+1)*(2*k+3)) + 2*log(L) -
381 log(pos[j]-left) - log(right-pos[j]) + log(right-left) -
382 prior[2]*log(prior[3]) + lgamma(prior[2]) -
383 (prior[2]-1) * log(newheight) -
385 proposal.ratio <- (k+1)*jump.prob[k+1,3]/jump.prob[k+2,4]/L
386 jacobian <- ((pos[j]-left)/(right-left))*(h[j+1]-h[j-1])+h[j-1]
388 # form new parameters
393 newloglik <- loglik(data,newpos,newh, b.lin,sk1,ci)
395 lr <- newloglik - curloglik
397 ratio <- exp(lr + prior.logratio) * proposal.ratio * (jacobian^(-1))
399 if(runif(1,0,1) < ratio)
400 return(list(newpos,newh,newloglik,1))
402 return(list(pos,h,curloglik,0))
407 # calculate the log likelihood for a set of data
409 function(time=sk1$time, pos=c(0,max(sk1$time)), h=mean(sk1$population.size),b=b.lin,sk1,ci){
414 h1<-c(h, h[length(h)])
415 pos1<-c(pos, pos[length(pos)])
416 while(i<length(time)){
417 left.pos<-sum(data.time[i+1]>=pos)
418 right.pos<-left.pos+1
419 h.mix<-(((data.time[i+1]-pos[left.pos])/(pos[right.pos]-pos[left.pos]))*(h[right.pos]-h[left.pos]))+h[left.pos]
420 leftside<-leftside+log(b[i]/h.mix)
429 jumps<-sort(c(time1, pos))
431 while(time.count<=length(jumps)){
432 left.pos<-sum(jumps[time.count]>=pos)
433 right.pos<-left.pos+1
434 h.jumps[time.count]<-(((jumps[time.count]-pos[left.pos])/(pos[right.pos]-pos[left.pos]))*(h[right.pos]-h[left.pos]))+h[left.pos]
435 if(is.na(h.jumps[time.count])){h.jumps[time.count]<-h[left.pos]}
436 time.count<-time.count+1
439 # Vektor for lineages
441 lineages.jumps<-jumps
442 while(i<=length(jumps)){
443 lineages.jumps[i]<-sum(jumps[i]>=time)
444 if(lineages.jumps[i]==0){lineages.jumps[i]<-1}
447 lineage<-ci$lineages[lineages.jumps]
448 b1<-choose(lineage, 2)
451 a<-(h.jumps[-1]-h.jumps[-length(h.jumps)])/(jumps[-1]-jumps[-length(jumps)])
452 c<-h.jumps[-1]-jumps[-1]*a
453 area<-(1/a)*log(a*jumps[-1]+c)-(1/a)*log(a*jumps[-length(jumps)]+c)
454 stepfunction<-(jumps[-1]-jumps[-length(jumps)])/h.jumps[-1]
455 area[is.na(area)]<-stepfunction[is.na(area)]
457 rightside<-sum(area*b1[-1])
459 loglik<-leftside-rightside