Treatment: 4 diet groups
Response: Coagulation time
The Data–
group1 | group2 | group3 | group4 |
---|---|---|---|
62 | 63 | 68 | 56 |
60 | 67 | 66 | 62 |
63 | 71 | 71 | 60 |
59 | 64 | 67 | 61 |
65 | 68 | 63 | |
66 | 68 | 64 | |
63 | |||
59 |
Model:
\[\large{ \begin{eqnarray} y_{ij} &\sim& N(\theta_j,\sigma^2) \\ \theta_j &\sim& N(\mu,\tau^2) \\ (\mu,\text{log}\sigma,\tau) &\sim& \text{uniform prior} \end{eqnarray} }\]
Notation:
\(\large{y_{ij},i=1,\ldots,n_j,\; j=1,\ldots,J}\): coagulation time for the \(\large{i}\)-th animal in the \(\large{j}\)-th group.
\(\large{\theta_j,j=1,\ldots,J}\): mean for the \(\large{j}\)-th treatment group.
Gibbs Sampler
Metropolis algorithm
Softwares: JAGS, Stan
\(\large{\theta_j}:\) choose overdispersed starting points by simply taking random points from group j.
\(\large{\mu}:\) take the average of \(\large{\theta_j}\)
\(\large{\tau, \sigma}:\) don’t need; can be drawn as the first steps in the Gibbs sampler.
\(\large{\theta_j | \mu,\sigma,\tau,y \sim N(\hat{\theta_j},V_{\theta_j})}\), where \(\large{\hat{\theta_j}=\frac{\frac{\mu}{\tau^2}+\frac{n_j}{\sigma^2}\bar{y}_{.j}}{\frac{1}{\tau^2}+\frac{n_j}{\sigma^2}}}\), \(\large{V_{\theta_j}=\frac{1}{\frac{1}{\tau^2}+\frac{n_j}{\sigma^2}}}\)
\(\large{\mu|\theta,\sigma,\tau,y \sim N(\hat{\mu},\tau^2/J)}\), where \(\large{\hat{\mu}=\frac{1}{J}\sum_{j=1}^J \theta_j}\)
\(\large{\sigma|\theta,\mu,\tau,y \sim \text{Inv-}\chi^2(n,\hat{\sigma}^2)}\), where \(\large{\hat{\sigma}^2=\frac{1}{n}\sum_{j=1}^{J}\sum_{i=1}^{n_j} (y_{ij}-\theta_j)^2}\)
\(\large{\tau^2|\theta,\mu,\sigma,y \sim \text{Inv-}\chi^2(J-1,\hat{\tau}^2)}\), where \(\large{\hat{\tau}^2=\frac{1}{J-1}\sum_{j=1}^J (\theta_j-\mu)^2}\)
library(geoR) #Generate the (scaled) inverse chi-squared random numbers
gibbs=function(N,nthin,ndiscard,nchains){
## N: number of total iterations per chain
## nthin: thinning rate--thin the sequences by keeping every nthin-th simulation draw
## ndiscard: number of iterations to discard at the beginning
## nchains: number of Markov chains
arr=array(NA,dim = c(N,nparams,nchains),dimnames = list(c(1:N),
c(paste('theta',1:4,sep = ""),
"mu","sigma","tau"),
paste('chain',1:nchains,sep = "")))
## arr: save sampling result
#Starting points
theta=aggregate(y~group,data = dat,FUN = sample,nchains,replace=T)[,2]
mu=colMeans(theta)
#Gibbs sampling
for(i in 1:N){
#sigma
sigma2_hat=colSums((y-matrix(rep(theta,times=rep(as.vector(table(group)),nchains)),
ncol = nchains))^2)/n
sigma2=rinvchisq(nchains,df=n,scale = sigma2_hat)
#tau
tau2_hat=colSums((theta-mu)^2)/(J-1)
tau2=rinvchisq(nchains,df=J-1,scale = tau2_hat)
#theta
theta_hat=aggregate(y~group,data = dat,
FUN = function(y){
num=mu/tau2+sum(y)/sigma2
den=1/tau2+length(y)/sigma2
return(num/den)
})[,2]
V=aggregate(y~group,data = dat,
FUN = function(y){
return(1/(1/tau2+length(y)/sigma2))
})[,2]
theta=rnorm(length(theta_hat[,1]),mean = theta_hat[,1],sd=sqrt(V[,1]))
for(ch in 2:nchains){
theta=cbind(theta,
rnorm(length(theta_hat[,ch]),mean = theta_hat[,ch],sd=sqrt(V[,ch])))
}
# theta=matrix(rnorm(J*nchains,mean = theta_hat,sd=sqrt(V)),ncol = nchains)
#mu
mu_hat=colMeans(theta)
mu=rnorm(nchains,mean = mu_hat,sd=sqrt(tau2/J))
#write down the i iteration
arr[i,,]=rbind(theta,mu,sqrt(sigma2),sqrt(tau2))
}
#discard and thin
arr1=arr[(ndiscard+1):N,,]
arr2=arr[seq(1,(N-ndiscard),by=nthin),,]
return(list(arr=arr,arr_discard=arr1,arr_thin=arr2))
}
set.seed(222)
n=length(y);J=4;nparams=7
N=1500;nthin=1;nchains=5
samples_g=gibbs(N,nthin,ndiscard=N/2,nchains)
apply(samples_g$arr_thin,2,FUN = mean)
## theta1 theta2 theta3 theta4 mu sigma tau
## 61.226810 65.928245 67.817807 61.124255 64.147440 2.489103 8.753039
apply(samples_g$arr_thin,2,FUN = sd)
## theta1 theta2 theta3 theta4 mu sigma tau
## 1.7697192 1.3031052 1.4384974 1.1344045 6.3882468 0.8963711 9.0296164
apply(samples_g$arr_thin,2,FUN = quantile)
## theta1 theta2 theta3 theta4 mu sigma
## 0% 56.61572 61.26747 29.90920 48.87796 6.088071 1.486691
## 25% 60.31920 65.26394 67.14252 60.52523 62.025483 2.176331
## 50% 61.21053 65.92861 67.85173 61.11924 64.062131 2.415885
## 75% 62.06461 66.58551 68.49813 61.68879 66.113474 2.719351
## 100% 124.02273 114.74153 115.07477 102.41647 186.983664 48.232329
## tau
## 0% 0.7070118
## 25% 4.0747481
## 50% 6.3479288
## 75% 10.1655766
## 100% 225.4439367
for(i in 1:nparams){
plot(density(as.vector(samples_g$arr_thin[,i,])),main = dimnames(samples_g$arr_thin)[[2]][i])
}
arr_thin_g=samples_g$arr_thin
for(i in 1:nparams){
plot(seq(N/2+1,N,nthin),arr_thin_g[,i,1],type = 'l',ylab=dimnames(arr_thin_g)[[2]][i],xlab = "iteration")
for(chain in 2:nchains){
lines(seq(N/2+1,N,nthin),arr_thin_g[,i,chain],type='l',col=chain)
}
}
arr1_g=samples_g$arr_discard
mat=arr1_g[,,1]
for(i in 2:dim(arr1_g)[3]){
mat=rbind(mat,arr1_g[,,i])
}
mat=as.data.frame(mat)
chain=factor(rep(1:(2*nchains),each=dim(arr1_g)[1]/2))
Rhat=apply(mat,2,FUN = function(y){
B=anova(aov(y~chain))$`Mean Sq`[1]
W=anova(aov(y~chain))$`Mean Sq`[2]
nsim=dim(arr1_g)[1]/2
var_hat=(nsim-1)*W/nsim+B/nsim
R_hat=sqrt(var_hat/W)
return(R_hat)
})
Rhat
## theta1 theta2 theta3 theta4 mu sigma tau
## 1.0021045 1.0004658 1.0017665 1.0001103 0.9995815 1.0007163 1.0030033
n_eff=apply(mat,2,FUN = function(y){
B=anova(aov(y~chain))$`Mean Sq`[1]
W=anova(aov(y~chain))$`Mean Sq`[2]
nsim=dim(arr1_g)[1]/2
m=nchains*2
var_hat=(nsim-1)*W/nsim+B/nsim
neff=min(m*nsim*var_hat/B,m*nsim)
return(neff)
})
n_eff
## theta1 theta2 theta3 theta4 mu sigma tau
## 1459.573 2781.563 1617.868 3464.217 3750.000 2442.674 1158.706
\(\large{\theta_j}:\) choose overdispersed starting points by simply taking random points from group j.
\(\large{\mu}:\) take the average of \(\large{\theta_j}\)
\(\large{\sigma^2=\frac{1}{J}\sum_{j=1}^J \sum_{i=1}^{n_j} (y_{ij}-\bar{y}_{.j})^2/(n_j-1)}\)
\(\large{\tau^2=\sum_{j=1}^J (\theta_j-\bar{\theta})^2/(J-1)}\)
Work more efficiently in a lower-dimensional space: \(\large{(\mu,\sigma,\tau)}\)
Taking advantage of the conjugacy: compute \(\large{p(\mu,\text{log}\sigma,\text{log}\tau|y)}\)
Steps–
Use Metroplis algorithm to jump through the marginal posterior distrubtion of \(\large{(\mu,\text{log}\sigma,\text{log}\tau)}\);
Draw \(\large{\theta}\) from its normal conditional posterior distribution.
Efficient jumping rules: \(\large{J(\zeta^\star|\zeta^{t-1})=N(\zeta^\star|\zeta^{t-1},c^2\Sigma)}\), where \(\large{\zeta=(\mu,\text{log}\sigma,\text{log}\tau)}\), \(\large{d=\text{number of parameters}=3}\), and \(\large{\Sigma}\) equals that of a normal approximation, i.e.,
\[\large{\Sigma=\large[ -\frac{d^2 \text{log}p(\zeta|y)}{d \zeta^2} \large]_{\zeta=\hat{\zeta}}^{-1}}\]
where \(\large{\hat{\theta_j}=\frac{\frac{\mu}{\tau^2}+\frac{n_j}{\sigma^2}\bar{y}_{.j}}{\frac{1}{\tau^2}+\frac{n_j}{\sigma^2}}}\), \(\large{V_{\theta_j}=\frac{1}{\frac{1}{\tau^2}+\frac{n_j}{\sigma^2}}}\)
\[\large{\frac{d^2 L(\zeta)}{d\zeta_i d\zeta_j} \approx [L(\zeta+\delta_i e_i+\delta_j e_j)-L(\zeta-\delta_i e_i+\delta_j e_j)-L(\zeta+\delta_i e_i-\delta_j e_j)+L(\zeta-\delta_i e_i-\delta_j e_j)]/ (4\delta_i \delta_j)}\]
\(\large{\hat{\zeta}}\) is the mode of the marginal posterior distribution – Use EM algorithm to find:
Starting points: the crude estimate
E-step: \[\large{E_{old}((\theta_j-\mu)^2)=(\hat{\theta}_j-\mu)^2+V_{\theta_j}}\] \[\large{E_{old}((y_{ij}-\theta_j)^2)=(y_{ij}-\hat{\theta}_j)^2+V_{\theta_j}}\]
M-step: \[\large{ \begin{eqnarray} \mu^{new} &=& \frac{1}{J}\sum_{j=1}^J \hat{\theta}_j \\ \sigma^{new} &=& \large(\frac{1}{n}\sum_{j=1}^{J}\sum_{i=1}^{n_j} ((y_{ij}-\hat{\theta}_j)^2+V_{\theta_j}) \large)^{1/2} \\ \tau^{new} &=& \large(\frac{1}{J-1}\sum_{j=1}^{J} ((\hat{\theta}_j-\mu^{new})^2+V_{\theta_j})\large)^{1/2} \end{eqnarray} }\]
\[\large{r=\frac{p(\mu^\star,\text{log}\sigma^\star,\text{log}\tau^\star |y)}{p(\mu^{t-1},\text{log}\sigma^{t-1},\text{log}\tau^{t-1} |y)}}\]
library(MASS)
##Use EM algorithm to find the marginal posterior mode
#crude estimates as starting points
theta_new=tapply(y,group,mean);mu_new=mean(theta_new)
sigma2_new=mean(tapply(y,group,var));tau2_new=var(theta_new)
theta_old=0;mu_old=0;sigma2_old=0;tau2_old=0
niter=0
while( (abs(mu_old-mu_new)>1e-04 | abs(sigma2_old-sigma2_new)>1e-06 | abs(tau2_old-tau2_new)>1e-04 ) & niter<10000){
theta_old=theta_new;mu_old=mu_new;sigma2_old=sigma2_new;tau2_old=tau2_new
theta_hat_m=aggregate(y~group,data = dat,
FUN = function(y){
num=mu_old/tau2_old+sum(y)/sigma2_old
den=1/tau2_old+length(y)/sigma2_old
return(num/den)
})[,2]
V_m=aggregate(y~group,data = dat,
FUN = function(y){
return(1/(1/tau2_old+length(y)/sigma2_old))
})[,2]
mu_new=mean(theta_hat_m)
sigma2_new= (sum((y-rep(theta_hat_m,times=as.vector(table(group))) )^2)
+sum(as.vector(table(group))*V_m) )/n
tau2_new= sum((theta_hat_m-mu_new)^2+V_m)/(J-1)
niter=niter+1
}
mu_mode=mu_new;sigma2_mode=sigma2_new;tau2_mode=tau2_new
##Approximate the second derivatives of the log marginal posterior distribution numerically
delta=1e-04
L=function(mu,log_sigma,log_tau){
sigma2=exp(log_sigma)^2
tau2=exp(log_tau)^2
theta_hat=aggregate(y~group,data = dat,
FUN = function(y){
num=mu/tau2+sum(y)/sigma2
den=1/tau2+length(y)/sigma2
return(num/den)
})[,2]
V=aggregate(y~group,data = dat,
FUN = function(y){
return(1/(1/tau2+length(y)/sigma2))
})[,2]
L=log_tau+sum(dnorm(theta_hat,mu,sqrt(tau2),log=T))+sum(dnorm(y,
mean = rep(theta_hat,times=as.vector(table(group))),sd=sqrt(sigma2),log=T))
+sum(0.5*log(V))
return(L)
}
#mu denote by 1; log_sigma denote by 2; log_tau denote by 3
H11=(L(mu_mode+delta*2,log(sigma2_mode)*0.5,log(tau2_mode)*0.5)
-2*L(mu_mode,log(sigma2_mode)*0.5,log(tau2_mode)*0.5)
+L(mu_mode-delta*2,log(sigma2_mode)*0.5,log(tau2_mode)*0.5) )/(4*delta^2)
H22=(L(mu_mode,log(sigma2_mode)*0.5+delta*2,log(tau2_mode)*0.5)
-2*L(mu_mode,log(sigma2_mode)*0.5,log(tau2_mode)*0.5)
+L(mu_mode,log(sigma2_mode)*0.5-delta*2,log(tau2_mode)*0.5) )/(4*delta^2)
H33=(L(mu_mode,log(sigma2_mode)*0.5,log(tau2_mode)*0.5+delta*2)
-2*L(mu_mode,log(sigma2_mode)*0.5,log(tau2_mode)*0.5)
+L(mu_mode,log(sigma2_mode)*0.5,log(tau2_mode)*0.5-delta*2) )/(4*delta^2)
H12=(L(mu_mode+delta,log(sigma2_mode)*0.5+delta,log(tau2_mode)*0.5)
-L(mu_mode+delta,log(sigma2_mode)*0.5-delta,log(tau2_mode)*0.5)
-L(mu_mode-delta,log(sigma2_mode)*0.5+delta,log(tau2_mode)*0.5)
+L(mu_mode-delta,log(sigma2_mode)*0.5-delta,log(tau2_mode)*0.5) )/(4*delta^2)
H13=(L(mu_mode+delta,log(sigma2_mode)*0.5,log(tau2_mode)*0.5+delta)
-L(mu_mode+delta,log(sigma2_mode)*0.5,log(tau2_mode)*0.5-delta)
-L(mu_mode-delta,log(sigma2_mode)*0.5,log(tau2_mode)*0.5+delta)
+L(mu_mode-delta,log(sigma2_mode)*0.5,log(tau2_mode)*0.5-delta) )/(4*delta^2)
H23=(L(mu_mode,log(sigma2_mode)*0.5+delta,log(tau2_mode)*0.5+delta)
-L(mu_mode,log(sigma2_mode)*0.5+delta,log(tau2_mode)*0.5-delta)
-L(mu_mode,log(sigma2_mode)*0.5-delta,log(tau2_mode)*0.5+delta)
+L(mu_mode,log(sigma2_mode)*0.5-delta,log(tau2_mode)*0.5-delta) )/(4*delta^2)
##The covariance matrix for the normal jumping kernel
H1=matrix(c(H11,H12,H13,H12,H22,H23,H13,H23,H33),ncol = 3)
H=solve(H1)*(-1)
metroplis=function(N,nthin,ndiscard,nchains){
arr=array(NA,dim = c(N,nparams,nchains),dimnames = list(c(1:N),
c(paste('theta',1:4,sep = ""),
"mu","sigma","tau"),
paste('chain',1:nchains,sep = "")
))
##Overdispersed starting points for Metropolis algorithm
theta=aggregate(y~group,data = dat,FUN = sample,nchains,replace=T)[,2]
mu=colMeans(theta)
sigma2=rep(mean(tapply(y,group,FUN = var)),nchains)
tau2=apply(theta,2,var)
##Metropolis algorithm
for(i in 1:N){
##Sample proposal params from the jumping distribution
d=3
params_star=mvrnorm(1,mu=c(mu[1],log(sigma2[1])*0.5,log(tau2[1])*0.5),
Sigma = H)
for(ch in 2:nchains){
params_star=rbind(params_star,
mvrnorm(1,mu=c(mu[ch],log(sigma2[ch])*0.5,log(tau2[ch])*0.5),
Sigma = H))
}
mu_star=params_star[,1]
log_sigma_star=params_star[,2];sigma2_star=exp(log_sigma_star)^2
log_tau_star=params_star[,3];tau2_star=exp(log_tau_star)^2
##Calculate the ratio for acceptance
log_num=L(mu_star[1],log(sigma2_star[1])*0.5,log(tau2_star[1])*0.5)
for(ch in 2:nchains){
log_num=c(log_num,L(mu_star[ch],log(sigma2_star[ch])*0.5,log(tau2_star[ch])*0.5))
}
log_den=L(mu[1],log(sigma2[1])*0.5,log(tau2[1])*0.5)
for(ch in 2:nchains){
log_den=c(log_den,L(mu[ch],log(sigma2[ch])*0.5,log(tau2[ch])*0.5))
}
#acceptance ratio
r=exp(log_num-log_den)
##Generate new (mu, log_sigma, log_tau )
#binary random number with success probability equals the acceptance ratio
accept=rbinom(nchains,size = 1,prob = (r>=1)*1+(r<1)*r )
#new mu
mu=rowSums(cbind(mu,mu_star)*cbind(1-accept,accept))
#new sigma
sigma2=rowSums(cbind(sigma2,sigma2_star)*cbind(1-accept,accept))
#new tau
tau2=rowSums(cbind(tau2,tau2_star)*cbind(1-accept,accept))
##Sample new theta from its conditional posterior distribution (same as Gibbs)
theta_hat=aggregate(y~group,data = dat,
FUN = function(y){
num=mu/tau2+sum(y)/sigma2
den=1/tau2+length(y)/sigma2
return(num/den)
})[,2]
V=aggregate(y~group,data = dat,
FUN = function(y){
return(1/(1/tau2+length(y)/sigma2))
})[,2]
theta=matrix(rnorm(J*nchains,mean = theta_hat,sd=sqrt(V)),ncol = nchains)
##write down the i iteration
arr[i,,]=rbind(theta,mu,sqrt(sigma2),sqrt(tau2))
}
#discard and thin
arr1=arr[(ndiscard+1):N,,]
arr2=arr[seq(1,(N-ndiscard),by=nthin),,]
return(list(arr=arr,arr_discard=arr1,arr_thin=arr2))
}
set.seed(222)
n=length(y);J=4;nparams=7
N=1500;nthin=1;nchains=5;ndiscard=N/2
samples_m=metroplis(N,nthin,ndiscard,nchains)
apply(samples_m$arr_thin,2,FUN = mean)
## theta1 theta2 theta3 theta4 mu sigma tau
## 61.268668 65.870548 67.757266 61.152226 63.942952 2.247331 5.546194
apply(samples_m$arr_thin,2,FUN = sd)
## theta1 theta2 theta3 theta4 mu sigma tau
## 1.1349269 0.9299713 0.9381882 0.8023437 2.5217698 0.3265883 4.2516998
apply(samples_m$arr_thin,2,FUN = quantile)
## theta1 theta2 theta3 theta4 mu sigma tau
## 0% 56.28354 62.34475 64.05446 58.43413 51.77725 1.425035 0.5496124
## 25% 60.52192 65.25554 67.14452 60.63496 62.53167 2.022264 3.0907738
## 50% 61.24985 65.87636 67.77855 61.13761 63.89958 2.210399 4.2766813
## 75% 62.02316 66.48300 68.38120 61.65718 65.16939 2.441658 6.4380150
## 100% 65.59524 69.56063 71.18273 64.12564 79.06300 4.067672 54.2553655
for(i in 1:nparams){
plot(density(as.vector(samples_m$arr_thin[,i,])),main = dimnames(samples_m$arr_thin)[[2]][i])
}
arr_thin_m=samples_m$arr_thin
for(i in 1:nparams){
plot(seq(N/2+1,N,nthin),arr_thin_m[,i,1],type = 'l',ylab=dimnames(arr_thin_m)[[2]][i],xlab = "iteration")
for(chain in 2:nchains){
lines(seq(N/2+1,N,nthin),arr_thin_m[,i,chain],type='l',col=chain)
}
}
arr1_m=samples_m$arr_discard
mat=arr1_m[,,1]
for(i in 2:dim(arr1_m)[3]){
mat=rbind(mat,arr1_m[,,i])
}
mat=as.data.frame(mat)
chain=factor(rep(1:(2*nchains),each=dim(arr1_m)[1]/2))
Rhat=apply(mat,2,FUN = function(y){
B=anova(aov(y~chain))$`Mean Sq`[1]
W=anova(aov(y~chain))$`Mean Sq`[2]
nsim=dim(arr1_m)[1]/2
var_hat=(nsim-1)*W/nsim+B/nsim
R_hat=sqrt(var_hat/W)
return(R_hat)
})
Rhat
## theta1 theta2 theta3 theta4 mu sigma tau
## 1.0030404 0.9990871 0.9996229 0.9998591 1.1513643 1.0306355 1.0919592
n_eff=apply(mat,2,FUN = function(y){
B=anova(aov(y~chain))$`Mean Sq`[1]
W=anova(aov(y~chain))$`Mean Sq`[2]
nsim=dim(arr1_m)[1]/2
m=nchains*2
var_hat=(nsim-1)*W/nsim+B/nsim
neff=min(m*nsim*var_hat/B,m*nsim)
return(neff)
})
n_eff
## theta1 theta2 theta3 theta4 mu sigma
## 1148.92594 3750.00000 3750.00000 3750.00000 40.37813 163.72875
## tau
## 61.13442
library(R2jags)
mu_a=min(y); mu_b=max(y); log_sigma_b=2*log(sd(y));tau_b=2*sd(y)
#Write the model use jags language
cat("model{
##specify the distribution for observations
for(i in 1:n){
y[i]~dnorm(theta[group[i]],1/sigma2)
}
##specify the prior for theta
for(j in 1:J){
theta[j]~dnorm(mu,1/tau2)
}
##specify the prior for hyperparameters
mu~dunif(55,75)
log_sigma~dunif(-10,3)
sigma2<-exp(2*log_sigma)
sigma<-exp(log_sigma)
tau~dunif(0,8)
tau2<-pow(tau,2)
}",file="model.jag")
#Data prepare
datlist=list('y','group','J','n')
#Call JAGS from R
samples_jags=jags(data = datlist,parameters.to.save = c('theta','mu','sigma','tau'),
model.file = 'model.jag',n.chains = nchains,n.iter = N,
n.burnin = ndiscard,n.thin = nthin)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 24
## Unobserved stochastic nodes: 7
## Total graph size: 130
##
## Initializing model
samples_jags$BUGSoutput$summary
## mean sd 2.5% 25% 50% 75%
## deviance 110.809734 3.6656153 106.036715 108.079751 110.105366 112.699181
## mu 63.981084 2.2854255 59.152783 62.576723 64.022952 65.375105
## sigma 2.460028 0.4211305 1.805325 2.162487 2.396482 2.688747
## tau 4.430385 1.5592327 1.894403 3.193914 4.291078 5.522600
## theta[1] 61.292702 1.2217256 58.908916 60.488746 61.280975 62.065920
## theta[2] 65.865279 1.0059236 63.845164 65.207928 65.867914 66.525188
## theta[3] 67.712821 1.0275781 65.672868 67.040757 67.722647 68.387651
## theta[4] 61.174376 0.8821369 59.449596 60.602598 61.169142 61.739311
## 97.5% Rhat n.eff
## deviance 119.973573 1.008294 560
## mu 68.619629 1.001795 2300
## sigma 3.424235 1.008148 490
## tau 7.569826 1.003249 1000
## theta[1] 63.827046 1.003446 970
## theta[2] 67.851504 1.001379 2900
## theta[3] 69.695605 1.001410 2800
## theta[4] 62.959155 1.000878 3800
#Display results
plot(as.mcmc(samples_jags),auto.layout = F )
#densplot(as.mcmc(samples_jags))
#traceplot(as.mcmc(samples_jags))
#Gelman and Rubin's convergence diagnostic
gelman.diag(as.mcmc(samples_jags))
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## deviance 1.01 1.02
## mu 1.00 1.00
## sigma 1.01 1.03
## tau 1.00 1.01
## theta[1] 1.00 1.01
## theta[2] 1.00 1.00
## theta[3] 1.00 1.00
## theta[4] 1.00 1.00
##
## Multivariate psrf
##
## 1.01
gelman.plot(as.mcmc(samples_jags))
library(rstan)
#Write the model use stan language
cat("
data{
int<lower=0> n; //number of observations
int<lower=0> J; //number of groups
real y[n]; //observations
int group[n]; //group of the observations
}
parameters{
vector[J] theta;
real<lower=0,upper=3> log_sigma;
real<lower=55,upper=75> mu;
real<lower=0,upper=5> tau;
}
transformed parameters{
real<lower=0> tau2;
real<lower=0> sigma2;
real<lower=0> sigma;
tau2<-pow(tau,2);
sigma2<-exp(2*log_sigma);
sigma<-exp(log_sigma);
}
model{
for(i in 1:n){
y[i]~normal(theta[group[i]],sqrt(sigma2)); //distribution of the observations
}
theta~normal(mu,sqrt(tau2)); //prior for theta
mu~uniform(55,75);
tau~uniform(0,5);
log_sigma~uniform(0,3);
}
",file="model.stan")
#Data prepare
datlist=list('y','group','J','n')
#Call Stan from R
samples_stan<-stan(file = "model.stan",data = datlist,pars=c('theta','mu','tau','sigma'),
iter = N,chains = nchains,warmup = N/2,thin = nthin,init = 'random',
verbose = FALSE)
##
## SAMPLING FOR MODEL 'model' NOW (CHAIN 1).
##
## Chain 1, Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 1, Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 1, Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 1, Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 1, Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 1, Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 1, Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 1, Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 1, Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 1, Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 1, Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 1, Iteration: 1500 / 1500 [100%] (Sampling)#
## # Elapsed Time: 0.062 seconds (Warm-up)
## # 0.047 seconds (Sampling)
## # 0.109 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model' NOW (CHAIN 2).
##
## Chain 2, Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 2, Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 2, Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 2, Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 2, Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 2, Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 2, Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 2, Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 2, Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 2, Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 2, Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 2, Iteration: 1500 / 1500 [100%] (Sampling)#
## # Elapsed Time: 0.047 seconds (Warm-up)
## # 0.047 seconds (Sampling)
## # 0.094 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model' NOW (CHAIN 3).
##
## Chain 3, Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 3, Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 3, Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 3, Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 3, Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 3, Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 3, Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 3, Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 3, Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 3, Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 3, Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 3, Iteration: 1500 / 1500 [100%] (Sampling)#
## # Elapsed Time: 0.046 seconds (Warm-up)
## # 0.047 seconds (Sampling)
## # 0.093 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model' NOW (CHAIN 4).
##
## Chain 4, Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 4, Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 4, Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 4, Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 4, Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 4, Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 4, Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 4, Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 4, Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 4, Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 4, Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 4, Iteration: 1500 / 1500 [100%] (Sampling)#
## # Elapsed Time: 0.063 seconds (Warm-up)
## # 0.047 seconds (Sampling)
## # 0.11 seconds (Total)
## #
##
## SAMPLING FOR MODEL 'model' NOW (CHAIN 5).
##
## Chain 5, Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 5, Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 5, Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 5, Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 5, Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 5, Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 5, Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 5, Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 5, Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 5, Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 5, Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 5, Iteration: 1500 / 1500 [100%] (Sampling)#
## # Elapsed Time: 0.047 seconds (Warm-up)
## # 0.046 seconds (Sampling)
## # 0.093 seconds (Total)
## #
print(samples_stan)
## Inference for Stan model: model.
## 5 chains, each with iter=1500; warmup=750; thin=1;
## post-warmup draws per chain=750, total post-warmup draws=3750.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## theta[1] 61.40 0.02 1.16 59.10 60.62 61.41 62.18 63.63 2463 1
## theta[2] 65.78 0.02 0.98 63.81 65.14 65.79 66.42 67.73 2711 1
## theta[3] 67.60 0.02 1.00 65.59 66.97 67.63 68.27 69.50 2069 1
## theta[4] 61.22 0.02 0.86 59.59 60.66 61.20 61.78 62.95 2972 1
## mu 64.01 0.04 1.87 60.22 62.80 64.01 65.24 67.64 2192 1
## tau 3.46 0.02 0.90 1.77 2.79 3.48 4.21 4.92 1473 1
## sigma 2.45 0.01 0.42 1.78 2.17 2.40 2.68 3.37 2208 1
## lp__ -39.54 0.06 2.11 -44.53 -40.71 -39.19 -37.98 -36.51 1156 1
##
## Samples were drawn using NUTS(diag_e) at Sat Oct 22 16:22:06 2016.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
stan_plot(samples_stan)
stan_trace(samples_stan)
stan_hist(samples_stan)
stan_dens(samples_stan)
stan_diag(samples_stan)
stan_ac(samples_stan)