# S functions for inference from iterative simulation
# Written 1 July 1991 by Andrew Gelman, Dept. of Statistics, UC Berkeley.
# Function "monitor" added 28 Feb 1994, fixed 20 Jan 1995.
# (df+3)/(df+1) fix on 2 Oct 1995--see Brooks and Gelman (1995) paper.
# Please mail all comments/questions to gelman@stat.berkeley.edu
# Software can be freely used for non-commercial purposes and freely
# distributed. Submitted to statlib on 29 April 1992. Current
# version submitted to statlib on 29 Dec 1995.
# Derivations appear in the articles, "Inference from iterative simulation
# using multiple sequences," by A. Gelman and D. B. Rubin, in "Statistical
# Science" 7, 457-511 (1992), and "Some new ideas on inference from
# iterative simulation using multiple sequences," by S. Brooks and
# A. Gelman, technical report (1995).
# How to Use These Programs
#
# Preparation: The results of m multiple sequences, each of length 2n,
# of iterative simulation (e.g., the Metropolis algorithm or the
# Gibbs sampler) used to simulate a probability distribution.
# The starting points of the simulations should be drawn from a
# distribution that is overdispersed relative to the "target"
# distribution you are trying to simulate.
#
# At each iteration, all scalar parameters or summaries of interest
# should be stored. The result will be an 2n by m matrix for each
# scalar summary.
#
# The program "gpar" (or "gpar.log" or "gpar.logit", see below) can be
# used for each scalar summary, or the results from k scalar summaries
# can be combined into an array of dimensions (2n, m, k) and put into
# the program "monitor".
#
# To run: Use gpar (or gpar.log or gpar.logit) to do output analysis
# on the simulations for each scalar summary of interest. For
# each scalar summary r, gpar produces two results:
#
# a. Estimates of the distribution of r,
# b. The estimated potential scale reduction;
# i.e., the factor by which the estimated t intervals for r
# might decrease if the iterative simulation is continued
# forever. As n increases, the scale reduction approaches 1.
# The following S functions are included:
# gpar (Gibbs-parallel) takes a matrix r of simulations: each of
# m columns is an independent iteratively simulated sequence of length
# 2n. The first n steps of each sequence are discarded.
# Output is a list of three vectors:
#
# post: (2.5%, 50%, 97.5%) quantiles for the target distribution
# based on the Student-t distribution
# quantiles: (2.5%, 25%, 50%, 75%, 97.5%) empirical quantiles of
# the mn simulated values.
# confshrink: 50% and 97.5% quantiles of a rough upper bound on
# how much the confidence interval of "post" will shrink
# if the iterative simulation is continued forever.
#
# If both components of confshrink are not near 1, you should
# probably run the iterative simulation further.
# gpar.log and gpar.logit are versions of gpar to be used for scalar
# summaries that are all-positive (gpar.log) or are constrained to lie
# between 0 and 1 (gpar.logit).
# monitor is a routine for monitoring the convergence of k scalar summaries
# at once: the input is an array of dimensions (2n, m, k).
# Output is a k by 7 matrix, with 2.5%, 25%, 50%, 75%, 97.5% quantiles,
# R-hat, and 97.5% upper bound for R-hat, for eack of the k scalar
# summaries.
# Optional additional input is trans: a vector of length k, each
# element of which is "", "log", or "logit", corresponding to no
# transformation, log transformation, or logit transformation for
# computing R-hat (the transformations have no effect on the quantiles).
#############################################################################
# SUBROUTINES
cov _ function (a,b) {
m _ length(a)
((mean ((a-mean(a))*(b-mean(b)))) * m)/(m-1)}
logit _ function (x) {log(x/(1-x))}
invlogit _ function (x) {1/(1+exp(-x))}
col.means _ function(mat) {
ones _ matrix(1, nrow = 1, ncol = nrow(mat))
ones %*% mat/nrow(mat)}
col.vars _ function(mat) {
means _ col.means(mat)
col.means(mat * mat) - means * means}
# Chi-squared degrees of freedom estimated by method of moments
#
# (Assume A has a gamma sampling distribution and varA is an unbiased
# estimate of the sampling variance of A.)
chisqdf _ function (A, varA) {2*(A^2/varA)}
#############################################################################
# MAIN PROGRAM
gpar _ function (r) {
alpha _ .05 # 95% intervals
m _ ncol(r)
x _ r [(nrow(r)/2+1):nrow(r),] # second half of simulated sequences
n _ nrow(x)
# We compute the following statistics:
#
# xdot: vector of sequence means
# s2: vector of sequence sample variances (dividing by n-1)
# W = mean(s2): within MS
# B = n*var(xdot): between MS.
# muhat = mean(xdot): grand mean; unbiased under strong stationarity
# varW = var(s2)/m: estimated sampling var of W
# varB = B^2 * 2/(m+1): estimated sampling var of B
# covWB = (n/m)*(cov(s2,xdot^2) - 2*muhat*cov(s^2,xdot)):
# estimated sampling cov(W,B)
# sig2hat = ((n-1)/n))*W + (1/n)*B: estimate of sig2; unbiased under
# strong stationarity
# quantiles: emipirical quantiles from last half of simulated sequences
xdot _ as.vector(col.means(x))
s2 _ as.vector(col.vars(x))
W _ mean(s2)
B _ n*var(xdot)
muhat _ mean(xdot)
varW _ var(s2)/m
varB _ B^2 * 2/(m-1)
covWB _ (n/m)*(cov(s2,xdot^2) - 2*muhat*cov(s2,xdot))
sig2hat _ ((n-1)*W + B)/n
quantiles _ quantile (as.vector(x), probs=c(.025,.25,.5,.75,.975))
if (W > 1.e-8) { # non-degenerate case
# Posterior interval post.range combines all uncertainties
# in a t interval with center muhat, scale sqrt(postvar),
# and postvar.df degrees of freedom.
#
# postvar = sig2hat + B/(mn): variance for the posterior interval
# The B/(mn) term is there because of the
# sampling variance of muhat.
# varpostvar: estimated sampling variance of postvar
postvar _ sig2hat + B/(m*n)
varpostvar _
(((n-1)^2)*varW + (1+1/m)^2*varB + 2*(n-1)*(1+1/m)*covWB)/n^2
post.df _ chisqdf (postvar, varpostvar)
post.range _ muhat + sqrt(postvar) * qt(1-alpha/2, post.df)*c(-1,0,1)
# Estimated potential scale reduction (that would be achieved by
# continuing simulations forever) has two components: an estimate and
# an approx. 97.5% upper bound.
#
# confshrink = sqrt(postvar/W),
# multiplied by sqrt(df/(df-2)) as an adjustment for the
### CHANGED TO sqrt((df+3)/(df+1))
# width of the t-interval with df degrees of freedom.
#
# postvar/W = (n-1)/n + (1+1/m)(1/n)(B/W); we approximate the sampling dist.
# of (B/W) by an F distribution, with degrees of freedom estimated
# from the approximate chi-squared sampling dists for B and W. (The
# F approximation assumes that the sampling dists of B and W are independent;
# if they are positively correlated, the approximation is conservative.)
varlo.df _ chisqdf (W, varW)
confshrink.range _ sqrt (c(postvar/W,
(n-1)/n + (1+1/m)*(1/n)*(B/W) * qf(.975, m-1, varlo.df)) *
(post.df+3)/(post.df+1))
list(post=post.range, quantiles=quantiles, confshrink=confshrink.range)
}
else { # degenerate case: all entries in "data matrix" are identical
list (post=muhat*c(1,1,1), quantiles=quantiles, confshrink=c(1,1))
}
}
##############################################################################
gpar.log _ function (r) {
gp _ gpar(log(r))
list (post=exp(gp$post), quantiles=exp(gp$quantiles),
confshrink=gp$confshrink)}
gpar.logit _ function (r) {
gp _ gpar(logit(r))
list (post=invlogit(gp$post), quantiles=invlogit(gp$quantiles),
confshrink=gp$confshrink)}
##############################################################################
monitor _ function (a, trans=rep("",
ifelse (length(dim(a))<3, 1, dim(a)[length(dim(a))]))) {
# a is a (2n) x m x k matrix: m sequences of length 2n, k variables measured
# trans is a vector of length k: "" if no transformation, or "log" or "logit"
output _ NULL
nparams _ ifelse (length(dim(a))<3, 1, dim(a)[length(dim(a))])
if (length(dim(a))==2) a _ array (a, c(dim(a),1))
for (i in 1:nparams){
if (trans[i]=="log") gp _ gpar.log(a[,,i])
else if (trans[i]=="logit") gp _ gpar.logit(a[,,i])
else gp _ gpar(a[,,i])
output _ rbind (output, c(gp$quantiles, gp$confshrink))
}
dimnames(output) _ list(dimnames(a)[[3]],c("2.5%","25%","50%","75%",
"97.5%","Rhat","Rupper"))
output
}