# 第 9 章 贝叶斯回归

## 9.2 贝叶斯线性回归

### 9.2.1 模型形式

$y_i = \beta_0 + \beta_1 x_1 + \cdots + \beta_p x_p + \epsilon_i,$ 其中 $$\epsilon_i \sim N(0, \sigma^2)$$。为了估计这个回归模型，我们希望得到 $$\beta_0,\beta_1, \cdots, \beta_p$$ 以及 $$\sigma^2$$ 的联合概率分布。根据高斯假设下的最小二乘估计，我们有 $$\beta|\sigma^2 \sim N\left[(x'x)^{-1}x'y,(x'x)^{-1}\sigma^2\right]$$$$\sigma^2 \sim \chi^2(n-p)$$

\begin{align*} p\left(\beta, \sigma^{2} | y, x\right) & \propto p\left(y | \beta, \sigma^{2}, x\right) p\left(\beta, \sigma^{2}\right) \\ &=p\left(y | \beta, \sigma^{2}, x\right) p\left(\beta | \sigma^{2}\right) p\left(\sigma^{2}\right) \end{align*} 其中 $$p(y|\beta,\sigma^2,x)$$ 是模型的似然函数$$p(\beta,\sigma^2) = p(\beta|\sigma^2)p(\sigma^2)$$先验信息，$$p(\beta,\sigma^2|y,x)$$ 是综合数据和先验之后得到的后验分布。

### 9.2.4 后验分布

###----------------------------------------------
### DGP: Data generating process
###----------------------------------------------

n <- 1000
p <- 3

epsilon <- rnorm(n, 0, 0.1)
beta <- matrix(c(-3, 1, 3, 5))
X <- cbind(1, matrix(runif(n*p), n))
y <- X%*%beta + epsilon

###----------------------------------------------
### the log posterior function
###----------------------------------------------

library(mvtnorm)

LogPosterior <- function(beta, sigma2, y, X)
{
p <- length(beta)

## The log likelihood function
LogLikelihood <- sum(dnorm(x = y,
mean = X%*%beta,
sd = sqrt(sigma2),
log = TRUE))
## The priors
LogPrior4beta <- dmvnorm(
x = t(beta),
mean = matrix(0, 1, p),
sigma = diag(p), log = TRUE)

LogPrior4sigma2 <- dchisq(x = sigma2, df = 10,
log = TRUE)

LogPrior <- LogPrior4beta + LogPrior4sigma2

## The log posterior
LogPosterior <- LogLikelihood + LogPrior

## Return
return(LogPosterior)
}

### 9.2.5 后验推断

###----------------------------------------------
### The Metropolis algorithm with Gibbs
###----------------------------------------------

nIter <- 1000
p <- ncol(X)

## Reserve space
beta <- matrix(NA, nIter, p)
sigma2 <- matrix(NA, nIter, 1)
acc <- matrix(NA, nIter, 2)

## Initial values
beta[1, ] <- c(-2, 3, 4, 1)
sigma2[1, ] <- 0.5

for(i in 2:nIter)
{
beta.current <- beta[i-1, ]
sigma2.current <- sigma2[i-1,]

## The Metropolis Algorithm For beta
beta.prop <- rmvnorm(
n = 1,
mean = matrix(beta.current, 1),
sigma = diag(p)) # FV

logPosterior.beta.prop <- LogPosterior(
beta = t(beta.prop),
sigma2 = sigma2.current,
y = y, X = X)

logPosterior.beta.current <- LogPosterior(
beta = beta.current,
sigma2 = sigma2.current,
y = y, X = X)

logratio <- (logPosterior.beta.prop -
logPosterior.beta.current)

acc.prob.beta <- min(exp(logratio), 1)

if(!is.na(acc.prob.beta) &
runif(1) < acc.prob.beta) # accept the proposal
{
beta.current <- beta.prop
}
acc[i, 1] <- acc.prob.beta
beta[i, ] <- beta.current

## The Metropolis Algorithm For sigma2
sigma2.prop <- rnorm(1, sigma2.current, 1) # FV
logPosterior.sigma2.prop <- LogPosterior(
beta = matrix(beta.current),
sigma2 = sigma2.prop,
y = y, X = X)

logPosterior.sigma2.current <- LogPosterior(
beta = matrix(beta.current),
sigma2 = sigma2.current,
y = y, X = X)

logratio <- logPosterior.sigma2.prop -
logPosterior.sigma2.current

acc.prob.sigma2 <- min(exp(logratio), 1)

if(!is.na(acc.prob.sigma2)&
runif(1, 0, 1) < acc.prob.sigma2) # accept the proposal
{
sigma2.current <- sigma2.prop
}

## Update the matrix
acc[i, 2] <- acc.prob.sigma2
sigma2[i, ] <- sigma2.current
}
## Summarize beta and sigma2
apply(beta, 2, mean)
#> [1] -3.1237  0.9976  3.1001  5.1548
apply(beta, 2, sd)
#> [1] 0.08883 0.19232 0.21274 0.40125