import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
= np.linspace(-1, 1, 50).reshape(-1, 1) x
= 5*x + 4
y = (np.abs(x.flatten())*np.random.randn(len(x))).reshape(-1,1)
noise = y + noise y
plt.scatter(x, y)5*x + 4, 'k') plt.plot(x,
from scipy.stats import multivariate_normal
from matplotlib import cm
= np.array([[ 1 , 0], [0, 1]])
cov = multivariate_normal(mean=[0,0], cov=cov)
var = np.mgrid[-1:1:.01, -1:1:.01]
x_grid, y_grid = np.dstack((x_grid, y_grid))
pos = var.pdf(pos)
z
plt.contourf(x_grid, y_grid, z)'equal')
plt.gca().set_aspect(r"$\theta_0$")
plt.xlabel(r"$\theta_1$")
plt.ylabel(r"Prior distribution of $\theta = f(\mu, \Sigma)$")
plt.title( plt.colorbar()
\[ \prod_{i=1}^{n} \frac{1}{\sqrt{2 \pi \sigma^{2}}} e^{-\frac{(y_{i}-\hat{y}_{i})^{2}}{2 \sigma^{2}}} \]
Sample from prior
= 20
n_samples for n in range(n_samples):
= var.rvs()
theta_0_s, theta_1_s *x + theta_0_s, color='k',alpha=0.2)
plt.plot(x, theta_1_s plt.scatter(x, y)
Likelihood of theta
def likelihood(theta_0, theta_1, x, y, sigma):
= 0
s = np.hstack((np.ones_like(x), x))
x_plus_1
for i in range(len(x)):
= x_plus_1[i, :]@np.array([theta_0, theta_1])
y_i_hat += (y[i,:]-y_i_hat)**2
s
return np.exp(-s/(2*sigma*sigma))/np.sqrt(2*np.pi*sigma*sigma)
-1, 1, x, y, 4) likelihood(
array([1.00683395e-22])
= np.mgrid[0:8:.1, 0:8:.1]
x_grid_2, y_grid_2
= np.zeros_like(x_grid_2)
li for i in range(x_grid_2.shape[0]):
for j in range(x_grid_2.shape[1]):
= likelihood(x_grid_2[i, j], y_grid_2[i, j], x, y, 4)
li[i, j]
plt.contourf(x_grid_2, y_grid_2, li)'equal')
plt.gca().set_aspect(r"$\theta_0$")
plt.xlabel(r"$\theta_1$")
plt.ylabel(
plt.colorbar()4, 5, s=200, marker='*', color='r')
plt.scatter(r"Likelihood as a function of ($\theta_0, \theta_1$)") plt.title(
Text(0.5, 1.0, 'Likelihood as a function of ($\\theta_0, \\theta_1$)')
Likelihood of \(\sigma^2\)
= np.hstack((np.ones_like(x), x))
x_plus_1
= np.linalg.inv(x_plus_1.T@x_plus_1)@(x_plus_1.T@y)
theta_mle = np.linalg.norm(y - x_plus_1@theta_mle)**2
sigma_2_mle = np.sqrt(sigma_2_mle)
sigma_mle sigma_mle
4.128685902124939
Posterior
\[ \begin{aligned} p(\boldsymbol{\theta} | \mathcal{X}, \mathcal{Y}) &=\mathcal{N}\left(\boldsymbol{\theta} | \boldsymbol{m}_{N}, \boldsymbol{S}_{N}\right) \\ \boldsymbol{S}_{N} &=\left(\boldsymbol{S}_{0}^{-1}+\sigma^{-2} \boldsymbol{\Phi}^{\top} \boldsymbol{\Phi}\right)^{-1} \\ \boldsymbol{m}_{N} &=\boldsymbol{S}_{N}\left(\boldsymbol{S}_{0}^{-1} \boldsymbol{m}_{0}+\sigma^{-2} \boldsymbol{\Phi}^{\top} \boldsymbol{y}\right) \end{aligned} \]
= np.array([[ 1 , 0], [0, 1]])
S0 = np.array([0, 0])
M0
= np.linalg.inv(np.linalg.inv(S0) + (sigma_mle**-2)*x_plus_1.T@x_plus_1)
SN = SN@(np.linalg.inv(S0)@M0 + (sigma_mle**-2)*(x_plus_1.T@y).squeeze()) MN
MN, SN
(array([2.97803341, 2.54277597]), array([[2.54243881e-01, 2.97285330e-17],
[2.97285330e-17, 4.95625685e-01]]))
from scipy.stats import multivariate_normal
from matplotlib import cm
= np.array([[ 1 , 0], [0, 1]])
cov = multivariate_normal(mean=MN, cov=SN)
var_pos = np.mgrid[0:8:.1, 0:8:.1]
x_grid, y_grid = np.dstack((x_grid, y_grid))
pos = var_pos.pdf(pos)
z
plt.contourf(x_grid, y_grid, z)'equal')
plt.gca().set_aspect(r"$\theta_0$")
plt.xlabel(r"$\theta_1$")
plt.ylabel(r"Posterior distribution of $\theta = f(\mu, \Sigma)$")
plt.title(4, 5, s=200, marker='*', color='r', label='MLE')
plt.scatter(0], MN[1], s=100, marker='^', color='black', label='MAP')
plt.scatter(MN[
plt.colorbar()
plt.legend()"../images/blr-map.png") plt.savefig(
Sample from posterior
= 20
n_samples for n in range(n_samples):
= var_pos.rvs()
theta_0_s, theta_1_s *x + theta_0_s, color='k',alpha=0.2)
plt.plot(x, theta_1_s plt.scatter(x, y)
Posterior predictions
\[ \begin{aligned} p\left(y_{*} | \mathcal{X}, \mathcal{Y}, \boldsymbol{x}_{*}\right) &=\int p\left(y_{*} | \boldsymbol{x}_{*}, \boldsymbol{\theta}\right) p(\boldsymbol{\theta} | \mathcal{X}, \mathcal{Y}) \mathrm{d} \boldsymbol{\theta} \\ &=\int \mathcal{N}\left(y_{*} | \boldsymbol{\phi}^{\top}\left(\boldsymbol{x}_{*}\right) \boldsymbol{\theta}, \sigma^{2}\right) \mathcal{N}\left(\boldsymbol{\theta} | \boldsymbol{m}_{N}, \boldsymbol{S}_{N}\right) \mathrm{d} \boldsymbol{\theta} \\ &=\mathcal{N}\left(y_{*} | \boldsymbol{\phi}^{\top}\left(\boldsymbol{x}_{*}\right) \boldsymbol{m}_{N}, \boldsymbol{\phi}^{\top}\left(\boldsymbol{x}_{*}\right) \boldsymbol{S}_{N} \boldsymbol{\phi}\left(\boldsymbol{x}_{*}\right)+\sigma^{2}\right) \end{aligned} \]
For a point \(x*\)
Predictive mean = \(X^Tm_N\)
Predictive variance = \(X^TS_NX + \sigma^2\)
x_plus_1.T.shape, SN.shape, x_plus_1.shape
((2, 50), (2, 2), (50, 2))
= x_plus_1@SN@x_plus_1.T
pred_var pred_var.shape
(50, 50)
## Marginal
= pred_var.diagonal() individual_var
= x_plus_1@MN
y_hat_map
='black')
plt.plot(x, y_hat_map, color-individual_var, y_hat_map+individual_var, alpha=0.2, color='black')
plt.fill_between(x.flatten(), y_hat_map plt.scatter(x, y)