import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
# Retina display
%config InlineBackend.figure_format = 'retina'
Computing the evidence term
from tueplots import bundles
plt.rcParams.update(bundles.beamer_moml())
# Also add despine to the bundle using rcParams
'axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams[
# Increase font size to match Beamer template
'font.size'] = 16
plt.rcParams[# Make background transparent
'figure.facecolor'] = 'none' plt.rcParams[
= torch.zeros(2)
prior_mu = torch.eye(2)
prior_cov = torch.distributions.MultivariateNormal(prior_mu, prior_cov)
theta theta
MultivariateNormal(loc: torch.Size([2]), covariance_matrix: torch.Size([2, 2]))
theta.loc, theta.covariance_matrix
(tensor([0., 0.]),
tensor([[1., 0.],
[0., 1.]]))
### True data
# True parameters
= torch.tensor([1.5, 0.5])
true_theta
# Generate data
= torch.linspace(-1, 1, 100)
x 42)
torch.manual_seed(= true_theta[0] + true_theta[1] * x + torch.randn_like(x) * 0.2
y
='x', color='k')
plt.scatter(x, y, marker'x')
plt.xlabel('y')
plt.ylabel(
0] + true_theta[1] * x, color='k', linestyle='--') plt.plot(x, true_theta[
\[ I = \int p(\mathcal{D} \mid \theta) p(\theta) \mathrm{d}\theta \]
\[ I \approx \frac{1}{N} \sum_{i=1}^N p(\mathcal{D} \mid \theta_i) \]
where \(\theta_i \sim p(\theta)\)
# Plot the prior in 2d contour
= torch.linspace(-3, 3, 100)
theta1 = torch.linspace(-3, 3, 100)
theta2
= torch.meshgrid(theta1, theta2)
theta1, theta2
= torch.stack((theta1, theta2), dim=-1) # Shape: (100, 100, 2)
theta_values
= theta.log_prob(theta_values.view(-1, 2)) # Shape: (10000,)
z = z.view(100, 100) # Reshape to (100, 100)
z
20)
plt.contourf(theta1.numpy(), theta2.numpy(), z.numpy(), 'equal')
plt.gca().set_aspect(
plt.colorbar()
/home/nipun.batra/miniforge3/lib/python3.9/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3483.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
<matplotlib.colorbar.Colorbar at 0x7feea97f91c0>
10,)) theta.sample((
tensor([[ 0.7262, 0.0912],
[-0.3891, 0.5279],
[-0.3609, -0.0606],
[ 0.0733, 0.8187],
[ 1.4805, 0.3449],
[-1.4241, -0.1163],
[ 0.2176, -0.0467],
[-1.4335, -0.5665],
[-0.4253, 0.2625],
[-1.4391, 0.5214]])
= theta.sample((1000,))
theta_sample theta_sample
tensor([[ 1.0414, -0.3997],
[-2.2933, 0.4976],
[-0.4257, -1.3371],
...,
[-0.5654, 0.2558],
[-1.5377, -0.1796],
[-0.1124, 0.2712]])
def log_likelihood(theta, x, y):
"""Compute the likelihood of a linear regression model."""
= theta.view(1, 2) # Shape: (1, 2)
theta = x.view(-1, 1) # Shape: (100, 1)
x = y.view(-1, 1) # Shape: (100, 1)
y = theta[:, 0] + theta[:, 1] * x # Shape: (100, 1)
mean return torch.distributions.Normal(mean, 0.2).log_prob(y).sum() # Shape: (1,)
0], x, y), log_likelihood(theta_sample[1], x, y), log_likelihood(true_theta, x, y) log_likelihood(theta_sample[
(tensor(-595.4868), tensor(-18079.3672), tensor(20.7114))
0], theta_sample[1], true_theta theta_sample[
(tensor([ 1.0414, -0.3997]),
tensor([-2.2933, 0.4976]),
tensor([1.5000, 0.5000]))
= 100
N for i in range(N):
= theta.sample()
theta_s 0] + theta_s[1] * x, color='k', alpha=0.1)
plt.plot(x, theta_s[0] + true_theta[1] * x, color='k', linestyle='--') plt.plot(x, true_theta[
theta_sample
tensor([[ 1.0414, -0.3997],
[-2.2933, 0.4976],
[-0.4257, -1.3371],
...,
[-0.5654, 0.2558],
[-1.5377, -0.1796],
[-0.1124, 0.2712]])
# Print the log likelihood of the true parameters, and first two samples
= log_likelihood(true_theta, x, y)
ll1 = log_likelihood(theta_sample[0], x, y)
ll2 = log_likelihood(theta_sample[1], x, y)
ll3 print(f'LL true: {ll1:.2f}, LL sample 1: {ll2:.2f}, LL sample 2: {ll3:.2f}')
print(f'Likelihood true: {ll1.exp():.2f}, Likelihood sample 1: {ll2.exp():.2f}, Likelihood sample 2: {ll3.exp():.2f}')
LL true: 20.71, LL sample 1: -595.49, LL sample 2: -18079.37
Likelihood true: 988233536.00, Likelihood sample 1: 0.00, Likelihood sample 2: 0.00
# It seems we will have numerical problems. Let us use the log-sum-exp trick to compute the log evidence.
= 15000
N # Initialize a list to store log likelihood values
= []
log_likelihood_values
# Monte Carlo estimation of the log marginal likelihood
for i in range(N):
# Sample θ from the prior distribution
= theta.sample()
theta_sample
# Calculate the log likelihood using your provided function
= log_likelihood(theta_sample, x, y)
log_likelihood_value
# Append the log likelihood value to the list
log_likelihood_values.append(log_likelihood_value.item())
# Use the log-sum-exp trick to compute the log marginal likelihood
= -torch.log(torch.tensor(N)) + torch.logsumexp(torch.tensor(log_likelihood_values), dim=0)
log_marginal_likelihood
print("Log marginal likelihood:", log_marginal_likelihood.item())
Log marginal likelihood: 11.202832221984863
min() torch.tensor(log_likelihood_values).
tensor(-36610.6055)
max() torch.tensor(log_likelihood_values).
tensor(19.7690)
# Design matrix X
= torch.stack([torch.ones_like(x), x], dim=1)
X
# Variance of the Gaussian noise in the likelihood
= 0.2**2
sigma_squared
# Calculate the marginal likelihood (evidence) using the provided expression
= torch.distributions.MultivariateNormal(torch.matmul(X, prior_mu), torch.matmul(torch.matmul(X, prior_cov), X.t()) + sigma_squared * torch.eye(len(x))) marginal_likelihood
marginal_likelihood
MultivariateNormal(loc: torch.Size([100]), covariance_matrix: torch.Size([100, 100]))
= marginal_likelihood.log_prob(y) log_prob_data
log_prob_data
tensor(12.3548)