Computing the evidence term

import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

# Retina display
%config InlineBackend.figure_format = 'retina'
from tueplots import bundles
plt.rcParams.update(bundles.beamer_moml())


# Also add despine to the bundle using rcParams
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False

# Increase font size to match Beamer template
plt.rcParams['font.size'] = 16
# Make background transparent
plt.rcParams['figure.facecolor'] = 'none'
prior_mu = torch.zeros(2)
prior_cov = torch.eye(2)
theta = torch.distributions.MultivariateNormal(prior_mu, prior_cov)
theta
MultivariateNormal(loc: torch.Size([2]), covariance_matrix: torch.Size([2, 2]))
theta.loc, theta.covariance_matrix
(tensor([0., 0.]),
 tensor([[1., 0.],
         [0., 1.]]))
### True data

# True parameters
true_theta = torch.tensor([1.5, 0.5])

# Generate data
x = torch.linspace(-1, 1, 100)
torch.manual_seed(42)
y = true_theta[0] + true_theta[1] * x + torch.randn_like(x) * 0.2

plt.scatter(x, y, marker='x', color='k')
plt.xlabel('x')
plt.ylabel('y')

plt.plot(x, true_theta[0] + true_theta[1] * x, color='k', linestyle='--')

\[ I = \int p(\mathcal{D} \mid \theta) p(\theta) \mathrm{d}\theta \]

\[ I \approx \frac{1}{N} \sum_{i=1}^N p(\mathcal{D} \mid \theta_i) \]

where \(\theta_i \sim p(\theta)\)

# Plot the prior in 2d contour

theta1 = torch.linspace(-3, 3, 100)
theta2 = torch.linspace(-3, 3, 100)

theta1, theta2 = torch.meshgrid(theta1, theta2)

theta_values = torch.stack((theta1, theta2), dim=-1)  # Shape: (100, 100, 2)

z = theta.log_prob(theta_values.view(-1, 2))  # Shape: (10000,)
z = z.view(100, 100)  # Reshape to (100, 100)

plt.contourf(theta1.numpy(), theta2.numpy(), z.numpy(), 20)
plt.gca().set_aspect('equal')

plt.colorbar()
/home/nipun.batra/miniforge3/lib/python3.9/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3483.)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
<matplotlib.colorbar.Colorbar at 0x7feea97f91c0>

theta.sample((10,))
tensor([[ 0.7262,  0.0912],
        [-0.3891,  0.5279],
        [-0.3609, -0.0606],
        [ 0.0733,  0.8187],
        [ 1.4805,  0.3449],
        [-1.4241, -0.1163],
        [ 0.2176, -0.0467],
        [-1.4335, -0.5665],
        [-0.4253,  0.2625],
        [-1.4391,  0.5214]])
theta_sample = theta.sample((1000,))
theta_sample
tensor([[ 1.0414, -0.3997],
        [-2.2933,  0.4976],
        [-0.4257, -1.3371],
        ...,
        [-0.5654,  0.2558],
        [-1.5377, -0.1796],
        [-0.1124,  0.2712]])
def log_likelihood(theta, x, y):
    """Compute the likelihood of a linear regression model."""
    theta = theta.view(1, 2)  # Shape: (1, 2)
    x = x.view(-1, 1)  # Shape: (100, 1)
    y = y.view(-1, 1)  # Shape: (100, 1)
    mean = theta[:, 0] + theta[:, 1] * x  # Shape: (100, 1)
    return torch.distributions.Normal(mean, 0.2).log_prob(y).sum()  # Shape: (1,)
log_likelihood(theta_sample[0], x, y), log_likelihood(theta_sample[1], x, y), log_likelihood(true_theta, x, y)
(tensor(-595.4868), tensor(-18079.3672), tensor(20.7114))
theta_sample[0], theta_sample[1], true_theta
(tensor([ 1.0414, -0.3997]),
 tensor([-2.2933,  0.4976]),
 tensor([1.5000, 0.5000]))
N = 100
for i in range(N):
    theta_s = theta.sample()
    plt.plot(x, theta_s[0] + theta_s[1] * x, color='k', alpha=0.1)
plt.plot(x, true_theta[0] + true_theta[1] * x, color='k', linestyle='--')

theta_sample
tensor([[ 1.0414, -0.3997],
        [-2.2933,  0.4976],
        [-0.4257, -1.3371],
        ...,
        [-0.5654,  0.2558],
        [-1.5377, -0.1796],
        [-0.1124,  0.2712]])
# Print the log likelihood of the true parameters, and first two samples
ll1 = log_likelihood(true_theta, x, y)
ll2 = log_likelihood(theta_sample[0], x, y)
ll3 = log_likelihood(theta_sample[1], x, y)
print(f'LL true: {ll1:.2f}, LL sample 1: {ll2:.2f}, LL sample 2: {ll3:.2f}')
print(f'Likelihood true: {ll1.exp():.2f}, Likelihood sample 1: {ll2.exp():.2f}, Likelihood sample 2: {ll3.exp():.2f}')
LL true: 20.71, LL sample 1: -595.49, LL sample 2: -18079.37
Likelihood true: 988233536.00, Likelihood sample 1: 0.00, Likelihood sample 2: 0.00
# It seems we will have numerical problems. Let us use the log-sum-exp trick to compute the log evidence.
N = 15000
# Initialize a list to store log likelihood values
log_likelihood_values = []

# Monte Carlo estimation of the log marginal likelihood
for i in range(N):
    # Sample θ from the prior distribution
    theta_sample = theta.sample()

    # Calculate the log likelihood using your provided function
    log_likelihood_value = log_likelihood(theta_sample, x, y)

    # Append the log likelihood value to the list
    log_likelihood_values.append(log_likelihood_value.item())

# Use the log-sum-exp trick to compute the log marginal likelihood
log_marginal_likelihood = -torch.log(torch.tensor(N)) + torch.logsumexp(torch.tensor(log_likelihood_values), dim=0)

print("Log marginal likelihood:", log_marginal_likelihood.item())
Log marginal likelihood: 11.202832221984863
torch.tensor(log_likelihood_values).min()
tensor(-36610.6055)
torch.tensor(log_likelihood_values).max()
tensor(19.7690)
# Design matrix X
X = torch.stack([torch.ones_like(x), x], dim=1)

# Variance of the Gaussian noise in the likelihood
sigma_squared = 0.2**2

# Calculate the marginal likelihood (evidence) using the provided expression
marginal_likelihood = torch.distributions.MultivariateNormal(torch.matmul(X, prior_mu), torch.matmul(torch.matmul(X, prior_cov), X.t()) + sigma_squared * torch.eye(len(x)))
marginal_likelihood
MultivariateNormal(loc: torch.Size([100]), covariance_matrix: torch.Size([100, 100]))
log_prob_data = marginal_likelihood.log_prob(y)
log_prob_data
tensor(12.3548)