import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# Retina display
%config InlineBackend.figure_format = 'retina'
from tueplots import bundles
plt.rcParams.update(bundles.beamer_moml())
# Also add despine to the bundle using rcParams
'axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams[
# Increase font size to match Beamer template
'font.size'] = 16
plt.rcParams[# Make background transparent
'figure.facecolor'] = 'none'
plt.rcParams[
import logging
'matplotlib.font_manager').disabled = True
logging.getLogger(
from ipywidgets import interact, FloatSlider, IntSlider
Setting Up & Imports
Coin Toss Problem
def plot_beta(alpha, beta):
= torch.distributions.Beta(concentration1=alpha, concentration0=beta)
dist = torch.linspace(0, 1, 500)
xs = dist.log_prob(xs).exp()
ys ='C0')
plt.plot(xs, ys, color
plt.grid()r'$\theta$')
plt.xlabel(r'$p(\theta)$')
plt.ylabel('Beta Distribution')
plt.title(
plt.show()=FloatSlider(min=1, max=11, step=0.5, value=1), beta=FloatSlider(min=1, max=11, step=0.5, value=1)) interact(plot_beta, alpha
<function __main__.plot_beta(alpha, beta)>
= [
combinations 1, 1],
[5, 1],
[1, 3],
[2, 2],
[2, 5]
[
]= plt.subplots(1, 1)
fig, ax for (alpha, beta) in combinations:
= torch.distributions.Beta(concentration1=alpha, concentration0=beta)
dist = torch.linspace(0, 1, 500)
xs = dist.log_prob(xs).exp()
ys =rf'($\alpha$={alpha}, $\beta$={beta})')
ax.plot(xs, ys, label
ax.legend()
ax.grid()r'$\theta$')
ax.set_xlabel(r'$p(\theta)$')
ax.set_ylabel('Beta Distribution')
ax.set_title(0, 3)
ax.set_ylim('../figures/map/beta_distribution.pdf', bbox_inches='tight')
plt.savefig( plt.show()
= 11
alpha = 11
beta = 0.5
bernoulli_theta = 9
n_1 = 1
n_0
= torch.distributions.Beta(concentration1=alpha, concentration0=beta)
dist = torch.linspace(0, 1, 500)
xs = dist.log_prob(xs).exp()
ys ='Beta Prior')
plt.plot(xs, ys, label
= n_1 + n_0
sample_size f"H: {n_1}, T: {n_0}")
plt.title(
= n_1 / (sample_size)#samples.mean()
mle_estimate ='k', linestyle='--', label='MLE')
plt.axvline(mle_estimate, color
= (n_1 + alpha - 1) / (sample_size + alpha + beta - 2)
map_estimate ='r', linestyle='-.', label='MAP')
plt.axvline(map_estimate, color
/ (alpha + beta), color='g', linestyle=':', label='Prior Mean')
plt.axvline(alpha
plt.grid()r'$\theta$')
plt.xlabel(r'$p(\theta)$')
plt.ylabel(=(1.25,1), borderaxespad=0)
plt.legend(bbox_to_anchor'../figures/map/coin_toss_prior_mle_map.pdf', bbox_inches='tight')
plt.savefig( plt.show()
def plot_beta_all(alpha, beta, bernoulli_theta = 0.5, sample_size=10):
42)
torch.manual_seed(
= torch.distributions.Beta(concentration1=alpha, concentration0=beta)
dist = torch.linspace(0, 1, 500)
xs = dist.log_prob(xs).exp()
ys ='Beta Prior')
plt.plot(xs, ys, label
= torch.empty(sample_size)
samples for s_num in range(sample_size):
= torch.distributions.Bernoulli(probs=bernoulli_theta)
dist = dist.sample()
samples[s_num] = int(samples.sum())
n_1 = int(sample_size - n_1)
n_0 f"H: {n_1}, T: {n_0}")
plt.title(
= n_1 / (sample_size)#samples.mean()
mle_estimate ='k', linestyle='--', label='MLE')
plt.axvline(mle_estimate, color
= (n_1 + alpha - 1) / (sample_size + alpha + beta - 2)
map_estimate ='r', linestyle='-.', label='MAP')
plt.axvline(map_estimate, color
/ (alpha + beta), color='g', linestyle=':', label='Prior Mean')
plt.axvline(alpha
plt.grid()r'$\theta$')
plt.xlabel(r'$p(\theta)$')
plt.ylabel(=(1.25,1), borderaxespad=0)
plt.legend(bbox_to_anchor
plt.show()
interact(
plot_beta_all,=FloatSlider(min=1, max=51, step=1, value=11),
alpha=FloatSlider(min=1, max=51, step=1, value=11),
beta= FloatSlider(min=0, max=1, step=0.1, value=0.5),
bernoulli_theta = IntSlider(min=10, max=1000, step=10, value=10),
sample_size )
<function __main__.plot_beta_all(alpha, beta, bernoulli_theta=0.5, sample_size=10)>
Linear Regression
= 5
n_data = torch.linspace(-1, 1, n_data)
x = lambda x: 3*x + 1
f
= torch.distributions.Normal(0, 5).sample((n_data,))
noise = f(x) + noise
y
plt.figure()='x', c='k', s=20, label="Noisy data")
plt.scatter(x, y, marker='k', label="True function")
plt.plot(x, f(x), c'x')
plt.xlabel('y')
plt.ylabel(r'$y_{true} = 3 x + 1$')
plt.title(f'../figures/map/linreg_data-{n_data}.pdf', bbox_inches='tight')
plt.savefig( plt.legend()
<matplotlib.legend.Legend at 0x7fcb67cc6220>
def nll(theta):
= theta[0] + theta[1] * x
mu = torch.tensor(1.0)
sigma = torch.distributions.normal.Normal(mu, sigma)
dist return -dist.log_prob(y).sum()
def neg_log_prior(theta):
= torch.tensor([0.0, 0.0]) # Prior mean for slope and intercept
prior_mean = torch.tensor([100, 100]) # Prior standard deviation for slope and intercept
prior_std = torch.distributions.normal.Normal(prior_mean, prior_std)
dist return -dist.log_prob(theta).sum()
def neg_log_joint(theta):
return nll(theta) + neg_log_prior(theta)
# Create a grid of theta[0] and theta[1] values
= torch.linspace(-4, 4, 100)
theta0_values = torch.linspace(-4, 4, 100)
theta1_values = torch.meshgrid(theta0_values, theta1_values)
theta0_mesh, theta1_mesh = torch.zeros_like(theta0_mesh)
nll_values = torch.zeros_like(theta0_mesh)
nlp_values = torch.zeros_like(theta0_mesh)
nlj_values
for i in range(len(theta0_values)):
for j in range(len(theta1_values)):
= torch.tensor([theta0_values[i], theta1_values[j]])
theta_current = nll(theta_current)
nll_values[i, j] = neg_log_prior(theta_current)
nlp_values[i, j] = neg_log_joint(theta_current)
nlj_values[i, j]
/home/nipun.batra/miniforge3/lib/python3.9/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3483.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
def plot_contour_with_minima(values, theta0_values, theta1_values, title):
=(6, 6))
plt.figure(figsize
= plt.contourf(theta0_mesh, theta1_mesh, values, levels=20, cmap='viridis')
contour r'$\theta_0$')
plt.xlabel(r'$\theta_1$')
plt.ylabel(
plt.title(title)
plt.colorbar(contour)
'equal', adjustable='box') # Set aspect ratio to be equal
plt.gca().set_aspect(
#plt.tight_layout()
#plt.show()
# Usage example
'Negative Log-Likelihood')
plot_contour_with_minima(nll_values, theta0_values, theta1_values, 'Negative Log-Prior')
plot_contour_with_minima(nlp_values, theta0_values, theta1_values, 'Negative Log-Joint') plot_contour_with_minima(nlj_values, theta0_values, theta1_values,
= plt.subplots(1,2, figsize=(12, 6))
fig, ax
= ax[1].contourf(theta0_mesh, theta1_mesh, nll_values, levels=20, cmap='viridis')
contour # ax[1].colorbar(contour)
1].set_xlabel(r'$\theta_0$')
ax[1].set_ylabel(r'$\theta_1$')
ax[1].set_title('NLL (MLE)')
ax[
# Adding contour level labels
= ax[1].contour(theta0_mesh, theta1_mesh, nll_values, levels=20, colors='black', linewidths=0.5)
contour_labels 1].clabel(contour_labels, inline=True, fontsize=8, fmt='%1.1f')
ax[
# Find and mark the minimum
= torch.argmin(nll_values)
min_indices = theta0_mesh.flatten()[min_indices]
min_theta0 = theta1_mesh.flatten()[min_indices]
min_theta1 1].scatter(min_theta0, min_theta1, color='red', marker='x', label='Minima')
ax[
# Draw lines from the minimum point to the axes
1].axhline(min_theta1, color='gray', linestyle='--')
ax[1].axvline(min_theta0, color='gray', linestyle='--')
ax[
# Add labels to the lines
1].text(min_theta0, 4.2, f'{min_theta0:.2f}', color='gray', fontsize=10)
ax[1].text(4.2, min_theta1, f'{min_theta1:.2f}', color='gray', fontsize=10)
ax[# ax[1].legend(bbox_to_anchor=(0, 1.05), loc='lower left')
0].scatter(x, y, marker='x', c='k', s=20, label="Noisy data")
ax[0].plot(x, f(x), c='k', label="True function")
ax[0].plot(x, min_theta0 + min_theta1*x, c = 'b', label="MLE")
ax[0].set_xlabel('x')
ax[0].set_ylabel('y')
ax[0].set_title(r'$y_{true} = 3 x + 1$')
ax[0].legend()
ax['../figures/map/linreg_mle.pdf', bbox_inches='tight')
plt.savefig( plt.show()
# Create a grid of theta[0] and theta[1] values
= torch.linspace(-4, 4, 100)
theta0_values = torch.linspace(-4, 4, 100)
theta1_values = torch.meshgrid(theta0_values, theta1_values)
theta0_mesh, theta1_mesh = torch.zeros_like(theta0_mesh)
nll_values
# Calculate negative log-likelihood values for each combination of theta[0] and theta[1]
for i in range(len(theta0_values)):
for j in range(len(theta1_values)):
= nll_with_prior([theta0_values[i], theta1_values[j]])
nll_values[i, j]
# Create a contour plot
#figsize=(8, 6))
plt.figure()= plt.contourf(theta0_mesh, theta1_mesh, nll_values, levels=20, cmap='viridis')
contour # plt.colorbar(contour)
r'$\theta_0$')
plt.xlabel(r'$\theta_1$')
plt.ylabel(#plt.title('Contour Plot of Negative Log-Likelihood')
# Adding contour level labels
= plt.contour(theta0_mesh, theta1_mesh, nll_values, levels=20, colors='black', linewidths=0.5)
contour_labels =True, fontsize=8, fmt='%1.1f')
plt.clabel(contour_labels, inline
# Find and mark the minimum
= torch.argmin(nll_values)
min_indices = theta0_mesh.flatten()[min_indices]
min_theta0 = theta1_mesh.flatten()[min_indices]
min_theta1 ='red', marker='x', label='Minima')
plt.scatter(min_theta0, min_theta1, color
# Draw lines from the minimum point to the axes
='gray', linestyle='--')
plt.axhline(min_theta1, color='gray', linestyle='--')
plt.axvline(min_theta0, color
# Add labels to the lines
4.2, f'{min_theta0:.2f}', color='gray', fontsize=10)
plt.text(min_theta0, 4.2, min_theta1, f'{min_theta1:.2f}', color='gray', fontsize=10)
plt.text(=(0, 1.05), loc='lower left') plt.legend(bbox_to_anchor
/home/nipun.batra/miniforge3/lib/python3.9/site-packages/torch/functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3483.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
NameError: name 'nll_with_prior' is not defined
def lin_reg_map(noise_std, prior_std_lambda, n_samples):
42)
torch.manual_seed(
= torch.linspace(-1, 1, n_samples)
x = lambda x: 3*x + 1
f
= torch.distributions.Normal(0, noise_std).sample((n_samples,))
noise = f(x) + noise
y
def nll(theta):
= theta[0] + theta[1] * x
mu = torch.tensor(1.0)
sigma = torch.distributions.normal.Normal(mu, sigma)
dist return -dist.log_prob(y).sum()
def nll_with_prior(theta):
= torch.tensor(theta)
theta = theta[0] + theta[1] * x
mu = torch.tensor(1.0)
sigma_likelihood = torch.distributions.normal.Normal(mu, sigma_likelihood)
dist_likelihood
= torch.tensor([0.0, 0.0]) # Prior mean for slope and intercept
prior_mean = torch.tensor([prior_std_lambda, prior_std_lambda]) # Prior standard deviation for slope and intercept
prior_std = torch.distributions.normal.Normal(prior_mean, prior_std)
dist_prior
= -dist_likelihood.log_prob(y).sum()
nll_likelihood = dist_prior.log_prob(theta).sum()
log_prior
return nll_likelihood + log_prior
= plt.subplots(1,3, figsize=(12, 6))
fig, ax 0].scatter(x, y, marker='x', c='k', s=20, label="Noisy data")
ax[0].plot(x, f(x), c='k', label="True function")
ax[
def plot_theta_contour(ax, func, title):
# Create a grid of theta[0] and theta[1] values
= torch.linspace(-4, 4, 100)
theta0_values = torch.linspace(-4, 4, 100)
theta1_values = torch.meshgrid(theta0_values, theta1_values)
theta0_mesh, theta1_mesh = torch.zeros_like(theta0_mesh)
nll_values
# Calculate negative log-likelihood values for each combination of theta[0] and theta[1]
for i in range(len(theta0_values)):
for j in range(len(theta1_values)):
= func([theta0_values[i], theta1_values[j]])
nll_values[i, j]
# Create a contour plot
= ax.contourf(theta0_mesh, theta1_mesh, nll_values, levels=20, cmap='viridis')
contour # plt.colorbar(contour)
r'$\theta_0$')
ax.set_xlabel(r'$\theta_1$')
ax.set_ylabel(
ax.set_title(title)
# Adding contour level labels
= ax.contour(theta0_mesh, theta1_mesh, nll_values, levels=20, colors='black', linewidths=0.5)
contour_labels =True, fontsize=8, fmt='%1.1f')
ax.clabel(contour_labels, inline
# Find and mark the minimum
= torch.argmin(nll_values)
min_indices = theta0_mesh.flatten()[min_indices]
min_theta0 = theta1_mesh.flatten()[min_indices]
min_theta1 ='red', marker='x', label='Minima')
ax.scatter(min_theta0, min_theta1, color
# Draw lines from the minimum point to the axes
='gray', linestyle='--')
ax.axhline(min_theta1, color='gray', linestyle='--')
ax.axvline(min_theta0, color
# Add labels to the lines
4.2, f'{min_theta0:.2f}', color='gray', fontsize=10)
ax.text(min_theta0, 4.2, min_theta1, f'{min_theta1:.2f}', color='gray', fontsize=10)
ax.text(# ax.legend(bbox_to_anchor=(0, -0.05), loc='lower left')
return min_theta0, min_theta1
= plot_theta_contour(ax[1], nll, 'NLL (MLE)')
mle_theta0, mle_theta1
= plot_theta_contour(ax[2], nll_with_prior, 'NLL + Prior (MAP)')
map_theta0, map_theta1
0].plot(x, mle_theta0 + mle_theta1*x, c='b', label="MLE")
ax[0].plot(x, map_theta0 + map_theta1*x, c='r', label="MAP")
ax[0].legend()
ax[0].set_xlabel('x')
ax[0].set_ylabel('y')
ax[# ax[0].set_title(r'$y_{true} = 3 x + 1$')
'../figures/map/linreg_mle_map.pdf', bbox_inches='tight')
plt.savefig(
plt.show()
= 3.0, prior_std_lambda = 1.0, n_samples = 30) lin_reg_map(noise_std
interact(
lin_reg_map,= FloatSlider(min=0.1, max=3.0, step=0.1, value=0.1),
noise_std = FloatSlider(min=0.1, max=3.0, step=0.1, value=1.0),
prior_std_lambda = IntSlider(min=10, max=1000, step=10, value=100)
n_samples )
<function __main__.lin_reg_map(noise_std, prior_std_lambda, n_samples)>
def lin_reg_map_laplace(noise_std, prior_std_lambda, n_samples):
42)
torch.manual_seed(
= torch.linspace(-1, 1, n_samples)
x = lambda x: 3*x + 1
f
= torch.distributions.Normal(0, noise_std).sample((n_samples,))
noise = f(x) + noise
y
def nll(theta):
= theta[0] + theta[1] * x
mu = torch.tensor(1.0)
sigma = torch.distributions.normal.Normal(mu, sigma)
dist return -dist.log_prob(y).sum()
def nll_with_prior(theta):
= torch.tensor(theta)
theta = theta[0] + theta[1] * x
mu = torch.tensor(1.0)
sigma_likelihood = torch.distributions.normal.Normal(mu, sigma_likelihood)
dist_likelihood
= torch.tensor([0.0, 0.0]) # Prior mean for slope and intercept
prior_mean = torch.tensor([prior_std_lambda, prior_std_lambda]) # Prior standard deviation for slope and intercept
prior_std = torch.distributions.normal.Normal(prior_mean, prior_std)
dist_prior
= -dist_likelihood.log_prob(y).sum()
nll_likelihood = dist_prior.log_prob(theta).sum()
log_prior
return nll_likelihood + log_prior
def nll_with_laplace_prior(theta):
= torch.tensor(theta)
theta = theta[0] + theta[1] * x
mu = torch.tensor(1.0)
sigma_likelihood = torch.distributions.normal.Normal(mu, sigma_likelihood)
dist_likelihood
= torch.tensor([0.0, 0.0]) # Prior mean for slope and intercept
prior_mean = torch.tensor([prior_std_lambda, prior_std_lambda]) # Prior standard deviation for slope and intercept
prior_std = torch.distributions.laplace.Laplace(prior_mean, prior_std)
dist_prior
= -dist_likelihood.log_prob(y).sum()
nll_likelihood = dist_prior.log_prob(theta).sum()
log_prior
return nll_likelihood + log_prior
= plt.subplots(1,3, figsize=(12, 6))
fig, ax 0].scatter(x, y, marker='x', c='k', s=20, label="Noisy data")
ax[0].plot(x, f(x), c='k', label="True function")
ax[
def plot_theta_contour(ax, func, title):
# Create a grid of theta[0] and theta[1] values
= torch.linspace(-4, 4, 100)
theta0_values = torch.linspace(-4, 4, 100)
theta1_values = torch.meshgrid(theta0_values, theta1_values)
theta0_mesh, theta1_mesh = torch.zeros_like(theta0_mesh)
nll_values
# Calculate negative log-likelihood values for each combination of theta[0] and theta[1]
for i in range(len(theta0_values)):
for j in range(len(theta1_values)):
= func([theta0_values[i], theta1_values[j]])
nll_values[i, j]
# Create a contour plot
= ax.contourf(theta0_mesh, theta1_mesh, nll_values, levels=20, cmap='viridis')
contour # plt.colorbar(contour)
r'$\theta_0$')
ax.set_xlabel(r'$\theta_1$')
ax.set_ylabel(
ax.set_title(title)
# Adding contour level labels
= ax.contour(theta0_mesh, theta1_mesh, nll_values, levels=20, colors='black', linewidths=0.5)
contour_labels =True, fontsize=8, fmt='%1.1f')
ax.clabel(contour_labels, inline
# Find and mark the minimum
= torch.argmin(nll_values)
min_indices = theta0_mesh.flatten()[min_indices]
min_theta0 = theta1_mesh.flatten()[min_indices]
min_theta1 ='red', marker='x', label='Minima')
ax.scatter(min_theta0, min_theta1, color
# Draw lines from the minimum point to the axes
='gray', linestyle='--')
ax.axhline(min_theta1, color='gray', linestyle='--')
ax.axvline(min_theta0, color
# Add labels to the lines
4.2, f'{min_theta0:.2f}', color='gray', fontsize=10)
ax.text(min_theta0, 4.2, min_theta1, f'{min_theta1:.2f}', color='gray', fontsize=10)
ax.text(# ax.legend(bbox_to_anchor=(0, -0.05), loc='lower left')
return min_theta0, min_theta1
= plot_theta_contour(ax[1], nll, 'NLL (MLE)')
mle_theta0, mle_theta1
# map_theta0, map_theta1 = plot_theta_contour(ax[2], nll_with_prior, 'NLL + Prior (MAP)')
= plot_theta_contour(ax[2], nll_with_laplace_prior, 'NLL + Prior (MAP)')
map_theta0, map_theta1
0].plot(x, mle_theta0 + mle_theta1*x, c='b', label="MLE")
ax[0].plot(x, map_theta0 + map_theta1*x, c='r', label="MAP")
ax[0].legend()
ax[0].set_xlabel('x')
ax[0].set_ylabel('y')
ax[0].set_title(r'$y_{true} = 3 x + 1$')
ax['../figures/map/linreg_mle_map_laplace.pdf', bbox_inches='tight')
plt.savefig(
plt.show()
= 3.0, prior_std_lambda = 1.0, n_samples = 30) lin_reg_map_laplace(noise_std