import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
# Retina display
%config InlineBackend.figure_format = 'retina'
Biased and Unbiased Estimators
Biased and Unbiased Estimators
from tueplots import bundles
plt.rcParams.update(bundles.beamer_moml())
# Also add despine to the bundle using rcParams
'axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams[
# Increase font size to match Beamer template
'font.size'] = 16
plt.rcParams[# Make background transparent
'figure.facecolor'] = 'none' plt.rcParams[
= torch.distributions.Normal(0, 1)
dist
# Generate data
= dist.sample((100,))
data
# Plot data
= sns.displot(data, kde=True) _
/home/nipun.batra/miniforge3/lib/python3.9/site-packages/seaborn/axisgrid.py:88: UserWarning: This figure was using constrained_layout, but that is incompatible with subplots_adjust and/or tight_layout; disabling constrained_layout.
self._figure.tight_layout(*args, **kwargs)
# Population
= torch.distributions.Normal(0, 1)
norm = torch.linspace(-10, 10, 1000)
xs = torch.exp(norm.log_prob(xs))
ys
plt.plot(xs, ys)'Population Distribution')
plt.title('x')
plt.xlabel('p(x)')
plt.ylabel('../figures/mle/population-dist.pdf')
plt.savefig(
= norm.sample((100000,))
population ='|', alpha=0.1)
plt.scatter(population, torch.zeros_like(population), marker'../figures/mle/population.pdf') plt.savefig(
plt.plot(xs, ys)= population[torch.randperm(population.size(0))[:10]]
sample_1 = population[torch.randperm(population.size(0))[:10]]
sample_2 ='sample 1')
plt.scatter(sample_1, torch.zeros_like(sample_1),label='sample 2')
plt.scatter(sample_2, torch.zeros_like(sample_2), label
plt.legend()'../figures/mle/sample.pdf') plt.savefig(
= torch.distributions.Normal(0, 1)
norm = norm.sample((100000,))
population
def plot_fit(seed, num_samples):
torch.manual_seed(seed)# Select a random sample of size num_samples from the population
= population[torch.randperm(population.shape[0])[:num_samples]]
data = data.mean()
mu = data.var(correction=0)
sigma_2
# Plot data scatter
='black', marker='x', zorder=10, label='samples')
plt.scatter(data, torch.zeros_like(data), color# Plot true distribution
= torch.linspace(-3, 3, 100)
x ='black', label='true distribution')
plt.plot(x, norm.log_prob(x).exp(), color# Plot estimated distribution
= torch.distributions.Normal(mu, sigma_2.sqrt())
est ='red', label='estimated distribution')
plt.plot(x, est.log_prob(x).exp(), color
plt.legend()f"Sample size: {num_samples}\n" +fr"Sample parameters: $\hat{{\mu}}={mu:0.02f}$, $\hat{{\sigma^2}}={sigma_2:0.02f}$")
plt.title(-0.1, 1.2)
plt.ylim("$x$")
plt.xlabel("$p(x)$")
plt.ylabel(f"../figures/mle/biased-mle-normal-{num_samples}-{seed}.pdf", bbox_inches='tight')
plt.savefig(return mu, sigma_2
= 100
N_samples = {}
mus = {}
sigmas for draw in [3, 4, 5, 10, 100]:
= torch.zeros(N_samples)
mus[draw] = torch.zeros(N_samples)
sigmas[draw] for i in range(N_samples):
plt.clf()= plot_fit(i, draw) mus[draw][i], sigmas[draw][i]
for draw in [3, 4, 5, 10, 100]:
plt.clf()
plt.scatter(mus[draw], sigmas[draw])=1, color='k', linestyle='--', label=r'$\sigma^2$')
plt.axhline(y=0, color='g', linestyle='-.', label=r'$\mu$')
plt.axvline(xr'$\hat{\mu}$')
plt.xlabel(r'$\hat{\sigma^2}$')
plt.ylabel(
plt.legend()f'Sample size={draw}\n'+ fr'$E[\hat{{\mu}}] = {mus[draw].mean():0.2f}$ $E[\hat{{\sigma^2}}] = {sigmas[draw].mean():0.2f}$ ')
plt.title(f"../figures/mle/biased-mle-normal-scatter-{draw}.pdf", bbox_inches='tight')
plt.savefig(#plt.clf()
= pd.DataFrame({draw:
df
sigmas[draw].numpy() for draw in [3, 4, 5, 10, 100]}).mean()
='bar', rot=0)
df.plot(kind1, color='k', linestyle='--')
plt.axhline(# Put numbers on top of bars
for i, v in enumerate(df):
- .1, v + .01, f'{v:.3f}', color='k', fontsize=12)
plt.text(i
"Sample size (N)")
plt.xlabel("Estimated standard deviation")
plt.ylabel('../figures/biased-mle-variance-quality.pdf', bbox_inches='tight') plt.savefig(
3 0.981293
4 1.014205
5 0.952345
10 1.022295
100 0.985779
dtype: float64
= df*(df.index/(df.index-1.0))
df_unbiased
='bar', rot=0)
df_unbiased.plot(kind1, color='k', linestyle='--')
plt.axhline(# Put numbers on top of bars
for i, v in enumerate(df_unbiased):
- .1, v + .01, f'{v:.3f}', color='k', fontsize=12)
plt.text(i
"Sample size (N)")
plt.xlabel("Corrected standard deviation")
plt.ylabel('../figures/corrected-mle-variance-quality.pdf', bbox_inches='tight') plt.savefig(