import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
# Retina display
%config InlineBackend.figure_format = 'retina'
Biased and Unbiased Estimators
Biased and Unbiased Estimators
from tueplots import bundles
plt.rcParams.update(bundles.beamer_moml())
# Also add despine to the bundle using rcParams
'axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams[
# Increase font size to match Beamer template
'font.size'] = 16
plt.rcParams[# Make background transparent
'figure.facecolor'] = 'none' plt.rcParams[
= torch.distributions.Normal(0, 1)
dist
# Generate data
= dist.sample((100,))
data
# Plot data
= sns.displot(data, kde=True) _
/home/nipun.batra/miniforge3/lib/python3.9/site-packages/seaborn/axisgrid.py:88: UserWarning: This figure was using constrained_layout, but that is incompatible with subplots_adjust and/or tight_layout; disabling constrained_layout.
self._figure.tight_layout(*args, **kwargs)
torch.std??
Docstring:
std(input, dim=None, *, correction=1, keepdim=False, out=None) -> Tensor
Calculates the standard deviation over the dimensions specified by :attr:`dim`.
:attr:`dim` can be a single dimension, list of dimensions, or ``None`` to
reduce over all dimensions.
The standard deviation (:math:`\sigma`) is calculated as
.. math:: \sigma = \sqrt{\frac{1}{N - \delta N}\sum_{i=0}^{N-1}(x_i-\bar{x})^2}
where :math:`x` is the sample set of elements, :math:`\bar{x}` is the
sample mean, :math:`N` is the number of samples and :math:`\delta N` is
the :attr:`correction`.
If :attr:`keepdim` is ``True``, the output tensor is of the same size
as :attr:`input` except in the dimension(s) :attr:`dim` where it is of size 1.
Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in the
output tensor having 1 (or ``len(dim)``) fewer dimension(s).
Args:
input (Tensor): the input tensor.
dim (int or tuple of ints): the dimension or dimensions to reduce.
Keyword args:
correction (int): difference between the sample size and sample degrees of freedom.
Defaults to `Bessel's correction`_, ``correction=1``.
.. versionchanged:: 2.0
Previously this argument was called ``unbiased`` and was a boolean
with ``True`` corresponding to ``correction=1`` and ``False`` being
``correction=0``.
keepdim (bool): whether the output tensor has :attr:`dim` retained or not.
out (Tensor, optional): the output tensor.
Example:
>>> a = torch.tensor(
... [[ 0.2035, 1.2959, 1.8101, -0.4644],
... [ 1.5027, -0.3270, 0.5905, 0.6538],
... [-1.5745, 1.3330, -0.5596, -0.6548],
... [ 0.1264, -0.5080, 1.6420, 0.1992]])
>>> torch.std(a, dim=1, keepdim=True)
tensor([[1.0311],
[0.7477],
[1.2204],
[0.9087]])
.. _Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction
Type: builtin_function_or_method
=0) np.std(data.numpy()), pd.Series(data.numpy()).std(), torch.std(data), torch.std(data, correction
(1.0807172, 1.0861616, tensor(1.0862), tensor(1.0807))
# Population
= torch.distributions.Normal(0, 1)
norm = torch.linspace(-10, 10, 1000)
xs = torch.exp(norm.log_prob(xs))
ys
plt.plot(xs, ys)'Population Distribution')
plt.title('x')
plt.xlabel('p(x)')
plt.ylabel('../figures/mle/population-dist.pdf')
plt.savefig(
= norm.sample((100000,))
population ='|', alpha=0.1)
plt.scatter(population, torch.zeros_like(population), marker'../figures/mle/population.pdf') plt.savefig(
plt.plot(xs, ys)= population[torch.randperm(population.size(0))[:10]]
sample_1 = population[torch.randperm(population.size(0))[:10]]
sample_2 ='sample 1')
plt.scatter(sample_1, torch.zeros_like(sample_1),label='sample 2')
plt.scatter(sample_2, torch.zeros_like(sample_2), label
plt.legend()'../figures/mle/sample.pdf') plt.savefig(
= torch.distributions.Normal(0, 1)
norm = norm.sample((100000,))
population
def plot_fit(seed, num_samples):
torch.manual_seed(seed)# Select a random sample of size num_samples from the population
= population[torch.randperm(population.shape[0])[:num_samples]]
data = data.mean()
mu = data.var(correction=0)
sigma_2
# Plot data scatter
='black', marker='x', zorder=10, label='samples')
plt.scatter(data, torch.zeros_like(data), color# Plot true distribution
= torch.linspace(-3, 3, 100)
x ='black', label='true distribution')
plt.plot(x, norm.log_prob(x).exp(), color# Plot estimated distribution
= torch.distributions.Normal(mu, sigma_2.sqrt())
est ='red', label='estimated distribution')
plt.plot(x, est.log_prob(x).exp(), color
plt.legend()f"Sample size: {num_samples}\n" +fr"Sample parameters: $\hat{{\mu}}={mu:0.02f}$, $\hat{{\sigma^2}}={sigma_2:0.02f}$")
plt.title(-0.1, 1.2)
plt.ylim("$x$")
plt.xlabel("$p(x)$")
plt.ylabel(f"../figures/mle/biased-mle-normal-{num_samples}-{seed}.pdf", bbox_inches='tight')
plt.savefig(return mu, sigma_2
= 100
N_samples = {}
mus = {}
sigmas for draw in [3, 4, 5, 10, 100]:
= torch.zeros(N_samples)
mus[draw] = torch.zeros(N_samples)
sigmas[draw] for i in range(N_samples):
plt.clf()= plot_fit(i, draw) mus[draw][i], sigmas[draw][i]
for draw in [3, 4, 5, 10, 100]:
plt.clf()
plt.scatter(mus[draw], sigmas[draw])=1, color='k', linestyle='--', label=r'$\sigma^2$')
plt.axhline(y=0, color='g', linestyle='-.', label=r'$\mu$')
plt.axvline(xr'$\hat{\mu}$')
plt.xlabel(r'$\hat{\sigma^2}$')
plt.ylabel(
plt.legend()f'Sample size={draw}\n'+ fr'$E[\hat{{\mu}}] = {mus[draw].mean():0.2f}$ $E[\hat{{\sigma^2}}] = {sigmas[draw].mean():0.2f}$ ')
plt.title(f"../figures/mle/biased-mle-normal-scatter-{draw}.pdf", bbox_inches='tight')
plt.savefig(#plt.clf()
= pd.DataFrame({draw:
df
sigmas[draw].numpy() for draw in [3, 4, 5, 10, 100]}).mean()
='bar', rot=0)
df.plot(kind1, color='k', linestyle='--')
plt.axhline(# Put numbers on top of bars
for i, v in enumerate(df):
- .1, v + .01, f'{v:.3f}', color='k', fontsize=12)
plt.text(i
"Sample size (N)")
plt.xlabel("Estimated standard deviation")
plt.ylabel('../figures/biased-mle-variance-quality.pdf', bbox_inches='tight') plt.savefig(
3 0.981293
4 1.014205
5 0.952345
10 1.022295
100 0.985779
dtype: float64
= df*(df.index/(df.index-1.0))
df_unbiased
='bar', rot=0)
df_unbiased.plot(kind1, color='k', linestyle='--')
plt.axhline(# Put numbers on top of bars
for i, v in enumerate(df_unbiased):
- .1, v + .01, f'{v:.3f}', color='k', fontsize=12)
plt.text(i
"Sample size (N)")
plt.xlabel("Corrected standard deviation")
plt.ylabel('../figures/corrected-mle-variance-quality.pdf', bbox_inches='tight') plt.savefig(