import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, reduce, repeat
= np.array([182, 322, 252, 269, 245, 214, 230, 223, 229, 327, 219, 216, 272, 208, 320, 310, 187, 230, 192, 332, 213, 198, 269, 226, 242, 299, 241, 249, 203, 270, 273, 228, 294, 233, 220, 311, 208, 245, 271])
vals_Delhi = np.array([90, 150]) vals_Kolhapur
= np.mean(vals_Delhi)
pop_mean = np.std(vals_Delhi)
pop_stdev
# Now, consider different subsets of the data of size K and find the mean and standard deviation of each subset
# and plot the mean and standard deviation of each subset.
= 5
K
def plot_subsets(vals, K):
= []
means = []
stdevs
= 100
num_subset for i in range(num_subset):
= np.random.choice(vals_Delhi, K)
subset
means.append(np.mean(subset))
stdevs.append(np.std(subset))
= 'Subsets')
plt.scatter(means, stdevs, label 'Mean')
plt.xlabel('Standard Deviation')
plt.ylabel(
='red', label = 'Population', s = 100)
plt.scatter([pop_mean], [pop_stdev], color180, 360)
plt.xlim(-10, 80)
plt.ylim(
plt.legend()f'Mean vs Standard Deviation of Subsets for K = {K}') plt.title(
2) plot_subsets(vals_Delhi,
5) plot_subsets(vals_Delhi,
30) plot_subsets(vals_Delhi,
def plot_distribution(vals, city='Delhi'):
# Fit a normal distribution to the data
= np.mean(vals), np.std(vals, ddof=0)
mu, std
# Plot the normal distribution
= np.linspace(0, 400, 1000)
xs
= 1/(std * np.sqrt(2 * np.pi)) * np.exp( - (xs - mu)**2 / (2 * std**2) )
ys
plt.plot(xs, ys)
# Mark the mean
='r', linestyle='--', label = 'Mean')
plt.axvline(mu, color
# Mark the values via rag plot
0]*len(vals), 'k|', label = 'Values')
plt.plot(vals, [
= std / np.sqrt(len(vals)-1)
standard_error_mean
# Mark the 95% confidence interval of SEM with shading
= 1.96
fac = standard_error_mean * fac
sem_times_fac 0, ys, where = (xs > mu - sem_times_fac) & (xs < mu + sem_times_fac), color = 'r', alpha = 0.5, label = '95% CI of SEM')
plt.fill_between(xs,
plt.legend()f'Distribution of values in {city}\n Mean: {mu:.2f}, Sample STDEV: {std:.2f} Standard Error of Mean: {standard_error_mean:.2f}') plt.title(
='Delhi') plot_distribution(vals_Delhi, city
='Kolhapur') plot_distribution(vals_Kolhapur, city