import numpy as np
import torch
import torch.distributions as dist
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
= dist.Categorical(probs=torch.tensor([1/6]*6))
dice
# Sample N dice rolls
= 1000
N
= dice.sample((N,)) + 1
rolls # 1 is added to the sample because the sample is 0-indexed
10] rolls[:
tensor([2, 3, 2, 2, 5, 5, 5, 5, 2, 2])
= plt.subplots(figsize=(12, 4))
fig, ax =0.5, marker='o', markersize=1)
ax.plot(rolls, lw'Roll number') ax.set_xlabel(
Text(0.5, 0, 'Roll number')
pd.Series(rolls).value_counts().sort_index()
1 183
2 160
3 188
4 148
5 165
6 156
Name: count, dtype: int64
# Running average
= torch.cumsum(rolls, dim=0) / torch.arange(1, N+1) runing_avg
=1)
plt.plot(runing_avg, lw'Roll number')
plt.xlabel('Running average')
plt.ylabel(3.5, color='red', lw=1, ls='--') plt.axhline(
def compute_running_avg(N):
= dice.sample((N,)) + 1
rolls = torch.cumsum(rolls, dim=0) / torch.arange(1, N+1)
runing_avg return runing_avg
= torch.distributions.Bernoulli(probs=0.5)
dist = dist.sample((100,)) samples
samples.mean()
tensor(0.5100)
pd.Series(samples.numpy()).value_counts()
1.0 51
0.0 49
Name: count, dtype: int64
# Game of winning and losing money based on a coin flip
= 2000
win_amt = -2000
loss_amt = 100 # Number of coin flips
total_count_N
= torch.distributions.Binomial(total_count_N, probs=0.5) dist
= 5000000 # number of players/Number of times we perform total_count_N coin flips num_samples
= dist.sample((num_samples,)) num_times_wins_across_samples
100] num_times_wins_across_samples[:
tensor([46., 54., 58., 58., 58., 55., 55., 47., 50., 54., 54., 40., 52., 54.,
54., 44., 49., 53., 59., 43., 54., 47., 53., 46., 45., 55., 50., 48.,
51., 56., 49., 42., 50., 52., 48., 53., 58., 47., 46., 46., 58., 60.,
40., 44., 53., 47., 50., 52., 42., 50., 45., 52., 52., 51., 46., 33.,
53., 52., 49., 42., 52., 44., 56., 49., 51., 55., 50., 53., 57., 50.,
49., 53., 56., 53., 49., 48., 55., 50., 50., 48., 59., 53., 54., 37.,
48., 57., 41., 60., 55., 59., 53., 55., 50., 46., 51., 47., 51., 44.,
43., 55.])
if num_samples<=50:
= plt.subplots(figsize=(12, 4))
fig, ax ='bar', rot=0, ax=ax)
pd.Series(num_times_wins_across_samples.numpy()).plot(kind'Sample number')
plt.xlabel('Number of wins') plt.ylabel(
= num_times_wins_across_samples * win_amt + (total_count_N - num_times_wins_across_samples) * loss_amt
net_money_won = pd.Series(net_money_won.numpy()) net_money_won_series
net_money_won_series
0 -16000.0
1 16000.0
2 32000.0
3 32000.0
4 32000.0
...
4999995 -16000.0
4999996 52000.0
4999997 -44000.0
4999998 -4000.0
4999999 -20000.0
Length: 5000000, dtype: float32
net_money_won_series.var()
399787200.0
max() - net_money_won_series.min() net_money_won_series.
192000.0
if num_samples<=50:
= plt.subplots(figsize=(12, 4))
fig, ax ='bar', rot=0, ax=ax)
net_money_won_series.plot(kind0, color='red', lw=1, ls='--') ax.axhline(
=False) net_money_won_series.hist(grid
net_money_won_series.mean()
-6.3688
net_money_won_series.var()
399787200.0
/((win_amt**2) * total_count_N) net_money_won_series.var()
0.999468
# same analysis but starting from Bernoulli distribution
= 2000
win_amt = -2000
loss_amt = 100 # Number of coin flips total_count_N
# Simulate game
= torch.distributions.Bernoulli(probs=0.5) dist
= dist.sample(torch.Size([num_samples, total_count_N])) overall_samples
overall_samples.shape
torch.Size([5000, 100])
= pd.DataFrame(overall_samples.numpy()) overall_samples_df
overall_samples_df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | ... | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 |
1 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 |
2 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 |
3 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 |
4 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 |
5 rows × 100 columns
# win and loss -- replace 0 with -2000 and 1 with 2000
= overall_samples_df.replace({0: loss_amt, 1: win_amt})
win_amount_df win_amount_df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -2000.0 | -2000.0 | 2000.0 | -2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | -2000.0 | ... | 2000.0 | -2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | -2000.0 | -2000.0 | 2000.0 |
1 | -2000.0 | -2000.0 | -2000.0 | 2000.0 | 2000.0 | -2000.0 | 2000.0 | -2000.0 | -2000.0 | 2000.0 | ... | -2000.0 | 2000.0 | 2000.0 | -2000.0 | -2000.0 | 2000.0 | -2000.0 | 2000.0 | -2000.0 | 2000.0 |
2 | -2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | -2000.0 | -2000.0 | 2000.0 | 2000.0 | ... | 2000.0 | 2000.0 | -2000.0 | 2000.0 | 2000.0 | 2000.0 | 2000.0 | -2000.0 | 2000.0 | 2000.0 |
3 | 2000.0 | 2000.0 | 2000.0 | -2000.0 | -2000.0 | -2000.0 | 2000.0 | 2000.0 | -2000.0 | -2000.0 | ... | -2000.0 | -2000.0 | -2000.0 | 2000.0 | 2000.0 | -2000.0 | 2000.0 | 2000.0 | 2000.0 | -2000.0 |
4 | -2000.0 | 2000.0 | -2000.0 | -2000.0 | -2000.0 | 2000.0 | 2000.0 | -2000.0 | -2000.0 | 2000.0 | ... | 2000.0 | -2000.0 | 2000.0 | -2000.0 | 2000.0 | 2000.0 | 2000.0 | -2000.0 | 2000.0 | 2000.0 |
5 rows × 100 columns
# Net money won
= win_amount_df.sum(axis=1)
net_money_won net_money_won
0 16000.0
1 -4000.0
2 40000.0
3 24000.0
4 -4000.0
...
4995 0.0
4996 -12000.0
4997 16000.0
4998 12000.0
4999 40000.0
Length: 5000, dtype: float32
=False) net_money_won.hist(grid
net_money_won.var()
397101630.0
net_money_won.mean()
561.6
# Plotting cumulative sum of net money won for first 5 players
= plt.subplots(figsize=(12, 4))
fig, ax for i in range(5):
#win_amount_df.iloc[i].values.cumsum()
=1) ax.plot(win_amount_df.iloc[i].values.cumsum(), lw
= win_amount_df.cumsum(axis=1)
cumsum_df cumsum_df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -2000.0 | -4000.0 | -2000.0 | -4000.0 | -2000.0 | 0.0 | 2000.0 | 4000.0 | 6000.0 | 4000.0 | ... | 10000.0 | 8000.0 | 10000.0 | 12000.0 | 14000.0 | 16000.0 | 18000.0 | 16000.0 | 14000.0 | 16000.0 |
1 | -2000.0 | -4000.0 | -6000.0 | -4000.0 | -2000.0 | -4000.0 | -2000.0 | -4000.0 | -6000.0 | -4000.0 | ... | -6000.0 | -4000.0 | -2000.0 | -4000.0 | -6000.0 | -4000.0 | -6000.0 | -4000.0 | -6000.0 | -4000.0 |
2 | -2000.0 | 0.0 | 2000.0 | 4000.0 | 6000.0 | 8000.0 | 6000.0 | 4000.0 | 6000.0 | 8000.0 | ... | 30000.0 | 32000.0 | 30000.0 | 32000.0 | 34000.0 | 36000.0 | 38000.0 | 36000.0 | 38000.0 | 40000.0 |
3 | 2000.0 | 4000.0 | 6000.0 | 4000.0 | 2000.0 | 0.0 | 2000.0 | 4000.0 | 2000.0 | 0.0 | ... | 22000.0 | 20000.0 | 18000.0 | 20000.0 | 22000.0 | 20000.0 | 22000.0 | 24000.0 | 26000.0 | 24000.0 |
4 | -2000.0 | 0.0 | -2000.0 | -4000.0 | -6000.0 | -4000.0 | -2000.0 | -4000.0 | -6000.0 | -4000.0 | ... | -10000.0 | -12000.0 | -10000.0 | -12000.0 | -10000.0 | -8000.0 | -6000.0 | -8000.0 | -6000.0 | -4000.0 |
5 rows × 100 columns
= plt.figure(figsize=(14, 6))
fig = fig.add_gridspec(1, 2, width_ratios=[4, 1]) # 4:1 ratio for main plot and histogram
gs
# Main plot (left plot for cumulative sum)
= fig.add_subplot(gs[0])
ax1 = win_amount_df.cumsum(axis=1)
cumsum_df =False, lw=1, alpha=0.1, color='k', ax=ax1)
cumsum_df.T.plot(legend"Number of coin flips")
ax1.set_xlabel("Net money won")
ax1.set_ylabel(
# Right plot (histogram)
= fig.add_subplot(gs[1])
ax2 ='gray', alpha=0.5, orientation='horizontal', bins=30)
ax2.hist(net_money_won, color"Frequency")
ax2.set_xlabel(
# Adjust layout
plt.tight_layout()'coin_flip_game.png', dpi=600) plt.savefig(
= np.linspace(0, 1, 1000)
p
*(1-p), label='p(1-p)') plt.plot(p, p
= 0.5
p = torch.distributions.Binomial(total_count=2, probs=p) dist
= 100000000
N = dist.sample((N,)) samples
samples.mean().item()
0.9999343752861023
### Finding the mean of Gaussian distrubution
= torch.linspace(-10, 10, 1000)
y_lin = 2.0
sigma
= y_lin*torch.exp(-y_lin**2/(2*sigma**2))
out
plt.plot(y_lin, out)
# Drawing coordinate axis at x = 0, y = 0
0, color='black', lw=0.5)
plt.axhline(0, color='black', lw=0.5)
plt.axvline(
='gray', alpha=0.5) plt.fill_between(y_lin, out, color