Random Variables

Author

Nipun Batra

Published

December 5, 2024

import matplotlib.pyplot as plt
import numpy as np
import torch 
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Sample space for flipping a coin 2 times
sample_space = ["HH", "HT", "TH", "TT"]

# Define a random variable X as the number of Heads in the outcomes
def random_variable_X(outcome):
    return outcome.count("H")

random_variable_X("HT"), random_variable_X("TT"), random_variable_X("HH"), random_variable_X("TH")

(1, 0, 2, 1)

# Mapping of outcomes to the random variable values
mapping = {outcome: random_variable_X(outcome) for outcome in sample_space}

mapping

{'HH': 2, 'HT': 1, 'TH': 1, 'TT': 0}

df = pd.DataFrame(mapping, index=["X"]).T
df.index.name = "Outcome"
df

	X
Outcome
HH	2
HT	1
TH	1
TT	0

# Find records/samples where X = 1
df["X"] == 1

Outcome
HH    False
HT     True
TH     True
TT    False
Name: X, dtype: bool

df[df["X"] == 1]

	X
Outcome
HT	1
TH	1

# Calculate probabilities for X = 0, 1, 2

def calculate_probability_X(x, df):
    subset = df[df["X"] == x]
    len_subset = len(subset)
    len_df = len(df)
    return len_subset / len_df

calculate_probability_X(0, df), calculate_probability_X(1, df), calculate_probability_X(2, df)

(0.25, 0.5, 0.25)

# Store inverse mapping

inverse_mapping = {x: [] for x in range(3)}
for outcome, value in mapping.items():
    inverse_mapping[value].append(outcome)
    
print(inverse_mapping)

{0: ['TT'], 1: ['HT', 'TH'], 2: ['HH']}

def calculate_probability_X(x, inverse_mapping):
    outcomes = inverse_mapping[x]
    len_outcomes = len(outcomes)
    len_sample_space = len(sample_space)
    return len_outcomes / len_sample_space

calculate_probability_X(0, inverse_mapping), calculate_probability_X(1, inverse_mapping), calculate_probability_X(2, inverse_mapping)

(0.25, 0.5, 0.25)

### Two dice example

# Construct the sample space

sample_space = [(i, j) for i in range(1, 7) for j in range(1, 7)]
print(sample_space)

[(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (3, 6), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (5, 6), (6, 1), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6)]

# Define a random variable X1 as the sum of the outcomes
# Define a random variable X2 as the product of the outcomes
# Define a random variable X3 as the maximum of the outcomes

def random_variable_X1(outcome):
    return sum(outcome)

def random_variable_X2(outcome):
    return outcome[0] * outcome[1]

def random_variable_X3(outcome):
    return max(outcome)

random_variable_X1([1, 2])

# Create a heatmap for the sum of the outcomes
df = pd.DataFrame(sample_space, columns=["D1", "D2"])

df["X1"] = df.apply(lambda row: random_variable_X1(row), axis=1)
df.index.name = "Serial No."

df[df["X1"] == 10]

	D1	D2	X1
Serial No.
23	4	6	10
28	5	5	10
33	6	4	10

# Create interactive ipywidgets for the sum of the outcomes
import ipywidgets as widgets
from ipywidgets import interact

@interact(x=widgets.IntSlider(min=2, max=12, step=1, value=7))
def show_samples(x):
    return df[df["X1"] == x]

# Difference between histrogram and PMF

die_pmf = pd.Series([1/6]*6, index=[1,2,3,4,5,6])
die_pmf.plot(kind='bar', rot = 0)
plt.xlabel('Face')
plt.ylabel('Probability')
plt.title('PMF of a fair die')

Text(0.5, 1.0, 'PMF of a fair die')

# Now, histogram over "N" rolls of the die
def die_hist(N):
    rolls = np.random.randint(1, 7, N)
    fig, ax = plt.subplots()
    hist = pd.Series(rolls).value_counts(normalize=True).sort_index()
    hist.plot(kind='bar', rot=0, ax=ax, label='Histogram', alpha=0.5, color='C0')
    ax.set_xlabel('Face')
    ax.set_ylabel('Frequency')
    ax.set_title(f'Histogram of {N} rolls of a fair die')

    # Plot ideal 
    die_pmf.plot(kind='bar', rot = 0, ax=ax, alpha=0.5, label='PMF', color='C1')
    ax.legend()

die_hist(10)

die_hist(100)

die_hist(50000)

die_hist(500000)

die_hist(500000)