Random Variables

ML
Author

Nipun Batra

Published

December 5, 2024

import matplotlib.pyplot as plt
import numpy as np
import torch 
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# Sample space for flipping a coin 2 times
sample_space = ["HH", "HT", "TH", "TT"]
# Define a random variable X as the number of Heads in the outcomes
def random_variable_X(outcome):
    return outcome.count("H")
random_variable_X("HT"), random_variable_X("TT"), random_variable_X("HH"), random_variable_X("TH")
(1, 0, 2, 1)
# Mapping of outcomes to the random variable values
mapping = {outcome: random_variable_X(outcome) for outcome in sample_space}
mapping
{'HH': 2, 'HT': 1, 'TH': 1, 'TT': 0}
df = pd.DataFrame(mapping, index=["X"]).T
df.index.name = "Outcome"
df
X
Outcome
HH 2
HT 1
TH 1
TT 0
# Find records/samples where X = 1
df["X"] == 1
Outcome
HH    False
HT     True
TH     True
TT    False
Name: X, dtype: bool
df[df["X"] == 1]
X
Outcome
HT 1
TH 1
# Calculate probabilities for X = 0, 1, 2

def calculate_probability_X(x, df):
    subset = df[df["X"] == x]
    len_subset = len(subset)
    len_df = len(df)
    return len_subset / len_df
calculate_probability_X(0, df), calculate_probability_X(1, df), calculate_probability_X(2, df)
(0.25, 0.5, 0.25)
# Store inverse mapping

inverse_mapping = {x: [] for x in range(3)}
for outcome, value in mapping.items():
    inverse_mapping[value].append(outcome)
    
print(inverse_mapping)
{0: ['TT'], 1: ['HT', 'TH'], 2: ['HH']}
def calculate_probability_X(x, inverse_mapping):
    outcomes = inverse_mapping[x]
    len_outcomes = len(outcomes)
    len_sample_space = len(sample_space)
    return len_outcomes / len_sample_space

calculate_probability_X(0, inverse_mapping), calculate_probability_X(1, inverse_mapping), calculate_probability_X(2, inverse_mapping)
(0.25, 0.5, 0.25)
### Two dice example

# Construct the sample space

sample_space = [(i, j) for i in range(1, 7) for j in range(1, 7)]
print(sample_space)
[(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (3, 6), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (5, 6), (6, 1), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6)]
# Define a random variable X1 as the sum of the outcomes
# Define a random variable X2 as the product of the outcomes
# Define a random variable X3 as the maximum of the outcomes

def random_variable_X1(outcome):
    return sum(outcome)

def random_variable_X2(outcome):
    return outcome[0] * outcome[1]

def random_variable_X3(outcome):
    return max(outcome)
random_variable_X1([1, 2])
3
# Create a heatmap for the sum of the outcomes
df = pd.DataFrame(sample_space, columns=["D1", "D2"])

df["X1"] = df.apply(lambda row: random_variable_X1(row), axis=1)
df.index.name = "Serial No."
df[df["X1"] == 10]
D1 D2 X1
Serial No.
23 4 6 10
28 5 5 10
33 6 4 10
# Create interactive ipywidgets for the sum of the outcomes
import ipywidgets as widgets
from ipywidgets import interact

@interact(x=widgets.IntSlider(min=2, max=12, step=1, value=7))
def show_samples(x):
    return df[df["X1"] == x]