import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
Random Variables
ML
# Sample space for flipping a coin 2 times
= ["HH", "HT", "TH", "TT"] sample_space
# Define a random variable X as the number of Heads in the outcomes
def random_variable_X(outcome):
return outcome.count("H")
"HT"), random_variable_X("TT"), random_variable_X("HH"), random_variable_X("TH") random_variable_X(
(1, 0, 2, 1)
# Mapping of outcomes to the random variable values
= {outcome: random_variable_X(outcome) for outcome in sample_space} mapping
mapping
{'HH': 2, 'HT': 1, 'TH': 1, 'TT': 0}
= pd.DataFrame(mapping, index=["X"]).T
df = "Outcome"
df.index.name df
X | |
---|---|
Outcome | |
HH | 2 |
HT | 1 |
TH | 1 |
TT | 0 |
# Find records/samples where X = 1
"X"] == 1 df[
Outcome
HH False
HT True
TH True
TT False
Name: X, dtype: bool
"X"] == 1] df[df[
X | |
---|---|
Outcome | |
HT | 1 |
TH | 1 |
# Calculate probabilities for X = 0, 1, 2
def calculate_probability_X(x, df):
= df[df["X"] == x]
subset = len(subset)
len_subset = len(df)
len_df return len_subset / len_df
0, df), calculate_probability_X(1, df), calculate_probability_X(2, df) calculate_probability_X(
(0.25, 0.5, 0.25)
# Store inverse mapping
= {x: [] for x in range(3)}
inverse_mapping for outcome, value in mapping.items():
inverse_mapping[value].append(outcome)
print(inverse_mapping)
{0: ['TT'], 1: ['HT', 'TH'], 2: ['HH']}
def calculate_probability_X(x, inverse_mapping):
= inverse_mapping[x]
outcomes = len(outcomes)
len_outcomes = len(sample_space)
len_sample_space return len_outcomes / len_sample_space
0, inverse_mapping), calculate_probability_X(1, inverse_mapping), calculate_probability_X(2, inverse_mapping) calculate_probability_X(
(0.25, 0.5, 0.25)
### Two dice example
# Construct the sample space
= [(i, j) for i in range(1, 7) for j in range(1, 7)]
sample_space print(sample_space)
[(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5), (2, 6), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), (3, 6), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5), (4, 6), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5), (5, 6), (6, 1), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6)]
# Define a random variable X1 as the sum of the outcomes
# Define a random variable X2 as the product of the outcomes
# Define a random variable X3 as the maximum of the outcomes
def random_variable_X1(outcome):
return sum(outcome)
def random_variable_X2(outcome):
return outcome[0] * outcome[1]
def random_variable_X3(outcome):
return max(outcome)
1, 2]) random_variable_X1([
3
# Create a heatmap for the sum of the outcomes
= pd.DataFrame(sample_space, columns=["D1", "D2"])
df
"X1"] = df.apply(lambda row: random_variable_X1(row), axis=1)
df[= "Serial No." df.index.name
"X1"] == 10] df[df[
D1 | D2 | X1 | |
---|---|---|---|
Serial No. | |||
23 | 4 | 6 | 10 |
28 | 5 | 5 | 10 |
33 | 6 | 4 | 10 |
# Create interactive ipywidgets for the sum of the outcomes
import ipywidgets as widgets
from ipywidgets import interact
@interact(x=widgets.IntSlider(min=2, max=12, step=1, value=7))
def show_samples(x):
return df[df["X1"] == x]