import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
# Retina display
%config InlineBackend.figure_format = 'retina'
Closed form solution for prior predictive distribution
from tueplots import bundles
plt.rcParams.update(bundles.icml2022())
# Also add despine to the bundle using rcParams
'axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams[
# Increase font size to match Beamer template
'font.size'] = 16
plt.rcParams[# Make background transparent
'figure.facecolor'] = 'none' plt.rcParams[
# Self information function
def self_information(p):
return -np.log2(p)
# Plot self information function from 0 to 1
= np.linspace(0.0001, 1, 500)
x = self_information(x)
y
plt.plot(x, y)'Probability (p)')
plt.xlabel('Self information (bits)')
plt.ylabel('figures/information-theory/self-information.pdf', bbox_inches='tight') plt.savefig(
= torch.distributions.Categorical(probs = torch.tensor([0.25, 0.25, 0.25, 0.25]))
categorical_1
= pd.Series(index=['A', 'B', 'C', 'D'], data=categorical_1.probs.detach().numpy())
ser_1 =0)
ser_1.plot.bar(rot'Probability')
plt.ylabel('figures/information-theory/categorical-uniform.pdf', bbox_inches='tight') plt.savefig(
= torch.distributions.Categorical(probs = torch.tensor([0.5, 0.25, 0.125, 0.125]))
categorical_2
= pd.Series(index=['A', 'B', 'C', 'D'], data=categorical_2.probs.detach().numpy())
ser_2 =0)
ser_2.plot.bar(rot'Probability')
plt.ylabel('figures/information-theory/categorical-nonuniform.pdf', bbox_inches='tight') plt.savefig(
# Entropy for a bernoulli distribution
def entropy_bernoulli(p):
return -(p * np.log2(p) + (1 - p) * np.log2(1 - p))
# Plot entropy for a bernoulli distribution
= np.linspace(0.0001, 0.9999, 500)
x = entropy_bernoulli(x)
y
plt.plot(x, y)'Probability (p)')
plt.xlabel('Entropy (bits)')
plt.ylabel('figures/information-theory/entropy-bernoulli.pdf', bbox_inches='tight') plt.savefig(
# Figure to take 3 categoriacl distributions on four symbols and title them with their entropy
# Make it two column figure for TUEplots
=1, ncols=3))
plt.rcParams.update(bundles.icml2022(nrows
= plt.subplots(1, 3, sharey=True)
fig, axs =0, ax=axs[0])
ser_1.plot.bar(rot0].set_ylabel('Probability')
axs[
0].set_title('Entropy: {:.2f} bits'.format(categorical_1.entropy().item()/np.log(2))) # convert loge to log2
axs[# Manually also calculate the entropy
= categorical_1.probs.detach().numpy()
p = -(p * np.log2(p)).sum()
entropy print(entropy)
=0, ax=axs[1])
ser_2.plot.bar(rot1].set_title('Entropy: {:.2f} bits'.format(categorical_2.entropy().item()/np.log(2)))
axs[
= torch.distributions.Categorical(probs = torch.tensor([0.9, 0.05, 0.025, 0.025]))
categorical_3 = pd.Series(index=['A', 'B', 'C', 'D'], data=categorical_3.probs.detach().numpy())
ser_3 =0, ax=axs[2])
ser_3.plot.bar(rot2].set_title('Entropy: {:.2f} bits'.format(categorical_3.entropy().item()/np.log(2)))
axs[
'figures/information-theory/categorical-entropy.pdf', bbox_inches='tight') plt.savefig(
2.0
= {"A": 0.4, "B": 0.3, "C": 0.2, "D": 0.1}
p = {"A": 0.15, "B": 0.55, "C": 0.05, "D": 0.25}
q
def entropy(p):
return -(np.array(list(p.values())) * np.log2(np.array(list(p.values())))).sum()
def cross_entropy(p, q):
return -(np.array(list(p.values())) * np.log2(np.array(list(q.values())))).sum()
def kl_divergence(p, q):
return cross_entropy(p, q) - entropy(p)
print(entropy(p))
print(cross_entropy(p, q))
print(kl_divergence(p, q))
1.8464393446710154
2.417920799518975
0.5714814548479596
# Average length of a code
categorical_1.probs
tensor([0.2500, 0.2500, 0.2500, 0.2500])
= ['A', 'B', 'C', 'D']
symbols = [0.25, 0.25, 0.25, 0.25]
probs = ['00', '01', '10', '11']
codes
def average_length(probs, codes, symbols):
# Create a dictionary with the symbols and their probabilities
= dict(zip(symbols, probs))
symbol_probs
# Create a dictionary with the symbols and their codes
= dict(zip(symbols, codes))
symbol_codes
= {k: len(v) for k, v in symbol_codes.items()}
symbol_codes_length
# Calculate the average length of a code
= sum([symbol_probs[symbol] * symbol_codes_length[symbol] for symbol in symbols])
average_length_code return average_length_code
0.25, 0.25, 0.25, 0.25], ['00', '01', '10', '11'], ['A', 'B', 'C', 'D']) average_length([
2.0
0.5, 0.25, 0.125, 0.125], ['00', '01', '10', '11'], ['A', 'B', 'C', 'D']) average_length([
2.0
import pygraphviz as pgv
from IPython.display import Image
# Heapq imports
import heapq
class Node:
def __init__(self, symbol, probability):
self.symbol = symbol
self.probability = probability
self.left = None
self.right = None
def __lt__(self, other):
return self.probability < other.probability
def huffman_encoding(symbols):
# Step 1: Calculate probabilities
= {}
probabilities = len(symbols)
total_symbols for symbol in symbols:
= symbols.count(symbol) / total_symbols
probabilities[symbol]
# Step 2: Create initial nodes
= [Node(symbol, probability) for symbol, probability in probabilities.items()]
nodes
# Step 3: Sort nodes
heapq.heapify(nodes)
# Step 4: Build Huffman tree
while len(nodes) > 1:
= heapq.heappop(nodes)
left = heapq.heappop(nodes)
right = Node(None, left.probability + right.probability)
parent = left
parent.left = right
parent.right
heapq.heappush(nodes, parent)
= nodes[0]
root
# Step 6: Assign binary codes
= {}
codes def assign_codes(node, code):
if node.symbol:
= code
codes[node.symbol] else:
+ '0')
assign_codes(node.left, code + '1')
assign_codes(node.right, code
'')
assign_codes(root,
# Step 7: Convert dot format to graph
= pgv.AGraph(tree_dot)
G ='dot')
G.layout(prog
return codes, G
# Example usage
= ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C']
symbols = huffman_encoding(symbols)
codes, tree_dot
print("Symbol\tCode")
for symbol, code in codes.items():
print(f"{symbol}\t{code}")
# Plotting the Huffman tree
'huffman_tree.png') tree_dot.draw(
ModuleNotFoundError: No module named 'pygraphviz'
= 1.0
sigma
def prior_predictive(x, sigma, prior_mean, prior_cov):
"""Closed form prior predictive distribution for linear regression."""
= prior_mean[0] + prior_mean[1] * x
prior_pred_mean = sigma ** 2 + x ** 2 * prior_cov[1, 1]
prior_pred_cov return prior_pred_mean, prior_pred_cov