Joint Distribution Properties

Probability
Statistics
Distributions
Mathematics
Exploring properties of joint probability distributions, including independence, covariance, correlation, and bivariate normal distributions with practical examples
Author

Nipun Batra

Published

March 24, 2025

Keywords

joint distributions, independence, covariance, correlation, bivariate normal, uniform distribution, PyTorch

import matplotlib.pyplot as plt
import numpy as np
print(np.__version__)
import torch 
import torch.nn as nn

import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
1.26.4
X = torch.distributions.Uniform(-1, 1)
Y = torch.distributions.Uniform(-1, 1)

x_samples = X.sample((1000,))
y_samples = Y.sample((1000,))

plt.scatter(x_samples, y_samples)
# aspect ratio
plt.gca().set_aspect('equal')
plt.xlabel('X')
plt.ylabel('Y')
Text(0, 0.5, 'Y')

## E[XY] between U(-1, 1) and U(-1, 1)

E_XY = torch.mean(x_samples * y_samples)
print(E_XY)
tensor(-0.0064)

Analytical solution for the expected value of the product of two random variables: \(X\sim U(0,1)\) and \(Y\sim U(0,1)\).

We know that \(f_{X, Y}(x, y) = 1\) for \(0 \leq x \leq 1\) and \(0 \leq y \leq 1\). The expected value of the product of two random variables is given by:

\[\begin{equation} \begin{aligned} E[XY] &= \int_{0}^{1} \int_{0}^{1} xy \, dx \, dy \\ &= \int_{0}^{1} \left[ \frac{1}{2}x^2y \right]_{0}^{1} \, dy \\ &= \int_{0}^{1} \frac{1}{2}y \, dy \\ &= \left[ \frac{1}{4}y^2 \right]_{0}^{1} \\ &= \frac{1}{4} \end{aligned} \end{equation}\]

def E_XY(X, Y, n=1000):
    x_samples = X.sample((n,))
    y_samples = Y.sample((n,))
    return torch.mean(x_samples * y_samples)
X = torch.distributions.Uniform(0, 1)
Y = torch.distributions.Uniform(0, 1)

print(E_XY(X, Y, 1000000))

# This is the same as E[XY] = E[X]E[Y] for independent random variables
tensor(0.2502)

Finding \(\cos(\theta)\) for \(X\sim U(0,1)\) and \(Y\sim U(0,1)\):

\(E[X^2] = \int_{0}^{1} x^2 \, dx = \left[ \frac{1}{3}x^3 \right]_{0}^{1} = \frac{1}{3}\)

\(\cos(\theta) = \frac{E[XY]}{\sqrt{E[X^2]E[Y^2]}} = \frac{\frac{1}{4}}{\sqrt{\frac{1}{3}\frac{1}{3}}} = \frac{3}{4}\)

For any general \(X\sim U(a,b)\) and \(Y\sim U(a,b)\), the expected value of the product of two random variables is given by:

\[\begin{equation} \begin{aligned} E[XY] &= \int_{a}^{b} \int_{a}^{b} xy \, dx \, dy \\ &= \int_{a}^{b} \left[ \frac{1}{2}x^2y \right]_{a}^{b} \, dy \\ &= \int_{a}^{b} \frac{1}{2}(b^2 - a^2)y \, dy \\ &= \frac{1}{2}(b^2 - a^2) \int_{a}^{b} y \, dy \\ &= \frac{1}{2}(b^2 - a^2) \left[ \frac{1}{2}y^2 \right]_{a}^{b} \\ &= \frac{1}{2}(b^2 - a^2) \left( \frac{1}{2}b^2 - \frac{1}{2}a^2 \right) \\ &= \frac{1}{4}(b^2 - a^2)(b^2 - a^2) \\ &= \frac{1}{4}(b - a)^2(b + a)^2 \end{aligned} \end{equation}\]

\(E[X^2] = \int_{a}^{b} x^2 \, dx = \left[ \frac{1}{3}x^3 \right]_{a}^{b} = \frac{1}{3}(b^3 - a^3)\)

\(\sqrt{E[X^2]E[Y^2]} = \sqrt{\frac{1}{3}(b^3 - a^3)\frac{1}{3}(b^3 - a^3)} = \frac{1}{3}(b^3 - a^3)\)

And \(\cos(\theta)\) for \(X\sim U(a,b)\) and \(Y\sim U(a,b)\) is given by:

\[\begin{equation} \begin{aligned} \cos(\theta) &= \frac{E[XY]}{\sqrt{E[X^2]E[Y^2]}} \\ &= \frac{\frac{1}{4}(b - a)^2(b + a)^2}{\frac{1}{3}(b^3 - a^3)} \\ \end{aligned} \end{equation}\]

### cos(theta)

def E_cos_theta(X, Y, n=1000):
    x_samples = X.sample((n,))
    y_samples = Y.sample((n,))
    EXY = torch.mean(x_samples * y_samples)
    EX2 = torch.mean(x_samples ** 2)
    EY2 = torch.mean(y_samples ** 2)
    print(EXY, EX2, EY2)

    return EXY / torch.sqrt(EX2 * EY2)
X = torch.distributions.Uniform(0, 1)
Y = torch.distributions.Uniform(0, 1)

print(E_cos_theta(X, Y, 10000))
tensor(0.2479) tensor(0.3288) tensor(0.3344)
tensor(0.7475)
# Now increasing the range of the random variables
X = torch.distributions.Uniform(10, 11)
Y = torch.distributions.Uniform(10, 11)

print(E_XY(X, Y))

EX2 = torch.mean(x_samples ** 2)
EY2 = torch.mean(y_samples ** 2)

print(EX2, EY2)
tensor(110.4675)
tensor(0.3374) tensor(0.3341)
X = torch.distributions.Uniform(10, 11)
Y = torch.distributions.Uniform(10, 11)

print(E_cos_theta(X, Y, 10000))
tensor(110.3001) tensor(110.3019) tensor(110.4660)
tensor(0.9992)
## E[XY] between N(0, 1) and N(0, 1)

X = torch.distributions.Normal(0, 1)
Y = torch.distributions.Normal(0, 1)

x_samples = X.sample((1000,))
y_samples = Y.sample((1000,))

plt.scatter(x_samples, y_samples)
# aspect ratio
plt.gca().set_aspect('equal')
plt.xlabel('X')
plt.ylabel('Y')

E_XY = torch.mean(x_samples * y_samples)
print(E_XY)
tensor(0.0701)

dist = torch.distributions.Normal(0, 1)
x = dist.sample((1000,))
plt.hist(x.numpy(), bins=50, density=True)

x_range = torch.linspace(-3, 3, 1000)
y = dist.log_prob(x_range).exp()
plt.plot(x_range.numpy(), y.numpy())

dist.sample([10])
tensor([-1.9083,  0.3758,  0.0051,  0.5140,  0.9852, -0.5989,  0.5222, -0.7744,
         0.9462, -1.7868])
dist_2d_normal = torch.distributions.MultivariateNormal(torch.tensor([0.0, 0.0]), torch.eye(2))
#dist_2d_normal = torch.distributions.MultivariateNormal(torch.tensor([0.0, 0.0]), torch.tensor([[1.0, 0.5], [0.5, 1.0]]))

dist_2d_normal.sample([10])
tensor([[ 0.0438, -0.0310],
        [ 0.0487, -0.3790],
        [-0.7872,  0.9880],
        [ 1.0010, -0.9025],
        [ 0.5449,  0.1047],
        [ 1.6466,  0.0925],
        [ 0.9357,  0.2228],
        [-1.2721,  2.5194],
        [-0.3306, -0.1152],
        [ 1.2249, -1.7330]])
plt.scatter(*dist_2d_normal.sample([1000]).numpy().T, alpha=0.2, color='k')

# Plot 2D normal distribution surface plot of PDF
from mpl_toolkits.mplot3d import Axes3D

x = torch.linspace(-3, 3, 100)
y = torch.linspace(-3, 3, 100)

X, Y = torch.meshgrid(x, y)
xy = torch.stack([X, Y], 2)
z = dist_2d_normal.log_prob(xy).exp()

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X.numpy(), Y.numpy(), z.numpy())

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('PDF')

fig.tight_layout()

df = pd.read_html("http://socr.ucla.edu/docs/resources/SOCR_Data/SOCR_Data_Dinov_020108_HeightsWeights.html")
store_df = df[0]
store_df.columns = store_df.iloc[0]
store_df = store_df.iloc[1:]
store_df = store_df.astype(float)
store_df = store_df.drop(columns=["Index"])
store_df = store_df.dropna()
store_df.head()
Height(Inches) Weight(Pounds)
1 65.78331 112.9925
2 71.51521 136.4873
3 69.39874 153.0269
4 68.21660 142.3354
5 67.78781 144.2971
### Fiting a bi-variate normal distribution to the data
data = torch.tensor(store_df.values)
mean = data.mean(0)
cov = torch.cov(data.T)
dist = torch.distributions.MultivariateNormal(mean, cov)
dist.loc
tensor([ 67.9931, 127.0794], dtype=torch.float64)
dist.covariance_matrix
tensor([[  3.6164,  11.1510],
        [ 11.1510, 135.9765]], dtype=torch.float64)
# Plot the data

plt.scatter(data[:, 0], data[:, 1], alpha=0.1, color='k', facecolors='k')
plt.xlabel("Height")
plt.ylabel("Weight")
Text(0, 0.5, 'Weight')

# plot the PDF
x = torch.linspace(50, 80, 100)
y = torch.linspace(80, 280, 100)
X, Y = torch.meshgrid(x, y)
xy = torch.stack([X, Y], 2)
z = dist.log_prob(xy).exp()

import plotly.graph_objects as go

# Create surface plot with custom hover labels
fig = go.Figure(data=[go.Surface(
    x=X, y=Y, z=z, colorscale="viridis",
    hovertemplate="Height: %{x:0.2f}<br>Weight: %{y:0.2f}<br>PDF: %{z:0.5f}<extra></extra>"
)])

# Maximize figure size and reduce whitespace
fig.update_layout(
    autosize=True,
    width=1200,  # Set wider figure
    height=700,  # Set taller figure
    margin=dict(l=0, r=0, t=40, b=0),  # Remove extra whitespace
    title="2D Gaussian PDF",
    scene=dict(
        xaxis_title="Height",
        yaxis_title="Weight",
        zaxis_title="PDF"
    )
)

# Show plot
fig.show()
# uniform distribution
dist_uniform = torch.distributions.Uniform(0, 1)
x = dist_uniform.sample((1000,))
plt.hist(x.numpy(), bins=50, density=True)

x_range = torch.linspace(0, 1, 1000)
y = dist_uniform.log_prob(x_range).exp()
plt.plot(x_range.numpy(), y.numpy())

dist_uniform_2d = torch.distributions.Uniform(torch.tensor([0.0, 0.0]), torch.tensor([1.0, 1.0]))
dist_uniform_2d.sample([10])
tensor([[0.5493, 0.3478],
        [0.7661, 0.2568],
        [0.7199, 0.2975],
        [0.9114, 0.2916],
        [0.0045, 0.4948],
        [0.0156, 0.7434],
        [0.6856, 0.1037],
        [0.4446, 0.1913],
        [0.1995, 0.5009],
        [0.0716, 0.6085]])
plt.scatter(*dist_uniform_2d.sample([1000]).numpy().T, alpha=0.5)

plt.scatter(*dist_uniform_2d.sample([10000]).numpy().T, alpha=0.1)

# surface plot of PDF

x = torch.linspace(0.0, 1.0, 100)
y = torch.linspace(0.0, 1.0, 100)

X, Y = torch.meshgrid(x, y)
xy = torch.stack([X, Y], 2)

## Important:
## f(x, y) = f(x) * f(y) for independent random variables
## log(f(x, y)) = log(f(x)) + log(f(y))
z1 = dist_uniform_2d.log_prob(xy).sum(-1).exp()
z2 = dist_uniform.log_prob(X).exp() * dist_uniform.log_prob(Y).exp()
assert torch.allclose(z1, z2) 


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X.numpy(), Y.numpy(), z1.numpy())
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('PDF')
Text(0.5, 0, 'PDF')