import numpy as np
import matplotlib.pyplot as plt
import torch
import seaborn as sns
import pandas as pd
=torch.distributions
dist
sns.reset_defaults()="talk", font_scale=1)
sns.set_context(context%matplotlib inline
%config InlineBackend.figure_format='retina'
Basic Imports
Generative model for logistic regression
= dist.Normal(loc = torch.tensor([0., 0.]), scale=torch.tensor([1., 2.]))
x = x.sample([100])
x_sample
x_sample.shape
= torch.concat((torch.ones(x_sample.shape[0], 1), x_sample), axis=1) x_dash
= dist.MultivariateNormal(loc = torch.tensor([0., 0., 0.]), covariance_matrix=0.5*torch.eye(3))
theta = theta.sample()
theta_sample
= torch.sigmoid(x_dash@theta_sample)
p
= dist.Bernoulli(probs=p)
y = y.sample() y_sample
0], x_sample[:, 1], c = y_sample, s=40, alpha=0.5)
plt.scatter(x_sample[:, sns.despine()
theta_sample
tensor([ 0.6368, -0.7526, 1.4652])
from sklearn.linear_model import LogisticRegression
= LogisticRegression()
lr_l2 = LogisticRegression(penalty='none') lr_none
lr_l2.fit(x_sample, y_sample) lr_none.fit(x_sample, y_sample)
LogisticRegression(penalty='none')
def plot_fit(x_sample, y_sample, theta, model_name):
# Retrieve the model parameters.
= theta[0]
b = theta[1], theta[2]
w1, w2 # Calculate the intercept and gradient of the decision boundary.
= -b/w2
c = -w1/w2
m
# Plot the data and the classification with the decision boundary.
= x_sample[:, 0].min()-0.2, x_sample[:, 0].max()+0.2
xmin, xmax = x_sample[:, 1].min()-0.2, x_sample[:, 1].max()+0.2
ymin, ymax = np.array([xmin, xmax])
xd = m*xd + c
yd 'k', lw=1, ls='--')
plt.plot(xd, yd, ='tab:blue', alpha=0.2)
plt.fill_between(xd, yd, ymin, color='tab:orange', alpha=0.2)
plt.fill_between(xd, yd, ymax, color
*x_sample[y_sample==0].T, s=20, alpha=0.5)
plt.scatter(*x_sample[y_sample==1].T, s=20, alpha=0.5)
plt.scatter(
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)r'$x_2$')
plt.ylabel(r'$x_1$')
plt.xlabel(= np.round(theta, 1)
theta_print f"{model_name}\n{theta_print}")
plt.title( sns.despine()
plot_fit(
x_sample,
y_sample,
theta_sample,r"Generating $\theta$",
)
plot_fit(
x_sample,
y_sample,-1, 1), lr_l2.coef_), axis=1).flatten(),
np.concatenate((lr_l2.intercept_.reshape(r"Sklearn $\ell_2$ penalty ",
)
plot_fit(
x_sample,
y_sample,-1, 1), lr_none.coef_), axis=1).flatten(),
np.concatenate((lr_none.intercept_.reshape(r"Sklearn No penalty ",
)
MLE estimate PyTorch
def neg_log_likelihood(theta, x, y):
= torch.concat((torch.ones(x.shape[0], 1), x), axis=1)
x_dash = torch.sigmoid(x_dash@theta)
p = dist.Bernoulli(probs=p)
y_dist
return -torch.sum(y_dist.log_prob(y))
neg_log_likelihood(theta_sample, x_sample, y_sample)
tensor(33.1907)
= torch.tensor([1., 1., 1.], requires_grad=True)
theta_learn_loc
neg_log_likelihood(theta_learn_loc, x_sample, y_sample)
plot_fit(
x_sample,
y_sample,
theta_learn_loc.detach(),r"Torch without training",
)
= torch.tensor([0., 0., 0.], requires_grad=True)
theta_learn_loc = []
loss_array = []
loc_array
= torch.optim.Adam([theta_learn_loc], lr=0.05)
opt for i in range(101):
= neg_log_likelihood(theta_learn_loc, x_sample, y_sample)
loss_val
loss_val.backward()
loc_array.append(theta_learn_loc)
loss_array.append(loss_val.item())
if i % 10 == 0:
print(
f"Iteration: {i}, Loss: {loss_val.item():0.2f}"
)
opt.step() opt.zero_grad()
Iteration: 0, Loss: 69.31
Iteration: 10, Loss: 44.14
Iteration: 20, Loss: 35.79
Iteration: 30, Loss: 32.73
Iteration: 40, Loss: 31.67
Iteration: 50, Loss: 31.25
Iteration: 60, Loss: 31.08
Iteration: 70, Loss: 31.00
Iteration: 80, Loss: 30.97
Iteration: 90, Loss: 30.95
Iteration: 100, Loss: 30.94
plot_fit(
x_sample,
y_sample,
theta_learn_loc.detach(),r"Torch MLE",
)
MAP estimate PyTorch
= dist.MultivariateNormal(loc = torch.tensor([0., 0., 0.]), covariance_matrix=2*torch.eye(3))
prior_theta
= lambda theta: -prior_theta.log_prob(theta) logprob
= torch.tensor([0., 0., 0.], requires_grad=True)
theta_learn_loc = []
loss_array = []
loc_array
= torch.optim.Adam([theta_learn_loc], lr=0.05)
opt for i in range(101):
= neg_log_likelihood(theta_learn_loc, x_sample, y_sample) + logprob(theta_learn_loc)
loss_val
loss_val.backward()
loc_array.append(theta_learn_loc)
loss_array.append(loss_val.item())
if i % 10 == 0:
print(
f"Iteration: {i}, Loss: {loss_val.item():0.2f}"
)
opt.step() opt.zero_grad()
Iteration: 0, Loss: 73.11
Iteration: 10, Loss: 48.06
Iteration: 20, Loss: 39.89
Iteration: 30, Loss: 37.01
Iteration: 40, Loss: 36.10
Iteration: 50, Loss: 35.78
Iteration: 60, Loss: 35.67
Iteration: 70, Loss: 35.64
Iteration: 80, Loss: 35.62
Iteration: 90, Loss: 35.62
Iteration: 100, Loss: 35.62
plot_fit(
x_sample,
y_sample,
theta_learn_loc.detach(),r"Torch MAP",
)
References
- Plotting code borrwed from here: https://scipython.com/blog/plotting-the-decision-boundary-of-a-logistic-regression-model/