Probability Calibration
ml
Author

Nipun Batra

Published

October 27, 2022

Code
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

https://towardsdatascience.com/introduction-to-reliability-diagrams-for-probability-calibration-ed785b3f5d44

Code
p = np.array([0.9, 0.2, 0.7, 0.4, 0.8, 0.1, 0.2, 0.8, 0.5, 0.9])
Code
true_labels = np.ones_like(p)
true_labels[[1, 6, 7, 8]] = 0
true_labels
array([1., 0., 1., 1., 1., 1., 0., 0., 0., 1.])
Code
num_splits = 3
Code
splits_arr = np.linspace(0, 1, num_splits + 1)
splits = [(x, y) for (x, y) in zip(splits_arr[:-1], splits_arr[1:])]
splits
[(0.0, 0.3333333333333333),
 (0.3333333333333333, 0.6666666666666666),
 (0.6666666666666666, 1.0)]
Code
pd.cut(pd.Series(p), bins=splits_arr)
0      (0.667, 1.0]
1      (0.0, 0.333]
2      (0.667, 1.0]
3    (0.333, 0.667]
4      (0.667, 1.0]
5      (0.0, 0.333]
6      (0.0, 0.333]
7      (0.667, 1.0]
8    (0.333, 0.667]
9      (0.667, 1.0]
dtype: category
Categories (3, interval[float64, right]): [(0.0, 0.333] < (0.333, 0.667] < (0.667, 1.0]]
Code
splits = np.digitize(p, splits_arr)
splits
array([3, 1, 3, 2, 3, 1, 1, 3, 2, 3])
Code
p_group = {}
labels_pos = {}
for group in np.unique(splits):
    p_group[group] = p[splits==group]
    #frac_pos[group] = true_labels[splits==group].sum()*1.0/len(p_group[group])
    labels_pos[group] = true_labels[splits==group]
    #print(np.arange(10)[splits==group])
Code
p_group
{1: array([0.2, 0.1, 0.2]),
 2: array([0.4, 0.5]),
 3: array([0.9, 0.7, 0.8, 0.8, 0.9])}
Code
labels_pos
{1: array([0., 1., 0.]), 2: array([1., 0.]), 3: array([1., 1., 1., 0., 1.])}
Code
p_group_mean = {k:np.mean(v) for k, v in p_group.items()}
p_group_mean
{1: 0.16666666666666666, 2: 0.45, 3: 0.8200000000000001}
Code
fracs = {k:np.sum(v)*1.0/len(v) for k, v in labels_pos.items()}
fracs
{1: 0.3333333333333333, 2: 0.5, 3: 0.8}
Code
plt.plot(p_group_mean.values(), fracs.values(), marker='*')
plt.xlim((-0.05, 1.05))
plt.ylim((-0.05, 1.05))
plt.gca().set_aspect("equal")
plt.xlabel("p")
plt.ylabel("Relative frequency")

plt.plot([0, 1], [0, 1], color='k', ls='--', label='Ideal')
plt.legend()

Let us wrap into a function

Code
def calib_curve(true, pred, n_bins = 10):
    splits_arr = np.linspace(0, 1, n_bins + 1)
    splits = np.digitize(pred, splits_arr)
    p_group = {}
    labels_pos = {}
    for group in np.unique(splits):
        p_group[group] = pred[splits==group]
        labels_pos[group] = true[splits==group]
    p_group_mean = {k:np.mean(v) for k, v in p_group.items()}
    fracs = {k:np.sum(v)*1.0/len(v) for k, v in labels_pos.items()}
    counts = np.array([len(v) for v in labels_pos.values()])
    return  np.array(list(p_group_mean.values())), np.array(list(fracs.values())), counts
Code
from sklearn.calibration import calibration_curve, CalibrationDisplay

prob_true, prob_pred = calibration_curve(true_labels, p, n_bins=3)
Code
prob_true
array([0.33333333, 0.5       , 0.8       ])
Code
prob_pred
array([0.16666667, 0.45      , 0.82      ])
Code
p_ours, p_hat_ours, count = calib_curve(true_labels, p, 3)
p_ours
array([0.16666667, 0.45      , 0.82      ])

Expected Calibration Error

Code
(np.abs(p_ours-p_hat_ours)*count).mean()
0.23333333333333336
Code
from sklearn.datasets import make_classification
Code
X, y = make_classification(n_features=2, n_informative=2, n_redundant=0, random_state=0)
Code
plt.scatter(X[:, 0], X[:, 1], c = y)

Code
from sklearn.linear_model import LogisticRegression
Code
lr = LogisticRegression()
Code
lr.fit(X, y)
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Code
display = CalibrationDisplay.from_estimator(
        lr,
        X,
        y,
        n_bins=11,
)

Code
pred_p = lr.predict_proba(X)[:, 1]
Code
probs, fractions, counts  = calib_curve(y, pred_p, 11)
Code
plt.plot(probs, fractions, marker='^')
plt.xlabel("p")
plt.ylabel("Relative frequency")

plt.plot([0, 1], [0, 1], color='k', ls='--', label='Ideal')
plt.legend()

Code
plt.hist(pred_p);

Code
(np.abs(probs-fractions)*counts).mean()
0.9530316314463302
Code
counts
array([18, 14, 13,  4,  2,  4,  2,  2,  6,  7, 28])