import numpy as np
import sklearn
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
https://towardsdatascience.com/introduction-to-reliability-diagrams-for-probability-calibration-ed785b3f5d44
= np.array([0.9, 0.2, 0.7, 0.4, 0.8, 0.1, 0.2, 0.8, 0.5, 0.9]) p
= np.ones_like(p)
true_labels 1, 6, 7, 8]] = 0
true_labels[[ true_labels
array([1., 0., 1., 1., 1., 1., 0., 0., 0., 1.])
= 3 num_splits
= np.linspace(0, 1, num_splits + 1)
splits_arr = [(x, y) for (x, y) in zip(splits_arr[:-1], splits_arr[1:])]
splits splits
[(0.0, 0.3333333333333333),
(0.3333333333333333, 0.6666666666666666),
(0.6666666666666666, 1.0)]
=splits_arr) pd.cut(pd.Series(p), bins
0 (0.667, 1.0]
1 (0.0, 0.333]
2 (0.667, 1.0]
3 (0.333, 0.667]
4 (0.667, 1.0]
5 (0.0, 0.333]
6 (0.0, 0.333]
7 (0.667, 1.0]
8 (0.333, 0.667]
9 (0.667, 1.0]
dtype: category
Categories (3, interval[float64, right]): [(0.0, 0.333] < (0.333, 0.667] < (0.667, 1.0]]
= np.digitize(p, splits_arr)
splits splits
array([3, 1, 3, 2, 3, 1, 1, 3, 2, 3])
= {}
p_group = {}
labels_pos for group in np.unique(splits):
= p[splits==group]
p_group[group] #frac_pos[group] = true_labels[splits==group].sum()*1.0/len(p_group[group])
= true_labels[splits==group]
labels_pos[group] #print(np.arange(10)[splits==group])
p_group
{1: array([0.2, 0.1, 0.2]),
2: array([0.4, 0.5]),
3: array([0.9, 0.7, 0.8, 0.8, 0.9])}
labels_pos
{1: array([0., 1., 0.]), 2: array([1., 0.]), 3: array([1., 1., 1., 0., 1.])}
= {k:np.mean(v) for k, v in p_group.items()}
p_group_mean p_group_mean
{1: 0.16666666666666666, 2: 0.45, 3: 0.8200000000000001}
= {k:np.sum(v)*1.0/len(v) for k, v in labels_pos.items()}
fracs fracs
{1: 0.3333333333333333, 2: 0.5, 3: 0.8}
='*')
plt.plot(p_group_mean.values(), fracs.values(), marker-0.05, 1.05))
plt.xlim((-0.05, 1.05))
plt.ylim(("equal")
plt.gca().set_aspect("p")
plt.xlabel("Relative frequency")
plt.ylabel(
0, 1], [0, 1], color='k', ls='--', label='Ideal')
plt.plot([ plt.legend()
Let us wrap into a function
def calib_curve(true, pred, n_bins = 10):
= np.linspace(0, 1, n_bins + 1)
splits_arr = np.digitize(pred, splits_arr)
splits = {}
p_group = {}
labels_pos for group in np.unique(splits):
= pred[splits==group]
p_group[group] = true[splits==group]
labels_pos[group] = {k:np.mean(v) for k, v in p_group.items()}
p_group_mean = {k:np.sum(v)*1.0/len(v) for k, v in labels_pos.items()}
fracs = np.array([len(v) for v in labels_pos.values()])
counts return np.array(list(p_group_mean.values())), np.array(list(fracs.values())), counts
from sklearn.calibration import calibration_curve, CalibrationDisplay
= calibration_curve(true_labels, p, n_bins=3) prob_true, prob_pred
prob_true
array([0.33333333, 0.5 , 0.8 ])
prob_pred
array([0.16666667, 0.45 , 0.82 ])
= calib_curve(true_labels, p, 3)
p_ours, p_hat_ours, count p_ours
array([0.16666667, 0.45 , 0.82 ])
Expected Calibration Error
abs(p_ours-p_hat_ours)*count).mean() (np.
0.23333333333333336
from sklearn.datasets import make_classification
= make_classification(n_features=2, n_informative=2, n_redundant=0, random_state=0) X, y
0], X[:, 1], c = y) plt.scatter(X[:,
from sklearn.linear_model import LogisticRegression
= LogisticRegression() lr
lr.fit(X, y)
LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression()
= CalibrationDisplay.from_estimator(
display
lr,
X,
y,=11,
n_bins )
= lr.predict_proba(X)[:, 1] pred_p
= calib_curve(y, pred_p, 11) probs, fractions, counts
='^')
plt.plot(probs, fractions, marker"p")
plt.xlabel("Relative frequency")
plt.ylabel(
0, 1], [0, 1], color='k', ls='--', label='Ideal')
plt.plot([ plt.legend()
; plt.hist(pred_p)
abs(probs-fractions)*counts).mean() (np.
0.9530316314463302
counts
array([18, 14, 13, 4, 2, 4, 2, 2, 6, 7, 28])