import torchimport numpy as npimport matplotlib.pyplot as pltimport pandas as pdimport matplotlib.pyplot as plt%matplotlib inline# Retina display%config InlineBackend.figure_format ='retina'
from tueplots import bundlesplt.rcParams.update(bundles.beamer_moml())# Also add despine to the bundle using rcParamsplt.rcParams['axes.spines.right'] =Falseplt.rcParams['axes.spines.top'] =False# Increase font size to match Beamer templateplt.rcParams['font.size'] =16# Make background transparentplt.rcParams['figure.facecolor'] ='none'
from sklearn.datasets import make_moonsfrom sklearn.linear_model import LogisticRegressionX, y = make_moons(n_samples=1000, noise=0.1, random_state=0)clf = LogisticRegression()clf.fit(X, y)
# Let us look at samples where p_hat is close to 0.5 (say between 0.45 and 0.55)df_close_phat_val = df[(df.p_hat >0.45) & (df.p_hat <0.55)]df_close_phat_val.head()
p_hat
y
6
0.510870
0
15
0.481702
0
20
0.533697
0
41
0.522857
1
60
0.483962
1
len(df_close_phat_val)
31
# Histogram of p_hat valuesdf.p_hat.hist(bins=50)plt.xlabel('$\hat{p}$')plt.ylabel('Frequency')plt.title('Histogram of $\hat{p}$ values');
findfont: Font family ['cursive'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'cursive' not found because none of the following families were found: Apple Chancery, Textile, Zapf Chancery, Sand, Script MT, Felipa, Comic Neue, Comic Sans MS, cursive
# Out of these len(df_close_phat_val) samples, how many are actually positive? df_close_phat_val.y.sum()
18
# Fraction of positive samplesdf_close_phat_val.y.sum() /len(df_close_phat_val)
0.5806451612903226
# This is the reliability of the classifier at p_hat = 0.5
Slide 26 from https://classifier-calibration.github.io/assets/slides/clacal_tutorial_ecmlpkdd_2020_intro.pdf
# Now, multi-class classification and reliability diagramsfrom sklearn.datasets import make_classification, make_blobsX, y = make_blobs(n_samples=1000, centers=3, n_features=2, random_state=0)# Plot the datasetplt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis')
<matplotlib.collections.PathCollection at 0x7f33e5367eb0>
clf = LogisticRegression()clf.fit(X, y)
LogisticRegression()
preds = pd.DataFrame(clf.predict_proba(X))
preds
0
1
2
0
0.068741
0.005454
0.925805
1
0.983384
0.015859
0.000757
2
0.021072
0.000240
0.978688
3
0.042703
0.001372
0.955925
4
0.000208
0.000026
0.999766
...
...
...
...
995
0.005001
0.994489
0.000510
996
0.050768
0.015765
0.933467
997
0.006151
0.911409
0.082440
998
0.000271
0.000021
0.999708
999
0.021539
0.962650
0.015811
1000 rows × 3 columns
preds['y'] = y
preds.head(10)
0
1
2
y
0
0.068741
0.005454
0.925805
2
1
0.983384
0.015859
0.000757
0
2
0.021072
0.000240
0.978688
2
3
0.042703
0.001372
0.955925
2
4
0.000208
0.000026
0.999766
2
5
0.179771
0.797045
0.023183
1
6
0.120874
0.003821
0.875305
0
7
0.957242
0.000002
0.042756
0
8
0.000908
0.998876
0.000216
1
9
0.836772
0.162509
0.000718
0
# Syntax highlighting to show highest amongst the three columnsmost_probable_class = preds[[0, 1, 2]].idxmax(axis=1)# Add the most probable class to the DataFramepreds['y_hat'] = most_probable_class
preds.head()
0
1
2
y
y_hat
0
0.068741
0.005454
0.925805
2
2
1
0.983384
0.015859
0.000757
0
0
2
0.021072
0.000240
0.978688
2
2
3
0.042703
0.001372
0.955925
2
2
4
0.000208
0.000026
0.999766
2
2
# Let us simplify dataframe to only include the most probable class probability and whether the prediction was correct or notnew_df = preds.copy()new_df["Correct"] = new_df.y == new_df.y_hatnew_df["Most_Probable_Class_Probability"] = new_df[[0, 1, 2]].max(axis=1)new_df.head()
0
1
2
y
y_hat
Correct
Most_Probable_Class_Probability
0
0.068741
0.005454
0.925805
2
2
True
0.925805
1
0.983384
0.015859
0.000757
0
0
True
0.983384
2
0.021072
0.000240
0.978688
2
2
True
0.978688
3
0.042703
0.001372
0.955925
2
2
True
0.955925
4
0.000208
0.000026
0.999766
2
2
True
0.999766
# drop the other columnsnew_df = new_df.drop(columns=[0, 1, 2, 'y', 'y_hat'])new_df.head()
Correct
Most_Probable_Class_Probability
0
True
0.925805
1
True
0.983384
2
True
0.978688
3
True
0.955925
4
True
0.999766
# Let us look at samples where the most probable class probability is close to 0.5 (say between 0.45 and 0.55)df_close_phat_val = new_df[(new_df.Most_Probable_Class_Probability >0.45) & (new_df.Most_Probable_Class_Probability <0.55)]df_close_phat_val.head()
# Now, let us compute the reliability of the classifier in each binbins = np.linspace(0, 1, 11)# Cuts the data by the binsnew_df['bin'] = pd.cut(new_df.Most_Probable_Class_Probability, bins=bins)
# Now let us look at the above for regression with heteroscedastic aleatoric uncertaintyX = np.linspace(-3, 3, 1000).reshape(-1, 1)y = np.sin(X.flatten()) + np.random.normal(0, 0.1, X.shape[0])plt.scatter(X, y, s=10, alpha=0.5)
<matplotlib.collections.PathCollection at 0x7f351983e700>
# Use BayesianRidge for regressionfrom sklearn.linear_model import BayesianRidgereg = BayesianRidge()reg.fit(X, y)
BayesianRidge()
# Predict on linspaceX_test = np.linspace(-3, 3, 100).reshape(-1, 1)y_pred, y_std = reg.predict(X_test, return_std=True)# Plot the predictionsplt.plot(X_test, y_pred)plt.fill_between(X_test.flatten(), y_pred - y_std, y_pred + y_std, alpha=0.5)plt.scatter(X, y, alpha=0.1)
<matplotlib.collections.PathCollection at 0x7f350eb599a0>
# Within 1\sigma we would like to see 68% of the data points# Within 2\sigma we would like to see 95% of the data points# Let us see how many points are within k*sigma of the meank =1y_pred, y_std = reg.predict(X, return_std=True)# Check how many points are within k*sigma of the meanwithin_k_sigma = np.abs(y - y_pred) < k * y_std# Fraction of points within k*sigma of the meanwithin_k_sigma.mean()
0.585
def within_k_sigma(y, y_pred, y_std, k=1):""" Return the fraction of points within k*sigma of the mean. """return np.abs(y - y_pred) < k * y_std
# Plot within_k_sigma for k=1, 2, 3, ..ks = np.arange(1, 6)within_k_sigma_fractions = [within_k_sigma(y, y_pred, y_std, k=k).mean() for k in ks]plt.plot(ks, within_k_sigma_fractions, 'o-')plt.xlabel('k')plt.ylabel('Fraction of points within k*sigma of the mean')
Text(0, 0.5, 'Fraction of points within k*sigma of the mean')
# but, ideally we want 95% of the points to be within 2*sigma of the mean or within 95% CI# we need a function to map CI to kfrom scipy.stats import normdef ci_to_k(ci):""" Return the value of k corresponding to the given confidence interval. """return norm.ppf((1+ ci) /2)
ci_to_k(0.95)
1.959963984540054
ci_to_k(0.68)
0.9944578832097535
# make reliability diagram for regressionpercentiles = np.linspace(0, 100, 11)quantiles = percentiles /100ks = ci_to_k(quantiles)# Get fraction of points within k*sigma of the mean for each kwithin_k_sigma_fractions = [within_k_sigma(y, y_pred, y_std, k=k).mean() for k in ks]plt.plot(quantiles, within_k_sigma_fractions, 'o-')# Plot idealplt.plot([0, 1], [0, 1], 'k:')