import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
= make_blobs(n_samples=100, centers=2, n_features=2, random_state=42, cluster_std=8.0)
X, y
= 40
train_samples = X[:train_samples]
X_train = y[:train_samples]
= X[train_samples:]
X_test = y[train_samples:]
# Plot training fata with small markers
== 0, 0], X_train[y_train == 0, 1], marker='o', label='class 0 Train', color='b')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], marker='s', label='class 1 Train', color='r')
# Plot test data with larger markers
== 0, 0], X_test[y_test == 0, 1], marker='o', s=100, label='class 0 Test', color='b', alpha=0.3)
plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], marker='s', s=100, label='class 1 Test', color='r', alpha=0.3)
from sklearn.linear_model import LogisticRegression
= LogisticRegression(penalty='none', max_iter=1000)
lr, y_train)
/Users/nipun/miniconda3/lib/python3.9/site-packages/sklearn/linear_model/ FutureWarning: `penalty='none'`has been deprecated in 1.2 and will be removed in 1.4. To keep the past behaviour, set `penalty=None`.
LogisticRegression(max_iter=1000, penalty='none')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with
LogisticRegression(max_iter=1000, penalty='none')
= lr.predict_proba(X_test) pred_test
5] pred_test[:
array([[0.37896698, 0.62103302],
[0.0502929 , 0.9497071 ],
[0.69240206, 0.30759794],
[0.1239149 , 0.8760851 ],
[0.27543045, 0.72456955]])
= pd.DataFrame(pred_test, columns=['class 0', 'class 1'])
df 'gt'] = y_test df[
class 0 | class 1 | gt | |
0 | 0.378967 | 0.621033 | 0 |
1 | 0.050293 | 0.949707 | 1 |
2 | 0.692402 | 0.307598 | 1 |
3 | 0.123915 | 0.876085 | 1 |
4 | 0.275430 | 0.724570 | 1 |
'pred'] = (df['class 1'] > 0.5).astype(int) df[
class 0 | class 1 | gt | pred | |
0 | 0.378967 | 0.621033 | 0 | 1 |
1 | 0.050293 | 0.949707 | 1 | 1 |
2 | 0.692402 | 0.307598 | 1 | 0 |
3 | 0.123915 | 0.876085 | 1 | 1 |
4 | 0.275430 | 0.724570 | 1 | 1 |
def confusion_p_r(model, X, y, threshold=0.5, eps=1e-8, img=False):
= model.predict_proba(X)
pred_prob = (pred_prob[:, 1] >= threshold).astype(int)
pred = pd.DataFrame(pred_prob, columns=['class 0', 'class1'])
df 'gt'] = y
df['pred'] = pred
df[= ((df['gt'] == 1) & (df['pred'] == 1)).sum()
TP = ((df['gt'] == 0) & (df['pred'] == 0)).sum()
TN = ((df['gt'] == 0) & (df['pred'] == 1)).sum()
FP = ((df['gt'] == 1) & (df['pred'] == 0)).sum()
FN = TP / (TP + FP + eps)
precision = TP / (TP + FN + eps)
recall # plot confusion matrix
if img:
# Compute confusion matrix
= confusion_matrix(y, pred)
cm print(pd.DataFrame(cm, index=['True 0', 'True 1'], columns=['Pred 0', 'Pred 1']))
return precision, recall
=True) confusion_p_r(lr, X_test, y_test, img
Pred 0 Pred 1
True 0 24 7
True 1 9 20
(0.7407407404663923, 0.6896551721759809)
for threshold in np.linspace(0, 1, 21):
= confusion_p_r(lr, X_test, y_test, threshold)
precision, recall print(f'Threshold: {threshold:.2f} Precision: {precision:.2f} Recall: {recall:.2f}')
Threshold: 0.00 Precision: 0.48 Recall: 1.00
Threshold: 0.05 Precision: 0.51 Recall: 1.00
Threshold: 0.10 Precision: 0.53 Recall: 1.00
Threshold: 0.15 Precision: 0.58 Recall: 0.97
Threshold: 0.20 Precision: 0.59 Recall: 0.93
Threshold: 0.25 Precision: 0.63 Recall: 0.90
Threshold: 0.30 Precision: 0.65 Recall: 0.83
Threshold: 0.35 Precision: 0.66 Recall: 0.79
Threshold: 0.40 Precision: 0.69 Recall: 0.76
Threshold: 0.45 Precision: 0.72 Recall: 0.72
Threshold: 0.50 Precision: 0.74 Recall: 0.69
Threshold: 0.55 Precision: 0.76 Recall: 0.66
Threshold: 0.60 Precision: 0.78 Recall: 0.62
Threshold: 0.65 Precision: 0.81 Recall: 0.59
Threshold: 0.70 Precision: 0.88 Recall: 0.52
Threshold: 0.75 Precision: 0.87 Recall: 0.48
Threshold: 0.80 Precision: 0.93 Recall: 0.48
Threshold: 0.85 Precision: 0.90 Recall: 0.31
Threshold: 0.90 Precision: 1.00 Recall: 0.24
Threshold: 0.95 Precision: 1.00 Recall: 0.14
Threshold: 1.00 Precision: 0.00 Recall: 0.00
=0.99, img=True) confusion_p_r(lr, X_test, y_test, threshold
Pred 0 Pred 1
True 0 31 0
True 1 29 0
(0.0, 0.0)
# Plo9t Precicion curve
= lr.predict_proba(X_test)[:, 1].min()
min_prob = lr.predict_proba(X_test)[:, 1].max()
max_prob = np.linspace(min_prob, max_prob, 100)
thresholds = []
precisions = []
for threshold in thresholds:
= confusion_p_r(lr, X_test, y_test, threshold)
precision, recall
plt.plot(thresholds, precisions, label='Recall')
plt.plot(thresholds, recalls, label
plt.xlabel( plt.legend()
# Plot Precision-Recall curve
plt.plot(recalls, precisions, marker'Recall')
plt.ylabel(# make plot square
'equal', adjustable='box') plt.gca().set_aspect(
from sklearn.metrics import precision_recall_curve
= precision_recall_curve(y_test, lr.predict_proba(X_test)[:, 1])
precision, recall, thresholds
plt.plot(recall, precision, marker'Recall')
plt.xlabel('Precision') plt.ylabel(
Text(0, 0.5, 'Precision')
from sklearn.metrics import PrecisionRecallDisplay
= PrecisionRecallDisplay.from_estimator(
display ="Logistic", plot_chance_level=True
lr, X_test, y_test, name
)= display.ax_.set_title("2-class Precision-Recall curve") _
==0).sum(), (y_train==1).sum(), (y_test==0).sum(), (y_test==1).sum() (y_train
(19, 21, 31, 29)
21/(19+21), 29/(29+31)
(0.525, 0.48333333333333334)
# Plot the decision boundary
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max
= np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
xx, yy = lr.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)
=0.2, cmap='coolwarm', levels=20)
plt.contourf(xx, yy, Z, alphamin(), xx.max())
plt.xlim(xx.min(), yy.max())
# Colorbar
# Plot test data with larger markers
== 0, 0], X_test[y_test == 0, 1], marker='o', s=100, label='class 0 Test', color='b', alpha=0.3)
plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], marker='s', s=100, label='class 1 Test', color='r', alpha=0.3)