import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
from sklearn.datasets import make_blobs
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
= make_blobs(n_samples=100, centers=2, n_features=2, random_state=42, cluster_std=8.0)
X, y
= 40
train_samples = X[:train_samples]
X_train = y[:train_samples]
y_train
= X[train_samples:]
X_test = y[train_samples:]
y_test
# Plot training fata with small markers
== 0, 0], X_train[y_train == 0, 1], marker='o', label='class 0 Train', color='b')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], marker='s', label='class 1 Train', color='r')
plt.scatter(X_train[y_train
# Plot test data with larger markers
== 0, 0], X_test[y_test == 0, 1], marker='o', s=100, label='class 0 Test', color='b', alpha=0.3)
plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], marker='s', s=100, label='class 1 Test', color='r', alpha=0.3)
plt.scatter(X_test[y_test
plt.legend()
from sklearn.linear_model import LogisticRegression
= LogisticRegression(penalty='none', max_iter=1000)
lr
lr.fit(X_train, y_train)
/Users/nipun/miniconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:1183: FutureWarning: `penalty='none'`has been deprecated in 1.2 and will be removed in 1.4. To keep the past behaviour, set `penalty=None`.
warnings.warn(
LogisticRegression(max_iter=1000, penalty='none')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(max_iter=1000, penalty='none')
= lr.predict_proba(X_test) pred_test
5] pred_test[:
array([[0.37896698, 0.62103302],
[0.0502929 , 0.9497071 ],
[0.69240206, 0.30759794],
[0.1239149 , 0.8760851 ],
[0.27543045, 0.72456955]])
= pd.DataFrame(pred_test, columns=['class 0', 'class 1'])
df 'gt'] = y_test df[
df.head()
class 0 | class 1 | gt | |
---|---|---|---|
0 | 0.378967 | 0.621033 | 0 |
1 | 0.050293 | 0.949707 | 1 |
2 | 0.692402 | 0.307598 | 1 |
3 | 0.123915 | 0.876085 | 1 |
4 | 0.275430 | 0.724570 | 1 |
'pred'] = (df['class 1'] > 0.5).astype(int) df[
df.head()
class 0 | class 1 | gt | pred | |
---|---|---|---|---|
0 | 0.378967 | 0.621033 | 0 | 1 |
1 | 0.050293 | 0.949707 | 1 | 1 |
2 | 0.692402 | 0.307598 | 1 | 0 |
3 | 0.123915 | 0.876085 | 1 | 1 |
4 | 0.275430 | 0.724570 | 1 | 1 |
def confusion_p_r(model, X, y, threshold=0.5, eps=1e-8, img=False):
= model.predict_proba(X)
pred_prob = (pred_prob[:, 1] >= threshold).astype(int)
pred = pd.DataFrame(pred_prob, columns=['class 0', 'class1'])
df 'gt'] = y
df['pred'] = pred
df[= ((df['gt'] == 1) & (df['pred'] == 1)).sum()
TP = ((df['gt'] == 0) & (df['pred'] == 0)).sum()
TN = ((df['gt'] == 0) & (df['pred'] == 1)).sum()
FP = ((df['gt'] == 1) & (df['pred'] == 0)).sum()
FN = TP / (TP + FP + eps)
precision = TP / (TP + FN + eps)
recall # plot confusion matrix
if img:
# Compute confusion matrix
= confusion_matrix(y, pred)
cm print(pd.DataFrame(cm, index=['True 0', 'True 1'], columns=['Pred 0', 'Pred 1']))
return precision, recall
=True) confusion_p_r(lr, X_test, y_test, img
Pred 0 Pred 1
True 0 24 7
True 1 9 20
(0.7407407404663923, 0.6896551721759809)
for threshold in np.linspace(0, 1, 21):
= confusion_p_r(lr, X_test, y_test, threshold)
precision, recall print(f'Threshold: {threshold:.2f} Precision: {precision:.2f} Recall: {recall:.2f}')
Threshold: 0.00 Precision: 0.48 Recall: 1.00
Threshold: 0.05 Precision: 0.51 Recall: 1.00
Threshold: 0.10 Precision: 0.53 Recall: 1.00
Threshold: 0.15 Precision: 0.58 Recall: 0.97
Threshold: 0.20 Precision: 0.59 Recall: 0.93
Threshold: 0.25 Precision: 0.63 Recall: 0.90
Threshold: 0.30 Precision: 0.65 Recall: 0.83
Threshold: 0.35 Precision: 0.66 Recall: 0.79
Threshold: 0.40 Precision: 0.69 Recall: 0.76
Threshold: 0.45 Precision: 0.72 Recall: 0.72
Threshold: 0.50 Precision: 0.74 Recall: 0.69
Threshold: 0.55 Precision: 0.76 Recall: 0.66
Threshold: 0.60 Precision: 0.78 Recall: 0.62
Threshold: 0.65 Precision: 0.81 Recall: 0.59
Threshold: 0.70 Precision: 0.88 Recall: 0.52
Threshold: 0.75 Precision: 0.87 Recall: 0.48
Threshold: 0.80 Precision: 0.93 Recall: 0.48
Threshold: 0.85 Precision: 0.90 Recall: 0.31
Threshold: 0.90 Precision: 1.00 Recall: 0.24
Threshold: 0.95 Precision: 1.00 Recall: 0.14
Threshold: 1.00 Precision: 0.00 Recall: 0.00
=0.99, img=True) confusion_p_r(lr, X_test, y_test, threshold
Pred 0 Pred 1
True 0 31 0
True 1 29 0
(0.0, 0.0)
# Plo9t Precicion curve
= lr.predict_proba(X_test)[:, 1].min()
min_prob = lr.predict_proba(X_test)[:, 1].max()
max_prob = np.linspace(min_prob, max_prob, 100)
thresholds = []
precisions = []
recalls
for threshold in thresholds:
= confusion_p_r(lr, X_test, y_test, threshold)
precision, recall
precisions.append(precision)
recalls.append(recall)
='Precision')
plt.plot(thresholds, precisions, label='Recall')
plt.plot(thresholds, recalls, label
'Threshold')
plt.xlabel( plt.legend()
# Plot Precision-Recall curve
='o')
plt.plot(recalls, precisions, marker'Recall')
plt.xlabel('Precision')
plt.ylabel(# make plot square
'equal', adjustable='box') plt.gca().set_aspect(
from sklearn.metrics import precision_recall_curve
= precision_recall_curve(y_test, lr.predict_proba(X_test)[:, 1])
precision, recall, thresholds
='o')
plt.plot(recall, precision, marker'Recall')
plt.xlabel('Precision') plt.ylabel(
Text(0, 0.5, 'Precision')
from sklearn.metrics import PrecisionRecallDisplay
= PrecisionRecallDisplay.from_estimator(
display ="Logistic", plot_chance_level=True
lr, X_test, y_test, name
)= display.ax_.set_title("2-class Precision-Recall curve") _
==0).sum(), (y_train==1).sum(), (y_test==0).sum(), (y_test==1).sum() (y_train
(19, 21, 31, 29)
21/(19+21), 29/(29+31)
(0.525, 0.48333333333333334)
# Plot the decision boundary
= X[:, 0].min() - 1, X[:, 0].max() + 1
x_min, x_max = X[:, 1].min() - 1, X[:, 1].max() + 1
y_min, y_max
= np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
xx, yy = lr.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)
Z
=0.2, cmap='coolwarm', levels=20)
plt.contourf(xx, yy, Z, alphamin(), xx.max())
plt.xlim(xx.min(), yy.max())
plt.ylim(yy.
# Colorbar
plt.colorbar()
# Plot test data with larger markers
== 0, 0], X_test[y_test == 0, 1], marker='o', s=100, label='class 0 Test', color='b', alpha=0.3)
plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], marker='s', s=100, label='class 1 Test', color='r', alpha=0.3)
plt.scatter(X_test[y_test
plt.legend()