import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
Parametric v/s Non-Parametric
Parametric v/s Non-Parametric
Aim:
Given Dataset (X, y), learn a function f
that maps X
to y
, i.e. y = f(X)
.
We will consider two cases: - Parametric: f
is a function of a fixed number of parameters, e.g. f(x) = ax + b
- Non-parametric: f
is a function of number of parameters that grows with the size of the dataset.
# Dataset for classification. Trivial dataset with 1 point in each class.
= np.array([[0, 0], [1, 1]])
X = np.array([0, 1]) y
# Plot the dataset
0], X[:, 1], c=y, s=100, cmap='viridis') plt.scatter(X[:,
<matplotlib.collections.PathCollection at 0x7f772d036640>
# Plot the decision boundary of a logistic regression classifier, KNN classifier, decision tree classifier, and a neural network classifier.
def plot_decision_boundary(model, X, y):
= 0.02
h = X[:, 0].min() - .5, X[:, 0].max() + .5
x_min, x_max = X[:, 1].min() - .5, X[:, 1].max() + .5
y_min, y_max = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
xx, yy = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
Z ='viridis', alpha=0.4)
plt.contourf(xx, yy, Z, cmap0], X[:, 1], c=y, s=100, cmap='viridis')
plt.scatter(X[:, min(), xx.max())
plt.xlim(xx.min(), yy.max())
plt.ylim(yy.
# Instantiate the models
= LogisticRegression()
lr = KNeighborsClassifier(n_neighbors=1)
knn = DecisionTreeClassifier()
dt = MLPClassifier(hidden_layer_sizes=(100, 100), activation='logistic', max_iter=10000)
nn
# Plot the decision boundaries
=(12, 8))
plt.figure(figsize
for i, model in enumerate([lr, knn, dt, nn]):
2, 2, i + 1)
plt.subplot(
model.fit(X, y)
plot_decision_boundary(model, X, y)__name__) plt.title(model.__class__.
Learnt functions
Logistic Regression
= X @ w + b
logits = sigmoid(logits)
prob = prob > 0.5 y_pred
Decision Tree
from sklearn.tree import export_graphviz
import graphviz
= export_graphviz(dt, out_file=None, feature_names=['x1', 'x2'], class_names=['0', '1'], filled=True, rounded=True, special_characters=True)
dot_data = graphviz.Source(dot_data)
graph graph
MLP
logits = nn.predict(X)
probs = sigmoid(logits)
y_pred = probs>0.5
KNN
if X1 < 0.5 and X2 < 0.5:
y = 0
elif X1 < 0.5 and X2 >= 0.5:
...
# Sophisticated dataset with 2 classes
from sklearn.datasets import make_blobs
= make_blobs(n_samples=100, centers=2, n_features=2, random_state=0, cluster_std=2)
X, y
# Plot the dataset
0], X[:, 1], c=y, cmap='viridis') plt.scatter(X[:,
<matplotlib.collections.PathCollection at 0x7f7a011d2d30>
= LogisticRegression()
lr = KNeighborsClassifier(n_neighbors=1)
knn = DecisionTreeClassifier()
dt = MLPClassifier(hidden_layer_sizes=(100, 100), activation='logistic', max_iter=10000)
nn
# Plot the decision boundaries
=(12, 8))
plt.figure(figsize
for i, model in enumerate([lr, knn, dt, nn]):
2, 2, i + 1)
plt.subplot(
model.fit(X, y)
plot_decision_boundary(model, X, y)__name__) plt.title(model.__class__.
= export_graphviz(dt, out_file=None, feature_names=['x1', 'x2'], class_names=['0', '1'], filled=True, rounded=True, special_characters=True)
dot_data = graphviz.Source(dot_data)
graph graph
dt.get_depth(), dt.get_n_leaves()
(9, 24)
lr.coef_
array([[ 0.19758375, -0.7298237 ]])
# Now, more noise
= make_blobs(n_samples=100, centers=2, n_features=2, random_state=0, cluster_std=5)
X, y
# Plot the dataset
0], X[:, 1], c=y, cmap='viridis') plt.scatter(X[:,
<matplotlib.collections.PathCollection at 0x7f772abcffa0>
= LogisticRegression()
lr = KNeighborsClassifier(n_neighbors=1)
knn = DecisionTreeClassifier()
dt = MLPClassifier(hidden_layer_sizes=(100, 100), activation='logistic', max_iter=10000)
nn
# Plot the decision boundaries
=(12, 8))
plt.figure(figsize
for i, model in enumerate([lr, knn, dt, nn]):
2, 2, i + 1)
plt.subplot(
model.fit(X, y)
plot_decision_boundary(model, X, y)__name__) plt.title(model.__class__.
= export_graphviz(dt, out_file=None, feature_names=['x1', 'x2'], class_names=['0', '1'], filled=True, rounded=True, special_characters=True)
dot_data = graphviz.Source(dot_data)
graph graph
dt.get_depth(), dt.get_n_leaves()
(11, 36)
lr.coef_
array([[ 0.03933004, -0.10323595]])