import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from io import StringIO
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from latexify import latexify, format_axes
Decision Trees Real Input and Discrete Output
ML
= """Day,Temperature,PlayTennis
dataset D1,40,No
D2,48,No
D3,60,Yes
D4,72,Yes
D5,80,Yes
D6,90,No"""
= StringIO(dataset)
f = pd.read_csv(f, sep=",") df
df
Day | Temperature | PlayTennis | |
---|---|---|---|
0 | D1 | 40 | No |
1 | D2 | 48 | No |
2 | D3 | 60 | Yes |
3 | D4 | 72 | Yes |
4 | D5 | 80 | Yes |
5 | D6 | 90 | No |
=2) latexify(columns
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import graphviz
= DecisionTreeClassifier(criterion='entropy', max_depth=1)
dt
= df[['Temperature']]
X = df['PlayTennis']
y
dt.fit(X, y)
DecisionTreeClassifier(criterion='entropy', max_depth=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(criterion='entropy', max_depth=1)
# Function to strore the decision tree's graphviz representation as pdf
def save_decision_tree_as_pdf(dt, feature_names, class_names, filename):
= export_graphviz(dt, out_file=None, feature_names=feature_names, class_names=class_names, filled=True, rounded=True, special_characters=True)
dot_data = graphviz.Source(dot_data)
graph format='pdf')
graph.render(filename,
'Temperature'], ['No', 'Yes'], '../figures/decision-trees/real-ip-1') save_decision_tree_as_pdf(dt, [
= DecisionTreeClassifier(criterion='entropy', max_depth=2)
dt2
dt2.fit(X, y)'Temperature'], ['No', 'Yes'], '../figures/decision-trees/real-ip-2') save_decision_tree_as_pdf(dt2, [
Heavily borrowed and inspired from https://jakevdp.github.io/PythonDataScienceHandbook/05.08-random-forests.html
from sklearn.datasets import make_blobs
= make_blobs(n_samples=300, centers=4,
X, y =0, cluster_std=1.2)
random_state0], X[:, 1], c=y, s=10, cmap='rainbow')
plt.scatter(X[:,
format_axes(plt.gca())r"$x_1$")
plt.xlabel(r"$x_2$") plt.ylabel(
Text(0, 0.5, '$x_2$')
from sklearn.tree import DecisionTreeClassifier
def visualize_tree(depth, X, y, ax=None, cmap='rainbow'):
= DecisionTreeClassifier(max_depth=depth)
model = ax or plt.gca()
ax print(model, depth)
# Plot the training points
0], X[:, 1], c=y, s=10, cmap=cmap,
ax.scatter(X[:, =(y.min(), y.max()), zorder=3)
clim'tight')
ax.axis(
format_axes(plt.gca())r"$x_1$")
plt.xlabel(r"$x_2$")
plt.ylabel(= ax.get_xlim()
xlim = ax.get_ylim()
ylim
# fit the estimator
model.fit(X, y)= np.meshgrid(np.linspace(*xlim, num=200),
xx, yy *ylim, num=200))
np.linspace(= model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
Z
# Create a color plot with the results
= len(np.unique(y))
n_classes = ax.contourf(xx, yy, Z, alpha=0.2,
contours =np.arange(n_classes + 1) - 0.5,
levels=cmap, clim=(y.min(), y.max()),
cmap=1)
zorder
set(xlim=xlim, ylim=ylim)
ax.
plt.tight_layout()f"../figures/decision-trees/dt-{depth}.pdf", bbox_inches="tight")
plt.savefig( plt.clf()
for depth in range(1, 11):
visualize_tree(depth, X, y)
DecisionTreeClassifier(max_depth=1) 1
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=2) 2
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=3) 3
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=4) 4
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=5) 5
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=6) 6
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=7) 7
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=8) 8
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=9) 9
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
DecisionTreeClassifier(max_depth=10) 10
/var/folders/1x/wmgn24mn1bbd2vgbqlk98tbc0000gn/T/ipykernel_73517/232881262.py:24: UserWarning: The following kwargs were not used by contour: 'clim'
contours = ax.contourf(xx, yy, Z, alpha=0.2,
<Figure size 400x246.914 with 0 Axes>