import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from latexify import latexify, format_axesDecision Trees Discrete Input and Discrete Output
ML
df = pd.read_csv("../datasets/tennis-discrete-output.csv", index_col=0)df| Outlook | Temp | Humidity | Windy | Play | |
|---|---|---|---|---|---|
| Day | |||||
| D1 | Sunny | Hot | High | Weak | No |
| D2 | Sunny | Hot | High | Strong | No |
| D3 | Overcast | Hot | High | Weak | Yes |
| D4 | Rain | Mild | High | Weak | Yes |
| D5 | Rain | Cool | Normal | Weak | Yes |
| D6 | Rain | Cool | Normal | Strong | No |
| D7 | Overcast | Cool | Normal | Strong | Yes |
| D8 | Sunny | Mild | High | Weak | No |
| D9 | Sunny | Cool | Normal | Weak | Yes |
| D10 | Rain | Mild | Normal | Weak | Yes |
| D11 | Sunny | Mild | Normal | Strong | Yes |
| D12 | Overcast | Mild | High | Strong | Yes |
| D13 | Overcast | Hot | Normal | Weak | Yes |
| D14 | Rain | Mild | High | Strong | No |
def entropy(ser):
"""
Calculate entropy for a categorical variable.
Parameters:
- ser: pd.Series of categorical data
Returns:
- Entropy value
"""
# Count the occurrences of each unique value in the series
value_counts = ser.value_counts()
# Calculate the probabilities of each unique value
probabilities = value_counts / len(ser)
# Calculate entropy using the formula: H(S) = -p1*log2(p1) - p2*log2(p2) - ...
entropy_value = -np.sum(probabilities * np.log2(probabilities))
return entropy_value
entropy(df["Play"])0.9402859586706311