Decision Trees Discrete Input and Discrete Output

Author

Nipun Batra

Published

January 21, 2024

Open In Colab

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Retina mode
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from latexify import latexify, format_axes

df = pd.read_csv("../datasets/tennis-discrete-output.csv", index_col=0)

df

	Outlook	Temp	Humidity	Windy	Play
Day
D1	Sunny	Hot	High	Weak	No
D2	Sunny	Hot	High	Strong	No
D3	Overcast	Hot	High	Weak	Yes
D4	Rain	Mild	High	Weak	Yes
D5	Rain	Cool	Normal	Weak	Yes
D6	Rain	Cool	Normal	Strong	No
D7	Overcast	Cool	Normal	Strong	Yes
D8	Sunny	Mild	High	Weak	No
D9	Sunny	Cool	Normal	Weak	Yes
D10	Rain	Mild	Normal	Weak	Yes
D11	Sunny	Mild	Normal	Strong	Yes
D12	Overcast	Mild	High	Strong	Yes
D13	Overcast	Hot	Normal	Weak	Yes
D14	Rain	Mild	High	Strong	No

def entropy(ser):
    """
    Calculate entropy for a categorical variable.

    Parameters:
    - ser: pd.Series of categorical data

    Returns:
    - Entropy value
    """
    # Count the occurrences of each unique value in the series
    value_counts = ser.value_counts()

    # Calculate the probabilities of each unique value
    probabilities = value_counts / len(ser)

    # Calculate entropy using the formula: H(S) = -p1*log2(p1) - p2*log2(p2) - ...
    entropy_value = -np.sum(probabilities * np.log2(probabilities))

    return entropy_value

entropy(df["Play"])

0.9402859586706311

Other Formats