import pandas as pdDT Regression
Decision Tree Regression
df = pd.read_csv("mins-played.csv")df| Day | Outlook | Temp | Humidity | Wind | Minutes Played | |
|---|---|---|---|---|---|---|
| 0 | D1 | Sunny | Hot | High | Weak | 20 |
| 1 | D2 | Sunny | Hot | High | Strong | 24 |
| 2 | D3 | Overcast | Hot | High | Weak | 40 |
| 3 | D4 | Rain | Mild | High | Weak | 50 |
| 4 | D5 | Rain | Cool | Normal | Weak | 60 |
| 5 | D6 | Rain | Cool | Normal | Strong | 10 |
| 6 | D7 | Overcast | Cool | Normal | Strong | 4 |
| 7 | D8 | Sunny | Mild | High | Weak | 10 |
| 8 | D9 | Sunny | Cool | Normal | Weak | 60 |
| 9 | D10 | Rain | Mild | Normal | Weak | 40 |
| 10 | D11 | Sunny | Mild | High | Strong | 45 |
| 11 | D12 | Overcast | Mild | High | Strong | 40 |
| 12 | D13 | Overcast | Hot | Normal | Weak | 35 |
| 13 | D14 | Rain | Mild | High | Strong | 20 |
df["Minutes Played"].std()18.3111087402348
import numpy as np
# np.std(df["Minutes Played"].values)df.query("Wind=='Weak'")["Minutes Played"].std()*len(df.query("Wind=='Weak'"))/len(df)10.180585192846463
df.query("Wind=='Strong'")["Minutes Played"].std()*len(df.query("Wind=='Strong'"))/len(df)6.933944897151599
out = {}
for temp in df["Temp"].unique():
print(temp)
out[temp] = df.query("Temp==@temp")["Minutes Played"].std()*len(df.query("Temp==@temp"))/len(df)
print(out[temp])
print()Hot
2.6636888135137133
Mild
6.696785704762413
Cool
8.770699519880226
df["Minutes Played"].std() - pd.Series(out).sum()0.17993470207844808