import pandas as pd
DT Regression
Decision Tree Regression
= pd.read_csv("mins-played.csv") df
df
Day | Outlook | Temp | Humidity | Wind | Minutes Played | |
---|---|---|---|---|---|---|
0 | D1 | Sunny | Hot | High | Weak | 20 |
1 | D2 | Sunny | Hot | High | Strong | 24 |
2 | D3 | Overcast | Hot | High | Weak | 40 |
3 | D4 | Rain | Mild | High | Weak | 50 |
4 | D5 | Rain | Cool | Normal | Weak | 60 |
5 | D6 | Rain | Cool | Normal | Strong | 10 |
6 | D7 | Overcast | Cool | Normal | Strong | 4 |
7 | D8 | Sunny | Mild | High | Weak | 10 |
8 | D9 | Sunny | Cool | Normal | Weak | 60 |
9 | D10 | Rain | Mild | Normal | Weak | 40 |
10 | D11 | Sunny | Mild | High | Strong | 45 |
11 | D12 | Overcast | Mild | High | Strong | 40 |
12 | D13 | Overcast | Hot | Normal | Weak | 35 |
13 | D14 | Rain | Mild | High | Strong | 20 |
"Minutes Played"].std() df[
18.3111087402348
import numpy as np
# np.std(df["Minutes Played"].values)
"Wind=='Weak'")["Minutes Played"].std()*len(df.query("Wind=='Weak'"))/len(df) df.query(
10.180585192846463
"Wind=='Strong'")["Minutes Played"].std()*len(df.query("Wind=='Strong'"))/len(df) df.query(
6.933944897151599
= {}
out for temp in df["Temp"].unique():
print(temp)
= df.query("Temp==@temp")["Minutes Played"].std()*len(df.query("Temp==@temp"))/len(df)
out[temp] print(out[temp])
print()
Hot
2.6636888135137133
Mild
6.696785704762413
Cool
8.770699519880226
"Minutes Played"].std() - pd.Series(out).sum() df[
0.17993470207844808