import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
'seaborn-whitegrid')
plt.style.use(%matplotlib inline
Bias Variance Charts
Interactive tutorial on bias variance charts with practical implementations and visualizations
= np.array([i*np.pi/180 for i in range(0,90,2)])
x 10) #Setting seed for reproducability
np.random.seed(= 0.25
var = 2*0.25
dy = np.sin(x) + 0.5 + np.random.normal(0,var,len(x))
y = np.sin(x) + 0.5
y_true = 20
max_deg = [x**(i+1) for i in range(max_deg)] + [y]
data_x = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c = pd.DataFrame(np.column_stack(data_x),columns=data_c) data
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(
plt.legend()'images/true.pdf', transparent=True)
plt.savefig(
'x'], data['y'], '.', label='Actual Prices')
plt.plot(data[
plt.legend()'images/data.pdf', transparent=True) plt.savefig(
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(# plt.fill_between(data['x'], y_true-dy, y_true+dy, color='green',alpha=0.2, label='Variance')
'x'][15], y_true[15], yerr=dy, fmt='k', capsize=5, label='Variance')
plt.errorbar(data['x'], data['y'], '.', label='Actual Prices')
plt.plot(data[
plt.legend()
'images/data_var.pdf', transparent=True) plt.savefig(
Bias New
= np.array([i*np.pi/180 for i in range(0,90,2)])
x 10) #Setting seed for reproducability
np.random.seed(= 0.25
var = 2*0.25
dy = np.sin(x) + 0.5 + np.random.normal(0,var,len(x))
y = np.sin(x) + 0.5
y_true = 16
max_deg = [x**(i+1) for i in range(max_deg)] + [y]
data_x = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c = pd.DataFrame(np.column_stack(data_x),columns=data_c) data
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim('x'], data['y'], '.', label='Actual Prices')
plt.plot(data[
plt.legend()
'images/biasn_1.pdf', transparent=True) plt.savefig(
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim('x'], data['y'], '.', label='Actual Prices')
plt.plot(data['x'], [data['y'].mean() for _ in data['x']], ':r', label='Prediction')
plt.plot(data[
plt.legend()
'images/biasn_2.pdf', transparent=True) plt.savefig(
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim('x'], data['y'], '.', label='Actual Prices')
plt.plot(data['x'], [data['y'].mean() for _ in data['x']], ':r', label='Prediction')
plt.plot(data['y'].mean() for _ in data['x']], color='green',alpha=0.2, label='Bias')
plt.fill_between(x, y_true, [data[
plt.legend()
'images/biasn_3.pdf', transparent=True) plt.savefig(
Bias Old
= np.array([i*np.pi/180 for i in range(0,70,2)])
x1 10) #Setting seed for reproducability
np.random.seed(= 0.25
var = 2*0.25
dy = np.sin(x1) + 0.5 + np.random.normal(0,var,len(x1))
y1 = np.sin(x) + 0.5 y_true
= np.array([i*np.pi/180 for i in range(20,90,2)])
x2 40)
np.random.seed(= np.sin(x2) + 0.5 + np.random.normal(0,var,len(x2))
y2 = np.sin(x) + 0.5 y_true
= plt.subplots(nrows=1, ncols=2, figsize=(10, 3))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
0].plot(x, y_true, 'g', label='True Function')
ax[0].set_xlabel('Size (sq.ft)')
ax[0].set_ylabel('Price (\$)')
ax[0].plot(x1, y1, '.', label='Actual Prices')
ax[
1].plot(x, y_true, 'g', label='True Function')
ax[1].set_xlabel('Size (sq.ft)')
ax[1].set_ylabel('Price (\$)')
ax[1].plot(x2, y2, '.', label='Actual Prices')
ax[
'images/bias1.pdf', transparent=True, bbox_inches='tight') plt.savefig(
= plt.subplots(nrows=1, ncols=2, figsize=(10, 3))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
0].plot(x, y_true, 'g', label='True Function')
ax[0].set_xlabel('Size (sq.ft)')
ax[0].set_ylabel('Price (\$)')
ax[0].plot(x1, y1, '.', label='Actual Prices')
ax[0].plot(x, [y1.mean() for _ in x], 'r:', label='Prediction')
ax[
1].plot(x, y_true, 'g', label='True Function')
ax[1].set_xlabel('Size (sq.ft)')
ax[1].set_ylabel('Price (\$)')
ax[1].plot(x2, y2, '.', label='Actual Prices')
ax[
= ax[0].get_legend_handles_labels()
handles, labels ='upper center', frameon=True, fancybox=True, framealpha=1, ncol=3)
fig.legend(handles, labels, loc'images/bias2.pdf', transparent=True, bbox_inches='tight') plt.savefig(
= plt.subplots(nrows=1, ncols=2, figsize=(10, 3))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
0].plot(x, y_true, 'g', label='True Function')
ax[0].set_xlabel('Size (sq.ft)')
ax[0].set_ylabel('Price (\$)')
ax[0].plot(x1, y1, '.', label='Actual Prices')
ax[0].plot(x, [y1.mean() for _ in x], 'r:', label='Prediction')
ax[
1].plot(x, y_true, 'g', label='True Function')
ax[1].set_xlabel('Size (sq.ft)')
ax[1].set_ylabel('Price (\$)')
ax[1].plot(x2, y2, '.', label='Actual Prices')
ax[1].plot(x, [y2.mean() for _ in x], 'r:', label='Prediction')
ax[
= ax[0].get_legend_handles_labels()
handles, labels ='upper center', frameon=True, fancybox=True, framealpha=1, ncol=3)
fig.legend(handles, labels, loc'images/bias3.pdf', transparent=True, bbox_inches='tight') plt.savefig(
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(
for _ in x], 'r-.', label=r'$f_{\hat\theta(train1)}$')
plt.plot(x, [y2.mean() for _ in x], 'r-.', label=r'$f_{\hat\theta(train2)}$')
plt.plot(x, [y1.mean() -0.3 for _ in x], 'r-.', label=r'$f_{\hat\theta(train3)}$')
plt.plot(x, [y2.mean()+0.1 for _ in x], 'r-.', label=r'$f_{\hat\theta(train4)}$')
plt.plot(x, [y1.mean()
='upper center', frameon=True, fancybox=True, framealpha=1, ncol=5)
plt.legend(loc
'images/bias4.pdf', transparent=True, bbox_inches='tight') plt.savefig(
= plt.subplots(nrows=1, ncols=1, figsize=(5, 3))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(
2*y2.mean()+2*y1.mean()-0.2)/4 for _ in x], 'r-.', label=r'$f_\bar{\theta}$')
plt.plot(x, [(
='upper center', frameon=True, fancybox=True, framealpha=1, ncol=5)
plt.legend(loc
'images/bias5.pdf', transparent=True, bbox_inches='tight') plt.savefig(
'g', label='True Function')
plt.plot(x, y_true, 'Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(
= np.array([(2*y2.mean()+2*y1.mean()-0.2)/4 for _ in x])
fit 2*y2.mean()+2*y1.mean()-0.2)/4 for _ in x], 'r-.', label=r'$f_\bar{\theta}$')
plt.plot(x, [(='green',alpha=0.2, label='Bias')
plt.fill_between(x, y_true, fit, color
='upper center', frameon=True, fancybox=True, framealpha=1, ncol=5)
plt.legend(loc
'images/bias6.pdf', transparent=True, bbox_inches='tight') plt.savefig(
Varying Degree on Bias
= np.array([i*np.pi/180 for i in range(0,90,2)])
x 10) #Setting seed for reproducability
np.random.seed(= 0.25
var = 2*0.25
dy = np.sin(x) + 0.5 + np.random.normal(0,var,len(x))
y = np.sin(x) + 0.5
y_true = 16
max_deg = [x**(i+1) for i in range(max_deg)] + [y]
data_x = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c = pd.DataFrame(np.column_stack(data_x),columns=data_c) data
from sklearn.linear_model import LinearRegression
=10
seed= plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(10, 4))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
0].plot(x, [y.mean() for _ in x], 'r-.', label=r'$f_\bar{\theta}$')
ax[0].plot(data['x'], data['y'], '.b', label='Actual Prices')
ax[0].plot(data['x'], y_true,'g', label='True Function')
ax[0].fill_between(x, y_true, [y.mean() for _ in x], color='green',alpha=0.2, label='Bias')
ax[0].set_title(f"Degree = 0")
ax[for i,deg in enumerate([1]):
=i+1
i= ['x']
predictors if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
= LinearRegression(normalize=True)
regressor 'y'])
regressor.fit(data[predictors],data[= regressor.predict(data[predictors])
y_pred
'x'],data['y'], '.b', label='Actual Prices')
ax[i].plot(data['x'], y_pred,'-.r', label=r'$f_\bar{\theta}$')
ax[i].plot(data['x'], y_true,'g', label='True Function')
ax[i].plot(data[='green',alpha=0.2, label='Bias')
ax[i].fill_between(x, y_true, y_pred, colorf"Degree = {deg}")
ax[i].set_title(
= ax[0].get_legend_handles_labels()
handles, labels ='lower center', frameon=True, fancybox=True, framealpha=1, ncol=4)
fig.legend(handles, labels, loc=0.01, hspace=0)
plt.subplots_adjust(wspace'images/bias7.pdf', transparent=True, bbox_inches='tight') plt.savefig(
from sklearn.linear_model import LinearRegression
=10
seed= plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(10, 4))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
for i,deg in enumerate([2,3]):
= ['x']
predictors if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
= LinearRegression(normalize=True)
regressor 'y'])
regressor.fit(data[predictors],data[= regressor.predict(data[predictors])
y_pred
'x'],data['y'], '.b', label='Actual Prices')
ax[i].plot(data['x'], y_pred,'-.r', label=r'$f_\bar{\theta}$')
ax[i].plot(data['x'], y_true,'g', label='True Function')
ax[i].plot(data[='green',alpha=0.2, label='Bias')
ax[i].fill_between(x, y_true, y_pred, colorf"Degree = {deg}")
ax[i].set_title(
= ax[0].get_legend_handles_labels()
handles, labels ='lower center', frameon=True, fancybox=True, framealpha=1, ncol=4)
fig.legend(handles, labels, loc=0.01, hspace=0)
plt.subplots_adjust(wspace'images/bias8.pdf', transparent=True, bbox_inches='tight') plt.savefig(
Variance
= np.array([i*np.pi/180 for i in range(0,90,2)])
x 10) #Setting seed for reproducability
np.random.seed(= 0.25
var = 2*0.25
dy = np.sin(x) + 0.5 + np.random.normal(0,var,len(x))
y = np.sin(x) + 0.5
y_true = 25
max_deg = [x**(i+1) for i in range(max_deg)] + [y]
data_x = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c = pd.DataFrame(np.column_stack(data_x),columns=data_c) data
= np.array([i*np.pi/180 for i in range(0,70,2)])
x1 10) #Setting seed for reproducability
np.random.seed(= 0.25
var = 2*0.25
dy = np.sin(x1) + 0.5 + np.random.normal(0,var,len(x1))
y1 = np.sin(x) + 0.5
y_true = np.array([i*np.pi/180 for i in range(20,90,2)])
x2 40)
np.random.seed(= np.sin(x2) + 0.5 + np.random.normal(0,var,len(x2))
y2 = np.sin(x) + 0.5 y_true
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(
= y2.mean()-(2*y2.mean()+2*y1.mean()-0.2)/4
dy for _ in x], 'r-.', label=r'$f_{\hat\theta(train1)}$')
plt.plot(x, [y2.mean() for _ in x], 'b-.', label=r'$f_{\hat\theta(train2)}$')
plt.plot(x, [y1.mean() -0.3 for _ in x], 'c-.', label=r'$f_{\hat\theta(train3)}$')
plt.plot(x, [y2.mean()+0.1 for _ in x], 'y-.', label=r'$f_{\hat\theta(train4)}$')
plt.plot(x, [y1.mean()# plt.errorbar(x[::3], [(2*y2.mean()+2*y1.mean()-0.2)/4 for _ in x][::3], yerr=dy, fmt='k', capsize=5, label='Variance')
for _ in x], [y2.mean()-0.3 for _ in x], color='green',alpha=0.2, label='Variance')
plt.fill_between(x, [y2.mean()
='upper center', frameon=True, fancybox=True, framealpha=1, ncol=4)
plt.legend(loc
'images/var1.pdf', transparent=True, bbox_inches='tight') plt.savefig(
'x'], y_true, 'g', label='True Function')
plt.plot(data['Size (sq.ft)')
plt.xlabel('Price (\$)')
plt.ylabel(
plt.xticks([],[])
plt.yticks([],[])0,2)
plt.ylim(0,1.6)
plt.xlim(
= y2.mean()-(2*y2.mean()+2*y1.mean()-0.2)/4
dy for _ in x], 'r-.', label=r'$f_{\hat\theta(train1)}$')
plt.plot(x, [y2.mean() for _ in x], 'b-.', label=r'$f_{\hat\theta(train2)}$')
plt.plot(x, [y1.mean() -0.3 for _ in x], 'c-.', label=r'$f_{\hat\theta(train3)}$')
plt.plot(x, [y2.mean()+0.1 for _ in x], 'y-.', label=r'$f_{\hat\theta(train4)}$')
plt.plot(x, [y1.mean()4], [(2*y2.mean()+2*y1.mean()-0.2)/4 for _ in x][::4], yerr=dy, fmt='k', capsize=3, label='Variance')
plt.errorbar(x[::# plt.fill_between(x, [y2.mean() for _ in x], [y2.mean()-0.3 for _ in x], color='green',alpha=0.2, label='Variance')
='upper center', frameon=True, fancybox=True, framealpha=1, ncol=4)
plt.legend(loc
'images/var2.pdf', transparent=True, bbox_inches='tight') plt.savefig(
Varaince Variation
from sklearn.linear_model import LinearRegression
= plt.subplots(nrows=2, ncols=2, sharey=True, figsize=(10, 8))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
= []
modles
for i,seed in enumerate([2,4,8,16]):
np.random.seed(seed)= np.sin(x) + 0.5 + np.random.normal(0,var,len(x))
y_random = [x**(i+1) for i in range(max_deg)] + [y_random]
data_x_s = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c_s = pd.DataFrame(np.column_stack(data_x_s),columns=data_c_s)
data_s
= 25
deg = ['x']
predictors if deg > 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
= LinearRegression(normalize=True)
regressor 'y'])
regressor.fit(data_s[predictors],data_s[= regressor.predict(data_s[predictors])
y_pred
modles.append(y_pred)
int(i/2)][i%2].plot(data_s['x'],data_s['y'], '.b', label='Data Point')
ax[# ax[i].plot(data_n['x'],data_n['y'], 'ok', label='UnSelected Points')
int(i/2)][i%2].plot(data_s['x'], y_pred,'r-.', label='Prediction')
ax[int(i/2)][i%2].plot(data['x'], y_true,'g-', label='True Function')
ax[# ax[i].set_title(f"{deg} : {max(regressor.coef_, key=abs):.2f}")
= ax[0][0].get_legend_handles_labels()
handles, labels ='center', frameon=True, fancybox=True, framealpha=1, ncol=4)
fig.legend(handles, labels, loc=0.01, hspace=0)
plt.subplots_adjust(wspace'images/var3.pdf', transparent=True, bbox_inches='tight') plt.savefig(
from sklearn.linear_model import LinearRegression
= []
modles
for i,seed in enumerate(range(1,50)):
np.random.seed(seed)= np.sin(x) + 0.5 + np.random.normal(0,var,len(x))
y_random = [x**(i+1) for i in range(max_deg)] + [y_random]
data_x_s = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c_s = pd.DataFrame(np.column_stack(data_x_s),columns=data_c_s)
data_s
= 25
deg = ['x']
predictors if deg > 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
= LinearRegression(normalize=True)
regressor 'y'])
regressor.fit(data_s[predictors],data_s[= regressor.predict(data_s[predictors])
y_pred
modles.append(y_pred)
= plt.subplots(nrows=1, ncols=1, sharey=True, figsize=(8, 4))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,1.6), ylim=(0,2))
plt.setp(ax, xticks
=np.array(modles)
modles
# ax[0].plot(x, modles.mean(axis=0), 'r-.', label='Average Fit')
# ax[0].plot(data['x'], y_true,'g-', label='True Function')
# ax[0].set_xlabel('Size (sq.ft)')
# ax[0].set_ylabel('Price (\$)')
# ax[1].errorbar(x[::4], modles.mean(axis=0)[::4], yerr=2*modles.std(axis=0)[::4], fmt=':k', capsize=3, label='Variance')
# ax[1].plot(x, modles[1], 'c-.', label=r'$f_{\hat\theta(train1)}$')
# ax[1].plot(x, modles[2], 'y-.', label=r'$f_{\hat\theta(train2)}$')
# ax[1].plot(x, modles[3], 'm-.', label=r'$f_{\hat\theta(train3)}$')
# ax[1].plot(data['x'], y_true,'g-', label='True Function')
# ax[1].set_xlabel('Size (sq.ft)')
4], modles.mean(axis=0)[::4], yerr=2*modles.std(axis=0)[::4], fmt=':k', capsize=3, label='Variance')
ax.errorbar(x[::1], 'c-.', label=r'$f_{\hat\theta(train1)}$')
ax.plot(x, modles[2], 'y-.', label=r'$f_{\hat\theta(train2)}$')
ax.plot(x, modles[3], 'm-.', label=r'$f_{\hat\theta(train3)}$')
ax.plot(x, modles['x'], y_true,'g-', label='True Function')
ax.plot(data['Size (sq.ft)')
ax.set_xlabel(
# plt.plot(x, modles.mean(axis=0), 'k.-', label=r'Average Fit')
# plt.plot(x, modles[2], 'y-.', label=r'$f_{\hat\theta(train3)}$')
# plt.errorbar(x[::4], [(2*y2.mean()+2*y1.mean()-0.2)/4 for _ in x][::4], yerr=dy, fmt='k', capsize=3, label='Variance')
# plt.fill_between(x, [y2.mean() for _ in x], [y2.mean()-0.3 for _ in x], color='green',alpha=0.2, label='Variance')
# handles, labels = [(a + b) for a, b in zip(ax[0].get_legend_handles_labels(), ax[1].get_legend_handles_labels())]
= ax.get_legend_handles_labels()
handles, labels ='upper center', frameon=True, fancybox=True, framealpha=1, ncol=5)
fig.legend(handles, labels, loc=0.01, hspace=0)
plt.subplots_adjust(wspace'images/var4.pdf', transparent=True, bbox_inches='tight') plt.savefig(
Bias-Variance Tradeoff
= x = np.linspace(0, 4*np.pi, 201)
x 10) #Setting seed for reproducability
np.random.seed(= 1
var = np.poly1d([1, 2, 3])
p = np.sin(x) + 0.5*x - 0.05*x**2 + np.random.normal(0,var,len(x))
y = np.sin(x) + 0.5*x - 0.05*x**2
y_true = 20
max_deg = [x**(i+1) for i in range(max_deg)] + [y]
data_x = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c = pd.DataFrame(np.column_stack(data_x),columns=data_c)
data
'x'], y_true, 'g', label='True Function')
plt.plot(data[# plt.xlabel('Size (sq.ft)')
# plt.ylabel('Price (\$)')
plt.xticks([],[])
plt.yticks([],[])# plt.ylim(-2,2)
# plt.xlim(0,4*np.pi)
'x'], data['y'], '.', label='Data Points')
plt.plot(data[
plt.legend()'images/bv-1.pdf', transparent=True, bbox_inches='tight') plt.savefig(
from sklearn.linear_model import LinearRegression
=10
seed= plt.subplots(nrows=2, ncols=3, sharey=True, figsize=(10, 5))
fig, ax =[], xticklabels=[], yticks=[], yticklabels=[], xlim=(0,4*np.pi))
plt.setp(ax, xticks
= [1,3,7]
degs for i,deg in enumerate(degs):
= ['x']
predictors if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
# print(predictors)
= LinearRegression(normalize=True)
regressor 'y'])
regressor.fit(data[predictors],data[= regressor.predict(data[predictors])
y_pred
# ax[0][i].plot(data['x'],data['y'], '.b', label='Actual Prices')
0][i].plot(data['x'], y_pred,'-.r', label='Prediction')
ax[0][i].plot(data['x'], y_true,'g', label='True Function')
ax[0][i].fill_between(x, y_true, y_pred, color='green',alpha=0.2, label='Bias')
ax[0][i].set_title(f"Degree = {deg}")
ax[
for i,deg in enumerate(degs):
= ['x']
predictors =[]
models
if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
for t,seed in enumerate(range(1,50)):
np.random.seed(seed)= np.sin(x) + 0.5*x - 0.05*x**2 + np.random.normal(0,var,len(x))
y_random = [x**(i+1) for i in range(max_deg)] + [y_random]
data_x_s = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c_s = pd.DataFrame(np.column_stack(data_x_s),columns=data_c_s)
data_s
= ['x']
predictors if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
= LinearRegression(normalize=True)
regressor 'y'])
regressor.fit(data_s[predictors],data_s[= regressor.predict(data_s[predictors])
y_pred
models.append(y_pred)
=np.array(models)
models1][i].errorbar(x[::7], models.mean(axis=0)[::7], yerr=2*models.std(axis=0)[::7], fmt=':k', capsize=3, label='Variance')
ax[1][i].plot(data['x'], y_true,'g-', label='True Function')
ax[
= [(a + b) for a, b in zip(ax[0][0].get_legend_handles_labels(), ax[1][0].get_legend_handles_labels())]
handles, labels ='center', frameon=True, fancybox=True, framealpha=1, ncol=5)
fig.legend(handles, labels, loc=0.01, hspace=0)
plt.subplots_adjust(wspace'images/bv-2.pdf', transparent=True, bbox_inches='tight') plt.savefig(