import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
# Interactive widget
from ipywidgets import interact
# Retina mode
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
Basis Expansion in Linear Regression
ML
# Download CO2 data from NOAA
= 'https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.csv'
url
= 'year,month,decimal date,average,deseasonalized,ndays,sdev,unc'.split(',')
names
# no index
= pd.read_csv(url, skiprows=72, names=names, index_col=False) df
df
year | month | decimal date | average | deseasonalized | ndays | sdev | unc | |
---|---|---|---|---|---|---|---|---|
0 | 1960 | 10 | 1960.7896 | 313.83 | 316.85 | -1 | -9.99 | -0.99 |
1 | 1960 | 11 | 1960.8743 | 315.00 | 316.89 | -1 | -9.99 | -0.99 |
2 | 1960 | 12 | 1960.9563 | 316.19 | 316.96 | -1 | -9.99 | -0.99 |
3 | 1961 | 1 | 1961.0411 | 316.89 | 316.84 | -1 | -9.99 | -0.99 |
4 | 1961 | 2 | 1961.1260 | 317.70 | 317.05 | -1 | -9.99 | -0.99 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
761 | 2024 | 3 | 2024.2083 | 425.38 | 423.92 | 22 | 0.99 | 0.40 |
762 | 2024 | 4 | 2024.2917 | 426.57 | 424.03 | 24 | 0.98 | 0.38 |
763 | 2024 | 5 | 2024.3750 | 426.90 | 423.61 | 29 | 0.76 | 0.27 |
764 | 2024 | 6 | 2024.4583 | 426.91 | 424.44 | 20 | 0.65 | 0.28 |
765 | 2024 | 7 | 2024.5417 | 425.55 | 425.10 | 24 | 0.69 | 0.27 |
766 rows × 8 columns
= pd.to_datetime(df[['year', 'month']].apply(lambda x: '{}-{}'.format(x[0], x[1]), axis=1)) df.index
=(6, 4), title='CO2 Levels at Mauna Loa Observatory')
df.average.plot(figsize'Year')
plt.xlabel('CO2 Level') plt.ylabel(
Text(0, 0.5, 'CO2 Level')
# Create X and y
# X = months since first measurement
= np.array(range(len(df)))
X = df.average.values y
plt.plot(X, y)'Months since first measurement')
plt.xlabel('CO2 Level') plt.ylabel(
Text(0, 0.5, 'CO2 Level')
# Normalize X and y
= StandardScaler()
s1 = StandardScaler()
s2
= s1.fit_transform(X.reshape(-1, 1))
X_norm = s2.fit_transform(y.reshape(-1, 1)) y_norm
X_norm.mean(), X_norm.std()
(0.0, 0.9999999999999999)
= pd.DataFrame({"x":X.flatten(), "transformed":X_norm.flatten()})
df df
x | transformed | |
---|---|---|
0 | 0 | -1.729791 |
1 | 1 | -1.725269 |
2 | 2 | -1.720746 |
3 | 3 | -1.716224 |
4 | 4 | -1.711702 |
... | ... | ... |
761 | 761 | 1.711702 |
762 | 762 | 1.716224 |
763 | 763 | 1.720746 |
764 | 764 | 1.725269 |
765 | 765 | 1.729791 |
766 rows × 2 columns
"re-transformed"] = s1.inverse_transform(df["transformed"].values.reshape(-1, 1))
df[ df
x | transformed | re-transformed | |
---|---|---|---|
0 | 0 | -1.729791 | 0.0 |
1 | 1 | -1.725269 | 1.0 |
2 | 2 | -1.720746 | 2.0 |
3 | 3 | -1.716224 | 3.0 |
4 | 4 | -1.711702 | 4.0 |
... | ... | ... | ... |
761 | 761 | 1.711702 | 761.0 |
762 | 762 | 1.716224 | 762.0 |
763 | 763 | 1.720746 | 763.0 |
764 | 764 | 1.725269 | 764.0 |
765 | 765 | 1.729791 | 765.0 |
766 rows × 3 columns
df.mean()
x 382.5
transformed 0.0
re-transformed 382.5
dtype: float64
df.std()
x 221.269444
transformed 1.000653
re-transformed 221.269444
dtype: float64
= np.array([800])
x_test -1, 1)) s1.transform(x_test.reshape(
array([[1.88807266]])
y_norm.mean(), y_norm.std()
(-2.003619202665557e-15, 0.9999999999999998)
plt.plot(X_norm, y_norm)'(Normalized) Months since first measurement')
plt.xlabel('(Normalized) CO2 Level') plt.ylabel(
Text(0, 0.5, '(Normalized) CO2 Level')
Task 1: Interpolation
42)
np.random.seed(= np.random.choice(range(len(X_norm)), size=int(len(X_norm) * 0.7), replace=False)
train_idx = list(set(range(len(X_norm))) - set(train_idx))
test_idx
= X[train_idx]
X_train = y[train_idx]
y_train
= X[test_idx]
X_test = y[test_idx]
y_test
= X_norm[train_idx]
X_norm_train = y_norm[train_idx]
y_norm_train
= X_norm[test_idx]
X_norm_test = y_norm[test_idx] y_norm_test
'o', label='train',markersize=1)
plt.plot(X_norm_train, y_norm_train, 'o', label='test', ms=3)
plt.plot(X_norm_test, y_norm_test, '(Normalized) Months since first measurement')
plt.xlabel('(Normalized) CO2 Level')
plt.ylabel( plt.legend()
= {}
errors= np.linspace(X_norm.min(), X_norm.max(), 100).reshape(-1, 1) X_lin_1d
Model 1: Vanilla Linear Regression
def plot_fit_predict(model, X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin, title, plot=True):
model.fit(X_norm_train, y_norm_train)
= model.predict(X_norm_train).reshape(-1, 1)
y_hat_train = model.predict(X_norm_test).reshape(-1, 1)
y_hat_test
# Transform back to original scale
= s2.inverse_transform(y_hat_train)
y_hat_train = s2.inverse_transform(y_hat_test)
y_hat_test
= s2.inverse_transform(model.predict(X_lin).reshape(-1, 1))
y_hat_lin
= {"train": mean_squared_error(y_train, y_hat_train),
errors[title] "test": mean_squared_error(y_test, y_hat_test)}
if plot:
'o', label='train', markersize=1)
plt.plot(X_train, y_train, 'o', label='test', ms=3)
plt.plot(X_test, y_test, ='model')
plt.plot(s1.inverse_transform(X_lin_1d), y_hat_lin, label'Months since first measurement')
plt.xlabel('CO2 Levels')
plt.ylabel(
plt.legend()'{}\n Train MSE: {:.2f} | Test MSE: {:.2f}'.format(title, errors[title]["train"], errors[title]["test"]))
plt.title(
return errors[title]
= LinearRegression()
model "Linear Regression") plot_fit_predict(model, X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin_1d,
{'train': 22.02534268790436, 'test': 18.72254862852729}
MLP
# use sk-learn for MLP
= MLPRegressor(hidden_layer_sizes=[128, 256, 512, 256, 128], max_iter = 10000)
mlp_model
"MLP Regression") plot_fit_predict(mlp_model, X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin_1d,
/Users/nipun/mambaforge/lib/python3.10/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1624: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
{'train': 5.935971669202183, 'test': 6.511128786459153}
Polynomial Regression of degree “d”
def create_poly_features(X, d):
"""
X is (N, 1) array
d is degree of polynomial
returns normalized polynomial features of X
"""
= np.zeros((len(X), d))
X_poly 0] = X[:, 0]
X_poly[:, for i in range(1, d):
= X[:, 0] ** (i + 1)
X_poly[:, i]
# Normalize each column
= StandardScaler().fit_transform(X_poly)
X_poly return X_poly
= np.linspace(-5, 5, 50).reshape(-1, 1)
xs = create_poly_features(xs, 4)
poly_f for i in range(4):
='x^{}'.format(i+1))
plt.plot(xs, poly_f[:, i], label plt.legend()
def show_poly_features(degree):
= create_poly_features(X_norm, degree)
X_poly
plt.plot(X_norm, X_poly)'X')
plt.xlabel('Polynomial Features')
plt.ylabel('Degree {}'.format(degree))
plt.title(
2) show_poly_features(
=(1, 10, 1)) interact(show_poly_features, degree
<function __main__.show_poly_features(degree)>
= LinearRegression()
model2 = 4
degree = create_poly_features(X_norm_train.reshape(-1, 1), degree)
Xf_norm_train = create_poly_features(X_norm_test.reshape(-1, 1), degree)
Xf_norm_test = create_poly_features(X_lin_1d, degree)
X_lin_poly
f"Polynomial Regression (d={degree})") plot_fit_predict(model2, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
{'train': 4.911310087864347, 'test': 6.6713097074692875}
model2.coef_, model2.intercept_
(array([[0.97361847, 0.09894695, 0.02772375, 0.03416679]]),
array([-0.00901074]))
X_lin_poly.shape
(100, 4)
X_lin_poly.shape
(100, 4)
model2.coef_.shape
(1, 4)
X_lin_1d.shape
(100, 1)
len(X_lin_1d)), label='Degree 0')
plt.plot(X_lin_1d, model2.intercept_.repeat(0:1]@model2.coef_[:, 0], label='Degree 1')
plt.plot(X_lin_1d, X_lin_poly[:, 1:2]@model2.coef_[:, 1], label='Degree 2')
plt.plot(X_lin_1d, X_lin_poly[:, 2:3]@model2.coef_[:, 2], label='Degree 3')
plt.plot(X_lin_1d, X_lin_poly[:, 3:4]@model2.coef_[:, 3], label='Degree 4')
plt.plot(X_lin_1d, X_lin_poly[:, plt.legend()
def show_additive(model, X_lin_1d, max_degree):
= model.intercept_.repeat(len(X_lin_1d))
ys #plt.fill_between(X_lin_1d.squeeze(), s2.inverse_transform(ys.reshape(-1, 1)).squeeze(), alpha=0.1)
print(ys.shape, X_lin_1d.shape)
= '{:0.2f}x'.format(model.intercept_[0])
label
for i in range(1, max_degree + 1):
= X_lin_poly[:, i-1:i]@model.coef_[:, i-1]
yd = ys + yd
ys += ' + {:0.2f} x^{}'.format(model.coef_[:, i-1][0], i)
label = s2.inverse_transform(ys.reshape(-1, 1))
ys = label)
plt.plot(X_lin_1d, ys, label 'o', label='train', markersize=1)
plt.plot(X_norm_train, y_train,
plt.legend()
3) show_additive(model2, X_lin_1d,
(100,) (100, 1)
from ipywidgets import interact, fixed
= model2
m =fixed(m), X_lin_1d=fixed(X_lin_1d), max_degree=(1, len(m.coef_[0]), 1)) interact(show_additive, model
<function __main__.show_additive(model, X_lin_1d, max_degree)>
for degree in range(1, 10):
= create_poly_features(X_norm_train.reshape(-1, 1), degree)
Xf_norm_train = create_poly_features(X_norm_test.reshape(-1, 1), degree)
Xf_norm_test
= create_poly_features(X_lin_1d, degree)
X_lin_poly
f"Polynomial Regression (d={degree})", plot=False) plot_fit_predict(model2, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
= pd.DataFrame(errors).T
errors_df errors_df
train | test | |
---|---|---|
Linear Regression | 22.025343 | 18.722549 |
MLP Regression | 5.935972 | 6.511129 |
Polynomial Regression (d=4) | 4.911310 | 6.671310 |
Polynomial Regression (d=1) | 22.025343 | 20.401004 |
Polynomial Regression (d=2) | 5.120285 | 6.983209 |
Polynomial Regression (d=3) | 5.001988 | 6.904987 |
Polynomial Regression (d=5) | 4.802187 | 6.752464 |
Polynomial Regression (d=6) | 4.800177 | 6.755818 |
Polynomial Regression (d=7) | 4.741101 | 6.819747 |
Polynomial Regression (d=8) | 4.735053 | 6.846223 |
Polynomial Regression (d=9) | 4.710354 | 6.859936 |
1 | 22.025343 | 20.401004 |
2 | 5.120285 | 6.983209 |
3 | 5.001988 | 6.904987 |
4 | 4.911310 | 6.671310 |
5 | 4.802187 | 6.752464 |
6 | 4.800177 | 6.755818 |
7 | 4.741101 | 6.819747 |
8 | 4.735053 | 6.846223 |
9 | 4.710354 | 6.859936 |
10 | 4.704567 | 6.857088 |
11 | 4.592348 | 6.814545 |
12 | 4.586188 | 6.859695 |
13 | 4.586165 | 6.858467 |
14 | 4.584720 | 6.872560 |
15 | 4.579522 | 6.854161 |
16 | 4.577088 | 6.856695 |
17 | 4.577062 | 6.857773 |
18 | 4.576811 | 6.862393 |
19 | 4.536762 | 6.881145 |
='bar', figsize=(12, 6), title='MSE for Train and Test Sets') errors_df.plot(kind
# bias variance tradeoff
= {}
errors_poly
for degree in range(1, 20):
= create_poly_features(X_norm_train.reshape(-1, 1), degree)
Xf_norm_train = create_poly_features(X_norm_test.reshape(-1, 1), degree)
Xf_norm_test
= create_poly_features(X_lin_1d, degree)
X_lin_poly
=False)
plot_fit_predict(model2, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly, degree, plot
# geting errors for polynomial regression only for plotting
= errors[degree] errors_poly[degree]
= pd.DataFrame(errors_poly).T
errors_poly_df = np.argmin(errors_poly_df.test) + 1
best_degree = errors_poly_df.test[best_degree - 1] # index of df = degree - 1
min_error print(f"Best degree: {best_degree}, Min error: {min_error}")
# set figure size
=(12, 6))
plt.figure(figsize='train')
plt.plot(errors_poly_df.index.values, errors_poly_df.train.values, label='test')
plt.plot(errors_poly_df.index.values, errors_poly_df.test.values, label='black', linestyle='--', label='best degree')
plt.axvline(best_degree, colormin(errors_poly_df.index), max(errors_poly_df.index)+1, 1.0))
plt.xticks(np.arange(4.5, 7.5) # set y limit - to show the difference between train and test clearly
plt.ylim('Degree')
plt.xlabel('MSE')
plt.ylabel('Bias-Variance Tradeoff')
plt.title(
plt.legend() plt.show()
Best degree: 4, Min error: 6.904987394528579
Ridge Regression with polynomial basis
# initiate ridge regression model
= Ridge(alpha=0.3)
model_ridge
= {}
errors_ridge
for degree in range(1, 20):
= create_poly_features(X_norm_train.reshape(-1, 1), degree)
Xf_norm_train = create_poly_features(X_norm_test.reshape(-1, 1), degree)
Xf_norm_test
= create_poly_features(X_lin_1d, degree)
X_lin_poly
f'ridge_{degree}', plot=False)
plot_fit_predict(model_ridge, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
# geting errors for polynomial regression only for plotting
= errors[f'ridge_{degree}'] errors_ridge[degree]
= pd.DataFrame(errors_ridge).T
errors_ridge_df = np.argmin(errors_ridge_df.test) + 1
best_degree_ridge = errors_ridge_df.test[best_degree_ridge - 1] # index of df = degree - 1
min_error print(f"Best degree: {best_degree_ridge}, Min error: {min_error}")
# set figure size
=(12, 6))
plt.figure(figsize='train')
plt.plot(errors_ridge_df.index.values, errors_ridge_df.train.values, label='test')
plt.plot(errors_ridge_df.index.values, errors_ridge_df.test.values, label='black', linestyle='--', label='best degree')
plt.axvline(best_degree_ridge, colormin(errors_ridge_df.index), max(errors_ridge_df.index)+1, 1.0))
plt.xticks(np.arange(4.5, 7.5) # set y limit - to show the difference between train and test clearly
plt.ylim('Degree')
plt.xlabel('MSE')
plt.ylabel('Bias-Variance Tradeoff')
plt.title(
plt.legend() plt.show()
Best degree: 4, Min error: 6.535907094411071
f"Ridge Regression with Polynomial Features (d={best_degree_ridge})", plot=True) plot_fit_predict(model_ridge, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
{'train': 4.767435271070607, 'test': 6.39886853568042}
Gausian Basis Function
def create_guassian_basis(X , d , std = 1):
"""
X is (N, 1) array
d is number of basis functions
"""
= np.linspace(X.min(), X.max(), d)
means = np.repeat(X, d, axis=1)
X = np.repeat(means.reshape(-1, 1), len(X), axis=1).T
means
return np.exp(-(X - means) ** 2 / (2 * std ** 2))
def show_gaussian_basis(d, stdev):
= np.linspace(-5, 5, 100).reshape(-1, 1)
xs = create_guassian_basis(xs, d, std=stdev)
X_gauss
plt.plot(xs, X_gauss)'X')
plt.xlabel('Gaussian Basis')
plt.ylabel('Degree {} \nStddev {}'.format(d, stdev))
plt.title(0, 1)
plt.ylim(
4, 1) show_gaussian_basis(
=(1, 10, 1), stdev=(0.1, 20, 0.1)) interact(show_gaussian_basis, d
<function __main__.show_gaussian_basis(d, stdev)>
= LinearRegression()
model_gauss = 4
degree = create_guassian_basis(X_norm_train.reshape(-1, 1), degree)
Xf_norm_train = create_guassian_basis(X_norm_test.reshape(-1, 1), degree)
Xf_norm_test
= create_guassian_basis(X_lin_1d, degree)
X_lin_poly
f"Gaussian Basis Linear Regression (d={degree})") plot_fit_predict(model2, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
{'train': 5.390651367676581, 'test': 5.618947635873222}
errors.clear()
for degree in range(3, 7):
= create_guassian_basis(X_norm_train.reshape(-1, 1), degree)
Xf_norm_train = create_guassian_basis(X_norm_test.reshape(-1, 1), degree)
Xf_norm_test
= create_guassian_basis(X_lin_1d, degree)
X_lin_poly
f"Gaussian Basis Linear Regression (d={degree})", plot=False) plot_fit_predict(model2, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
= pd.DataFrame(errors).T errors_df
='bar', figsize=(12, 6), title='MSE for Train and Test Sets') errors_df.plot(kind
errors.clear()
# Bias Variance Tradeoff wrt Sigma
= 3
num_gaussians for std in [0.1, 0.5, 1, 2, 5, 10]:
= create_guassian_basis(X_norm_train.reshape(-1, 1), num_gaussians, std)
Xf_norm_train = create_guassian_basis(X_norm_test.reshape(-1, 1), num_gaussians, std)
Xf_norm_test
= create_guassian_basis(X_lin_1d, num_gaussians, std)
X_lin_poly
f"Gaussian Basis Linear Regression (d=3, std={std})", plot=False) plot_fit_predict(model2, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_poly,
# Plot the train and test errors for different values of sigma
= pd.DataFrame(errors).T
errors_df
= errors_df.test.values
test_errors = errors_df.train.values
train_errors
= np.log(test_errors)
log_test_errors = np.log(train_errors)
log_train_errors
= [0.1, 0.5, 1, 2, 5, 10]
stds
='Log Test Loss')
plt.plot(stds , log_test_errors, label='Log Train Loss')
plt.plot(stds , log_train_errors, label
plt.scatter(stds, log_test_errors)
plt.scatter(stds, log_train_errors)
plt.legend()'Sigma')
plt.xlabel('Log(MSE)')
plt.ylabel('Bias Variance Tradeoff wrt Sigma') plt.title(
Text(0.5, 1.0, 'Bias Variance Tradeoff wrt Sigma')
Gaussian Process
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.gaussian_process.kernels import WhiteKernel
= 50.0**2 * RBF(length_scale=50.0)
long_term_trend_kernel
= (
seasonal_kernel 2.0**2
* RBF(length_scale=100.0)
* ExpSineSquared(length_scale=1.0, periodicity=1.0, periodicity_bounds="fixed")
)
= 0.5**2 * RationalQuadratic(length_scale=1.0, alpha=1.0)
irregularities_kernel
= 0.1**2 * RBF(length_scale=0.1) + WhiteKernel(
noise_kernel =0.1**2, noise_level_bounds=(1e-5, 1e5)
noise_level
)
= (
co2_kernel + seasonal_kernel + irregularities_kernel + noise_kernel
long_term_trend_kernel
) co2_kernel
50**2 * RBF(length_scale=50) + 2**2 * RBF(length_scale=100) * ExpSineSquared(length_scale=1, periodicity=1) + 0.5**2 * RationalQuadratic(alpha=1, length_scale=1) + 0.1**2 * RBF(length_scale=0.1) + WhiteKernel(noise_level=0.01)
# Using GP for the interpolation problem
from sklearn.gaussian_process import GaussianProcessRegressor
def plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin, title, plot=True):
= GaussianProcessRegressor(
gaussian_process =co2_kernel,n_restarts_optimizer=9
kernel
)
gaussian_process.fit(X_norm_train, y_norm_train)
= gaussian_process.predict(X_norm_train, return_std=True)
y_hat_train, std_prediction_train = gaussian_process.predict(X_norm_test, return_std=True)
y_hat_test , std_prediction_test
= y_hat_train.reshape(-1, 1)
y_hat_train = y_hat_test.reshape(-1, 1)
y_hat_test
# Transform back to original scale
= s2.inverse_transform(y_hat_train)
y_hat_train = s2.inverse_transform(y_hat_test)
y_hat_test
= gaussian_process.predict(X_lin , return_std=True)
y_hat_lin , std_prediction_lin = y_hat_lin.reshape(-1, 1)
y_hat_lin = s2.inverse_transform(y_hat_lin)
y_hat_lin
= {"train": mean_squared_error(y_train, y_hat_train),
errors[title] "test": mean_squared_error(y_test, y_hat_test)}
if plot:
'o', label='train',markersize=1)
plt.plot(X_train, y_train, 'o', label='test', ms=3)
plt.plot(X_test, y_test, ='model')
plt.plot(s1.inverse_transform(X_lin_1d), y_hat_lin, label-1),
plt.fill_between(s1.inverse_transform(X_lin_1d).reshape(- 1.96*std_prediction_lin.reshape(-1,1)).reshape(-1),
(y_hat_lin + 1.96*std_prediction_lin.reshape(-1,1)).reshape(-1), alpha=0.5 , label='95% Confidence interval')
(y_hat_lin 'Months since first measurement')
plt.xlabel('CO2 Levels')
plt.ylabel(
plt.legend()
'{}\n Train MSE: {:.2f} | Test MSE: {:.2f}'.format(title, errors[title]["train"], errors[title]["test"]))
plt.title(
return errors[title]
"Gaussian Process Regression") plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin_1d,
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Input In [58], in <module> ----> 1 plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin_1d, "Gaussian Process Regression") Input In [57], in plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin, title, plot) 5 def plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin, title, plot=True): 7 gaussian_process = GaussianProcessRegressor( 8 kernel=co2_kernel,n_restarts_optimizer=9 9 ) ---> 11 gaussian_process.fit(X_norm_train, y_norm_train) 13 y_hat_train, std_prediction_train = gaussian_process.predict(X_norm_train, return_std=True) 14 y_hat_test , std_prediction_test = gaussian_process.predict(X_norm_test, return_std=True) File ~/miniforge3/lib/python3.9/site-packages/sklearn/base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs) 1145 estimator._validate_params() 1147 with config_context( 1148 skip_parameter_validation=( 1149 prefer_skip_nested_validation or global_skip_validation 1150 ) 1151 ): -> 1152 return fit_method(estimator, *args, **kwargs) File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/_gpr.py:325, in GaussianProcessRegressor.fit(self, X, y) 322 for iteration in range(self.n_restarts_optimizer): 323 theta_initial = self._rng.uniform(bounds[:, 0], bounds[:, 1]) 324 optima.append( --> 325 self._constrained_optimization(obj_func, theta_initial, bounds) 326 ) 327 # Select result from run with minimal (negative) log-marginal 328 # likelihood 329 lml_values = list(map(itemgetter(1), optima)) File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/_gpr.py:656, in GaussianProcessRegressor._constrained_optimization(self, obj_func, initial_theta, bounds) 654 def _constrained_optimization(self, obj_func, initial_theta, bounds): 655 if self.optimizer == "fmin_l_bfgs_b": --> 656 opt_res = scipy.optimize.minimize( 657 obj_func, 658 initial_theta, 659 method="L-BFGS-B", 660 jac=True, 661 bounds=bounds, 662 ) 663 _check_optimize_result("lbfgs", opt_res) 664 theta_opt, func_min = opt_res.x, opt_res.fun File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_minimize.py:710, in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options) 707 res = _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback, 708 **options) 709 elif meth == 'l-bfgs-b': --> 710 res = _minimize_lbfgsb(fun, x0, args, jac, bounds, 711 callback=callback, **options) 712 elif meth == 'tnc': 713 res = _minimize_tnc(fun, x0, args, jac, bounds, callback=callback, 714 **options) File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_lbfgsb_py.py:365, in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options) 359 task_str = task.tobytes() 360 if task_str.startswith(b'FG'): 361 # The minimization routine wants f and g at the current x. 362 # Note that interruptions due to maxfun are postponed 363 # until the completion of the current minimization iteration. 364 # Overwrite f and g: --> 365 f, g = func_and_grad(x) 366 elif task_str.startswith(b'NEW_X'): 367 # new iteration 368 n_iterations += 1 File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:285, in ScalarFunction.fun_and_grad(self, x) 283 if not np.array_equal(x, self.x): 284 self._update_x_impl(x) --> 285 self._update_fun() 286 self._update_grad() 287 return self.f, self.g File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:251, in ScalarFunction._update_fun(self) 249 def _update_fun(self): 250 if not self.f_updated: --> 251 self._update_fun_impl() 252 self.f_updated = True File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:155, in ScalarFunction.__init__.<locals>.update_fun() 154 def update_fun(): --> 155 self.f = fun_wrapped(self.x) File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:137, in ScalarFunction.__init__.<locals>.fun_wrapped(x) 133 self.nfev += 1 134 # Send a copy because the user may overwrite it. 135 # Overwriting results in undefined behaviour because 136 # fun(self.x) will change self.x, with the two no longer linked. --> 137 fx = fun(np.copy(x), *args) 138 # Make sure the function returns a true scalar 139 if not np.isscalar(fx): File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_optimize.py:77, in MemoizeJac.__call__(self, x, *args) 75 def __call__(self, x, *args): 76 """ returns the function value """ ---> 77 self._compute_if_needed(x, *args) 78 return self._value File ~/miniforge3/lib/python3.9/site-packages/scipy/optimize/_optimize.py:71, in MemoizeJac._compute_if_needed(self, x, *args) 69 if not np.all(x == self.x) or self._value is None or self.jac is None: 70 self.x = np.asarray(x).copy() ---> 71 fg = self.fun(x, *args) 72 self.jac = fg[1] 73 self._value = fg[0] File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/_gpr.py:297, in GaussianProcessRegressor.fit.<locals>.obj_func(theta, eval_gradient) 295 def obj_func(theta, eval_gradient=True): 296 if eval_gradient: --> 297 lml, grad = self.log_marginal_likelihood( 298 theta, eval_gradient=True, clone_kernel=False 299 ) 300 return -lml, -grad 301 else: File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/_gpr.py:580, in GaussianProcessRegressor.log_marginal_likelihood(self, theta, eval_gradient, clone_kernel) 577 kernel.theta = theta 579 if eval_gradient: --> 580 K, K_gradient = kernel(self.X_train_, eval_gradient=True) 581 else: 582 K = kernel(self.X_train_) File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/kernels.py:841, in Sum.__call__(self, X, Y, eval_gradient) 813 """Return the kernel k(X, Y) and optionally its gradient. 814 815 Parameters (...) 838 is True. 839 """ 840 if eval_gradient: --> 841 K1, K1_gradient = self.k1(X, Y, eval_gradient=True) 842 K2, K2_gradient = self.k2(X, Y, eval_gradient=True) 843 return K1 + K2, np.dstack((K1_gradient, K2_gradient)) File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/kernels.py:842, in Sum.__call__(self, X, Y, eval_gradient) 840 if eval_gradient: 841 K1, K1_gradient = self.k1(X, Y, eval_gradient=True) --> 842 K2, K2_gradient = self.k2(X, Y, eval_gradient=True) 843 return K1 + K2, np.dstack((K1_gradient, K2_gradient)) 844 else: File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/kernels.py:940, in Product.__call__(self, X, Y, eval_gradient) 938 if eval_gradient: 939 K1, K1_gradient = self.k1(X, Y, eval_gradient=True) --> 940 K2, K2_gradient = self.k2(X, Y, eval_gradient=True) 941 return K1 * K2, np.dstack( 942 (K1_gradient * K2[:, :, np.newaxis], K2_gradient * K1[:, :, np.newaxis]) 943 ) 944 else: File ~/miniforge3/lib/python3.9/site-packages/sklearn/gaussian_process/kernels.py:1916, in RationalQuadratic.__call__(self, X, Y, eval_gradient) 1913 # gradient with respect to alpha 1914 if not self.hyperparameter_alpha.fixed: 1915 alpha_gradient = K * ( -> 1916 -self.alpha * np.log(base) 1917 + dists / (2 * self.length_scale**2 * base) 1918 ) 1919 alpha_gradient = alpha_gradient[:, :, np.newaxis] 1920 else: # alpha is kept fixed KeyboardInterrupt:
Fourier Features
Reference: https://bmild.github.io/fourfeat/
Fourier Feature Mapping
For every input x, we map it to a higher dimensional space using the following function:
\[\gamma(x) = [\cos(2\pi Bx), \sin(2\pi Bx)]^{T} \]
where \(B\) is a random Gaussian matrix, where each entry is drawn independently from a normal distribution N(0, \(σ^{2}\))
42)
np.random.seed(= 5
sigma = 5
NUM_features = sigma*np.random.randn(NUM_features)
fs print(fs)
for i in range(NUM_features):
*X_norm), label=f'feature {i}-sin')
plt.plot(X_norm, np.sin(fs[i]*X_norm), label=f'feature {i}-cos')
plt.plot(X_norm, np.cos(fs[i]
plt.legend()'Fourier Featurization of X manually') plt.title(
[ 2.48357077 -0.69132151 3.23844269 7.61514928 -1.17076687]
Text(0.5, 1.0, 'Fourier Featurization of X manually')
# Explicit implementation of RFF
def create_random_features(X, gamma, NUM_features):
"""
X is (N, 1) array
gamma is a scalar
NUM_features is a scalar
"""
= np.zeros((len(X), 2*NUM_features + 1))
X_rff 0] = X[:, 0]
X_rff[:, for i in range(NUM_features):
= np.random.randn()
b +1] = np.sin(2*np.pi*gamma*b*X[:, 0])
X_rff[:, i+ NUM_features+1] = np.cos(2*np.pi*gamma*b*X[:, 0])
X_rff[:, i
# Normalize each column
= StandardScaler().fit_transform(X_rff)
X_rff return X_rff
Sklearn Implementation
# Sklearn's implementation of RFF
from sklearn.kernel_approximation import RBFSampler
RBFSampler?
Init signature: RBFSampler(*, gamma=1.0, n_components=100, random_state=None)
Docstring:
Approximate a RBF kernel feature map using random Fourier features.
It implements a variant of Random Kitchen Sinks.[1]
Read more in the :ref:`User Guide <rbf_kernel_approx>`.
Parameters
----------
gamma : 'scale' or float, default=1.0
Parameter of RBF kernel: exp(-gamma * x^2).
If ``gamma='scale'`` is passed then it uses
1 / (n_features * X.var()) as value of gamma.
.. versionadded:: 1.2
The option `"scale"` was added in 1.2.
n_components : int, default=100
Number of Monte Carlo samples per original feature.
Equals the dimensionality of the computed feature space.
random_state : int, RandomState instance or None, default=None
Pseudo-random number generator to control the generation of the random
weights and random offset when fitting the training data.
Pass an int for reproducible output across multiple function calls.
See :term:`Glossary <random_state>`.
Attributes
----------
random_offset_ : ndarray of shape (n_components,), dtype={np.float64, np.float32}
Random offset used to compute the projection in the `n_components`
dimensions of the feature space.
random_weights_ : ndarray of shape (n_features, n_components), dtype={np.float64, np.float32}
Random projection directions drawn from the Fourier transform
of the RBF kernel.
n_features_in_ : int
Number of features seen during :term:`fit`.
.. versionadded:: 0.24
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.
Nystroem : Approximate a kernel map using a subset of the training data.
PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch.
SkewedChi2Sampler : Approximate feature map for
"skewed chi-squared" kernel.
sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.
Notes
-----
See "Random Features for Large-Scale Kernel Machines" by A. Rahimi and
Benjamin Recht.
[1] "Weighted Sums of Random Kitchen Sinks: Replacing
minimization with randomization in learning" by A. Rahimi and
Benjamin Recht.
(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)
Examples
--------
>>> from sklearn.kernel_approximation import RBFSampler
>>> from sklearn.linear_model import SGDClassifier
>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]
>>> y = [0, 0, 1, 1]
>>> rbf_feature = RBFSampler(gamma=1, random_state=1)
>>> X_features = rbf_feature.fit_transform(X)
>>> clf = SGDClassifier(max_iter=5, tol=1e-3)
>>> clf.fit(X_features, y)
SGDClassifier(max_iter=5)
>>> clf.score(X_features, y)
1.0
File: ~/mambaforge/lib/python3.10/site-packages/sklearn/kernel_approximation.py
Type: type
Subclasses:
= RBFSampler(n_components=5)
r-1,1)))
plt.plot(X_norm, r.fit_transform(X_norm.reshape('Fourier Featurization using RBFSampler (sklearn)') plt.title(
Text(0.5, 1.0, 'Fourier Featurization using RBFSampler (sklearn)')
= RBFSampler(n_components=4, gamma=0.1)
r -1,1)))
plt.plot(X_norm, r.fit_transform(X_norm.reshape('Fourier Featurization using RBFSampler (sklearn)') plt.title(
Text(0.5, 1.0, 'Fourier Featurization using RBFSampler (sklearn)')
= RBFSampler(n_components=4, gamma=20)
r -1,1)))
plt.plot(X_norm, r.fit_transform(X_norm.reshape('Fourier Featurization using RBFSampler (sklearn)') plt.title(
Text(0.5, 1.0, 'Fourier Featurization using RBFSampler (sklearn)')
# Implicit implementation of RFF using sklearn
def create_rff(X, gamma, NUM_features):
# Random Fourier Features
# https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.RBFSampler.html
= RBFSampler(gamma=gamma, n_components=NUM_features, random_state=1)
rbf_feature = rbf_feature.fit_transform(X)
X_features return X_features
= LinearRegression()
model3 = 2.0
gamma = 10
NUM_features
= create_rff(X_norm_train.reshape(-1, 1), gamma, NUM_features)
Xf_norm_train = create_rff(X_norm_test.reshape(-1, 1), gamma, NUM_features)
Xf_norm_test
= create_rff(X_lin_1d, gamma, NUM_features)
X_lin_rff
f"Random Fourier Features (gamma={gamma}, NUM_features={NUM_features})") plot_fit_predict(model3, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_rff,
{'train': 4.630893826442012, 'test': 4.626609156411284}
Bias-Variance Tradeoff for Fourier Features
w.r.t gamma
= LinearRegression()
model4
# NUM_features_values = [1, 2, 3, 4, 5, 10, 20, 50, 100]
= [0.01, 0.1, 1, 2, 5, 10]
gamma_values
= {}
errors_rff
for gamma in gamma_values:
# gamma = 2.0
= 100
NUM_features_ = create_rff(X_norm_train.reshape(-1, 1), gamma, NUM_features_)
Xf_norm_train = create_rff(X_norm_test.reshape(-1, 1), gamma, NUM_features_)
Xf_norm_test
= create_rff(X_lin_1d, gamma, NUM_features_)
X_lin_rff
=False)
plot_fit_predict(model4, Xf_norm_train, y_norm_train, Xf_norm_test, y_norm_test, X_lin_rff, gamma, plot
= errors[gamma] errors_rff[gamma]
=True, logx=True) pd.DataFrame(errors_rff).T.plot(logy
Extrapolation using Gaussian Process
= X_norm[:int(len(X_norm)*0.7)]
X_norm_train = X_norm[int(len(X_norm)*0.7):]
X_norm_test
= y_norm[:int(len(y_norm)*0.7)]
y_norm_train = y_norm[int(len(y_norm)*0.7):]
y_norm_test
= X[:int(len(X)*0.7)]
X_train = X[int(len(X)*0.7):]
X_test
= y[:int(len(y)*0.7)]
y_train = y[int(len(y)*0.7):] y_test
'o', label='train',markersize=1)
plt.plot(X_norm_train, y_norm_train, 'o', label='test', ms=3) plt.plot(X_norm_test, y_norm_test,
"Gaussian Process Extrapolation Regression") plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin_1d,
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Input In [235], in <module> ----> 1 plot_fit_gp(X_norm_train, y_norm_train, X_norm_test, y_norm_test, X_lin_1d, "Gaussian Process Extrapolation Regression") NameError: name 'plot_fit_gp' is not defined
Before this…
Dataset 1: Sine wave with noise
# Generate some data
= np.random.RandomState(1)
rng = 15 * rng.rand(200)
x = np.sin(x) + 0.1 * rng.randn(200)
y = pd.DataFrame({'x': x, 'y': y})
df
# plot the data
plt.scatter(df.x, df.y) plt.show()
# Train test split
= df.sample(frac=0.8, random_state=1)
train = df.drop(train.index)
test
='blue', label='train')
plt.scatter(train.x, train.y, color='orange', label='test')
plt.scatter(test.x, test.y, color
plt.legend() plt.show()
def plot_predictions(train, test, yhat_train, yhat_test):
# add yhat_train to train and yhat_test to test
'yhat'] = yhat_train
train['yhat'] = yhat_test
test[
# sort train and test by x
= train.sort_values(by='x')
train = test.sort_values(by='x')
test
# Train and test error
= np.sqrt(np.mean((train.yhat - train.y)**2))
train_rmse = np.sqrt(np.mean((test.yhat - test.y)**2))
test_rmse
='blue', label='train')
plt.scatter(train.x, train.y, color='orange', label='test')
plt.scatter(test.x, test.y, color='red', label='train prediction')
plt.plot(train.x, train.yhat, color='green', label='test prediction')
plt.plot(test.x, test.yhat, color'Train RMSE: {:.3f}, Test RMSE: {:.3f}'.format(train_rmse, test_rmse))
plt.title(
plt.legend()
plt.show()
return train_rmse, test_rmse
# Hyperparameter tuning using grid search and showing bias variance tradeoff
def hyperparameter_tuning(params, train, test, model):
= []
train_rmse = []
test_rmse
for d in params:
= model(d, train, test)
yhat_train, yhat_test - train.y)**2)))
train_rmse.append(np.sqrt(np.mean((yhat_train - test.y)**2)))
test_rmse.append(np.sqrt(np.mean((yhat_test
='train')
plt.plot(params, train_rmse, label='test')
plt.plot(params, test_rmse, label'params')
plt.xlabel('RMSE')
plt.ylabel(
plt.legend()
plt.show()
= params[np.argmin(test_rmse)]
optimal_param
return optimal_param
= {} rmse_dict
Model 1: MLP
# use sk-learn for MLP
= MLPRegressor(hidden_layer_sizes=[128, 256, 512, 256, 128], max_iter = 10000)
mlp_model -1, 1), train.y) mlp_model.fit(np.array(train.x).reshape(
MLPRegressor(hidden_layer_sizes=[128, 256, 512, 256, 128], max_iter=10000)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPRegressor(hidden_layer_sizes=[128, 256, 512, 256, 128], max_iter=10000)
= mlp_model.predict(np.array(train.x).reshape(-1, 1))
yhat_train = mlp_model.predict(np.array(test.x).reshape(-1, 1))
yhat_test = plot_predictions(train, test, yhat_train, yhat_test)
train_rmse, test_rmse 'MLP'] = (train_rmse, test_rmse) rmse_dict[
Model 2: Vanilla Linear Regression
= LinearRegression()
lr1 -1, 1), train.y) lr1.fit(np.array(train.x).reshape(
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
= lr1.predict(np.array(train.x).reshape(-1, 1))
yhat_train = lr1.predict(np.array(test.x).reshape(-1, 1))
yhat_test = plot_predictions(train, test, yhat_train, yhat_test)
train_rmse, test_rmse 'Vanilla LR'] = (train_rmse, test_rmse) rmse_dict[
Model 3: Polynomial regression with degree d
def poly_regression(d, train, test):
= LinearRegression()
lr = PolynomialFeatures(degree=d)
pf
= pf.fit_transform(train.x.values.reshape(-1, 1))
X_train = pf.fit_transform(test.x.values.reshape(-1, 1))
X_test
lr.fit(X_train, train.y)
= lr.predict(X_train)
yhat_train = lr.predict(X_test)
yhat_test
return yhat_train, yhat_test
# Hyperparameter tuning using grid search and showing bias variance tradeoff
= range(1, 20)
degrees = hyperparameter_tuning(degrees, train, test, poly_regression)
best_degree = poly_regression(best_degree, train, test)
yhat_train, yhat_test = plot_predictions(train, test, yhat_train, yhat_test)
train_rmse, test_rmse 'Polynomial Regression'] = (train_rmse, test_rmse)
rmse_dict[print("Best degree: ", best_degree)
Best degree: 10
Model 4: Linear regression with sine and cosine basis functions
def sine_basis_regression(num_basis, train, test):
= LinearRegression()
lr for i in range(1, num_basis+1):
f"sine_{i}"] = np.sin(i*train.x)
train[f"cosine_{i}"] = np.cos(i*train.x)
train[f"sine_{i}"] = np.sin(i*test.x)
test[f"cosine_{i}"] = np.cos(i*test.x)
test[
= train.drop(['y'], axis=1)
X_train = test.drop(['y'], axis=1)
X_test
lr.fit(X_train, train.y)
= lr.predict(X_train)
yhat_train = lr.predict(X_test)
yhat_test
return yhat_train, yhat_test
= range(1, 20)
basis = hyperparameter_tuning(basis, train, test, sine_basis_regression)
best_num_basis = sine_basis_regression(best_num_basis, train, test)
yhat_train, yhat_test = plot_predictions(train, test, yhat_train, yhat_test)
train_rmse, test_rmse 'Sine Basis Regression'] = (train_rmse, test_rmse)
rmse_dict[print("Best number of basis: ", best_num_basis)
Best number of basis: 14
Model 5: Linear regression with Gaussian basis functions
# Source: https://jakevdp.github.io/PythonDataScienceHandbook/05.06-linear-regression.html#Gaussian-basis-functions
class GaussianFeatures(BaseEstimator, TransformerMixin):
"""
Uniformly spaced Gaussian features for one-dimensional input
Constructor with N centers and width_factor as hyperparameters
N comes from the number of basis functions
width_factor is the width of each basis function
"""
def __init__(self, N, width_factor=2.0):
self.N = N
self.width_factor = width_factor
@staticmethod
def _gauss_basis(x, y, width, axis=None):
= (x - y) / width
arg return np.exp(-0.5 * np.sum(arg ** 2, axis))
def fit(self, X, y=None):
# create N centers spread along the data range
self.centers_ = np.linspace(X.min(), X.max(), self.N)
self.width_ = self.width_factor * (self.centers_[1] - self.centers_[0])
return self
def transform(self, X):
return self._gauss_basis(X[:, :, np.newaxis], self.centers_, self.width_, axis=1)
# Hyperparameter tuning
= range(2, 20)
basis
= []
train_rmse = []
test_rmse for d in basis:
= make_pipeline(GaussianFeatures(d), LinearRegression())
model -1, 1), train.y)
model.fit(np.array(train.x).reshape(= model.predict(np.array(train.x).reshape(-1, 1))
yhat_train = model.predict(np.array(test.x).reshape(-1, 1))
yhat_test - train.y)**2)))
train_rmse.append(np.sqrt(np.mean((yhat_train - test.y)**2)))
test_rmse.append(np.sqrt(np.mean((yhat_test
= basis[np.argmin(test_rmse)]
best_num_basis print("Best number of basis: ", best_num_basis)
='train')
plt.plot(basis, train_rmse, label='test')
plt.plot(basis, test_rmse, label'degree')
plt.xlabel('RMSE')
plt.ylabel(
plt.legend() plt.show()
Best number of basis: 13
= make_pipeline(GaussianFeatures(best_num_basis), LinearRegression())
gauss_model -1, 1), train.y)
gauss_model.fit(np.array(train.x).reshape(= gauss_model.predict(train.x.values.reshape(-1, 1))
yhat_train = gauss_model.predict(test.x.values.reshape(-1, 1))
yhat_test = plot_predictions(train, test, yhat_train, yhat_test)
train_rmse, test_rmse 'Gaussian Basis Regression'] = (train_rmse, test_rmse) rmse_dict[
Plotting rmse using different variants of linear regression
# create a bar plot of train and test RMSE
= [rmse_dict[key][0] for key in rmse_dict.keys()]
train_rmse = [rmse_dict[key][1] for key in rmse_dict.keys()]
test_rmse = [key for key in rmse_dict.keys()]
labels
= np.arange(len(labels)) # the label locations
x = 0.35 # the width of the bars
width
= plt.subplots(figsize=(10, 5))
fig, ax = ax.bar(x - width/2, train_rmse, width, label='Train RMSE')
rects1 = ax.bar(x + width/2, test_rmse, width, label='Test RMSE')
rects2
# Add some text for labels, title and custom x-axis tick labels, etc.
'RMSE')
ax.set_ylabel('RMSE by model')
ax.set_title(
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend() fig.tight_layout()
Dataset 2: CO2 Dataset
df
x | y | |
---|---|---|
0 | 6.255330 | -0.020070 |
1 | 10.804867 | -0.920032 |
2 | 0.001716 | 0.024965 |
3 | 4.534989 | -0.916051 |
4 | 2.201338 | 0.776696 |
... | ... | ... |
195 | 13.979581 | 1.115462 |
196 | 0.209274 | 0.163526 |
197 | 3.515431 | -0.332839 |
198 | 9.251675 | 0.161240 |
199 | 14.235245 | 0.996049 |
200 rows × 2 columns
= pd.to_datetime(df[['year', 'month']].apply(lambda x: '{}-{}'.format(x[0], x[1]), axis=1)) df.index
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) /Users/kalash/Documents/Work/Semester_8/ML_TA/ml-teaching/notebooks/basis.ipynb Cell 108 line 1 ----> <a href='vscode-notebook-cell:/Users/kalash/Documents/Work/Semester_8/ML_TA/ml-teaching/notebooks/basis.ipynb#Y425sZmlsZQ%3D%3D?line=0'>1</a> df.index = pd.to_datetime(df[['year', 'month']].apply(lambda x: '{}-{}'.format(x[0], x[1]), axis=1)) File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/frame.py:3767, in DataFrame.__getitem__(self, key) 3765 if is_iterator(key): 3766 key = list(key) -> 3767 indexer = self.columns._get_indexer_strict(key, "columns")[1] 3769 # take() does not accept boolean indexers 3770 if getattr(indexer, "dtype", None) == bool: File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:5876, in Index._get_indexer_strict(self, key, axis_name) 5873 else: 5874 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) -> 5876 self._raise_if_missing(keyarr, indexer, axis_name) 5878 keyarr = self.take(indexer) 5879 if isinstance(key, Index): 5880 # GH 42790 - Preserve name from an Index File /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:5935, in Index._raise_if_missing(self, key, indexer, axis_name) 5933 if use_interval_msg: 5934 key = list(key) -> 5935 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 5937 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) 5938 raise KeyError(f"{not_found} not in index") KeyError: "None of [Index(['year', 'month'], dtype='object')] are in the [columns]"
df
x | y | |
---|---|---|
0 | 6.255330 | -0.020070 |
1 | 10.804867 | -0.920032 |
2 | 0.001716 | 0.024965 |
3 | 4.534989 | -0.916051 |
4 | 2.201338 | 0.776696 |
... | ... | ... |
195 | 13.979581 | 1.115462 |
196 | 0.209274 | 0.163526 |
197 | 3.515431 | -0.332839 |
198 | 9.251675 | 0.161240 |
199 | 14.235245 | 0.996049 |
200 rows × 2 columns
df.average.plot()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) /var/folders/nf/tcn1v_gx4pvc23db1_jd0_zr0000gq/T/ipykernel_44290/1265214205.py in ?() ----> 1 df.average.plot() /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/generic.py in ?(self, name) 5985 and name not in self._accessors 5986 and self._info_axis._can_hold_identifiers_and_holds_name(name) 5987 ): 5988 return self[name] -> 5989 return object.__getattribute__(self, name) AttributeError: 'DataFrame' object has no attribute 'average'
= 2000
train_cutoff = df[df.year < train_cutoff]
train = df[df.year >= train_cutoff]
test
df.average.plot()
='blue')
train.average.plot(color='orange')
test.average.plot(color
len(train), len(test)
= range(len(df))
months_from_start = np.array(months_from_start).reshape(-1, 1) months_from_start
# use sk-learn for MLP
= MLPRegressor(hidden_layer_sizes=[512, 512, 512, 512, 512], max_iter = 5000)
mlp_model len(train)], train.average.values) mlp_model.fit(months_from_start[:
= mlp_model.predict(months_from_start[:len(train)])
yhat_train = mlp_model.predict(months_from_start[len(train):])
yhat_test
= pd.Series(yhat_train, index=train.index)
yhat_train = pd.Series(yhat_test, index=test.index)
yhat_test
df.average.plot()
yhat_train.plot()
yhat_test.plot()
# Train error
= np.sqrt(np.mean((yhat_train - train.average)**2))
train_rmse = np.sqrt(np.mean((yhat_test - test.average)**2))
test_rmse 'Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_rmse, test_rmse)) plt.title(
# normalize data
= (train - train.mean()) / train.std()
train_scaled = (test - test.mean()) / test.std()
test_scaled = (months_from_start - months_from_start.mean()) / months_from_start.std()
months_from_start_scaled
# train_scaled = (train - train.mean()) / train.std()
# test_scaled = (test - test.mean()) / test.std()
# months_from_start_scaled = (months_from_start - months_from_start.mean()) / months_from_start.std()
= MLPRegressor(hidden_layer_sizes=512, max_iter = 1000)
mlp_model len(train)], train_scaled.average.values)
mlp_model.fit(months_from_start_scaled[:
= mlp_model.predict(months_from_start_scaled[:len(train)])
yhat_train = mlp_model.predict(months_from_start_scaled[len(train):])
yhat_test
= pd.Series(yhat_train, index=train.index)
yhat_train_scaled = pd.Series(yhat_test, index=test.index)
yhat_test_scaled
= yhat_train_scaled * train.std() + train.mean()
yhat_train # yhat_test = yhat_test_scaled * test.std() + test.mean()
df.average.plot()
yhat_train.plot()
yhat_test.plot()
# Train error
= np.sqrt(np.mean((yhat_train - train.average)**2))
train_rmse = np.sqrt(np.mean((yhat_test - test.average)**2))
test_rmse 'Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_rmse, test_rmse)) plt.title(
Model 2: Vanilla Linear Regression
= LinearRegression()
lr1 len(train)], train.average.values)
lr1.fit(months_from_start[:= lr1.predict(months_from_start[len(train):])
yhat1_test = lr1.predict(months_from_start[:len(train)])
yhat1_train
= pd.Series(yhat1_train, index=train.index)
yhat_train = pd.Series(yhat1_test, index=test.index)
yhat_test
df.average.plot()
yhat_train.plot()
yhat_test.plot()
# Train error
= np.sqrt(np.mean((yhat_train - train.average)**2))
train_rmse = np.sqrt(np.mean((yhat_test - test.average)**2))
test_rmse 'Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_rmse, test_rmse)) plt.title(
Model 3: Polynomial regression with degree d
def poly_regression(d, train, test):
= range(len(df))
months_from_start = np.array(months_from_start).reshape(-1, 1)
months_from_start
= LinearRegression()
lr = PolynomialFeatures(degree=d)
pf = pf.fit_transform(months_from_start[:len(train)])
X_train = pf.fit_transform(months_from_start[len(train):])
X_test
lr.fit(X_train, train.average.values)
= lr.predict(X_test)
yhat_test = lr.predict(X_train)
yhat_train
= pd.Series(yhat_train, index=train.index)
yhat_train = pd.Series(yhat_test, index=test.index)
yhat_test
df.average.plot()
yhat_train.plot()
yhat_test.plot()
# Train error
= np.sqrt(np.mean((yhat_train - train.average)**2))
train_rmse = np.sqrt(np.mean((yhat_test - test.average)**2))
test_rmse 'Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_rmse, test_rmse)) plt.title(
2, train, test) poly_regression(
Model 4: Linear Regression with sine and cosine basis functions
### Adding sine and cosine terms
def sine_cosine_features(X, n):
"""
X: array of shape (n_samples, 1)
n: number of sine and cosine features to add
"""
for i in range(1, n+1):
= np.hstack([X, np.sin(i*X), np.cos(i*X)])
X return X
= np.linspace(-1, 1, 100).reshape(-1, 1) X
= plt.plot(X, sine_cosine_features(X, 0)) _
Model 5: Gaussian basis functions
Model 6: Linear Regression with polynomial and sine/cosine basis functions
def poly_sine_cosine_regression(n, train, test):
= range(len(df))
months_from_start = np.array(months_from_start).reshape(-1, 1)
months_from_start
= LinearRegression()
lr
= sine_cosine_features(months_from_start[:len(train)], n)
X_train
= sine_cosine_features(months_from_start[len(train):], n)
X_test print(X_train.shape, X_test.shape)
lr.fit(X_train, train.average.values)
= lr.predict(X_test)
yhat_test = lr.predict(X_train)
yhat_train
= pd.Series(yhat_train, index=train.index)
yhat_train = pd.Series(yhat_test, index=test.index)
yhat_test
=0.2, lw=4)
yhat_train.plot(alpha=0.2, lw=4)
yhat_test.plot(alpha='k', lw=1)
df.average.plot(color
# Train error
= np.sqrt(np.mean((yhat_train - train.average)**2))
train_rmse = np.sqrt(np.mean((yhat_test - test.average)**2))
test_rmse 'Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_rmse, test_rmse)) plt.title(
6, train, test) poly_sine_cosine_regression(
Model 7: Random Fourier Features
def rff_featurise(X, n_components=100):
# Random Fourier Features
# https://scikit-learn.org/stable/modules/generated/sklearn.kernel_approximation.RBFSampler.html
from sklearn.kernel_approximation import RBFSampler
= RBFSampler(gamma=1, n_components=n_components, random_state=1)
rbf_feature = rbf_feature.fit_transform(X)
X_features return X_features
def poly_rff_regression(n, train, test):
= range(len(df))
months_from_start = np.array(months_from_start).reshape(-1, 1)
months_from_start
= LinearRegression()
lr
= rff_featurise(months_from_start[:len(train)], n)
X_train
= rff_featurise(months_from_start[len(train):], n)
X_test print(X_train.shape, X_test.shape)
lr.fit(X_train, train.average.values)
= lr.predict(X_test)
yhat_test = lr.predict(X_train)
yhat_train
= pd.Series(yhat_train, index=train.index)
yhat_train = pd.Series(yhat_test, index=test.index)
yhat_test
=0.2, lw=4)
yhat_train.plot(alpha=0.2, lw=4)
yhat_test.plot(alpha='k', lw=1)
df.average.plot(color
# Train error
= np.sqrt(np.mean((yhat_train - train.average)**2))
train_rmse = np.sqrt(np.mean((yhat_test - test.average)**2))
test_rmse 'Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_rmse, test_rmse)) plt.title(
440, train, test) poly_rff_regression(