# Support for maths
import numpy as np
# Plotting tools
from matplotlib import pyplot as plt
# we use the following for plotting figures in jupyter
%matplotlib inline
import warnings
'ignore')
warnings.filterwarnings(
# GPy: Gaussian processes library
import GPy
from IPython.display import display
youtube: https://www.youtube.com/watch?v=V1bF37-_ytQ
Disclaimer
This blog post is forked from GPSS 2019 Lab 1. This is produced only for educational purposes. All credit goes to the GPSS organisers.
Covariance functions, aka kernels
We will define a covariance function, from hereon referred to as a kernel, using GPy
. The most commonly used kernel in machine learning is the Gaussian-form radial basis function (RBF) kernel. It is also commonly referred to as the exponentiated quadratic or squared exponential kernel – all are equivalent.
The definition of the (1-dimensional) RBF kernel has a Gaussian-form, defined as:
\[ \kappa_\mathrm{rbf}(x,x') = \sigma^2\exp\left(-\frac{(x-x')^2}{2\mathscr{l}^2}\right) \]
It has two parameters, described as the variance, \(\sigma^2\) and the lengthscale \(\mathscr{l}\).
In GPy, we define our kernels using the input dimension as the first argument, in the simplest case input_dim=1
for 1-dimensional regression. We can also explicitly define the parameters, but for now we will use the default values:
# Create a 1-D RBF kernel with default parameters
= GPy.kern.RBF(1)
k # Preview the kernel's parameters
k
rbf. | value | constraints | priors |
variance | 1.0 | +ve | |
lengthscale | 1.0 | +ve |
We can see from the above table that our kernel has two parameters, variance
and lengthscale
, both with value 1.0
. There is also information on the constraints and priors on each parameter, but we will look at this later.
Visualising the kernel
We can visualise our kernel in a few different ways. We can plot the shape of the kernel by plotting \(k(x,0)\) over some sample space \(x\) which, looking at the equation above, clearly has a Gaussian shape. This describes the covariance between each sample location and \(0\).
# Our sample space: 100 samples in the interval [-4,4]
= np.linspace(-4.,4.,100)[:, None] # we need [:, None] to reshape X into a column vector for use in GPy
X
# First, sample kernel at x' = 0
= k.K(X, np.array([[0.]])) # k(x,0) K
plt.plot(X, K)"$\kappa_{rbf}(x,x')$"); plt.title(
Writing an animation function routine for visualsing kernel with changing length scale
= plt.subplots()
fig, ax from matplotlib.animation import FuncAnimation
from matplotlib import rc
= [0.05, 0.25, 0.5, 1., 2., 4.]
ls
def update(iteration):
ax.cla()= GPy.kern.RBF(1)
k = ls[iteration]
k.lengthscale # Calculate the new covariance function at k(x,0)
= k.K(X, np.array([[0.]]))
C # Plot the resulting covariance vector
ax.plot(X,C)"$\kappa_{rbf}(x,x')$\nLength scale = %s" %k.lengthscale[0]);
ax.set_title(0, 1.2))
ax.set_ylim((
= len(ls)
num_iterations = FuncAnimation(fig, update, frames=np.arange(0, num_iterations-1, 1), interval=500)
anim
plt.close()
'animation', html='jshtml')
rc( anim
From the animation above, we can notice that increasing the scale means that a point becomes more correlated with a further away point. Using such a kernel for GP regression and increasing the length scale would mean making the regression smoother.
Writing an animation function routine for visualsing kernel with changing variance
= plt.subplots()
fig, ax import os
from matplotlib.animation import FuncAnimation
from matplotlib import rc
= [0.01, 0.05, 0.25, 0.5, 1., 2., 4.]
variances
def update(iteration):
ax.cla()= GPy.kern.RBF(1)
k = variances[iteration]
k.variance # Calculate the new covariance function at k(x,0)
= k.K(X, np.array([[0.]]))
C # Plot the resulting covariance vector
ax.plot(X,C)"$\kappa_{rbf}(x,x')$\nVariance = %s" %k.variance[0]);
ax.set_title(0, 2))
ax.set_ylim((
= len(ls)
num_iterations = FuncAnimation(fig, update, frames=np.arange(0, num_iterations-1, 1), interval=500)
anim
plt.close()
'animation', html='jshtml')
rc( anim
Alternatively, we can construct a full covariance matrix, \(\mathbf{K}_{xx} \triangleq k(x,x')\) with samples \(x = x'\). The resulting GP prior is a multivariate normal distribution over the space of samples \(x\): \(\mathcal{N}(\mathbf{0}, \mathbf{K}_{xx})\). It should be evident then that the elements of the matrix represents the covariance between respective points in \(x\) and \(x'\), and that it is exactly \(\sigma^2[=1]\) in the diagonal.
= np.linspace(-4.,4.,30)[:, None]
X = k.K(X,X)
K
# Plot the covariance of the sample space
plt.pcolor(X.T, X, K)
# Format and annotate plot
"image")
plt.gca().invert_yaxis(), plt.gca().axis("x"), plt.ylabel("x'"), plt.colorbar()
plt.xlabel("$\kappa_{rbf}(x,x')$"); plt.title(
= plt.subplots()
fig, ax = fig.add_axes([0.87, 0.2, 0.05, 0.65])
cax
def update(iteration):
ax.cla()
cax.cla()= GPy.kern.RBF(1)
k = ls[iteration]
k.lengthscale # Calculate the new covariance function at k(x,0)
= k.K(X,X)
K
# Plot the covariance of the sample space
= ax.pcolor(X.T, X, K)
im
# Format and annotate plot
ax.invert_yaxis()"image")
ax.axis(#ax.colorbar()
# Plot the resulting covariance vector
"Length scale = %s" %k.lengthscale[0]);
ax.set_title(#ax.set_ylim((0, 1.2))
=cax, orientation='vertical')
fig.colorbar(im, cax
fig.tight_layout()
= len(ls)
num_iterations = FuncAnimation(fig, update, frames=np.arange(0, num_iterations, 1), interval=500)
anim
plt.close()
'animation', html='jshtml')
rc( anim
The above animation shows the impact of increasing length scale on the covariance matrix.
= plt.subplots()
fig, ax = fig.add_axes([0.87, 0.2, 0.05, 0.65])
cax
def update(iteration):
ax.cla()
cax.cla()= GPy.kern.RBF(1)
k = variances[iteration]
k.variance # Calculate the new covariance function at k(x,0)
= k.K(X,X)
K
# Plot the covariance of the sample space
= ax.pcolor(X.T, X, K)
im
# Format and annotate plot
ax.invert_yaxis()"image")
ax.axis(#ax.colorbar()
# Plot the resulting covariance vector
"Variance = %s" %k.variance[0]);
ax.set_title(#ax.set_ylim((0, 1.2))
=cax, orientation='vertical')
fig.colorbar(im, cax
fig.tight_layout()
= len(ls)
num_iterations = FuncAnimation(fig, update, frames=np.arange(0, num_iterations, 1), interval=500)
anim
plt.close()
'animation', html='jshtml')
rc( anim
The above animation shows the impact of increasing variance on the covariance matrix. Notice the scale on the colorbar changing.
GP Regresion
Creating a data set
# lambda function, call f(x) to generate data
= lambda x: 0.4*x**2 - 0.15*x**3 + 0.5*x**2 - 0.002*x**5 + 0.0002*x**6 +0.5*(x-2)**2
f
0)
np.random.seed(# 30 equally spaced sample locations
= np.linspace(0.05, 4.95, 30)[:,None]
X
np.random.shuffle(X)
# y = f(X) + epsilon
= f(X) + np.random.normal(0., 0.1, (30,1)) # note that np.random.normal takes mean and s.d. (not variance), 0.1^2 = 0.01
Y
= X[:10]
train_X = Y[:10]
train_Y
= X[10:]
test_X = Y[10:]
test_Y
# Plot observations
"kx", mew=2, label='Train points')
plt.plot(train_X, train_Y, "mo", mew=2, label='Test points')
plt.plot(test_X, test_Y,
# Annotate plot
"x"), plt.ylabel("f")
plt.xlabel( plt.legend()
Fitting the above data usigng GPR with RBF kernel by varying the length scale (Noiseless case)
Here we assume that observations (train instances) are noise free. Thus, the GP fit must pass exactly through the train points.
= plt.subplots()
fig, ax
= [0.05, 0.25, 0.5, 1., 2., 4.]
ls from sklearn.metrics import mean_absolute_error
def update(iteration):
ax.cla()= GPy.kern.RBF(1)
k = ls[iteration]
k.lengthscale = GPy.models.GPRegression(train_X, train_Y, k)
m = 0.0
m.Gaussian_noise =ax)
m.plot(ax"mo", mew=2, label='Test points')
ax.plot(test_X, test_Y,
ax.legend()"Length scale = %s, MAE = %s" %(k.lengthscale[0], mean_absolute_error(test_Y, m.predict_noiseless(test_X)[0].flatten())));
ax.set_title(
fig.tight_layout()
= len(ls)
num_iterations = FuncAnimation(fig, update, frames=np.arange(0, num_iterations, 1), interval=500)
anim
plt.close()
'animation', html='jshtml')
rc( anim
We can see that increasing the length scale makes the fit smoother.
Fitting the above data usigng GPR with RBF kernel by varying the length scale (Noisy case)
Here we assume that observations (train instances) have noise. Thus, the GP fit may not pass exactly through the train points.
= plt.subplots()
fig, ax
= [0.05, 0.25, 0.5, 1., 2., 4.]
ls from sklearn.metrics import mean_absolute_error
def update(iteration):
ax.cla()= GPy.kern.RBF(1)
k = ls[iteration]
k.lengthscale = GPy.models.GPRegression(train_X, train_Y, k)
m =ax)
m.plot(ax"mo", mew=2, label='Test points')
ax.plot(test_X, test_Y,
ax.legend()"Length scale = %s, MAE = %s" %(k.lengthscale[0], mean_absolute_error(test_Y, m.predict_noiseless(test_X)[0].flatten())));
ax.set_title(
fig.tight_layout()
= len(ls)
num_iterations = FuncAnimation(fig, update, frames=np.arange(0, num_iterations, 1), interval=500)
anim
plt.close()
'animation', html='jshtml')
rc( anim
Optimizing kernel parameters
Thus far we had been hard coding the kernel parameters. Could we learn these? Yes, we can learn these by applying MLE. This might sound a little weird – learning prior parameters from data in the Bayesian setting?!
= GPy.kern.RBF(1)
k = GPy.models.GPRegression(train_X, train_Y, k)
m m.optimize()
<paramz.optimization.optimization.opt_lbfgsb at 0x7f5239861390>
m
Model: GP regression
Objective: 6.219091605935309
Number of Parameters: 3
Number of Optimization Parameters: 3
Updates: True
GP_regression. | value | constraints | priors |
rbf.variance | 12.433073229232102 | +ve | |
rbf.lengthscale | 2.34663339745307 | +ve | |
Gaussian_noise.variance | 0.017278356392144867 | +ve |
m.plot()= plt.gca()
ax "mo", mew=2, label='Test points')
ax.plot(test_X, test_Y,
ax.legend()
0].flatten()) mean_absolute_error(test_Y, m.predict(test_X)[
0.1261412756644803
Above, we see the fit for the learnt kernel parameters.
Other kernels
We have thus far discussed RBF kernel. Let us now have a quck look at Periodic kernel before we look at combining differnt kernels.
= GPy.kern.StdPeriodic(1, period=2)
k = np.linspace(-4.,4.,100)[:, None]
data = k.K(data, np.array([[0.]]))
C
plt.plot(data,C)
The periodic kernel does what you might expect – there is a periodic nature of relation between different points.
Let us now try to fit the periodic kernel to our previously generated data.
= GPy.models.GPRegression(train_X, train_Y, k)
m m.optimize()
<paramz.optimization.optimization.opt_lbfgsb at 0x7f5238adb250>
m.plot()= plt.gca()
ax "mo", mew=2, label='Test points')
ax.plot(test_X, test_Y,
ax.legend()
As we might have expected, the fit is not very good. Let us create a data set having periodicity where our periodic kernel would be of good value.
# lambda function, call f(x) to generate data
= lambda x: np.sin(6*x) + 0.5*np.cos(x)
g
0)
np.random.seed(# 30 equally spaced sample locations
= np.linspace(0.05, 4.95, 30)[:,None]
X2
np.random.shuffle(X2)
# y = f(X) + epsilon
= g(X2) + np.random.normal(0., 0.1, (30,1)) # note that np.random.normal takes mean and s.d. (not variance), 0.1^2 = 0.01
Y2
= X2[:10]
train_X2 = Y2[:10]
train_Y2
= X2[10:]
test_X2 = Y2[10:]
test_Y2
# Plot observations
"kx", mew=2, label='Train points')
plt.plot(train_X2, train_Y2, "mo", mew=2, label='Test points')
plt.plot(test_X2, test_Y2,
# Annotate plot
"x"), plt.ylabel("f")
plt.xlabel( plt.legend()
= GPy.models.GPRegression(train_X2, train_Y2, k)
m m.optimize()
<paramz.optimization.optimization.opt_lbfgsb at 0x7f5238c88a50>
m
Model: GP regression
Objective: 6.688944496233991
Number of Parameters: 4
Number of Optimization Parameters: 4
Updates: True
GP_regression. | value | constraints | priors |
std_periodic.variance | 1.6241936960384638 | +ve | |
std_periodic.period | 2.0498165862939817 | +ve | |
std_periodic.lengthscale | 0.5405924861920798 | +ve | |
Gaussian_noise.variance | 0.026138350160739007 | +ve |
m.plot()= plt.gca()
ax "mo", mew=2, label='Test points')
ax.plot(test_X2, test_Y2,
ax.legend()
From the plot above, we can see that just using the periodic kernel is not very useful. Maybe we need to combine the RBF kernel with the periodic kernel? We will be trying two combinations: adding the two kernels and multiplying the two kernels to obtain two new kernels.
= GPy.kern.StdPeriodic(1, period=2)
k1 = GPy.kern.RBF(1, lengthscale=1)
k2 = k1+k2
k_combined_1 = k1*k2 k_combined_2
Let us now try to visualise the two kernels.
= k_combined_1.K(data, np.array([[0.]]))
C1 = k_combined_2.K(data, np.array([[0.]]))
C2
0.]])),label="Periodic")
plt.plot(data,k1.K(data, np.array([[0.]])),label="RBF")
plt.plot(data,k2.K(data, np.array([[
="Periodic+RBF")
plt.plot(data,C1,label="Periodic*RBF")
plt.plot(data,C2,label plt.legend()
We can also visualise the covariance matrices corresponding to the two new kernels we have created.
= k_combined_1.K(data,data)
cov
# Plot the covariance of the sample space
plt.pcolor(data.T, data, cov)
# Format and annotate plot
"image")
plt.gca().invert_yaxis(), plt.gca().axis("x"), plt.ylabel("x'"), plt.colorbar()
plt.xlabel("RBF+Periodic"); plt.title(
= k_combined_2.K(data,data)
cov
# Plot the covariance of the sample space
plt.pcolor(data.T, data, cov)
# Format and annotate plot
"image")
plt.gca().invert_yaxis(), plt.gca().axis("x"), plt.ylabel("x'"), plt.colorbar()
plt.xlabel("RBF*Periodic"); plt.title(
= GPy.models.GPRegression(train_X2, train_Y2, k_combined_1)
m
m.optimize()
m.plot()= plt.gca()
ax "mo", mew=2, label='Test points')
ax.plot(test_X2, test_Y2,
ax.legend()
= GPy.models.GPRegression(train_X2, train_Y2, k_combined_2)
m
m.optimize()
m.plot()print(m)
= plt.gca()
ax "mo", mew=2, label='Test points')
ax.plot(test_X2, test_Y2,
ax.legend()
Name : GP regression
Objective : 4.136252889408651
Number of Parameters : 6
Number of Optimization Parameters : 6
Updates : True
Parameters:
GP_regression. | value | constraints | priors
mul.std_periodic.variance | 1.7461089633948401 | +ve |
mul.std_periodic.period | 2.041745856285753 | +ve |
mul.std_periodic.lengthscale | 0.6637866448282967 | +ve |
mul.rbf.variance | 1.7461089633948303 | +ve |
mul.rbf.lengthscale | 24.98629931665634 | +ve |
Gaussian_noise.variance | 4.955970365230769e-16 | +ve |
From the above two visualisations, we can see the two kernels in action.
2D GP
Having studied GPR for 1 dimension, we will now be looking at GPR for 2d data. Let us first create some dataset.
= np.array([[3, 2], [1, 4], [1, 1], [3, 4], [2,2], [2, 3], [3, 1], [3, 3.5], [2.5, 3.5]])
X = np.array([1, 1, 3, 2, 5.5, 4.5, 0.5, 3, 3.5]) y
X.shape, y.shape
((9, 2), (9,))
0], X[:, 1],s=y*50) plt.scatter(X[:,
The above visualisation shows the dataset where the size of the marker denotes the value at that location. We will now be fitting a RBF kernel (expecting a 2d input) to this data set.
= GPy.kern.RBF(input_dim=2, lengthscale=1) k_2d
X.shape
(9, 2)
= GPy.models.GPRegression(X, y.reshape(-1, 1), k_2d)
m m.optimize()
<paramz.optimization.optimization.opt_lbfgsb at 0x7f521d185fd0>
Generating predictions over the entire grid for visualisation.
= np.linspace(0, 4.5, 40)
x_t = np.linspace(0, 4.5, 40)
y_t
= np.meshgrid(x_t, y_t) XX, YY
= np.zeros_like(YY)
Z_pred for i in range(40):
for j in range(40):
= m.predict(np.array([XX[i, j], YY[i, j]]).reshape(1, 2))[0] Z_pred[i, j]
Z_pred.shape
(40, 40)
=30)
plt.contourf(XX, YY, Z_pred, levels plt.colorbar()
The above plot shows the prediction in the 2d space. We can alternatively view the 3d surface plot.
= plt.axes(projection='3d')
ax =1, cstride=1,
ax.plot_surface(XX, YY, Z_pred, rstride='viridis', edgecolor='none')
cmap'surface');
ax.set_title(30, 35) ax.view_init(
= np.zeros_like(YY)
Z_var for i in range(40):
for j in range(40):
= m.predict(np.array([XX[i, j], YY[i, j]]).reshape(1, 2))[1] Z_var[i, j]
=30)
plt.contourf(XX, YY, Z_var, levels plt.colorbar()
We can above see the variance plot.
Air quality 2d map
Now, we will be using GPs for predicting air quality in New Delhi. See my previous post on how to get AQ data for Delhi.https://nipunbatra.github.io/blog/air%20quality/2018/06/21/aq-india-map.html
I will be creating a function to visualise the AQ estimations using GPs based on different kernels.
The shapefile for Delhi can be downloaded from here.
import pandas as pd
import os
= pd.read_csv(os.path.expanduser("~/Downloads/2018-04-06.csv"))
df = df[(df.country=='IN')&(df.city=='Delhi')&(df.parameter=='pm25')].dropna().groupby("location").mean() df
df
value | latitude | longitude | |
---|---|---|---|
location | |||
Burari Crossing, New Delhi - IMD | 245.583333 | 28.725650 | 77.201157 |
CRRI Mathura Road, New Delhi - IMD | 265.666667 | 28.551200 | 77.273574 |
DTU, New Delhi - CPCB | 214.333333 | 28.750050 | 77.111261 |
IGI Airport Terminal - 3, New Delhi - IMD | 130.666667 | 28.562776 | 77.118005 |
IHBAS, Dilshad Garden,New Delhi - CPCB | 212.583333 | 28.680275 | 77.201157 |
ITO, New Delhi - CPCB | 220.500000 | 28.631694 | 77.249439 |
Lodhi Road, New Delhi - IMD | 176.083333 | 28.591825 | 77.227307 |
Mandir Marg, New Delhi - DPCC | 82.000000 | 28.637269 | 77.200560 |
NSIT Dwarka, New Delhi - CPCB | 184.583333 | 28.609090 | 77.032541 |
North Campus, DU, New Delhi - IMD | 147.833333 | 28.657381 | 77.158545 |
Pusa, New Delhi - IMD | 112.000000 | 28.610304 | 77.099694 |
R K Puram, New Delhi - DPCC | 103.600000 | 28.564610 | 77.167010 |
Shadipur, New Delhi - CPCB | 213.833333 | 28.651478 | 77.147311 |
Sirifort, New Delhi - CPCB | 222.250000 | 28.550425 | 77.215938 |
US Diplomatic Post: New Delhi | 46.625000 | 28.635760 | 77.224450 |
import geopandas
= geopandas.GeoDataFrame(
gdf =geopandas.points_from_xy(df.longitude, df.latitude)) df, geometry
gdf.plot()
def plot_air_vis(df, k, shp, title):
= GPy.models.GPRegression(df[['longitude','latitude']], df[['value']], k)
m =2000)
m.optimize(max_iters= np.linspace(28.38,28.9, 40)
y_t = np.linspace(76.82, 77.4, 40)
x_t
= np.meshgrid(x_t, y_t)
XX, YY = np.zeros_like(YY)
Z_pred = np.zeros_like(YY)
Z_var for i in range(40):
for j in range(40):
= m.predict_noiseless(np.array([XX[i, j], YY[i, j]]).reshape(1, 2))
Z_pred[i, j], Z_var[i, j]
= geopandas.read_file(fp)
data = plt.figure(figsize=(6, 6))
fig =30,alpha=0.6,cmap='Purples')
plt.contourf(XX, YY, Z_pred, levels
plt.colorbar()=plt.gca(),markersize=gdf['value'],color='k')
gdf.plot(ax='k',ax=plt.gca(),zorder=-1,alpha=0.4)
data.plot(color"equal")
plt.gca().set_aspect(for a in [100, 150, 200,250]:
='k', alpha=1, s=a,
plt.scatter([], [], c=str(a) + '$\mu g/m^3$')
label=1, frameon=True,
plt.legend(scatterpoints=1, loc='upper left',ncol=2)
labelspacing
+"\t"+str(m.objective_function())) plt.title(title
= GPy.kern.RBF(input_dim=2, lengthscale=1)
k_2d = GPy.kern.RBF(input_dim=2, lengthscale=3)*k_2d
k_2d_rbf_2 = GPy.kern.RBF(input_dim=2, lengthscale=3) + k_2d_rbf_2
k_2d_rbf_3 = GPy.kern.Matern32(input_dim=2)
k_matern32 = GPy.kern.Matern52(input_dim=2)
k_matern52
= k_matern32 * k_matern52 + k_matern32*k_2d_rbf_3
k_rbf_matern
=os.path.expanduser("~/Downloads/wards delimited.shp") fp
"RBF")
plot_air_vis(df, k_2d, fp,"Matern32")
plot_air_vis(df, k_matern32, fp,"matern52")
plot_air_vis(df, k_matern52, fp,"RBF*RBF")
plot_air_vis(df, k_2d_rbf_2, fp,"RBF*RBF+RBF")
plot_air_vis(df, k_2d_rbf_3, fp,"Matern32*Matern52+Matern32*RBF") plot_air_vis(df, k_rbf_matern, fp,
There you go. Till next time!