SVM with RBF kernel

Author

Nipun Batra

Published

April 25, 2023

Open In Colab

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from sklearn import svm
from sklearn.datasets import make_blobs, make_classification

# Generate some random data for demonstration
X, y = make_blobs(n_samples=20, centers=2, random_state=6)

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)

# Create an SVM model with an RBF kernel
model = svm.SVC(kernel='linear', C = 1e6)

# Fit the model to the data
model.fit(X, y)

# Access the alpha coefficients
alphas = np.abs(model.dual_coef_.ravel())  # Absolute values of dual coefficients
print(alphas, model.dual_coef_)

[0.14522303 0.11368673 0.0315363 ] [[-0.14522303  0.11368673  0.0315363 ]]

model.support_vectors_

array([[ 8.98426675, -4.87449712],
       [ 7.98907212, -8.45336239],
       [ 7.28183008, -8.2229685 ]])

# Plot the support vectors
plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)

model.support_vectors_.shape, model.support_vectors_

((3, 2),
 array([[ 8.98426675, -4.87449712],
        [ 7.98907212, -8.45336239],
        [ 7.28183008, -8.2229685 ]]))

model.dual_coef_.shape, model.dual_coef_, y[model.support_]

((1, 3), array([[-0.14522303,  0.11368673,  0.0315363 ]]), array([0, 1, 1]))

ys_minus1_and_plus1 = np.where(y[model.support_] == 0, -1, 1)
ys_minus1_and_plus1

array([-1,  1,  1])

w = np.dot(alphas * ys_minus1_and_plus1, model.support_vectors_)
print("w found using dual coefficients using our calculation:", w)
print("w found using model.coef_:", model.coef_)

w found using dual coefficients using our calculation: [-0.16682897 -0.51246787]
w found using model.coef_: [[-0.16682897 -0.51246787]]

# intercept

nsv = model.support_vectors_.shape[0]
b_sum = 0.0
for i in range(nsv):
    b_sum += ys_minus1_and_plus1[i] - np.dot(w, model.support_vectors_[i])

b = b_sum / nsv

b_sklearn = model.intercept_

print("b found using dual coefficients using our calculation:", b)
print("b found using model.intercept_:", b_sklearn)

b found using dual coefficients using our calculation: -1.9992140529976397
b found using model.intercept_: [-1.99921422]

### Method 1: Contour plot
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)

ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)

YY, XX = np.meshgrid(yy, xx)

xy = np.vstack([XX.ravel(), YY.ravel()]).T

Z = np.dot(xy, w) + b

Z = Z.reshape(XX.shape)

plt.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])

plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')

plt.show()

### Method 2: plotting the decision function directly using wx + b = 0 and wx + b = 1 and wx + b = -1

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)

ax = plt.gca()

xlim = ax.get_xlim()
ylim = ax.get_ylim()

# decision line
# w1x1 + w2x2 + b = 0
# x2 = (-w1x1 - b) / w2
# x2 is y-axis and x1 is x-axis

w = model.coef_[0]
b = model.intercept_[0]

x1 = np.linspace(xlim[0], xlim[1], 30)
x2 = (-w[0] * x1 - b) / w[1]

plt.plot(x1, x2, 'k-')

# Now plot the margins
# w1x1 + w2x2 + b = 1
# x2 = (-w1x1 - b + 1) / w2

x2 = (-w[0] * x1 - b + 1) / w[1]

plt.plot(x1, x2, 'k--')

x2= (-w[0] * x1 - b - 1) / w[1]
plt.plot(x1, x2, 'k--')

plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')

### Method 3: using sklearn's decision_function

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)

xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)

YY, XX = np.meshgrid(yy, xx)

xy = np.vstack([XX.ravel(), YY.ravel()]).T

Z = model.decision_function(xy).reshape(XX.shape)

plt.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])

plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')

### Method 4: using dot product of phi(xj).phi(x_test) for each support vector point xj

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)


ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)

def phi_linear(x):
    return x

YY, XX = np.meshgrid(yy, xx)

xy = np.vstack([XX.ravel(), YY.ravel()]).T

phi_linear_xy = phi_linear(xy)

phi_linear_support_vectors = phi_linear(model.support_vectors_)

alphas = model.dual_coef_

b = model.intercept_[0]

Z = np.ones(len(xy))

for index, phi_x_test in enumerate(phi_linear_xy):
    decision = 0
    for i in range(len(alphas[0])):
        decision += alphas[0][i] * np.dot(phi_linear_support_vectors[i], phi_x_test)
    decision += b
    
    Z[index] = decision
    
plt.contour(XX, YY, Z.reshape(XX.shape), colors='k', levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])

plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')

x_test = xy[0]

phi_linear_support_vectors[0]@x_test*alphas[0][0] + phi_linear_support_vectors[1]@x_test*alphas[0][1] + phi_linear_support_vectors[2]@x_test*alphas[0][2] + b

3.025383870347727

model.decision_function([x_test])

array([3.02538387])

model.decision_function(xy).shape

(900,)

Z.shape

(900,)

np.allclose(Z, model.decision_function(xy))

True

### Method 5: using K(x_sv, x_test) for each support vector point x_sv

def linear_kernel(x1, x2):
    return np.dot(x1, x2)

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cm.Paired)

ax = plt.gca()

xlim = ax.get_xlim()
ylim = ax.get_ylim()

xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)

YY, XX = np.meshgrid(yy, xx)

xy = np.vstack([XX.ravel(), YY.ravel()]).T

Z = np.ones(len(xy))

for index, x_test in enumerate(xy):
    decision = 0
    for i in range(len(alphas[0])):
        decision += alphas[0][i] * linear_kernel(model.support_vectors_[i], x_test)
    decision += b
    
    Z[index] = decision
    
plt.contour(XX, YY, Z.reshape(XX.shape), colors='k', levels=[-1, 0, 1], alpha=0.5,
            linestyles=['--', '-', '--'])

plt.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=100,
            linewidth=1, facecolors='none', edgecolors='k')

Other Formats