import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
from mpl_toolkits import mplot3d
from math import sqrt
= 'gray'
SPINE_COLOR import numpy as np
import matplotlib.pyplot as plt
'gnuplot2')
plt.get_cmap(
%matplotlib inline
# Based on: https://www.analyticsvidhya.com/blog/2016/01/complete-tutorial-ridge-lasso-regression-python/
Lasso Regression
Interactive tutorial on lasso regression with practical implementations and visualizations
#Define input array with angles from 60deg to 300deg converted to radians
= np.array([i*np.pi/180 for i in range(60,300,4)])
x 10) #Setting seed for reproducability
np.random.seed(= 4*x + 7 + np.random.normal(0,3,len(x))
y
= 4*x + 7
y_true = 20
max_deg
= [x**(i+1) for i in range(max_deg)] + [y]
data_x = ['x'] + ['x_{}'.format(i+1) for i in range(1,max_deg)] + ['y']
data_c = pd.DataFrame(np.column_stack(data_x),columns=data_c)
data "ones"] = 1
data['x'],data['y'],'.', label='Data Points')
plt.plot(data['x'], y_true,'g', label='True Function')
plt.plot(data["x")
plt.xlabel("y")
plt.ylabel(
plt.legend()'true_function.pdf', transparent=True, bbox_inches="tight") plt.savefig(
def cost(theta_0, theta_1, x, y):
= 0
s for i in range(len(x)):
= x[i]*theta_1 + theta_0
y_i_hat += (y[i]-y_i_hat)**2
s return s/len(x)
= np.mgrid[-4:15:.2, -4:15:.2]
x_grid, y_grid
= np.zeros_like(x_grid)
cost_matrix for i in range(x_grid.shape[0]):
for j in range(x_grid.shape[1]):
= cost(x_grid[i, j], y_grid[i, j], data['x'], data['y']) cost_matrix[i, j]
def cost_lasso(theta_0, theta_1, x, y, lamb):
= 0
s for i in range(len(x)):
= x[i]*theta_1 + theta_0
y_i_hat += (y[i]-y_i_hat)**2 + lamb*(abs(theta_0) + abs(theta_1))
s return s/len(x)
= np.mgrid[-4:15:.2, -4:15:.2]
x_grid, y_grid
#lambda = 1000 cost curve tends to lasso objective.
= plt.figure(figsize=(7,7))
fig = [10,100,1000]
lamb_list for lamb in lamb_list:
= np.zeros_like(x_grid)
lasso_cost_matrix for i in range(x_grid.shape[0]):
for j in range(x_grid.shape[1]):
= cost_lasso(x_grid[i, j], y_grid[i, j], data['x'], data['y'],lamb)
lasso_cost_matrix[i, j]
= plt.axes(projection='3d')
ax ='viridis', edgecolor='none')
ax.plot_surface(x_grid, y_grid, lasso_cost_matrix,cmap
'Least squares objective function');
ax.set_title(r"$\theta_0$")
ax.set_xlabel(r"$\theta_1$")
ax.set_ylabel(-4,15])
ax.set_xlim([-4,15])
ax.set_ylim([
= np.linspace(0, np.pi, 30)
u = np.linspace(0, 2 * np.pi, 30)
v
# x = np.outer(500*np.sin(u), np.sin(v))
# y = np.outer(500*np.sin(u), np.cos(v))
# z = np.outer(500*np.cos(u), np.ones_like(v))
# ax.plot_wireframe(x, y, z)
45, 120)
ax.view_init('lasso_lamb_{}_surface.pdf'.format(lamb), transparent=True, bbox_inches="tight") plt.savefig(
def yy(p,soln):
= np.linspace(-soln,soln,100)
xx = []
xx_final = []
yy_final for x in xx:
if(x>0):
xx_final.append(x)
xx_final.append(x)= (soln**p - x**p)**(1.0/p)
y
yy_final.append(y)-y)
yy_final.append(
else:
xx_final.append(x)
xx_final.append(x)= (soln**p - (-x)**p)**(1.0/p)
y
yy_final.append(y)-y)
yy_final.append(return xx_final, yy_final
= plt.subplots()
fig,ax ax.fill(xx_final,yy_final)
from matplotlib.patches import Rectangle
= 3
soln = 0.5
p = np.sort(np.array([2,10,20,50,100,150,200,400,600,800,1000]))
levels = plt.subplots()
fig,ax =.7)
plt.contourf(x_grid, y_grid, cost_matrix, levels,alpha
plt.colorbar()0, color='black', alpha=.5, dashes=[2, 4],linewidth=1)
plt.axhline(0, color='black', alpha=0.5, dashes=[2, 4],linewidth=1)
plt.axvline(
= np.linspace(-soln,soln,100)
xx = yy1(xx,soln,p)
y1 = yy2(xx,soln,p)
y2
= np.hstack((xx,xx))
x_final = np.hstack((y1,y2))
y_final
= yy(p,soln)
xx_final, yy_final
plt.fill(xx_final,yy_final)
= plt.contour(x_grid, y_grid, cost_matrix, levels, linewidths=1,colors='black')
CS =1, fontsize=8)
plt.clabel(CS, inline"Least squares objective function")
plt.title(r"$\theta_0$")
plt.xlabel(r"$\theta_1$")
plt.ylabel(= Rectangle((-soln, 0), np.sqrt(2)*soln,np.sqrt(2)*soln, angle = '-45', color='g', label=r'$|\theta_0|+|\theta_1|=3$')
p1 7], [4],marker='*', color='r',s=25,label='Actual Solution')
plt.scatter([
plt.gca().add_patch(p1)
plt.legend()'equal')
plt.gca().set_aspect(
'lasso_base_contour.pdf', transparent=True, bbox_inches="tight")
plt.savefig(
plt.show()
from matplotlib.patches import Rectangle
= np.sort(np.array([2,10,20,50,100,150,200,400,600,800,1000]))
levels = plt.subplots()
fig,ax =.7)
plt.contourf(x_grid, y_grid, cost_matrix, levels,alpha
plt.colorbar()0, color='black', alpha=.5, dashes=[2, 4],linewidth=1)
plt.axhline(0, color='black', alpha=0.5, dashes=[2, 4],linewidth=1)
plt.axvline(
=0.1,color='k',)
plt.scatter(xx, y1, s=0.1,color='k',)
plt.scatter(xx, y2, s
= plt.contour(x_grid, y_grid, cost_matrix, levels, linewidths=1,colors='black')
CS =1, fontsize=8)
plt.clabel(CS, inline"Least squares objective function")
plt.title(r"$\theta_0$")
plt.xlabel(r"$\theta_1$")
plt.ylabel(= Rectangle((-soln, 0), np.sqrt(2)*soln,np.sqrt(2)*soln, angle = '-45', color='g', label=r'$|\theta_0|+|\theta_1|=3$')
p1 7], [4],marker='*', color='r',s=25,label='Actual Solution')
plt.scatter([
plt.gca().add_patch(p1)
plt.legend()'equal')
plt.gca().set_aspect(
'lasso_base_contour.pdf', transparent=True, bbox_inches="tight")
plt.savefig(
plt.show()
0] + regressor.coef_[1] regressor.coef_[
5.74196012460497
## Function generator.
= 60
iterations = 4
p = 4
q = 0.1
alpha
= np.linspace(-5,5,1000)
x = x**2
y1 = abs(x)
y2
for i in range(iterations):
= plt.subplots(1,2)
fig,ax 0].plot(x,y1)
ax[1].plot(x,y2)
ax[= p
prev = q
qrev = p - 2*alpha*p
p = q - alpha
q = p
val
0].arrow(prev,prev**2,p-prev,p**2-prev**2,head_width=0.5)
ax[1].arrow(qrev,abs(qrev),q - qrev ,abs(q) - abs(qrev),head_width = 0.5)
ax[0].scatter([prev],[prev**2],s=100)
ax[1].scatter([qrev],abs(qrev),s=100)
ax[0].set_xlabel("x")
ax[1].set_xlabel("x")
ax[1].set_ylabel("Cost")
ax[0].set_ylabel("Cost")
ax[1].set_xlim(-5,5)
ax[1].set_ylim(0,5)
ax[1].set_title("Iteration"+str(i+1)+" (lr: "+str(alpha)+")")
ax[0].set_title("Iteration"+str(i+1)+" (lr: "+str(alpha)+")")
ax[if(i==0):
"GD_iteration_"+str((i+1)//10)+".pdf", format='pdf',transparent=True)
plt.savefig(if(i%10==9):
"GD_iteration_"+str((i+1)//10)+".pdf", format='pdf',transparent=True)
plt.savefig(
plt.show()
= 5**0.5,5
x1,y1= 5,5
y2,x2 = 0
x_shift = -0.5
y_shift = 11
iterations for i in range(iterations):
= plt.subplots(nrows=1, ncols=2)
fig, ax
0].set_ylim(-1,10)
ax[0].set_xlim(-5,5)
ax[1].set_xlim(-5,5)
ax[1].set_ylim(-1,10)
ax[0].plot(x,x**2, color = 'blue')
ax[1].plot(x,abs(x),color = 'red')
ax[0].scatter(x1,y1,color = 'black')
ax[
0].annotate(str(round(y1,3)), (x1 + x_shift, y1+y_shift))
ax[1].annotate(str(y2), (x2 + x_shift, y2 + y_shift))
ax[1].scatter(x2,y2,color = 'black')
ax['Iteration {}'.format(i))
fig.suptitle(if(iteratio)
'GD_Iteration_{}.pdf'.format(i))
plt.savefig(
= y1 - alpha*y1
y1 = y2 - 0.5
y2 = y2
x2 = y1**0.5 x1
from sklearn.linear_model import Lasso
from matplotlib.patches import Rectangle
for alpha in np.linspace(1,2,5):
= plt.subplots(nrows=1, ncols=2, figsize=(10, 5))
fig, ax
= 1
deg = ['ones','x']
predictors if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
= Lasso(alpha=alpha,normalize=True, fit_intercept=False)
regressor 'y'])
regressor.fit(data[predictors],data[= regressor.predict(data[predictors])
y_pred
# Plot
0].scatter(data['x'],data['y'], label='Train')
ax[0].plot(data['x'], y_pred,'k', label='Prediction')
ax[0].plot(data['x'], y_true,'g.', label='True Function')
ax[0].legend()
ax[0].set_title(f"Degree: {deg} | $\mu$: {alpha} | Max Coef: {max(regressor.coef_, key=abs):.2f}")
ax[
# Circle
= abs(regressor.coef_[0]) + abs(regressor.coef_[1])
total = Rectangle((-total, 0), np.sqrt(2)*total, np.sqrt(2)*total, angle = -45, alpha=0.6, color='g', label=r'$|\theta_0|+|\theta_1|={:.2f}$'.format(total))
p1 1].add_patch(p1)
ax[
# Contour
= np.sort(np.array([2,10,20,50,100,150,200,400,600,800,1000]))
levels 1].contourf(x_grid, y_grid, cost_matrix, levels,alpha=.7)
ax[#ax[1].colorbar()
1].axhline(0, color='black', alpha=.5, dashes=[2, 4],linewidth=1)
ax[1].axvline(0, color='black', alpha=0.5, dashes=[2, 4],linewidth=1)
ax[
= plt.contour(x_grid, y_grid, cost_matrix, levels, linewidths=1,colors='black')
CS 1].clabel(CS, inline=1, fontsize=8)
ax[1].set_title("Least squares objective function")
ax[1].set_xlabel(r"$\theta_0$")
ax[1].set_ylabel(r"$\theta_1$")
ax[1].scatter(regressor.coef_[0],regressor.coef_[1] ,marker='x', color='r',s=25,label='Lasso Solution')
ax[1].scatter([7], [4],marker='*', color='r',s=25,label='Actual Solution')
ax[1].set_xlim([-4,15])
ax[1].set_ylim([-4,15])
ax[1].legend()
ax[
'lasso_{}.pdf'.format(alpha), transparent=True, bbox_inches="tight")
plt.savefig(
plt.show() plt.clf()
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
from sklearn.linear_model import Lasso
for i,deg in enumerate([19]):
= ['ones', 'x']
predictors if deg >= 2:
'x_%d'%i for i in range(2,deg+1)])
predictors.extend([
for i,alpha in enumerate([1, 1e10]):
= Lasso(alpha=alpha,normalize=False, fit_intercept=False)
regressor 'y'])
regressor.fit(data[predictors],data[= regressor.predict(data[predictors])
y_pred 'x'],data['y'], label='Train')
plt.scatter(data['x'], y_pred,'k', label='Prediction')
plt.plot(data['x'], y_true,'g.', label='True Function')
plt.plot(data[
plt.legend() f"Degree: {deg} | $\mu$: {alpha} | Max Coeff: {max(regressor.coef_, key=abs):.2f}")
plt.title('lasso_{}_{}.pdf'.format(alpha, deg), transparent=True, bbox_inches="tight")
plt.savefig(
plt.show() plt.clf()
<Figure size 432x288 with 0 Axes>
import pandas as pd
= pd.read_excel("dataset.xlsx")
data = data.columns
cols = np.logspace(-5,1,num=20, endpoint=False)
alph_list = []
coef_list
for i,alpha in enumerate(alph_list):
= Lasso(alpha=alpha,normalize=True)
regressor 1:-1]],data[cols[-1]])
regressor.fit(data[cols[
coef_list.append(regressor.coef_)
= np.abs(np.array(coef_list).T)
coef_list for i in range(len(cols[1:-1])):
=r"$\theta_{}$".format(i))
plt.loglog(alph_list, coef_list[i] , label'$\mu$ value')
plt.xlabel('Coefficient Value')
plt.ylabel(
plt.legend() 'lasso_reg.pdf', transparent=True, bbox_inches="tight") plt.savefig(