Gradient Descent

Interactive tutorial on gradient descent with practical implementations and visualizations
Author

Nipun Batra

Published

July 24, 2025

Open In Colab
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.despine()
<Figure size 432x288 with 0 Axes>
init_x = 2
init_y = 5
%matplotlib notebook
import numpy as np
from mpl_toolkits.mplot3d import Axes3D  
# Axes3D import has side effects, it enables using projection='3d' in add_subplot
import matplotlib.pyplot as plt
import random

def fun(x, y):
    x = np.array(x)
    y = np.array(y)
    return 14+3*(x**2) + 14*(y**2) - 12*x- 28*y + 12*x*y


lst_x = []
lst_y = []
x_ = init_x
y_ = init_y
alpha = 0.005

lst_x.append(x_)
lst_y.append(y_)

for i in range(10):

    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111, projection='3d')
    x = y = np.arange(-4.0, 4.0, 0.05)
    X, Y = np.meshgrid(x, y)
    zs = np.array(fun(np.ravel(X), np.ravel(Y)))
    Z = zs.reshape(X.shape)
    x_ = lst_x[-1]
    y_ = lst_y[-1]
#     ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Greens')
#     print (lst_x,lst_y,fun(lst_x,lst_y))
    ax.scatter3D(lst_x,lst_y,fun(lst_x,lst_y),lw=10,alpha=1,cmap='hsv')
    ax.plot_surface(X, Y, Z,color='orange',cmap='hsv')

    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')
    plt.title("Iteration "+str(i+1))
    lst_x.append(x_ - alpha * (3*x_ - 12 + 12*y_))
    lst_y.append(y_  - alpha *(14*y_ -28 + 12*x_))
    

    plt.show()

x = np.linspace(-5,5,1000) y = x**2 plt.plot(x,y) plt.title(“Cost Function”)

plt.rcParams['axes.facecolor'] = '#fafafa'
p = 4.1
alpha = 0.05
iterations = 20

# for i in range(10):
for i in range(iterations):
    plt.figure()
    plt.plot(x,y)
    prev = p
    p = p - (alpha*2*p)
    plt.arrow(prev,prev**2,p-prev,p**2-prev**2,head_width=0.5)
    plt.scatter([prev],[prev**2],s=100)
    plt.xlabel("x")
    plt.ylabel("Cost")
    plt.title("Iteration "+str(i+1)+" (lr: "+str(alpha)+")")
    plt.savefig("iteration-"+str(i+1)+".eps", format='eps',transparent=True)
    
    plt.show()

s = ""
for i in range(iterations):
    s+="\\begin{frame}{Gradient Descent}\n"
    s+="  \\begin{center}\n"
    s+="       \includegraphics[totalheight=6cm]{gradient-descent/iteration-"+str(i+1)+".eps}\n"
    s+="   \end{center}\n"
    s+="\end{frame}\n\n"
print (s)
\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-1.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-2.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-3.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-4.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-5.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-6.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-7.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-8.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-9.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-10.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-11.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-12.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-13.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-14.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-15.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-16.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-17.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-18.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-19.eps}
   \end{center}
\end{frame}

\begin{frame}{Gradient Descent}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/iteration-20.eps}
   \end{center}
\end{frame}

def func(x):
    return np.sin(x) + np.sin(x/2) + np.sin(x/3)
fig, ax = plt.subplots()
x = np.linspace(-10,10,100)
x = x[x<=0]
y = func(x)


val = -7.2

plt.scatter([val],func(np.array([val])))
ax.annotate('local minima', xy=(val, func(val)), xytext=(val, 1),
            arrowprops=dict(facecolor='black', shrink=0.05))
plt.xticks([])
plt.yticks([])
plt.plot(x,y)
plt.savefig("local-minima.eps", format='eps',transparent=True)

x = np.linspace(-5,5,1000)
y = x**2

p = 4.1
alpha = .95
iterations = 10

# for i in range(10):
for i in range(iterations):
    plt.figure()
    plt.plot(x,y)
    prev = p
    p = p - (alpha*2*p)
    plt.arrow(prev,prev**2,p-prev,p**2-prev**2,head_width=0.5)
    plt.scatter([prev],[prev**2],s=100)
    plt.xlabel("x")
    plt.ylabel("Cost")
    plt.title("Iteration "+str(i+1)+" (lr: "+str(alpha)+")")
    plt.savefig("overshooting-"+str(i+1)+".eps", format='eps',transparent=True)
    plt.show()

s = ""
for i in range(iterations):
    s+="\\begin{frame}{Overshooting}\n"
    s+="  \\begin{center}\n"
    s+="       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-"+str(i+1)+".eps}\n"
    s+="   \end{center}\n"
    s+="\end{frame}\n\n"
print (s)
\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-1.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-2.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-3.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-4.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-5.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-6.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-7.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-8.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-9.eps}
   \end{center}
\end{frame}

\begin{frame}{Overshooting}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/overshooting-10.eps}
   \end{center}
\end{frame}

x = np.linspace(-5,5,1000)
y = x**2

p = 4.1
alpha = .01
iterations = 10

# for i in range(10):
for i in range(iterations):
    plt.figure()
    plt.plot(x,y)
    prev = p
    p = p - (alpha*2*p)
    plt.arrow(prev,prev**2,p-prev,p**2-prev**2,head_width=0.5)
    plt.scatter([prev],[prev**2],s=100)
    plt.xlabel("x")
    plt.ylabel("Cost")
    plt.title("Iteration "+str(i+1)+" (lr: "+str(alpha)+")")
    plt.savefig("undershooting-"+str(i+1)+".eps", format='eps',transparent=True)
    plt.show()

s = ""
for i in range(iterations):
    s+="\\begin{frame}{Slow Convergence}\n"
    s+="  \\begin{center}\n"
    s+="       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-"+str(i+1)+".eps}\n"
    s+="   \end{center}\n"
    s+="\end{frame}\n\n"
print (s)
\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-1.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-2.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-3.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-4.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-5.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-6.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-7.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-8.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-9.eps}
   \end{center}
\end{frame}

\begin{frame}{Slow Convergence}
  \begin{center}
       \includegraphics[totalheight=6cm]{gradient-descent/undershooting-10.eps}
   \end{center}
\end{frame}

x = np.linspace(1,10,100)
y = 1/x
plt.plot(y,label="GD")
noise = np.random.random((len(x)))
noise[0] = 0
noise[1] = 0
noise[2] = 0
plt.plot(y+0.2*(noise-0.5),label="SGD")
plt.legend()
plt.title("Iterations vs Cost")
plt.xlabel("Iteration")
plt.ylabel("Cost")
plt.savefig("gd-sgd.eps", format='eps',transparent=True)

val = 4.1
alpha = 0.05

for i in range(10):
    val = val - alpha * 2* val
    print (val)
3.6899999999999995
3.3209999999999997
2.9888999999999997
2.6900099999999996
2.4210089999999997
2.1789080999999997
1.9610172899999996
1.7649155609999996
1.5884240048999996
1.4295816044099996