Convolution Operation Stride

Interactive tutorial on convolution operation stride with practical implementations and visualizations

Author

Nipun Batra

Published

July 24, 2025

Open In Colab

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from matplotlib import patches

inp = np.random.choice(range(10), (5, 5))
filter_conv = np.array([
    [1, 0, -1],
    [1, 0, -1],
    [1, 0, -1]
])

plt.imshow(inp, cmap='Greys')

fig, ax = plt.subplots(ncols=3, figsize=(12, 4))

sns.heatmap(inp, annot=True, cbar=None, ax=ax[0], cmap='Purples')
sns.heatmap(filter_conv, annot=True, cbar=None, ax=ax[1], cmap='Purples')
g = ax[0]
rect = patches.Rectangle((0,0),3,3,linewidth=5,edgecolor='grey',facecolor='black', alpha=0.5)

# Add the patch to the Axes
g.add_patch(rect)

ax[0].set_title("Input")
ax[1].set_title("Filter")

Text(0.5, 1.0, 'Filter')

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.animation

#####################
# Array preparation
#####################

#input array
n = 6
p = 0
s = 2
f = 3
a = np.random.randint(0, 5, size=(n, n))
# kernel
kernel = np.tile([1, 0, -1], f).reshape(f, f)
#f = kernel.shape[0]

def create_animation(a, kernel, p, s, fname, frate, figsize=(8, 4)):

    if p:
        # visualization array (2 bigger in each direction)
        va = np.zeros((a.shape[0]+2*p, a.shape[1]+2*p), dtype=int)
        va[p:-p,p:-p] = a
        va_color = np.zeros((a.shape[0]+2*p, a.shape[1]+2*p)) 
        va_color[p:-p,p:-p] = 0.5
    else:
        va = a
        va_color = np.zeros_like(a)
    n = a.shape[0]
    o_shape = np.floor_divide(n+2*p-f, s)+1
    #output array
    res = np.zeros((o_shape, o_shape))



    #####################
    # Create inital plot
    #####################
    fig = plt.figure(figsize=figsize)

    def add_axes_inches(fig, rect):
        w,h = fig.get_size_inches()
        return fig.add_axes([rect[0]/w, rect[1]/h, rect[2]/w, rect[3]/h])

    axwidth = 3.
    cellsize = axwidth/va.shape[1]
    axheight = cellsize*va.shape[0]

    ax_va  = add_axes_inches(fig, [cellsize, cellsize, axwidth, axheight])
    ax_kernel  = add_axes_inches(fig, [cellsize*2+axwidth,
                                       (2+res.shape[0])*cellsize-kernel.shape[0]*cellsize,
                                       kernel.shape[1]*cellsize,  
                                       kernel.shape[0]*cellsize])
    ax_res = add_axes_inches(fig, [cellsize*3+axwidth+kernel.shape[1]*cellsize,
                                   2*cellsize, 
                                   res.shape[1]*cellsize,  
                                   res.shape[0]*cellsize])
    ax_kernel.set_title("Kernel", size=12)

    im_va = ax_va.imshow(va_color, vmin=0., vmax=1.3, cmap="Blues")
    ax_va.set_title("Image size: {}X{}\n Padding: {} and Strides: {}".format(n, n, p, s))
    for i in range(va.shape[0]):
        for j in range(va.shape[1]):
            ax_va.text(j,i, va[i,j], va="center", ha="center")

    ax_kernel.imshow(np.zeros_like(kernel), vmin=-1, vmax=1, cmap="Pastel1")
    for i in range(kernel.shape[0]):
        for j in range(kernel.shape[1]):
            ax_kernel.text(j,i, kernel[i,j], va="center", ha="center")


    im_res = ax_res.imshow(res, vmin=0, vmax=1.3, cmap="Greens")
    res_texts = []
    for i in range(res.shape[0]):
        row = []
        for j in range(res.shape[1]):
            row.append(ax_res.text(j,i, "", va="center", ha="center"))
        res_texts.append(row)    

    ax_res.set_title("Output size: {}X{}".format(n+2*p-f+1, n+2*p-f+1))

    for ax  in [ax_va, ax_kernel, ax_res]:
        ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
        ax.yaxis.set_major_locator(mticker.IndexLocator(1,0))
        ax.xaxis.set_major_locator(mticker.IndexLocator(1,0))
        ax.grid(color="k")

    ###############
    # Animation
    ###############
    def init():
        for row in res_texts:
            for text in row:
                text.set_text("")

    def animate(ij):
        i,j=ij
        o = kernel.shape[1]//2
        # calculate result

        res_ij = (kernel*va[1+s*i-o:1+s*i+o+1, 1+s*j-o:1+s*j+o+1]).sum()
        res_texts[i][j].set_text(res_ij)
        # make colors
        c = va_color.copy()
        c[1+s*i-o:1+s*i+o+1, 1+s*j-o:1+s*j+o+1] = 1.
        im_va.set_array(c)

        r = res.copy()
        r[i,j] = 1
        im_res.set_array(r)



    i,j = np.indices(res.shape)
    ani = matplotlib.animation.FuncAnimation(fig, animate, init_func=init, 
                                             frames=zip(i.flat, j.flat), interval=frate)
    ani.save(fname, writer="imagemagick")

create_animation(a, kernel, p, s, 'demo.gif', 400)

from keras.datasets import mnist

img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

create_animation(x_train[0], kernel, 0, 1, 'mnist.gif', 2, (20, 4))

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.animation

#####################
# Array preparation
#####################

#input array
n = 6
p = 0
s = 2
f = 3
a = np.random.randint(0, 5, size=(n, n))
# kernel
kernel = np.tile([1, 0, -1], f).reshape(f, f)
#f = kernel.shape[0]

def create_static(a, kernel, p, s, fname, frate, figsize=(8, 4)):

    if p:
        # visualization array (2 bigger in each direction)
        va = np.zeros((a.shape[0]+2*p, a.shape[1]+2*p), dtype=int)
        va[p:-p,p:-p] = a
        va_color = np.zeros((a.shape[0]+2*p, a.shape[1]+2*p)) 
        va_color[p:-p,p:-p] = 0.5
    else:
        va = a
        va_color = np.zeros_like(a)
    n = a.shape[0]
    o_shape = np.floor_divide(n+2*p-f, s)+1
    #output array
    res = np.zeros((o_shape, o_shape))



    #####################
    # Create inital plot
    #####################
    fig = plt.figure(figsize=figsize)

    def add_axes_inches(fig, rect):
        w,h = fig.get_size_inches()
        return fig.add_axes([rect[0]/w, rect[1]/h, rect[2]/w, rect[3]/h])

    axwidth = 3.
    cellsize = axwidth/va.shape[1]
    axheight = cellsize*va.shape[0]

    ax_va  = add_axes_inches(fig, [cellsize, cellsize, axwidth, axheight])
    ax_kernel  = add_axes_inches(fig, [cellsize*2+axwidth,
                                       (2+res.shape[0])*cellsize-kernel.shape[0]*cellsize,
                                       kernel.shape[1]*cellsize,  
                                       kernel.shape[0]*cellsize])
    ax_res = add_axes_inches(fig, [cellsize*3+axwidth+kernel.shape[1]*cellsize,
                                   2*cellsize, 
                                   res.shape[1]*cellsize,  
                                   res.shape[0]*cellsize])
    ax_kernel.set_title("Kernel", size=12)

    im_va = ax_va.imshow(va_color, vmin=0., vmax=1.3, cmap="Blues")
    ax_va.set_title("Image size: {}X{}\n Padding: {} and Strides: {}".format(n, n, p, s))
    for i in range(va.shape[0]):
        for j in range(va.shape[1]):
            ax_va.text(j,i, va[i,j], va="center", ha="center")

    ax_kernel.imshow(np.zeros_like(kernel), vmin=-1, vmax=1, cmap="Pastel1")
    for i in range(kernel.shape[0]):
        for j in range(kernel.shape[1]):
            ax_kernel.text(j,i, kernel[i,j], va="center", ha="center")


    im_res = ax_res.imshow(res, vmin=0, vmax=1.3, cmap="Greens")
    res_texts = []
    for i in range(res.shape[0]):
        row = []
        for j in range(res.shape[1]):
            row.append(ax_res.text(j,i, "", va="center", ha="center"))
        res_texts.append(row)    

    ax_res.set_title("Output size: {}X{}".format(n+2*p-f+1, n+2*p-f+1))

    for ax  in [ax_va, ax_kernel, ax_res]:
        ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
        ax.yaxis.set_major_locator(mticker.IndexLocator(1,0))
        ax.xaxis.set_major_locator(mticker.IndexLocator(1,0))
        ax.grid(color="k")

    ###############
    # Animation
    ###############
    def init():
        for row in res_texts:
            for text in row:
                text.set_text("")

    def animate(ij):
        i,j=ij
        o = kernel.shape[1]//2
        # calculate result

        res_ij = (kernel*va[1+s*i-o:1+s*i+o+1, 1+s*j-o:1+s*j+o+1]).sum()
        res_texts[i][j].set_text(res_ij)
        # make colors
        c = va_color.copy()
        c[1+s*i-o:1+s*i+o+1, 1+s*j-o:1+s*j+o+1] = 1.
        im_va.set_array(c)

        r = res.copy()
        r[i,j] = 1
        im_res.set_array(r)



    i,j = np.indices(res.shape)
     
    frames=zip(i.flat, j.flat)
    animate(frames)
    fig.savefig(fname)

from keras import backend as K
# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

import tensorflow.keras as keras
import tensorflow.keras.layers as layers

model_vertical_edge = keras.Sequential()
model_vertical_edge.add(layers.Conv2D(filters=1, kernel_size=(3, 3), activation='linear', input_shape=(28, 28, 1)))

model_vertical_edge_relu = keras.Sequential()
model_vertical_edge_relu.add(layers.Conv2D(filters=1, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))

T = model_vertical_edge.layers[0].get_weights()
filter_conv = filter_conv
T[0] = filter_conv.reshape(T[0].shape)
model_vertical_edge.layers[0].set_weights(T)

sns.heatmap(model_vertical_edge.predict(x_train[2:3]/255).reshape(26, 26),cmap='Greys')
plt.savefig("4-vertical-edge-linear.pdf", transparent=True)

T = model_vertical_edge_relu.layers[0].get_weights()
filter_conv = filter_conv
T[0] = filter_conv.reshape(T[0].shape)
model_vertical_edge_relu.layers[0].set_weights(T)

sns.heatmap(model_vertical_edge_relu.predict(x_train[2:3]/255).reshape(26, 26),cmap='Greys')
plt.savefig("4-vertical-edge-relu.pdf", transparent=True)

sns.heatmap((x_train[2:3]/255).reshape(28, 28),cmap='Greys')
plt.savefig("mnist-4.pdf", transparent=True)

model_horizontal_edge_relu = keras.Sequential()
model_horizontal_edge_relu.add(layers.Conv2D(filters=1, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))

T = model_horizontal_edge_relu.layers[0].get_weights()
T[0] = filter_conv.T.reshape(T[0].shape)
model_horizontal_edge_relu.layers[0].set_weights(T)

sns.heatmap(model_horizontal_edge_relu.predict(x_train[2:3]/255).reshape(26, 26),cmap='Greys')
plt.savefig("4-horizontal-edge-relu.pdf", transparent=True)

sns.heatmap((x_train[0:1]/255).reshape(28, 28),cmap='Greys')
plt.savefig("mnist-5.pdf", transparent=True)

sns.heatmap(model_horizontal_edge_relu.predict(x_train[0:1]/255).reshape(26, 26),cmap='Greys')
plt.savefig("5-horizontal-edge-relu.pdf", transparent=True)

sns.heatmap(model_vertical_edge_relu.predict(x_train[0:1]/255).reshape(26, 26),cmap='Greys')
plt.savefig("5-vertical-edge-relu.pdf", transparent=True)

sns.heatmap((x_train[5:6]/255).reshape(28, 28),cmap='Greys')
plt.savefig("mnist-2.pdf", transparent=True)

sns.heatmap(model_horizontal_edge_relu.predict(x_train[5:6]/255).reshape(26, 26),cmap='Greys')
plt.savefig("2-horizontal-edge-relu.pdf", transparent=True)

sns.heatmap(model_vertical_edge_relu.predict(x_train[5:6]/255).reshape(26, 26),cmap='Greys')
plt.savefig("2-vertical-edge-relu.pdf", transparent=True)

sns.heatmap((x_train[6:7]/255).reshape(28, 28),cmap='Greys')
plt.savefig("mnist-1.pdf", transparent=True)

sns.heatmap(model_horizontal_edge_relu.predict(x_train[6:7]/255).reshape(26, 26),cmap='Greys')
plt.savefig("1-horizontal-edge-relu.pdf", transparent=True)

sns.heatmap(model_vertical_edge_relu.predict(x_train[6:7]/255).reshape(26, 26),cmap='Greys')
plt.savefig("1-vertical-edge-relu.pdf", transparent=True)

CIFAR

from keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170500096/170498071 [==============================] - 91s 1us/step

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))

model_horizontal_edge_relu = keras.Sequential()
model_horizontal_edge_relu.add(layers.Conv2D(filters=1, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)))

model_horizontal_edge_relu.layers[0].get_weights()[0].shape

(3, 3, 3, 1)

filter_3d_horizontal = np.empty((3, 3, 3))
filter_3d_horizontal[:] = filter_conv.T

filter_3d_horizontal

array([[[ 1.,  1.,  1.],
        [ 0.,  0.,  0.],
        [-1., -1., -1.]],

       [[ 1.,  1.,  1.],
        [ 0.,  0.,  0.],
        [-1., -1., -1.]],

       [[ 1.,  1.,  1.],
        [ 0.,  0.,  0.],
        [-1., -1., -1.]]])

T = model_horizontal_edge_relu.layers[0].get_weights()
T[0] = filter_3d_horizontal.reshape(T[0].shape)
model_horizontal_edge_relu.layers[0].set_weights(T)

plt.imshow(x_train[4])
plt.title(y_train[4])
plt.savefig("cifar-10-car.pdf", transparent=True)

plt.imshow(x_train[4][:, :, 0], cmap='Reds')
plt.savefig("cifar-10-car-red.pdf", transparent=True)

plt.imshow(x_train[4][:, :, 1], cmap='Greens')
plt.savefig("cifar-10-car-green.pdf", transparent=True)

plt.imshow(x_train[4][:, :, 2], cmap='Blues')
plt.savefig("cifar-10-car-blue.pdf", transparent=True)

sns.heatmap(model_horizontal_edge_relu.predict(x_train[6:7]).reshape(30, 30),cmap='Greys')

x_train.shape[1:]

(32, 32, 3)

model_horizontal_edge_relu.predict(x_train[4:5]).shape

(1, 30, 30, 1)

model_vertical_edge_relu = keras.Sequential()
model_vertical_edge_relu.add(layers.Conv2D(filters=1, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)))

filter_3d_vertical = np.empty((3, 3, 3))
filter_3d_vertical[:] = filter_conv
filter_3d_vertical = filter_3d_vertical

T = model_vertical_edge_relu.layers[0].get_weights()
T[0] = filter_3d_vertical.reshape(T[0].shape)
model_vertical_edge_relu.layers[0].set_weights(T)

plt.imshow((filter_3d_horizontal+1)/2)

plt.imshow(((filter_3d_vertical+1)/2).T)

filter_3d_vertical

array([[[ 1.,  0., -1.],
        [ 1.,  0., -1.],
        [ 1.,  0., -1.]],

       [[ 1.,  0., -1.],
        [ 1.,  0., -1.],
        [ 1.,  0., -1.]],

       [[ 1.,  0., -1.],
        [ 1.,  0., -1.],
        [ 1.,  0., -1.]]])

(filter_3d_vertical+1)/2

array([[[1. , 0.5, 0. ],
        [1. , 0.5, 0. ],
        [1. , 0.5, 0. ]],

       [[1. , 0.5, 0. ],
        [1. , 0.5, 0. ],
        [1. , 0.5, 0. ]],

       [[1. , 0.5, 0. ],
        [1. , 0.5, 0. ],
        [1. , 0.5, 0. ]]])

sns.heatmap(model_vertical_edge_relu.predict(x_train[6:7]).reshape(30, 30),cmap='Greys')

model_vertical_edge_relu.layers[0].get_weights()[0][0].shape

(3, 3, 1)

import scipy
img = x_train[6:7].reshape(32, 32, 3)
from skimage import color
img = color.rgb2gray(img)
sharpen_kernel = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]])
image_sharpen = scipy.signal.convolve2d(img, sharpen_kernel, 'valid')

from skimage import io

beach = io.imread("../neural-networks/assets/cnn/beach.jpg")

beach.shape

(1704, 2272, 3)

buildings = io.imread("../neural-networks/assets/cnn/buildings.jpg")

buildings.shape

(1704, 2272, 3)

plt.imshow(beach)
plt.axis('OFF')

plt.imshow(beach[:, :, 0], cmap='Reds')
plt.axis('off')

plt.imshow(beach[:, :, 1], cmap='Greens')

plt.imshow(beach[:, :, 2], cmap='Blues')

vertical_kernel = np.array([[1,0,-1],[1,0,-1],[1,0,-1]])
image_out_beach_red = scipy.signal.convolve2d(beach[:, :, 0], vertical_kernel, 'valid')
plt.imshow(image_out_beach_red, cmap='Greys')

vertical_kernel = np.array([[1,0,-1],[1,0,-1],[1,0,-1]])
image_out_beach_green = scipy.signal.convolve2d(beach[:, :, 1], vertical_kernel, 'valid')
plt.imshow(image_out_beach_green, cmap='Greens')
plt.axis('off')

vertical_kernel = np.array([[1,0,-1],[1,0,-1],[1,0,-1]])
image_out_beach_blue = scipy.signal.convolve2d(beach[:, :, 2], vertical_kernel, 'valid')
plt.imshow(image_out_beach_blue, cmap='Blues')
plt.axis('off')

image_out_buildings_blue = scipy.signal.convolve2d(buildings[:, :, 2], vertical_kernel, 'valid')
plt.imshow(image_out_buildings_blue, cmap='Greys')
plt.axis('off')

horizontal_kernel = vertical_kernel.T
horizontal_kernel

array([[ 1,  1,  1],
       [ 0,  0,  0],
       [-1, -1, -1]])

image_out_buildings_blue_horizontal = scipy.signal.convolve2d(buildings[:, :, 2], horizontal_kernel, 'valid')
plt.imshow(image_out_buildings_blue_horizontal, cmap='Greys')
plt.axis('off')