import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline
# config retina mode
%config InlineBackend.figure_format = 'retina'
Introduction to NumPy
Introduction to Numerical Computing with NumPy
Heavily Inspired by Jake VanderPlas’s Python Data Science Handbook
Array representation
= "../datasets/images/street.jpg"
img_path = Image.open(img_path) img
= np.array(img).astype(np.uint8) img_array
plt.imshow(img_array)# remove axis
= plt.axis('off') _
img_array
array([[[ 48, 51, 56],
[ 41, 44, 49],
[ 31, 34, 39],
...,
[153, 154, 149],
[129, 125, 116],
[120, 112, 101]],
[[ 47, 50, 55],
[ 41, 44, 49],
[ 30, 33, 38],
...,
[151, 152, 147],
[126, 122, 113],
[117, 109, 98]],
[[ 46, 49, 54],
[ 39, 42, 47],
[ 29, 32, 37],
...,
[147, 148, 143],
[121, 117, 108],
[113, 105, 94]],
...,
[[ 80, 93, 99],
[ 85, 98, 104],
[ 91, 104, 110],
...,
[ 83, 88, 92],
[ 85, 90, 94],
[ 88, 93, 97]],
[[ 69, 80, 86],
[ 75, 86, 92],
[ 82, 93, 99],
...,
[ 86, 94, 97],
[ 86, 94, 97],
[ 87, 95, 98]],
[[ 58, 65, 75],
[ 64, 71, 81],
[ 73, 80, 90],
...,
[ 90, 98, 101],
[ 88, 96, 99],
[ 88, 96, 99]]], dtype=uint8)
img_array.shape
(2000, 3000, 3)
img_array.dtype
dtype('uint8')
# rotate image by 90 degrees
= np.rot90(img_array)
rotated_img_array
plt.imshow(rotated_img_array.astype(np.uint8))'off') plt.axis(
# 0, 0 th pixel
0, 0] img_array[
array([48, 51, 56], dtype=uint8)
# Increase R value of first quarter to max
= img_array.copy()
new_img 0]//2, :new_img.shape[1]//2, 0] = 255
new_img[:new_img.shape[
plt.imshow(new_img)'off') plt.axis(
%pip install pydub -q
Note: you may need to restart the kernel to use updated packages.
# load audio
from email.mime import audio
from pydub import AudioSegment
= "../datasets/audio/pm-answer.mp3"
audio_path
= AudioSegment.from_file(audio_path) audio
audio
= np.array(audio.get_array_of_samples()) audio_arr
audio_arr
array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
plt.plot(audio_arr)'Sample')
plt.xlabel('Amplitude') plt.ylabel(
Text(0, 0.5, 'Amplitude')
audio_arr.shape
(82368,)
audio.frame_rate
24000
# Convert plot to time as x-axis
= np.linspace(0, len(audio_arr) / audio.frame_rate, num=len(audio_arr))
time
plt.plot(time, audio_arr)'Time (s)')
plt.xlabel('Amplitude') plt.ylabel(
Text(0, 0.5, 'Amplitude')
# Add a smoothing effect
from scipy.signal import savgol_filter
= savgol_filter(audio_arr, 51, 3)
smoothed_audio_arr
plt.plot(time, smoothed_audio_arr)
from IPython.display import Audio
=audio.frame_rate) Audio(audio_arr, rate
=audio.frame_rate) Audio(smoothed_audio_arr, rate
from sklearn.feature_extraction.text import CountVectorizer
# Sample text data
= [
documents "The quick brown fox jumps over the lazy dog",
"Never jump over the lazy dog quickly"
]
# Convert text to a bag-of-words representation
= CountVectorizer()
vectorizer = vectorizer.fit_transform(documents)
X
print("Feature names:", vectorizer.get_feature_names_out())
print("Bag-of-words representation:\n", X.toarray())
Feature names: ['brown' 'dog' 'fox' 'jump' 'jumps' 'lazy' 'never' 'over' 'quick'
'quickly' 'the']
Bag-of-words representation:
[[1 1 1 0 1 1 0 1 1 0 2]
[0 1 0 1 0 1 1 1 0 1 1]]
Why not use Python lists instead of NumPy arrays?
import time
= 10000000
n_nums # Using a Python list
= list(range(n_nums))
lst = time.time()
start = [x**2 for x in lst]
lst_squared = time.time()
end print(f"Python list computation time: {end - start: .2f} seconds")
# Using a NumPy array
= np.arange(n_nums)
arr = time.time()
start = arr ** 2
arr_squared = time.time()
end print(f"NumPy array computation time: {end - start: .2f} seconds")
Python list computation time: 0.21 seconds
NumPy array computation time: 0.01 seconds
Import & Version Check
import numpy as np
print("Using NumPy version:", np.__version__)
Using NumPy version: 2.1.2
Creating Arrays
NumPy arrays can come from Python lists or built-in functions.
# From a Python list
= [1, 2, 3, 4]
py_list = np.array(py_list)
arr_from_list print("Array from list:", arr_from_list)
Array from list: [1 2 3 4]
print(py_list)
[1, 2, 3, 4]
print(arr_from_list)
[1 2 3 4]
type(py_list), type(arr_from_list)
(list, numpy.ndarray)
= [0, 0, 0, 0, 0, 0, 0]
py_list
np.array(py_list)
= np.zeros(7, dtype=np.int32)
zeros_arr zeros_arr, py_list
(array([0, 0, 0, 0, 0, 0, 0], dtype=int32), [0, 0, 0, 0, 0, 0, 0])
# Using built-in functions
= np.zeros((2, 3))
zeros_arr print("Zeros array:\n", zeros_arr)
= np.zeros(3)
zeros_1d print("1D Zeros array:", zeros_1d)
Zeros array:
[[0. 0. 0.]
[0. 0. 0.]]
1D Zeros array: [0. 0. 0.]
= np.ones((3, 2))
ones_arr print("Ones array:\n", ones_arr)
Ones array:
[[1. 1.]
[1. 1.]
[1. 1.]]
list(range(0, 10, 2))
[0, 2, 4, 6, 8]
= np.arange(0, 10, 2)
range_arr print("range_arr =", range_arr)
range_arr = [0 2 4 6 8]
0, 10, 2.5) np.arange(
array([0. , 2.5, 5. , 7.5])
def f(x):
return np.sin(x)
= np.arange(0, 2*np.pi, 0.001)
x_range = f(x_range) y
x_range
array([0.000e+00, 1.000e-03, 2.000e-03, ..., 6.281e+00, 6.282e+00,
6.283e+00])
plt.plot(x_range, y)
= np.linspace(0, 1, 5)
linspace_arr print("linspace_arr =", linspace_arr)
linspace_arr = [0. 0.25 0.5 0.75 1. ]
= np.eye(3)
identity_mat_arr print("Identity matrix array:\n", identity_mat_arr)
Identity matrix array:
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
Array Attributes
shape
, size
, ndim
, and dtype
are particularly important.
= np.random.randint(1, 10, size=(3,4))
random_arr
print("Array:\n", random_arr)
print("Shape:", random_arr.shape)
print("Size:", random_arr.size)
print("Dimensions:", random_arr.ndim)
print("Data Type:", random_arr.dtype)
Array:
[[1 1 2 8]
[8 4 5 2]
[3 2 9 8]]
Shape: (3, 4)
Size: 12
Dimensions: 2
Data Type: int64
Taking help
? and . tab completion are useful for exploring the API.
np.zeros?
Docstring: zeros(shape, dtype=float, order='C', *, like=None) Return a new array of given shape and type, filled with zeros. Parameters ---------- shape : int or tuple of ints Shape of the new array, e.g., ``(2, 3)`` or ``2``. dtype : data-type, optional The desired data-type for the array, e.g., `numpy.int8`. Default is `numpy.float64`. order : {'C', 'F'}, optional, default: 'C' Whether to store multi-dimensional data in row-major (C-style) or column-major (Fortran-style) order in memory. like : array_like, optional Reference object to allow the creation of arrays which are not NumPy arrays. If an array-like passed in as ``like`` supports the ``__array_function__`` protocol, the result will be defined by it. In this case, it ensures the creation of an array object compatible with that passed in via this argument. .. versionadded:: 1.20.0 Returns ------- out : ndarray Array of zeros with the given shape, dtype, and order. See Also -------- zeros_like : Return an array of zeros with shape and type of input. empty : Return a new uninitialized array. ones : Return a new array setting values to one. full : Return a new array of given shape filled with value. Examples -------- >>> import numpy as np >>> np.zeros(5) array([ 0., 0., 0., 0., 0.]) >>> np.zeros((5,), dtype=int) array([0, 0, 0, 0, 0]) >>> np.zeros((2, 1)) array([[ 0.], [ 0.]]) >>> s = (2,2) >>> np.zeros(s) array([[ 0., 0.], [ 0., 0.]]) >>> np.zeros((2,), dtype=[('x', 'i4'), ('y', 'i4')]) # custom dtype array([(0, 0), (0, 0)], dtype=[('x', '<i4'), ('y', '<i4')]) Type: builtin_function_or_method
help(np.zeros)
Help on built-in function zeros in module numpy:
zeros(...)
zeros(shape, dtype=float, order='C', *, like=None)
Return a new array of given shape and type, filled with zeros.
Parameters
----------
shape : int or tuple of ints
Shape of the new array, e.g., ``(2, 3)`` or ``2``.
dtype : data-type, optional
The desired data-type for the array, e.g., `numpy.int8`. Default is
`numpy.float64`.
order : {'C', 'F'}, optional, default: 'C'
Whether to store multi-dimensional data in row-major
(C-style) or column-major (Fortran-style) order in
memory.
like : array_like, optional
Reference object to allow the creation of arrays which are not
NumPy arrays. If an array-like passed in as ``like`` supports
the ``__array_function__`` protocol, the result will be defined
by it. In this case, it ensures the creation of an array object
compatible with that passed in via this argument.
.. versionadded:: 1.20.0
Returns
-------
out : ndarray
Array of zeros with the given shape, dtype, and order.
See Also
--------
zeros_like : Return an array of zeros with shape and type of input.
empty : Return a new uninitialized array.
ones : Return a new array setting values to one.
full : Return a new array of given shape filled with value.
Examples
--------
>>> import numpy as np
>>> np.zeros(5)
array([ 0., 0., 0., 0., 0.])
>>> np.zeros((5,), dtype=int)
array([0, 0, 0, 0, 0])
>>> np.zeros((2, 1))
array([[ 0.],
[ 0.]])
>>> s = (2,2)
>>> np.zeros(s)
array([[ 0., 0.],
[ 0., 0.]])
>>> np.zeros((2,), dtype=[('x', 'i4'), ('y', 'i4')]) # custom dtype
array([(0, 0), (0, 0)],
dtype=[('x', '<i4'), ('y', '<i4')])
= np.zeros((2, 3))
a a.size
6
# Gotcha
# Shape of (N,) v/s (N, 1)
= np.zeros(3)
a print("Shape of a:", a.shape)
print("a:", a)
= np.zeros((3, 1))
b print("Shape of b:", b.shape)
print("b:\n", b)
= np.zeros((1, 3))
c print("Shape of c:", c.shape)
print("c:\n", c)
Shape of a: (3,)
a: [0. 0. 0.]
Shape of b: (3, 1)
b:
[[0.]
[0.]
[0.]]
Shape of c: (1, 3)
c:
[[0. 0. 0.]]
In above code, “a” is a vector (1d array) and “b” is a matrix (2d array) with 3 rows and 1 column; “c” is a 2d array with 1 row and 3 columns.
Indexing & Slicing
- Indexing for single elements:
arr[r, c]
- Slicing for subarrays:
arr[start:stop:step]
Remember that slices in NumPy are views—changing a slice changes the original array.
# Example array
= np.array([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
x print("Original x:\n", x)
Original x:
[[10 20 30]
[40 50 60]
[70 80 90]]
# Accessing a single element
# If we want to select the second element of the first row, we need to specify row and column
print("Second element of the First Row:", x[0, 1])
Second element of the First Row: 20
# Note: We can also use x[0][1] to get the same result but it is less efficient because it first creates
# an array containing the first row and then selects the element from that row.
print("Second element of the First Row:", x[0][1])
Second element of the First Row: 20
print("x = ", x)
# Slicing examples
print("x[:1] =", x[:1]) # Slices up to the first row (row index 0)
print("x[1:] =", x[1:]) # Starts slicing from the second row (row index 1)
print("x[::2] =", x[::2]) # Selects every second row (row indices 0 and 2 in this case)
x = [[10 20 30]
[40 50 60]
[70 80 90]]
x[:1] = [[10 20 30]]
x[1:] = [[40 50 60]
[70 80 90]]
x[::2] = [[10 20 30]
[70 80 90]]
print("x = ", x)
# Slicing examples
print("x[:1] =", x[:1, :]) # Slices up to the first row (row index 0)
print("x[1:] =", x[1:, :]) # Starts slicing from the second row (row index 1)
print("x[::2] =", x[::2, :]) # Selects every second row (row indices 0 and 2 in this case)
x = [[10 20 30]
[40 50 60]
[70 80 90]]
x[:1] = [[10 20 30]]
x[1:] = [[40 50 60]
[70 80 90]]
x[::2] = [[10 20 30]
[70 80 90]]
# Changing a view changes the original array
= np.random.randint(10, size=(4,5))
arr2d print("\narr2d:\n", arr2d)
arr2d:
[[6 8 0 8 6]
[2 2 3 1 5]
[7 9 0 0 8]
[5 1 8 6 5]]
= arr2d[:2, :3]
sub print("\nSubarray:", sub)
Subarray: [[6 8 0]
[2 2 3]]
0,0] = 99
sub[print("\nChanged subarray => arr2d:")
print(arr2d)
Changed subarray => arr2d:
[[99 8 0 8 6]
[ 2 2 3 1 5]
[ 7 9 0 0 8]
[ 5 1 8 6 5]]
# Create a copy of the array and then change the value
= np.random.randint(10, size=(4,5))
arr2d
print("\narr2d:\n", arr2d)
= arr2d[:2, :3].copy()
arr2d_copy print("\nCopy of subarray:", arr2d_copy)
0,0] = 99
arr2d_copy[
print("\nChanged copy of subarray ")
print(arr2d_copy)
print("\nSame original array => arr2d:")
print(arr2d)
arr2d:
[[5 1 8 0 0]
[0 1 4 6 3]
[3 8 6 9 6]
[3 5 2 8 3]]
Copy of subarray: [[5 1 8]
[0 1 4]]
Changed copy of subarray
[[99 1 8]
[ 0 1 4]]
Same original array => arr2d:
[[5 1 8 0 0]
[0 1 4 6 3]
[3 8 6 9 6]
[3 5 2 8 3]]
print(audio_arr), print(audio_arr.shape)
=audio.frame_rate) Audio(audio_arr, rate
[0 0 0 ... 0 0 0]
(82368,)
# Get last 2 seconds of audio
= audio_arr[-2 * audio.frame_rate:]
last_2_seconds =audio.frame_rate) Audio(last_2_seconds, rate
Reshaping
Use reshape
to change the shape without altering data.
= np.arange(1, 10)
grid print("Array, shape, dimensions:")
print(grid, grid.shape, grid.ndim)
Array, shape, dimensions:
[1 2 3 4 5 6 7 8 9] (9,) 1
= grid.reshape((3,3))
grid_3x3 print("\nArray, shape, dimensions:")
print(grid_3x3, grid_3x3.shape, grid_3x3.ndim)
Array, shape, dimensions:
[[1 2 3]
[4 5 6]
[7 8 9]] (3, 3) 2
= grid.reshape((1, 3,3))
grid_temp print("\nArray, shape, dimensions:")
print(grid_temp, grid_temp.shape, grid_temp.ndim)
Array, shape, dimensions:
[[[1 2 3]
[4 5 6]
[7 8 9]]] (1, 3, 3) 3
2, 5)) grid.reshape((
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[60], line 1 ----> 1 grid.reshape((2, 5)) ValueError: cannot reshape array of size 9 into shape (2,5)
# Example usage
= np.random.randint(0, 255, size=(28, 28))
random_2d_img ='gray')
plt.imshow(random_2d_img, cmapprint(random_2d_img.shape)
(28, 28)
# Flatten the 2D image to 1D
= random_2d_img.flatten()
flattened_img print("Flattened image shape:", flattened_img.shape)
Flattened image shape: (784,)
= flattened_img.size
N = random_2d_img.reshape(N)
flattened_img_using_reshape
print("Flattened image using reshape:", flattened_img_using_reshape.shape)
Flattened image using reshape: (784,)
# Using -1 in reshape
= random_2d_img.reshape(-1)
flattened_img_using_reshape
print("Flattened image using reshape with -1:", flattened_img_using_reshape.shape)
Flattened image using reshape with -1: (784,)
flattened_img.shape
(784,)
# Using -1 in reshape in one dimension
= flattened_img.reshape(28, -1)
two_d_img_1print("2D image shape:", two_d_img_1.shape)
= flattened_img.reshape(-1, 28)
two_d_img_2 print("2D image shape:", two_d_img_2.shape)
# Check if two arrays are equal
all(two_d_img_1 == two_d_img_2) np.
2D image shape: (28, 28)
2D image shape: (28, 28)
np.True_
Concatenation
np.concatenate
, np.vstack
, and np.hstack
can help combine arrays.
= np.array([1, 2, 3])
arrA = np.array([4, 5, 6])
arrB print("Concatenate:", np.concatenate([arrA, arrB]))
= np.array([[1,2],[3,4]])
gridA = np.array([[5,6],[7,8]])
gridB print("\nVStack:\n", np.vstack([gridA, gridB]))
print("\nHStack:\n", np.hstack([gridA, gridB]))
Concatenate: [1 2 3 4 5 6]
VStack:
[[1 2]
[3 4]
[5 6]
[7 8]]
HStack:
[[1 2 5 6]
[3 4 7 8]]
Universal Functions (Ufuncs)
Ufuncs are vectorized, element-by-element functions that allow fast operations on entire arrays without explicit Python loops. Each arithmetic operator (+, -, *, /, etc.) in NumPy is backed by a ufunc, and there are many more specialized ufuncs for math, stats, etc.
# Create a simple array
= np.arange(5)
x print("x:", x)
# Perform elementwise operations via ufuncs
= x * 2 # multiplication
y = np.exp(x) # exponential
z print("y = x * 2:", y)
print("z = np.exp(x):", z)
x: [0 1 2 3 4]
y = x * 2: [0 2 4 6 8]
z = np.exp(x): [ 1. 2.71828183 7.3890561 20.08553692 54.59815003]
= range(5)
x_list = [x*2 for x in x_list]
mul_two print(mul_two)
[0, 2, 4, 6, 8]
Aggregations
Aggregations summarize array values into a single numeric result (or one result per axis). Common examples include minimum, maximum, sum, mean, median, standard deviation, etc.
= np.random.randint(1, 100, size=10)
data print("data:", data)
# Basic aggregations
print("Sum:", np.sum(data))
print("Min:", np.min(data))
print("Max:", np.max(data))
print("Mean:", np.mean(data))
print("Standard Deviation:", np.std(data))
= np.random.randint(0, 10, size=(3,4))
matrix print("matrix:\n", matrix)
print("Min of each column:", np.min(matrix, axis=0))
print("Max of each row:", np.max(matrix, axis=1))
data: [38 61 9 74 1 5 60 77 71 94]
Sum: 490
Min: 1
Max: 94
Mean: 49.0
Standard Deviation: 31.849646779831012
matrix:
[[7 1 2 5]
[7 3 5 5]
[9 6 1 8]]
Min of each column: [7 1 1 5]
Max of each row: [7 7 9]
Broadcasting
Allows operations on arrays of different shapes by stretching dimensions when possible.
See this nice video
= np.array([1.0, 2.0, 3.0])
a = np.array([2.0, 2.0, 2.0])
b
= a*b
c print("c = a*b:", c)
print(c.shape)
c = a*b: [2. 4. 6.]
(3,)
= 2.0
scalar = a * scalar
d
print("d = a * scalar:", d)
print(d.shape)
d = a * scalar: [2. 4. 6.]
(3,)
= np.array([[2, 6, 8], [4, 5, 3]])
X print(X.shape)
= np.array([[2], [1]])
Y print(Y.shape)
= X + Y
Z print(Z.shape)
(2, 3)
(2, 1)
(2, 3)
Reference: https://numpy.org/doc/stable/user/basics.broadcasting.html
= np.array([[ 0.0, 0.0, 0.0],
a 10.0, 10.0, 10.0],
[20.0, 20.0, 20.0],
[30.0, 30.0, 30.0]])
[= np.array([1.0, 2.0, 3.0])
b print(a)
print(b)
# Broadcasting
print("a + b:\n", a + b)
[[ 0. 0. 0.]
[10. 10. 10.]
[20. 20. 20.]
[30. 30. 30.]]
[1. 2. 3.]
a + b:
[[ 1. 2. 3.]
[11. 12. 13.]
[21. 22. 23.]
[31. 32. 33.]]
Boolean Masks
Create a mask to select certain elements.
= np.random.randint(1, 20, size=10)
data = data > 10
mask print("data:", data)
print("mask:", mask)
print("Values > 10:", data[mask])
data: [12 8 14 5 10 13 4 14 2 1]
mask: [ True False True False False True False True False False]
Values > 10: [12 14 13 14]
Sorting & Partitioning
np.sort(arr)
returns a sorted copy.arr.sort()
sorts in-place.np.argsort
returns the indices.
= np.array([2,1,4,3,5])
unsorted_arr print("Sorted copy:", np.sort(unsorted_arr))
print("Original:", unsorted_arr)
unsorted_arr.sort()print("In-place sort:", unsorted_arr)
Sorted copy: [1 2 3 4 5]
Original: [2 1 4 3 5]
In-place sort: [1 2 3 4 5]
Acknowledgments
Shreyans Jain, BTech IIT Gandhinagar for creating the first version of this notebook.