import autograd.numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
__repr__ = lambda self: str(self._value) np.numpy_boxes.ArrayBox.
= np.array([[0, 0],
X 0, 1],
[1, 0],
[1, 1]
[
])
= np.array([[0], [1], [1], [0]]) y
X.shape, y.shape
((4, 2), (4, 1))
= X.shape
N, N_0 = y.shape
N, N_2 = 2 N_1
= [np.array([0]), np.array([[1, 1], [1, 1]]), np.array([[1, -2]])]
W
= [np.array([0]), np.array([[0], [-1]]), np.array([[0]])]
b = [] B
= [X]
A None]*(len(W)-1))
A.extend([= [None]*(len(W)) Z
def relu(z):
= z.copy()
temp <0] = 0
temp[tempreturn temp
def sigmoid(z):
return 1./(1+np.exp(-z))
for i in range(1, len(W)):
= A[i-1]@(W[i].T) + b[i].T
Z[i] =relu(Z[i]) A[i]
2]==y A[
array([[ True],
[ True],
[ True],
[ True]])
Excellent, now let us start from random weight initialisations and use backprop to come to our result
= [X.shape[1], 2, 1]
shapes = ['empty','sigmoid','sigmoid']
activations
= {'sigmoid':sigmoid, 'relu':relu}
activation_func
= [None]*(len(shapes))
W = [None]*(len(shapes))
b
0)
np.random.seed(# Dummy
0] = np.array([0])
W[0] = np.array([0])
b[
for i in range(1, len(shapes)):
= np.random.randn(shapes[i], shapes[i-1])
W[i] = np.random.randn(shapes[i], 1)
b[i]
= [None]*(len(W))
Z 0] = np.array([0])
Z[
= [X]
A None]*(len(W)-1)) A.extend([
def make_plot(iteration, loss, W, b, cmap='PRGn',close=True):
= 100
h = np.meshgrid(np.linspace(-0.1, 1.1, h),
xx, yy -0.1, 1.1, h))
np.linspace(= np.c_[xx.ravel(), yy.ravel()]
XX = [XX]
A None]*(len(W)-1))
A.extend([= [None]*(len(W))
Z for i in range(1, len(W)):
= A[i-1]@(W[i].T) + b[i].T
Z[i] =sigmoid(Z[i])
A[i] = A[2].reshape(xx.shape)
pred>0.5] = 1
pred[pred<=0.5] = 0
pred[pred
= plt.contourf(xx, yy, pred, h , cmap=cmap, alpha=0.2)
contours
plt.colorbar()f"Iteration: {iteration}\n Loss: {loss}")
plt.title(0], X[:, 1], c= y.flatten(), cmap=cmap, s=200)
plt.scatter(X[:, f"/home/nipunbatra-pc/Desktop/xor/{iteration:04}.png")
plt.savefig(if close:
plt.clf()
0, 2.9, W, b, close=False) make_plot(
def objective(W, b):
for i in range(1, len(W)):
= A[i-1]@(W[i].T) + b[i].T
Z[i] = activation_func[activations[i]](Z[i])
A[i] = A[2]
y_hat = (-y.T@np.log(y_hat) - (1-y).T@np.log(1-y_hat)).squeeze()
loss return loss
objective(W, b)
array(2.9991465)
from autograd import elementwise_grad as egrad
from autograd import grad
= grad(objective, argnum=[0, 1]) grad_objective
= grad_objective(W, b) (del_W0_auto, del_W1_auto, del_W2_auto), (del_b0_auto, del_b1_auto, del_b2_auto)
del_W2_auto
array([[0.60353799, 0.35399637]])
= (A[2]-y).T@A[1] del_W2_ours
del_W2_ours,del_W2_auto
([[0.60353799 0.35399637]], array([[0.60353799, 0.35399637]]))
= (A[2]-y).sum(axis=0).reshape(-1, 1) del_b2_ours
del_b2_ours,del_b2_auto
([[0.6632421]], array([[0.6632421]]))
= (A[2]-y)@W[2]
del_A1_ours = np.multiply(del_A1_ours, sigmoid(Z[1])*(1-sigmoid(Z[1])))
del_Z1_ours = del_Z1_ours.T@A[0]
del_W1_ours np.allclose(del_W1_ours, del_W1_auto)
True
= (del_Z1_ours.sum(axis=0)).reshape(-1, 1)
del_b1_ours np.allclose(del_b1_ours, del_b1_auto)
True
= 140
epochs =1
alpha = np.zeros(epochs)
losses
= 20
print_every
= [None]*(len(shapes))
W = [None]*(len(shapes))
b
0)
np.random.seed(# Dummy
0] = np.array([0])
W[0] = np.array([0])
b[
for i in range(1, len(shapes)):
= np.random.randn(shapes[i], shapes[i-1])
W[i] = np.random.randn(shapes[i], 1)
b[i]
= [None]*(len(W))
Z 0] = np.array([0])
Z[
= [X]
A None]*(len(W)-1))
A.extend([
= [None]*(len(W)+1)
del_Z = [None]*(len(W)+1)
del_A = [None]*(len(W))
del_W = [None]*(len(W))
del_b
for iteration in range(epochs):
for i in range(1, len(W)):
= A[i-1]@(W[i].T) + b[i].T
Z[i] = activation_func[activations[i]](Z[i])
A[i]
= A[2]
y_hat = (-y.T@np.log(y_hat) - (1-y).T@np.log(1-y_hat)).squeeze()
loss = loss
losses[iteration] if iteration%print_every==0:
print(iteration, loss)
=True)
make_plot(iteration, loss, W, b, close
2] = -np.multiply(y, A[2]) + np.multiply((1-y), (1-A[2]))
del_A[2] = A[2]-y
del_Z[2] = (A[2]-y).T@A[1]
del_W[2] = (del_Z[2].sum(axis=0)).reshape(-1, 1)
del_b[1] = del_Z[2]@W[2]
del_A[1] = np.multiply(del_A[1], sigmoid(Z[1])*(1-sigmoid(Z[1])))
del_Z[1] = del_Z[1].T@A[0]
del_W[1] = (del_Z[1].sum(axis=0)).reshape(-1, 1)
del_b[
for i in range(1, len(shapes)):
= W[i] - alpha*del_W[i]
W[i] = b[i] - alpha*del_b[i] b[i]
0 2.9991464995409807
20 2.850067543754094
40 2.5045921819726082
60 1.5756597251036364
80 0.5779054501565161
100 0.3097308274202594
120 0.2028529568023768
<Figure size 432x288 with 0 Axes>
=False) make_plot(iteration, loss, W, b, close
0, 2.9, W, b, close=False) make_plot(
array([[0],
[1],
[1],
[0]])
!convert -delay 20 -loop 0 /home/nipunbatra-pc/Desktop/xor/*.png xor-own.gif