import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
print("Ready to build neural networks! π§ ")Lecture 5: Neural Networks Foundation
Interactive Demo Notebook
In this notebook, weβll build neural networks from scratch: 1. The XOR Problem - Why we need neural networks 2. The Perceptron - A single neuron 3. Activation Functions - Adding non-linearity 4. Multi-Layer Networks - Deep learning begins 5. PyTorch Basics - The real tool
Part 1: The XOR Problem
Why linear models arenβt enough
# The XOR function
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_xor = np.array([0, 1, 1, 0]) # Output 1 if inputs are DIFFERENT
# Visualize
plt.figure(figsize=(8, 6))
colors = ['#e85a4f' if y == 0 else '#2a9d8f' for y in y_xor]
plt.scatter(X_xor[:, 0], X_xor[:, 1], c=colors, s=300, edgecolors='white', linewidth=3)
# Add labels
for i, (x, y, label) in enumerate(zip(X_xor[:, 0], X_xor[:, 1], y_xor)):
plt.annotate(f'XOR={label}', (x, y), textcoords="offset points",
xytext=(0, 15), ha='center', fontsize=12)
plt.xlabel('Input A', fontsize=14)
plt.ylabel('Input B', fontsize=14)
plt.title('XOR Problem: Can you draw ONE line to separate colors?', fontsize=14)
plt.xlim(-0.5, 1.5)
plt.ylim(-0.5, 1.5)
plt.grid(True, alpha=0.3)
plt.show()
print("π΄ Red = 0 (same inputs)")
print("π’ Green = 1 (different inputs)")
print("\nπ‘ No single straight line can separate them!")# Try logistic regression (will fail!)
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()
log_model.fit(X_xor, y_xor)
predictions = log_model.predict(X_xor)
print("Logistic Regression Results:")
print(f"Predictions: {predictions}")
print(f"Actual: {y_xor}")
print(f"\nβ Accuracy: {(predictions == y_xor).mean():.0%} - It can't solve XOR!")Part 2: The Perceptron - A Single Neuron
\[output = activation(w_1 \cdot x_1 + w_2 \cdot x_2 + b)\]
def perceptron(x, weights, bias):
"""A single neuron with step activation"""
weighted_sum = np.dot(x, weights) + bias
return 1 if weighted_sum > 0 else 0
# AND gate with a perceptron
and_weights = np.array([1, 1])
and_bias = -1.5 # Threshold
print("AND Gate with Perceptron:")
print("-" * 30)
for x1, x2 in [(0, 0), (0, 1), (1, 0), (1, 1)]:
result = perceptron([x1, x2], and_weights, and_bias)
expected = x1 and x2
status = "β" if result == expected else "β"
print(f"{x1} AND {x2} = {result} {status}")# OR gate with a perceptron
or_weights = np.array([1, 1])
or_bias = -0.5 # Lower threshold
print("\nOR Gate with Perceptron:")
print("-" * 30)
for x1, x2 in [(0, 0), (0, 1), (1, 0), (1, 1)]:
result = perceptron([x1, x2], or_weights, or_bias)
expected = x1 or x2
status = "β" if result == expected else "β"
print(f"{x1} OR {x2} = {result} {status}")
print("\nπ‘ Single perceptrons can do AND and OR, but NOT XOR!")Part 3: Activation Functions
The βswitchβ that decides if a neuron fires
# Common activation functions
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def relu(x):
return np.maximum(0, x)
def tanh_act(x):
return np.tanh(x)
x = np.linspace(-5, 5, 100)
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
# Sigmoid
axes[0].plot(x, sigmoid(x), 'b-', linewidth=3)
axes[0].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
axes[0].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[0].set_title('Sigmoid: Ο(x) = 1/(1+eβ»Λ£)', fontsize=12)
axes[0].set_xlabel('Input')
axes[0].set_ylabel('Output')
axes[0].set_ylim(-0.1, 1.1)
axes[0].text(2, 0.3, 'Squashes to\n[0, 1]', fontsize=10)
# ReLU
axes[1].plot(x, relu(x), 'g-', linewidth=3)
axes[1].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[1].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[1].set_title('ReLU: max(0, x)', fontsize=12)
axes[1].set_xlabel('Input')
axes[1].set_ylabel('Output')
axes[1].text(1, 3, 'Simple & fast!\nMost popular', fontsize=10)
# Tanh
axes[2].plot(x, tanh_act(x), 'r-', linewidth=3)
axes[2].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[2].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[2].set_title('Tanh: (eΛ£ - eβ»Λ£)/(eΛ£ + eβ»Λ£)', fontsize=12)
axes[2].set_xlabel('Input')
axes[2].set_ylabel('Output')
axes[2].set_ylim(-1.1, 1.1)
axes[2].text(1, -0.5, 'Squashes to\n[-1, 1]', fontsize=10)
plt.tight_layout()
plt.show()
print("π‘ ReLU is used most in modern deep learning - simple and effective!")Part 4: Multi-Layer Network - Solving XOR!
class SimpleNeuralNetwork:
"""A 2-layer neural network to solve XOR"""
def __init__(self):
# Layer 1: 2 inputs β 2 hidden neurons
# These weights are hand-picked to solve XOR!
self.W1 = np.array([[1, 1], [1, 1]]) # hidden layer weights
self.b1 = np.array([-0.5, -1.5]) # hidden layer biases
# Layer 2: 2 hidden β 1 output
self.W2 = np.array([[1], [-2]]) # output weights
self.b2 = np.array([-0.5]) # output bias
def forward(self, x):
# Hidden layer
self.z1 = np.dot(x, self.W1) + self.b1
self.h = relu(self.z1) # ReLU activation
# Output layer
self.z2 = np.dot(self.h, self.W2) + self.b2
self.out = sigmoid(self.z2) # Sigmoid for probability
return self.out
def predict(self, x):
return (self.forward(x) > 0.5).astype(int)
# Test our network!
nn = SimpleNeuralNetwork()
print("Neural Network solving XOR:")
print("-" * 40)
print(f"{'Input':<10} {'Hidden':<20} {'Output':<10} {'Prediction':<10}")
print("-" * 40)
for x, expected in zip(X_xor, y_xor):
output = nn.forward(x)
prediction = nn.predict(x)[0]
hidden = nn.h
status = "β" if prediction == expected else "β"
print(f"{str(x):<10} {str(hidden):<20} {output[0]:.3f} {prediction} {status}")
print("\nβ
The neural network solved XOR!")Part 5: PyTorch - The Real Tool
import torch
import torch.nn as nn
import torch.optim as optim
print(f"PyTorch version: {torch.__version__}")
print("Ready to use real deep learning! π₯")# Define a neural network in PyTorch
class XORNet(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Linear(2, 4) # 2 inputs β 4 hidden
self.layer2 = nn.Linear(4, 1) # 4 hidden β 1 output
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.relu(self.layer1(x))
x = self.sigmoid(self.layer2(x))
return x
# Create model
model = XORNet()
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters())}")# Training data
X_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_train = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
# Loss function and optimizer
criterion = nn.BCELoss() # Binary Cross Entropy
optimizer = optim.Adam(model.parameters(), lr=0.1)
# Training loop
losses = []
for epoch in range(1000):
# Forward pass
outputs = model(X_train)
loss = criterion(outputs, y_train)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
if epoch % 200 == 0:
print(f"Epoch {epoch:4d}, Loss: {loss.item():.4f}")
print("\nTraining complete!")# Plot training loss
plt.figure(figsize=(10, 4))
plt.plot(losses, color='#1e3a5f', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Training Loss Over Time', fontsize=14)
plt.grid(True, alpha=0.3)
plt.show()
# Test the trained model
print("\nTrained Model Predictions:")
print("-" * 30)
with torch.no_grad():
for x, expected in zip(X_train, y_train):
output = model(x.unsqueeze(0))
prediction = (output > 0.5).int().item()
status = "β" if prediction == expected.item() else "β"
print(f"{x.numpy()} β {output.item():.3f} β {prediction} (expected: {int(expected.item())}) {status}")π― Exercises
# Exercise 1: Build a neural network for digit classification (MNIST)
# Hint: Use sklearn's load_digits and torch.nn.Linear
# Exercise 2: Try different learning rates (0.01, 0.1, 1.0)
# What happens to the loss curve?
Summary
| Concept | What We Learned |
|---|---|
| XOR Problem | Linear models canβt solve non-linear problems |
| Perceptron | Single neuron = weighted sum + activation |
| Activation | ReLU is popular: max(0, x) |
| Multi-layer | Hidden layers = power to solve complex problems |
| PyTorch | Define model β Loss β Optimizer β Train loop |