Lecture 5: Neural Networks Foundation

Interactive Demo Notebook

In this notebook, we’ll build neural networks from scratch: 1. The XOR Problem - Why we need neural networks 2. The Perceptron - A single neuron 3. Activation Functions - Adding non-linearity 4. Multi-Layer Networks - Deep learning begins 5. PyTorch Basics - The real tool

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
print("Ready to build neural networks! 🧠")

Part 1: The XOR Problem

Why linear models aren’t enough

# The XOR function
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_xor = np.array([0, 1, 1, 0])  # Output 1 if inputs are DIFFERENT

# Visualize
plt.figure(figsize=(8, 6))
colors = ['#e85a4f' if y == 0 else '#2a9d8f' for y in y_xor]
plt.scatter(X_xor[:, 0], X_xor[:, 1], c=colors, s=300, edgecolors='white', linewidth=3)

# Add labels
for i, (x, y, label) in enumerate(zip(X_xor[:, 0], X_xor[:, 1], y_xor)):
    plt.annotate(f'XOR={label}', (x, y), textcoords="offset points", 
                 xytext=(0, 15), ha='center', fontsize=12)

plt.xlabel('Input A', fontsize=14)
plt.ylabel('Input B', fontsize=14)
plt.title('XOR Problem: Can you draw ONE line to separate colors?', fontsize=14)
plt.xlim(-0.5, 1.5)
plt.ylim(-0.5, 1.5)
plt.grid(True, alpha=0.3)
plt.show()

print("🔴 Red = 0 (same inputs)")
print("🟢 Green = 1 (different inputs)")
print("\n💡 No single straight line can separate them!")

# Try logistic regression (will fail!)
from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression()
log_model.fit(X_xor, y_xor)
predictions = log_model.predict(X_xor)

print("Logistic Regression Results:")
print(f"Predictions: {predictions}")
print(f"Actual:      {y_xor}")
print(f"\n❌ Accuracy: {(predictions == y_xor).mean():.0%} - It can't solve XOR!")

Part 2: The Perceptron - A Single Neuron

\[output = activation(w_1 \cdot x_1 + w_2 \cdot x_2 + b)\]

def perceptron(x, weights, bias):
    """A single neuron with step activation"""
    weighted_sum = np.dot(x, weights) + bias
    return 1 if weighted_sum > 0 else 0

# AND gate with a perceptron
and_weights = np.array([1, 1])
and_bias = -1.5  # Threshold

print("AND Gate with Perceptron:")
print("-" * 30)
for x1, x2 in [(0, 0), (0, 1), (1, 0), (1, 1)]:
    result = perceptron([x1, x2], and_weights, and_bias)
    expected = x1 and x2
    status = "✓" if result == expected else "✗"
    print(f"{x1} AND {x2} = {result} {status}")

# OR gate with a perceptron
or_weights = np.array([1, 1])
or_bias = -0.5  # Lower threshold

print("\nOR Gate with Perceptron:")
print("-" * 30)
for x1, x2 in [(0, 0), (0, 1), (1, 0), (1, 1)]:
    result = perceptron([x1, x2], or_weights, or_bias)
    expected = x1 or x2
    status = "✓" if result == expected else "✗"
    print(f"{x1} OR {x2} = {result} {status}")

print("\n💡 Single perceptrons can do AND and OR, but NOT XOR!")

Part 3: Activation Functions

The “switch” that decides if a neuron fires

# Common activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def tanh_act(x):
    return np.tanh(x)

x = np.linspace(-5, 5, 100)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Sigmoid
axes[0].plot(x, sigmoid(x), 'b-', linewidth=3)
axes[0].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
axes[0].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[0].set_title('Sigmoid: σ(x) = 1/(1+e⁻ˣ)', fontsize=12)
axes[0].set_xlabel('Input')
axes[0].set_ylabel('Output')
axes[0].set_ylim(-0.1, 1.1)
axes[0].text(2, 0.3, 'Squashes to\n[0, 1]', fontsize=10)

# ReLU
axes[1].plot(x, relu(x), 'g-', linewidth=3)
axes[1].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[1].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[1].set_title('ReLU: max(0, x)', fontsize=12)
axes[1].set_xlabel('Input')
axes[1].set_ylabel('Output')
axes[1].text(1, 3, 'Simple & fast!\nMost popular', fontsize=10)

# Tanh
axes[2].plot(x, tanh_act(x), 'r-', linewidth=3)
axes[2].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[2].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[2].set_title('Tanh: (eˣ - e⁻ˣ)/(eˣ + e⁻ˣ)', fontsize=12)
axes[2].set_xlabel('Input')
axes[2].set_ylabel('Output')
axes[2].set_ylim(-1.1, 1.1)
axes[2].text(1, -0.5, 'Squashes to\n[-1, 1]', fontsize=10)

plt.tight_layout()
plt.show()

print("💡 ReLU is used most in modern deep learning - simple and effective!")

Part 4: Multi-Layer Network - Solving XOR!

class SimpleNeuralNetwork:
    """A 2-layer neural network to solve XOR"""
    
    def __init__(self):
        # Layer 1: 2 inputs → 2 hidden neurons
        # These weights are hand-picked to solve XOR!
        self.W1 = np.array([[1, 1], [1, 1]])  # hidden layer weights
        self.b1 = np.array([-0.5, -1.5])      # hidden layer biases
        
        # Layer 2: 2 hidden → 1 output
        self.W2 = np.array([[1], [-2]])       # output weights
        self.b2 = np.array([-0.5])            # output bias
    
    def forward(self, x):
        # Hidden layer
        self.z1 = np.dot(x, self.W1) + self.b1
        self.h = relu(self.z1)  # ReLU activation
        
        # Output layer
        self.z2 = np.dot(self.h, self.W2) + self.b2
        self.out = sigmoid(self.z2)  # Sigmoid for probability
        
        return self.out
    
    def predict(self, x):
        return (self.forward(x) > 0.5).astype(int)

# Test our network!
nn = SimpleNeuralNetwork()

print("Neural Network solving XOR:")
print("-" * 40)
print(f"{'Input':<10} {'Hidden':<20} {'Output':<10} {'Prediction':<10}")
print("-" * 40)

for x, expected in zip(X_xor, y_xor):
    output = nn.forward(x)
    prediction = nn.predict(x)[0]
    hidden = nn.h
    status = "✓" if prediction == expected else "✗"
    print(f"{str(x):<10} {str(hidden):<20} {output[0]:.3f}      {prediction} {status}")

print("\n✅ The neural network solved XOR!")

Part 5: PyTorch - The Real Tool

import torch
import torch.nn as nn
import torch.optim as optim

print(f"PyTorch version: {torch.__version__}")
print("Ready to use real deep learning! 🔥")

# Define a neural network in PyTorch
class XORNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(2, 4)   # 2 inputs → 4 hidden
        self.layer2 = nn.Linear(4, 1)   # 4 hidden → 1 output
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.sigmoid(self.layer2(x))
        return x

# Create model
model = XORNet()
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters())}")

# Training data
X_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_train = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)

# Loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy
optimizer = optim.Adam(model.parameters(), lr=0.1)

# Training loop
losses = []
for epoch in range(1000):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    losses.append(loss.item())
    
    if epoch % 200 == 0:
        print(f"Epoch {epoch:4d}, Loss: {loss.item():.4f}")

print("\nTraining complete!")

# Plot training loss
plt.figure(figsize=(10, 4))
plt.plot(losses, color='#1e3a5f', linewidth=2)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Training Loss Over Time', fontsize=14)
plt.grid(True, alpha=0.3)
plt.show()

# Test the trained model
print("\nTrained Model Predictions:")
print("-" * 30)
with torch.no_grad():
    for x, expected in zip(X_train, y_train):
        output = model(x.unsqueeze(0))
        prediction = (output > 0.5).int().item()
        status = "✓" if prediction == expected.item() else "✗"
        print(f"{x.numpy()} → {output.item():.3f} → {prediction} (expected: {int(expected.item())}) {status}")

🎯 Exercises

# Exercise 1: Build a neural network for digit classification (MNIST)
# Hint: Use sklearn's load_digits and torch.nn.Linear

# Exercise 2: Try different learning rates (0.01, 0.1, 1.0)
# What happens to the loss curve?

Summary

Concept	What We Learned
XOR Problem	Linear models can’t solve non-linear problems
Perceptron	Single neuron = weighted sum + activation
Activation	ReLU is popular: max(0, x)
Multi-layer	Hidden layers = power to solve complex problems
PyTorch	Define model → Loss → Optimizer → Train loop