import torchimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimimport numpy as npimport matplotlib.pyplot as pltfrom torch.utils.data import DataLoader, TensorDatasetimport seaborn as sns%matplotlib inline%config InlineBackend.figure_format ='retina'# Set random seed for reproducibilitytorch.manual_seed(0)# Torch versiontorch.__version__
### Example to illustrate accumulation of gradientstheta = torch.tensor(1.0, requires_grad=True)x1 = torch.tensor(1.0)x2 = torch.tensor(2.0)L1 = theta*x1L2 = theta*x2L = L1 + L2L.backward()
theta.grad
tensor(3.)
Why do we need to use torch.no_grad() in the test phase?
Why do we need to zero out the gradients in the training phase after each update?
model = SimpleModel()criterion = nn.MSELoss()optimizer = optim.SGD(model.parameters(), lr=0.01)# Dummy datainputs = torch.randn((100, 10))targets = torch.randn((100, 1))# Training loopfor epoch inrange(100):# Forward pass outputs = model(inputs)# Compute the loss loss = criterion(outputs, targets)# Zero the gradients optimizer.zero_grad()# Backward pass loss.backward()# Update the weights optimizer.step()# Print the loss every 10 epochsif epoch %10==0:print(f'Epoch {epoch}, Loss: {loss.item()}')