import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, reduce, repeat
import time
# sine dataset
= torch.linspace(0, 100, 10000)
X_whole = torch.sin(X_whole)
y_whole
# split into train and test
= X_whole[:8000]
X_train = y_whole[:8000]
y_train
= X_whole[8000:]
X_test = y_whole[8000:]
y_test
# plot
=(6, 4))
plt.figure(figsize='train')
plt.plot(X_train, y_train, label='test') plt.plot(X_test, y_test, label
y_train.shape
torch.Size([8000])
= 100 # history
context_length = 10 # future to predict prediction_length
class MLP(nn.Module):
def __init__(self, input_dim, prediction_length):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, 128)
self.fc4 = nn.Linear(128, prediction_length)
def forward(self, x):
= F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
x return x
= 0
i +context_length].shape, y_train[i+context_length:i+context_length+prediction_length].shape y_train[i:i
(torch.Size([100]), torch.Size([10]))
# create training data
def create_train_data(X, y, context_length, prediction_length):
= [], []
Xs, ys for i in range(len(X) - context_length - prediction_length):
+context_length])
Xs.append(y[i:i+context_length:i+context_length+prediction_length])
ys.append(y[ireturn torch.stack(Xs), torch.stack(ys)
= create_train_data(X_train, y_train, context_length, prediction_length) Xs, ys
# Plot the first training data example
=(6, 4))
plt.figure(figsize0], label='context (history)')
plt.plot(Xs[range(context_length, context_length+prediction_length), ys[0], label='prediction (future)') plt.plot(
= MLP(context_length, prediction_length)
mlp print("Number of parameters:", sum(p.numel() for p in mlp.parameters()))
= torch.optim.Adam(mlp.parameters(), lr=1e-3)
optimizer = nn.MSELoss()
criterion
# training loop
= 100
n_epochs = time.time()
start_time for epoch in range(n_epochs):
optimizer.zero_grad()= mlp(Xs)
y_pred = criterion(y_pred, ys)
loss
loss.backward()
optimizer.step()if epoch % 10 == 0:
print(f'Epoch {epoch} - Loss {loss.item():.4f} Time (s) {time.time() - start_time: .2f}')
Number of parameters: 47242
Epoch 0 - Loss 0.4968 Time (s) 0.02
Epoch 10 - Loss 0.1697 Time (s) 0.15
Epoch 20 - Loss 0.0666 Time (s) 0.28
Epoch 30 - Loss 0.0046 Time (s) 0.41
Epoch 40 - Loss 0.0038 Time (s) 0.54
Epoch 50 - Loss 0.0019 Time (s) 0.64
Epoch 60 - Loss 0.0004 Time (s) 0.74
Epoch 70 - Loss 0.0002 Time (s) 0.84
Epoch 80 - Loss 0.0001 Time (s) 0.97
Epoch 90 - Loss 0.0001 Time (s) 1.12
X_test.shape, y_test.shape
(torch.Size([2000]), torch.Size([2000]))
# Test data
= create_train_data(X_test, y_test, context_length, prediction_length) Xs_test, ys_test
Xs_test.shape, ys_test.shape
(torch.Size([1890, 100]), torch.Size([1890, 10]))
= mlp(Xs_test)
y_hat
# compute the loss
with torch.no_grad():
= criterion(y_hat, ys_test)
loss print(f'Test Loss: {loss.item():0.4f}')
Test Loss: 5.9333124227123335e-05
Xs_test.shape, y_hat.shape
(torch.Size([1890, 100]), torch.Size([1890, 10]))
# Plot the first test data example
def plot_prediction(i):
=(6, 4))
plt.figure(figsize='context (history)')
plt.plot(Xs_test[i], labelrange(context_length, context_length+prediction_length), ys_test[i], label='ground truth (future)')
plt.plot(range(context_length, context_length+prediction_length), y_hat[i].detach().numpy(), label='prediction (future)')
plt.plot(
plt.legend()
0) plot_prediction(
# Simple Transformer
class Transformer(nn.Module):
def __init__(self, input_dim, prediction_length, num_heads=8, num_layers=3):
super(Transformer, self).__init__()
self.prediction_length = prediction_length
self.num_heads = num_heads
self.num_layers = num_layers
self.emb = nn.Linear(input_dim, 32)
= nn.TransformerEncoderLayer(d_model=32, nhead=num_heads)
encoder_layer self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
self.fc = nn.Linear(32, prediction_length)
def forward(self, x):
= self.emb(x)
x = self.transformer_encoder(x)
x = self.fc(x)
x return x
= Transformer(context_length, prediction_length, num_heads=4, num_layers=3)
transormer print("Number of parameters:", sum(p.numel() for p in transormer.parameters()))
= torch.optim.Adam(transormer.parameters(), lr=1e-3)
optimizer = nn.MSELoss()
criterion
# training loop
= 100
n_epochs
= time.time()
start_time for epoch in range(n_epochs):
optimizer.zero_grad()= transormer(Xs)
y_pred = criterion(y_pred, ys)
loss
loss.backward()
optimizer.step()if epoch % 10 == 0:
print(f'Epoch {epoch} - Loss {loss.item():.4f} Time {time.time() - start_time: .2f}')
Number of parameters: 416074
Epoch 0 - Loss 1.0997 Time 22.90
Epoch 10 - Loss 0.0648 Time 181.78
Epoch 20 - Loss 0.0348 Time 292.99
Epoch 30 - Loss 0.0191 Time 410.26
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[110], line 14 12 y_pred = transormer(Xs) 13 loss = criterion(y_pred, ys) ---> 14 loss.backward() 15 optimizer.step() 16 if epoch % 10 == 0: File ~/miniconda3/lib/python3.9/site-packages/torch/_tensor.py:487, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs) 477 if has_torch_function_unary(self): 478 return handle_torch_function( 479 Tensor.backward, 480 (self,), (...) 485 inputs=inputs, 486 ) --> 487 torch.autograd.backward( 488 self, gradient, retain_graph, create_graph, inputs=inputs 489 ) File ~/miniconda3/lib/python3.9/site-packages/torch/autograd/__init__.py:200, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs) 195 retain_graph = create_graph 197 # The reason we repeat same the comment below is that 198 # some Python versions print out the first line of a multi-line function 199 # calls in the traceback and some print out the last line --> 200 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass 201 tensors, grad_tensors_, retain_graph, create_graph, inputs, 202 allow_unreachable=True, accumulate_grad=True) KeyboardInterrupt:
# Prediction
with torch.no_grad():
= transormer(Xs_test)
y_hat = criterion(y_hat, ys_test)
loss print(f'Test Loss: {loss.item()}')
Test Loss: 0.017046289518475533
# Plot the first test data example
0) plot_prediction(
from transformers import InformerForPrediction, InformerConfig
= InformerConfig(
config =context_length,
input_dim=prediction_length,
prediction_length=4,
num_heads=2,
encoder_layers=2,
decoder_layers=True,
use_mask=True
forecast
)
= InformerForPrediction(config) informer
sum(p.numel() for p in informer.parameters())
134531
# training loop
= 100
n_epochs
= time.time()
start_time for epoch in range(n_epochs):
optimizer.zero_grad()= informer(Xs, past y_pred
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[117], line 7 5 for epoch in range(n_epochs): 6 optimizer.zero_grad() ----> 7 y_pred = informer(Xs) 8 loss = criterion(y_pred, ys) 9 loss.backward() File ~/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs) 1496 # If we don't have any hooks, we want to skip the rest of the logic in 1497 # this function, and just call forward. 1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1499 or _global_backward_pre_hooks or _global_backward_hooks 1500 or _global_forward_hooks or _global_forward_pre_hooks): -> 1501 return forward_call(*args, **kwargs) 1502 # Do not call functions when jit is used 1503 full_backward_hooks, non_full_backward_hooks = [], [] TypeError: forward() missing 2 required positional arguments: 'past_time_features' and 'past_observed_mask'