import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange, reduce, repeat
import time
# sine dataset
= torch.linspace(0, 100, 10000)
X_whole = torch.sin(X_whole)
y_whole
# split into train and test
= X_whole[:8000]
X_train = y_whole[:8000]
y_train
= X_whole[8000:]
X_test = y_whole[8000:]
y_test
# plot
=(6, 4))
plt.figure(figsize='train')
plt.plot(X_train, y_train, label='test') plt.plot(X_test, y_test, label
y_train.shape
torch.Size([8000])
= 100 # history
context_length = 10 # future to predict prediction_length
class MLP(nn.Module):
def __init__(self, input_dim, prediction_length):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, 128)
self.fc4 = nn.Linear(128, prediction_length)
def forward(self, x):
= F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
x return x
= 0
i +context_length].shape, y_train[i+context_length:i+context_length+prediction_length].shape y_train[i:i
(torch.Size([100]), torch.Size([10]))
# create training data
def create_train_data(X, y, context_length, prediction_length):
= [], []
Xs, ys for i in range(len(X) - context_length - prediction_length):
+context_length])
Xs.append(y[i:i+context_length:i+context_length+prediction_length])
ys.append(y[ireturn torch.stack(Xs), torch.stack(ys)
= create_train_data(X_train, y_train, context_length, prediction_length) Xs, ys
# Plot the first training data example
=(6, 4))
plt.figure(figsize0], label='context (history)')
plt.plot(Xs[range(context_length, context_length+prediction_length), ys[0], label='prediction (future)') plt.plot(
= MLP(context_length, prediction_length)
mlp print("Number of parameters:", sum(p.numel() for p in mlp.parameters()))
= torch.optim.Adam(mlp.parameters(), lr=1e-3)
optimizer = nn.MSELoss()
criterion
# training loop
= 100
n_epochs = time.time()
start_time for epoch in range(n_epochs):
optimizer.zero_grad()= mlp(Xs)
y_pred = criterion(y_pred, ys)
loss
loss.backward()
optimizer.step()if epoch % 10 == 0:
print(f'Epoch {epoch} - Loss {loss.item():.4f} Time (s) {time.time() - start_time: .2f}')
Number of parameters: 47242
Epoch 0 - Loss 0.4968 Time (s) 0.02
Epoch 10 - Loss 0.1697 Time (s) 0.15
Epoch 20 - Loss 0.0666 Time (s) 0.28
Epoch 30 - Loss 0.0046 Time (s) 0.41
Epoch 40 - Loss 0.0038 Time (s) 0.54
Epoch 50 - Loss 0.0019 Time (s) 0.64
Epoch 60 - Loss 0.0004 Time (s) 0.74
Epoch 70 - Loss 0.0002 Time (s) 0.84
Epoch 80 - Loss 0.0001 Time (s) 0.97
Epoch 90 - Loss 0.0001 Time (s) 1.12
X_test.shape, y_test.shape
(torch.Size([2000]), torch.Size([2000]))
# Test data
= create_train_data(X_test, y_test, context_length, prediction_length) Xs_test, ys_test
Xs_test.shape, ys_test.shape
(torch.Size([1890, 100]), torch.Size([1890, 10]))
= mlp(Xs_test)
y_hat
# compute the loss
with torch.no_grad():
= criterion(y_hat, ys_test)
loss print(f'Test Loss: {loss.item():0.4f}')
Test Loss: 5.9333124227123335e-05
Xs_test.shape, y_hat.shape
(torch.Size([1890, 100]), torch.Size([1890, 10]))
# Plot the first test data example
def plot_prediction(i):
=(6, 4))
plt.figure(figsize='context (history)')
plt.plot(Xs_test[i], labelrange(context_length, context_length+prediction_length), ys_test[i], label='ground truth (future)')
plt.plot(range(context_length, context_length+prediction_length), y_hat[i].detach().numpy(), label='prediction (future)')
plt.plot(
plt.legend()
0) plot_prediction(
# Simple Transformer
class Transformer(nn.Module):
def __init__(self, input_dim, prediction_length, num_heads=8, num_layers=3):
super(Transformer, self).__init__()
self.prediction_length = prediction_length
self.num_heads = num_heads
self.num_layers = num_layers
self.emb = nn.Linear(input_dim, 32)
= nn.TransformerEncoderLayer(d_model=32, nhead=num_heads)
encoder_layer self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
self.fc = nn.Linear(32, prediction_length)
def forward(self, x):
= self.emb(x)
x = self.transformer_encoder(x)
x = self.fc(x)
x return x
= Transformer(context_length, prediction_length, num_heads=4, num_layers=3)
transormer print("Number of parameters:", sum(p.numel() for p in transormer.parameters()))
= torch.optim.Adam(transormer.parameters(), lr=1e-3)
optimizer = nn.MSELoss()
criterion
# training loop
= 100
n_epochs
= time.time()
start_time for epoch in range(n_epochs):
optimizer.zero_grad()= transormer(Xs)
y_pred = criterion(y_pred, ys)
loss
loss.backward()
optimizer.step()if epoch % 10 == 0:
print(f'Epoch {epoch} - Loss {loss.item():.4f} Time {time.time() - start_time: .2f}')
Number of parameters: 416074
Epoch 0 - Loss 1.0997 Time 22.90
Epoch 10 - Loss 0.0648 Time 181.78
Epoch 20 - Loss 0.0348 Time 292.99
Epoch 30 - Loss 0.0191 Time 410.26
KeyboardInterrupt:
# Prediction
with torch.no_grad():
= transormer(Xs_test)
y_hat = criterion(y_hat, ys_test)
loss print(f'Test Loss: {loss.item()}')
Test Loss: 0.017046289518475533
# Plot the first test data example
0) plot_prediction(
from transformers import InformerForPrediction, InformerConfig
= InformerConfig(
config =context_length,
input_dim=prediction_length,
prediction_length=4,
num_heads=2,
encoder_layers=2,
decoder_layers=True,
use_mask=True
forecast
)
= InformerForPrediction(config) informer
sum(p.numel() for p in informer.parameters())
134531
# training loop
= 100
n_epochs
= time.time()
start_time for epoch in range(n_epochs):
optimizer.zero_grad()= informer(Xs, past y_pred
TypeError: forward() missing 2 required positional arguments: 'past_time_features' and 'past_observed_mask'