How to do it...

Execute the following steps to train a multilayer perceptron in PyTorch.

Import the libraries:

import yfinance as yf
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import (Dataset, TensorDataset, 
                             DataLoader, Subset)

from sklearn.metrics import mean_squared_error

device = 'cuda' if torch.cuda.is_available() else 'cpu'

Define the parameters:

# data
TICKER = 'ANF'
START_DATE = '2010-01-02'
END_DATE = '2019-12-31'
N_LAGS = 3

# neural network 
VALID_SIZE = 12
BATCH_SIZE = 5
N_EPOCHS = 1000

Download the stock prices of Abercrombie and Fitch and process the data:

df = yf.download(TICKER, 
                 start=START_DATE, 
                 end=END_DATE,
                 progress=False)

df = df.resample("M").last()
prices = df['Adj Close'].values

Define a function for transforming time series into a dataset for the MLP:

def create_input_data(series, n_lags=1):
    
    X, y = [], []
    for step in range(len(series) – n_lags):
        end_step = step + n_lags
        X.append(series[step:end_step])
        y.append(series[end_step])
    return np.array(X), np.array(y)

Transform the considered time series into input for the MLP:

X, y = create_input_data(prices, N_LAGS)

X_tensor = torch.from_numpy(X).float()
y_tensor = torch.from_numpy(y).float().unsqueeze(dim=1)

Create training and validation sets:

valid_ind = len(X) – VALID_SIZE

dataset = TensorDataset(X_tensor, y_tensor)

train_dataset = Subset(dataset, list(range(valid_ind)))
valid_dataset = Subset(dataset, list(range(valid_ind, len(X))))

train_loader = DataLoader(dataset=train_dataset,     
                          batch_size=BATCH_SIZE)
valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=BATCH_SIZE)

Inspect the features from the first batch:

next(iter(train_loader))[0]

Running the code results in the following output:

tensor([[23.1900, 26.9100, 33.7300],
        [26.9100, 33.7300, 32.3100],
        [33.7300, 32.3100, 26.6100],
        [32.3100, 26.6100, 22.7900],
        [26.6100, 22.7900, 27.4300]])

To inspect the target, we should use next(iter(train_loader))[1].

Use a naïve forecast as a benchmark and evaluate the performance:

naive_pred = prices[len(prices) – VALID_SIZE – 1:-1]
y_valid = prices[len(prices) – VALID_SIZE:]

naive_mse = mean_squared_error(y_valid, naive_pred)
naive_rmse = np.sqrt(naive_mse)
print(f"Naive forecast – MSE: {naive_mse:.2f}, RMSE: {naive_rmse:.2f}")

Running the code prints the following line:

Naive forecast – MSE: 17.87, RMSE: 4.23

Define the network's architecture:

class MLP(nn.Module):
   
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(input_size, 8)
        self.linear2 = nn.Linear(8, 4)
        self.linear3 = nn.Linear(4, 1)
        self.dropout = nn.Dropout(p=0.2)
    
    
    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.linear3(x)
        return x

Instantiate the model, the loss function, and the optimizer:

# set seed for reproducibility
torch.manual_seed(42)

model = MLP(N_LAGS).to(device) 
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Inspecting the model object results in the following:

MLP(
  (linear1): Linear(in_features=3, out_features=8, bias=True)
  (linear2): Linear(in_features=8, out_features=4, bias=True)
  (linear3): Linear(in_features=4, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

Train the network:

PRINT_EVERY = 50
train_losses, valid_losses = [], []

for epoch in range(N_EPOCHS):
    running_loss_train = 0
    running_loss_valid = 0

    model.train()
    
    for x_batch, y_batch in train_loader:
        
        optimizer.zero_grad()
        
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        y_hat = model(x_batch)
        loss = loss_fn(y_batch, y_hat)
        loss.backward()
        optimizer.step()
        running_loss_train += loss.item() * x_batch.size(0)
        
    
    epoch_loss_train = running_loss_train / len(train_loader.dataset)
    train_losses.append(epoch_loss_train)

    with torch.no_grad():
        
        model.eval()
        
        for x_val, y_val in valid_loader:
            x_val = x_val.to(device)
            y_val = y_val.to(device)
            y_hat = model(x_val)
            loss = loss_fn(y_val, y_hat)
            running_loss_valid += loss.item() * x_val.size(0)
            
        
        epoch_loss_valid = running_loss_valid / len(valid_loader.dataset)
            
        if epoch > 0 and epoch_loss_valid < min(valid_losses):
            best_epoch = epoch
            torch.save(model.state_dict(), './mlp_checkpoint.pth')
            
        valid_losses.append(epoch_loss_valid)

    if epoch % PRINT_EVERY == 0:
        print(f"<{epoch}> – Train. loss: {epoch_loss_train:.2f} 	 Valid. loss: {epoch_loss_valid:.2f}")
        
print(f'Lowest loss recorded in epoch: {best_epoch}')

Running the code results in the following output:

Lowest loss recorded in epoch: 961

Plot the losses over epochs:

train_losses = np.array(train_losses)
valid_losses = np.array(valid_losses)

fig, ax = plt.subplots()

ax.plot(train_losses, color='blue', label='Training loss')
ax.plot(valid_losses, color='red', label='Validation loss')

ax.set(title="Loss over epochs", 
       xlabel='Epoch', 
       ylabel='Loss')
ax.legend()

Running the code results in the following plot:

In the plot, we can see that after the initial drop (caused by randomly initialized weights), the training loss visibly decreases over epochs, while it is hard to unanimously say the same about the validation loss, as it is much smaller.

Load the best model (with the lowest validation loss):

state_dict = torch.load('mlp_checkpoint.pth')
model.load_state_dict(state_dict)

Obtain the predictions:

y_pred, y_valid= [], []

with torch.no_grad():

    model.eval()
    
    for x_val, y_val in valid_loader:
        x_val = x_val.to(device) 
        y_pred.append(model(x_val))
        y_valid.append(y_val)
        
y_pred = torch.cat(y_pred).numpy().flatten()
y_valid = torch.cat(y_valid).numpy().flatten()

Evaluate the predictions:

mlp_mse = mean_squared_error(y_test, y_pred)
mlp_rmse = np.sqrt(mlp_mse)
print(f"MLP's forecast – MSE: {mlp_mse:.2f}, RMSE: {mlp_rmse:.2f}")

fig, ax = plt.subplots()

ax.plot(y_test, color='blue', label='true')
ax.plot(y_pred, color='red', label='prediction')

ax.set(title="Multilayer Perceptron's Forecasts", 
       xlabel='Time', 
       ylabel='Price ($)')
ax.legend()

Running the code generates the following plot:

We see that the network is not able to pick up the patterns in time, hence, the predicted line is shifted to the right of the actual one.

And the evaluation is as follows:

MLP's forecast – MSE: 15.47, RMSE: 3.93

Using the multilayer perceptron, we obtained results better than with the naïve forecast.

Table of Contents for How to do it...

Create new playlist

Sign In

Sign Up

Table of Contents for
How to do it...