Source code for traja.models.predictive_models.lstm

"""Implementation of Multimodel LSTM"""
import torch

from traja.models.utils import TimeDistributed

device = "cuda" if torch.cuda.is_available() else "cpu"


[docs]class LSTM(torch.nn.Module):
    """Deep LSTM network. This implementation
    returns output_size outputs.
    Args:
        input_size: The number of expected features in the input ``x``
        hidden_size: The number of features in the hidden state ``h``
        output_size: The number of output dimensions
        batch_size: Size of batch. Default is 8
        sequence_length: The number of in each sample        
        num_layers: Number of recurrent layers. E.g., setting ``num_layers=2``
            would mean stacking two LSTMs together to form a `stacked LSTM`,
            with the second LSTM taking in outputs of the first LSTM and
            computing the final results. Default: 1
        reset_state: If ``True``, will reset the hidden and cell state for each batch of data
        dropout: If non-zero, introduces a `Dropout` layer on the outputs of each
            LSTM layer except the last layer, with dropout probability equal to
            :attr:`dropout`. Default: 0
        bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False``
    """

    def __init__(
        self,            
        input_size: int,       
        hidden_size: int,
        output_size: int,
        num_future: int = 8,
        batch_size: int = 8,
        num_layers: int = 1,             
        reset_state: bool = True,
        bidirectional: bool = False,
        dropout: float = 0,
        batch_first: bool = True,
    ):
        super(LSTM, self).__init__()

        self.batch_size = batch_size
        self.input_size = input_size
        self.num_past = num_future  # num_past and num_future are equal
        self.num_future = num_future
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size        
        self.dropout = dropout
        self.batch_first = batch_first
        self.reset_state = reset_state
        self.bidirectional = bidirectional

        # Let the trainer know what kind of model this is
        self.model_type = "lstm"

        # RNN decoder
        self.lstm = torch.nn.LSTM(
            input_size=self.input_size,
            hidden_size=self.hidden_size,
            num_layers=self.num_layers,
            dropout=self.dropout,
            bidirectional=self.bidirectional,
            batch_first=True,
        )
        self.output = TimeDistributed(
            torch.nn.Linear(self.hidden_size, self.output_size)
        )

    def _init_hidden(self):
        return (
            torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
            .requires_grad_()
            .to(device),
            torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
            .requires_grad_()
            .to(device),
        )

[docs]    def forward(self, x, training=True, classify=False, regress=False, latent=False):
        assert not classify, "LSTM forecaster cannot classify!"
        assert not regress, "LSTM forecaster cannot regress!"
        assert not latent, "LSTM forecaster does not have a latent space!"
        # To feed the latent states into lstm decoder, repeat the tensor n_future times at second dim
        (h0, c0) = self._init_hidden()

        # Decoder input Shape(batch_size, num_futures, latent_size)
        out, (dec_hidden, dec_cell) = self.lstm(x, (h0.detach(), c0.detach()))

        # Map the decoder output: Shape(batch_size, sequence_len, hidden_dim) to Time Distributed Linear Layer
        out = self.output(out)
        return out