Source code for traja.models.predictive_models.lstm

"""Implementation of Multimodel LSTM"""
import torch

from traja.models.utils import TimeDistributed

device = "cuda" if torch.cuda.is_available() else "cpu"

[docs]class LSTM(torch.nn.Module): """Deep LSTM network. This implementation returns output_size outputs. Args: input_size: The number of expected features in the input ``x`` hidden_size: The number of features in the hidden state ``h`` output_size: The number of output dimensions batch_size: Size of batch. Default is 8 sequence_length: The number of in each sample num_layers: Number of recurrent layers. E.g., setting ``num_layers=2`` would mean stacking two LSTMs together to form a `stacked LSTM`, with the second LSTM taking in outputs of the first LSTM and computing the final results. Default: 1 reset_state: If ``True``, will reset the hidden and cell state for each batch of data dropout: If non-zero, introduces a `Dropout` layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to :attr:`dropout`. Default: 0 bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False`` """ def __init__( self, input_size: int, hidden_size: int, output_size: int, num_future: int = 8, batch_size: int = 8, num_layers: int = 1, reset_state: bool = True, bidirectional: bool = False, dropout: float = 0, batch_first: bool = True, ): super(LSTM, self).__init__() self.batch_size = batch_size self.input_size = input_size self.num_past = num_future # num_past and num_future are equal self.num_future = num_future self.hidden_size = hidden_size self.num_layers = num_layers self.output_size = output_size self.dropout = dropout self.batch_first = batch_first self.reset_state = reset_state self.bidirectional = bidirectional # Let the trainer know what kind of model this is self.model_type = "lstm" # RNN decoder self.lstm = torch.nn.LSTM( input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, bidirectional=self.bidirectional, batch_first=True, ) self.output = TimeDistributed( torch.nn.Linear(self.hidden_size, self.output_size) ) def _init_hidden(self): return ( torch.zeros(self.num_layers, self.batch_size, self.hidden_size) .requires_grad_() .to(device), torch.zeros(self.num_layers, self.batch_size, self.hidden_size) .requires_grad_() .to(device), )
[docs] def forward(self, x, training=True, classify=False, regress=False, latent=False): assert not classify, "LSTM forecaster cannot classify!" assert not regress, "LSTM forecaster cannot regress!" assert not latent, "LSTM forecaster does not have a latent space!" # To feed the latent states into lstm decoder, repeat the tensor n_future times at second dim (h0, c0) = self._init_hidden() # Decoder input Shape(batch_size, num_futures, latent_size) out, (dec_hidden, dec_cell) = self.lstm(x, (h0.detach(), c0.detach())) # Map the decoder output: Shape(batch_size, sequence_len, hidden_dim) to Time Distributed Linear Layer out = self.output(out) return out