Source code for ice.remaining_useful_life_estimation.models.lstm

from torch import nn
from ice.remaining_useful_life_estimation.models.base import BaseRemainingUsefulLifeEstimation
from pandas import DataFrame, Series
import torch


class LSTM_model(nn.Module):
    """
    Long short-term memory (LSTM) is reccurent neural network type, 
    pytorch realisation https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html 
    """
    def __init__(
            self, 
            input_dim, 
            hidden_size=512, 
            device="cpu", 
            num_layers=2,):
        """
        Args:
            input_dim (int): The dimension size of input data, related to the sensor amount in industry probles.
            hidden_size (int): The number of features in the hidden state of the model.
            device (str): The name of a device to train the model. `cpu` and `cuda` are possible.
            num_layers (int): The number of stacked reccurent layers of the classic LSTM architecture.
        """
        super(LSTM_model, self).__init__()
        self.input_dim = input_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device

        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True)

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) #hidden state
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) #internal state
        
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) 
        return output[:, -1, :]

[docs]class LSTM(BaseRemainingUsefulLifeEstimation):
    """
    Long short-term memory (LSTM) model consists of the classical LSTM architecture stack and 
    two-layer MLP with SiLU nonlinearity and dropout to make the final prediction.
    
    Each sample is moved to LSTM and reshaped to a vector (B, L, C) -> (B, hidden_size, C)
    Then the sample is reshaped to a vector (B, hidden_size, C) -> (B, hidden_size * C)
    """

    def __init__(
        self,
        window_size: int = 32,
        stride: int = 1,
        hidden_dim: int = 512,
        hidden_size: int = 256, 
        num_layers: int =2,
        dropout_value: float = 0.5,
        batch_size: int = 64,
        lr: float = 1e-4,
        num_epochs: int = 35,
        device: str = "cpu",
        verbose: bool = True,
        name: str = "mlp_cmapss_rul",
        random_seed: int = 42,
        val_ratio: float = 0.15,
        save_checkpoints: bool = False
    ):
        """
        Args:
            window_size (int): The window size to train the model.
            stride (int): The time interval between first points of consecutive sliding windows.
            hidden_dim (int): The dimensionality of the hidden layer in MLP.
            hidden_size (int): The number of features in the hidden state of the model.
            num_layers (int): The number of stacked reccurent layers of the classic LSTM architecture.
            batch_size (int): The batch size to train the model.
            lr (float): The larning rate to train the model.
            num_epochs (float): The number of epochs to train the model.
            device (str): The name of a device to train the model. `cpu` and
                `cuda` are possible.
            verbose (bool): If true, show the progress bar in training.
            name (str): The name of the model for artifact storing.
            random_seed (int): Seed for random number generation to ensure reproducible results.
            val_ratio (float): Proportion of the dataset used for validation, between 0 and 1.
            save_checkpoints (bool): If true, store checkpoints.
        """
        super().__init__(
            window_size, stride, batch_size, lr, num_epochs, device, verbose, name, random_seed, val_ratio, save_checkpoints
        )
        self.val_metrics = True

        self.hidden_dim = hidden_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device
        self.dropout_value = dropout_value

        self.loss_array = []

    _param_conf_map = dict(
        BaseRemainingUsefulLifeEstimation._param_conf_map,
        **{"hidden_dim": ["MODEL", "HIDDEN_DIM"],
        "hidden_size": ["MODEL", "HIDDEN_SIZE"],
        "num_layers": ["MODEL", "NUM_LAYERS"],
        "dropout_value": ["MODEL", "DROPOUT"],
        }
    )

    def _create_model(self, input_dim: int, output_dim: int):
        self.model = nn.Sequential(
            nn.Dropout(self.dropout_value),
            LSTM_model(input_dim, self.hidden_size, self.device, self.num_layers),
            nn.Flatten(),
            nn.Linear(self.hidden_size, self.hidden_dim),
            nn.SiLU(),
            nn.Dropout(self.dropout_value),
            nn.Linear(self.hidden_dim, 1),
            nn.Flatten(start_dim=0),
        )