Source code for ice.health_index_estimation.models.IE_SBiGRU

from torch import nn
import torch
from ice.health_index_estimation.models.base import BaseHealthIndexEstimation
from pandas import DataFrame, Series
from torch.nn import functional as F
import math


class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttentionLayer, self).__init__()
        self.multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
        self.layer_norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        attn_output, _ = self.multihead_attn(x, x, x)
        x = self.layer_norm(attn_output + x)  # Add & Norm
        return x


class PositionwiseFeedForward(nn.Module):
    def __init__(self, embed_dim, ff_dim):
        super(PositionwiseFeedForward, self).__init__()
        self.conv1 = nn.Conv1d(embed_dim, ff_dim, kernel_size=1)
        self.conv2 = nn.Conv1d(ff_dim, embed_dim, kernel_size=1)
        self.gelu = nn.GELU()
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.drop = nn.Dropout(0.1)

    def forward(self, x):
        x = x.permute(
            0, 2, 1
        )  
        x = self.conv1(x)
        x = self.gelu(x)
        x = self.conv2(x)
        x = x.permute(
            0, 2, 1
        )   # Add & Norm
        return self.drop(x)


class DistillationLayer(nn.Module):
    def __init__(self, input_dim, distilled_dim=18, kernel_size1=3, kernel_size2=3):
        super(DistillationLayer, self).__init__()
        self.conv1 = nn.Conv1d(
            input_dim, distilled_dim, kernel_size=kernel_size1, stride=1, padding=1
        )
        self.batch_norm = nn.BatchNorm1d(distilled_dim)
        self.elu = nn.ELU()
        self.conv2 = nn.Conv1d(
            distilled_dim, distilled_dim, kernel_size=kernel_size2, stride=2, padding=1
        )
        self.max_pool = nn.MaxPool1d(kernel_size=2)

    def forward(self, x):
        x = x.permute(
            0, 2, 1
        )  
        x = self.conv1(x)
        x = self.batch_norm(x)
        x = self.elu(x)
        x = self.conv2(x)
        x = self.max_pool(x)
        x = x.permute(
            0, 2, 1
        )  
        return x


class Basic_block(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout):
        super(Basic_block, self).__init__()
        self.multihead_attn = MultiHeadAttentionLayer(embed_dim, num_heads)
        self.ffn = PositionwiseFeedForward(embed_dim, ff_dim)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.dist = DistillationLayer(embed_dim, embed_dim)

    def forward(self, x):
        x = self.multihead_attn(x)
        x = self.dropout(x) + x
        x = self.layer_norm(x)

        x = self.dropout(self.ffn(x)) + x

        x = self.layer_norm(x)
        x = self.dist(x)
        return x


class InformerEncoder(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout):
        super(InformerEncoder, self).__init__()
        self.Basic_block_1 = Basic_block(embed_dim, num_heads, ff_dim, dropout)
        self.Basic_block_2 = Basic_block(embed_dim, num_heads, ff_dim, dropout)

    def forward(self, x):
        x = self.Basic_block_1(x)
        x = self.Basic_block_2(x)
        return x


class PositionalEncoding(nn.Module):
    def __init__(self, feature_size, max_len=1500):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, feature_size)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, feature_size, 2).float()
            * (-math.log(10000.0) / feature_size)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(
            position * div_term[: feature_size // 2]
        )
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x + self.pe[:, : x.size(1), : x.size(2)]
        return x


class StackedBiGRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(StackedBiGRU, self).__init__()
        self.bigru = nn.GRU(
            input_dim,
            hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
        )

    def forward(self, x):
        x, _ = self.bigru(x)
        return x


class IE_SBiGRU_Model(nn.Module):
    def __init__(
        self,
        input_dim,
        embed_dim=64,
        num_heads=8,
        ff_dim=256,
        hidden_dim=128,
        num_layers=3,
        dropout=0.1,
    ):
        super(IE_SBiGRU_Model, self).__init__()
        self.input_proj = nn.Linear(
            input_dim, embed_dim
        )  # Projection to embedding dimension

        self.pos_encoder = PositionalEncoding(embed_dim)

        self.encoder = InformerEncoder(embed_dim, num_heads, ff_dim, dropout)
        self.bigru = StackedBiGRU(embed_dim, hidden_dim, num_layers)
        self.fc = nn.Linear(hidden_dim * 2, 1)  # Output Layer
        self.tanh = nn.Tanh()
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)  # Global Max Pooling

    def forward(self, x):
        x = self.input_proj(x)  # Project input to embedding dimension
        x = self.pos_encoder(x)
        x = self.encoder(x)
        x = self.bigru(x)
        x = self.global_max_pool(x.permute(0, 2, 1)).squeeze(-1)  # Permute for pooling
        x = self.tanh(x)
        x = self.fc(x)
        return torch.squeeze(x, dim=1)


[docs]class IE_SBiGRU(BaseHealthIndexEstimation):
    """
    Wear condition monitoring model from https://doi.org/10.1016/j.rcim.2022.102368 SOTA paper
    """

    def __init__(
        self,
        window_size: int = 1024,
        num_layers: int = 3,
        hidden_dim: int = 128,
        ff_dim: int = 256,
        stride: int = 300,
        batch_size: int = 64,
        lr: float = 0.0031789041005068647,
        num_epochs: int = 55,
        device: str = "cpu",
        verbose: bool = True,
        name: str = "IE_SBiGRU_hi_estimation",
        random_seed: int = 42,
        val_ratio: float = 0.15,
        save_checkpoints: bool = False,
    ):
        """
        Args:
            window_size (int): The window size to train the model.
            stride (int): The time interval between first points of consecutive sliding windows.
            num_layers (int): The amount of BiGRU layers in the model.
            hidden_dim (int): The regression head hidden linear layer size.
            ff_dim (int): The CNN projection dim in feedforward module of Informer encoder.
            batch_size (int): The batch size to train the model.
            lr (float): The larning rate to train the model.
            num_epochs (float): The number of epochs to train the model.
            device (str): The name of a device to train the model. `cpu` and
                `cuda` are possible.
            verbose (bool): If true, show the progress bar in training.
            name (str): The name of the model for artifact storing.
            random_seed (int): Seed for random number generation to ensure reproducible results.
            val_ratio (float): Proportion of the dataset used for validation, between 0 and 1.
            save_checkpoints (bool): If true, store checkpoints.
        """
        super().__init__(
            window_size,
            stride,
            batch_size,
            lr,
            num_epochs,
            device,
            verbose,
            name,
            random_seed,
            val_ratio,
            save_checkpoints,
        )
        self.val_metrics = True
        self.window_size = window_size
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.ff_dim = ff_dim

    _param_conf_map = dict(
        BaseHealthIndexEstimation._param_conf_map,
        **{"num_layers": ["MODEL", "NUM_LAYERS"]},
        **{"hidden_dim": ["MODEL", "HIDDEN_DIM"]},
        **{"ff_dim": ["MODEL", "FF_DIM"]}
    )

    def _create_model(self, input_dim: int, output_dim: int):
        self.model = nn.Sequential(
            IE_SBiGRU_Model(
                input_dim=input_dim,
                num_layers=self.num_layers,
                hidden_dim=self.hidden_dim,
                ff_dim=self.ff_dim,
            )
        )