Source code for ice.health_index_estimation.models.IE_SBiGRU

from torch import nn
import torch
from ice.health_index_estimation.models.base import BaseHealthIndexEstimation
from pandas import DataFrame, Series
from torch.nn import functional as F
import math


class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttentionLayer, self).__init__()
        self.multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
        self.layer_norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        attn_output, _ = self.multihead_attn(x, x, x)
        x = self.layer_norm(attn_output + x)  # Add & Norm
        return x


class PositionwiseFeedForward(nn.Module):
    def __init__(self, embed_dim, ff_dim):
        super(PositionwiseFeedForward, self).__init__()
        self.conv1 = nn.Conv1d(embed_dim, ff_dim, kernel_size=1)
        self.conv2 = nn.Conv1d(ff_dim, embed_dim, kernel_size=1)
        self.gelu = nn.GELU()
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.drop = nn.Dropout(0.1)

    def forward(self, x):
        x = x.permute(
            0, 2, 1
        )  
        x = self.conv1(x)
        x = self.gelu(x)
        x = self.conv2(x)
        x = x.permute(
            0, 2, 1
        )   # Add & Norm
        return self.drop(x)


class DistillationLayer(nn.Module):
    def __init__(self, input_dim, distilled_dim=18, kernel_size1=3, kernel_size2=3):
        super(DistillationLayer, self).__init__()
        self.conv1 = nn.Conv1d(
            input_dim, distilled_dim, kernel_size=kernel_size1, stride=1, padding=1
        )
        self.batch_norm = nn.BatchNorm1d(distilled_dim)
        self.elu = nn.ELU()
        self.conv2 = nn.Conv1d(
            distilled_dim, distilled_dim, kernel_size=kernel_size2, stride=2, padding=1
        )
        self.max_pool = nn.MaxPool1d(kernel_size=2)

    def forward(self, x):
        x = x.permute(
            0, 2, 1
        )  
        x = self.conv1(x)
        x = self.batch_norm(x)
        x = self.elu(x)
        x = self.conv2(x)
        x = self.max_pool(x)
        x = x.permute(
            0, 2, 1
        )  
        return x


class Basic_block(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout):
        super(Basic_block, self).__init__()
        self.multihead_attn = MultiHeadAttentionLayer(embed_dim, num_heads)
        self.ffn = PositionwiseFeedForward(embed_dim, ff_dim)
        self.dropout = nn.Dropout(dropout)
        self.layer_norm = nn.LayerNorm(embed_dim)
        self.dist = DistillationLayer(embed_dim, embed_dim)

    def forward(self, x):
        x = self.multihead_attn(x)
        x = self.dropout(x) + x
        x = self.layer_norm(x)

        x = self.dropout(self.ffn(x)) + x

        x = self.layer_norm(x)
        x = self.dist(x)
        return x


class InformerEncoder(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout):
        super(InformerEncoder, self).__init__()
        self.Basic_block_1 = Basic_block(embed_dim, num_heads, ff_dim, dropout)
        self.Basic_block_2 = Basic_block(embed_dim, num_heads, ff_dim, dropout)

    def forward(self, x):
        x = self.Basic_block_1(x)
        x = self.Basic_block_2(x)
        return x


class PositionalEncoding(nn.Module):
    def __init__(self, feature_size, max_len=1500):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, feature_size)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, feature_size, 2).float()
            * (-math.log(10000.0) / feature_size)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(
            position * div_term[: feature_size // 2]
        )
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x + self.pe[:, : x.size(1), : x.size(2)]
        return x


class StackedBiGRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(StackedBiGRU, self).__init__()
        self.bigru = nn.GRU(
            input_dim,
            hidden_dim,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
        )

    def forward(self, x):
        x, _ = self.bigru(x)
        return x


class IE_SBiGRU_Model(nn.Module):
    def __init__(
        self,
        input_dim,
        embed_dim=64,
        num_heads=8,
        ff_dim=256,
        hidden_dim=128,
        num_layers=3,
        dropout=0.1,
    ):
        super(IE_SBiGRU_Model, self).__init__()
        self.input_proj = nn.Linear(
            input_dim, embed_dim
        )  # Projection to embedding dimension

        self.pos_encoder = PositionalEncoding(embed_dim)

        self.encoder = InformerEncoder(embed_dim, num_heads, ff_dim, dropout)
        self.bigru = StackedBiGRU(embed_dim, hidden_dim, num_layers)
        self.fc = nn.Linear(hidden_dim * 2, 1)  # Output Layer
        self.tanh = nn.Tanh()
        self.global_max_pool = nn.AdaptiveMaxPool1d(1)  # Global Max Pooling

    def forward(self, x):
        x = self.input_proj(x)  # Project input to embedding dimension
        x = self.pos_encoder(x)
        x = self.encoder(x)
        x = self.bigru(x)
        x = self.global_max_pool(x.permute(0, 2, 1)).squeeze(-1)  # Permute for pooling
        x = self.tanh(x)
        x = self.fc(x)
        return torch.squeeze(x, dim=1)


[docs]class IE_SBiGRU(BaseHealthIndexEstimation): """ Wear condition monitoring model from https://doi.org/10.1016/j.rcim.2022.102368 SOTA paper """ def __init__( self, window_size: int = 1024, num_layers: int = 3, hidden_dim: int = 128, ff_dim: int = 256, stride: int = 300, batch_size: int = 64, lr: float = 0.0031789041005068647, num_epochs: int = 55, device: str = "cpu", verbose: bool = True, name: str = "IE_SBiGRU_hi_estimation", random_seed: int = 42, val_ratio: float = 0.15, save_checkpoints: bool = False, ): """ Args: window_size (int): The window size to train the model. stride (int): The time interval between first points of consecutive sliding windows. num_layers (int): The amount of BiGRU layers in the model. hidden_dim (int): The regression head hidden linear layer size. ff_dim (int): The CNN projection dim in feedforward module of Informer encoder. batch_size (int): The batch size to train the model. lr (float): The larning rate to train the model. num_epochs (float): The number of epochs to train the model. device (str): The name of a device to train the model. `cpu` and `cuda` are possible. verbose (bool): If true, show the progress bar in training. name (str): The name of the model for artifact storing. random_seed (int): Seed for random number generation to ensure reproducible results. val_ratio (float): Proportion of the dataset used for validation, between 0 and 1. save_checkpoints (bool): If true, store checkpoints. """ super().__init__( window_size, stride, batch_size, lr, num_epochs, device, verbose, name, random_seed, val_ratio, save_checkpoints, ) self.val_metrics = True self.window_size = window_size self.num_layers = num_layers self.hidden_dim = hidden_dim self.ff_dim = ff_dim _param_conf_map = dict( BaseHealthIndexEstimation._param_conf_map, **{"num_layers": ["MODEL", "NUM_LAYERS"]}, **{"hidden_dim": ["MODEL", "HIDDEN_DIM"]}, **{"ff_dim": ["MODEL", "FF_DIM"]} ) def _create_model(self, input_dim: int, output_dim: int): self.model = nn.Sequential( IE_SBiGRU_Model( input_dim=input_dim, num_layers=self.num_layers, hidden_dim=self.hidden_dim, ff_dim=self.ff_dim, ) )