Source code for ice.anomaly_detection.models.autoencoder

import pandas as pd
from torch import nn

from ice.anomaly_detection.models.base import BaseAnomalyDetection


class MLP(nn.Module):
    def __init__(
            self,
            input_dim: int,
            window_size: int,
            hidden_dims: list,
            decoder: bool = False,
            ):
        super().__init__()
        self.input_dim = input_dim
        self.window_size = window_size
        self.hidden_dims = [input_dim * window_size]
        self.decoder = decoder
        if self.decoder:
            self.hidden_dims = hidden_dims + self.hidden_dims
        else:
            self.hidden_dims = self.hidden_dims + hidden_dims
        self.mlp = nn.Sequential(nn.Flatten())
        for i in range(len(hidden_dims)):
            self.mlp.append(nn.Linear(
                                self.hidden_dims[i],
                                self.hidden_dims[i + 1])
                                )
            if self.decoder and i + 1 == len(hidden_dims):
                break
            self.mlp.append(nn.ReLU())

    def forward(self, x):
        output = self.mlp(x)
        if self.decoder:
            return output.view(-1, self.window_size, self.input_dim)

        return output


[docs]class AutoEncoderMLP(BaseAnomalyDetection):
    """
    MLP autoencoder consists of MLP encoder and MLP decoder parts. Each
    sample is reshaped to a vector (B, L, C) -> (B, L * C) for calculations
    and to a vector (B, L * C) -> (B, L, C) for the output. Where B is the
    batch size, L is the sequence length, C is the number of sensors.
    """
    def __init__(
            self,
            window_size: int,
            stride: int = 1,
            batch_size: int = 128,
            lr: float = 0.001,
            num_epochs: int = 10,
            device: str = 'cpu',
            verbose: bool = False,
            name: str = 'ae_anomaly_detection',
            random_seed: int = 42,
            val_ratio: float = 0.15,
            save_checkpoints: bool = False,
            threshold_level: float = 0.95,
            hidden_dims: list = [256, 128, 64]
            ):
        """
        Args:
            window_size (int): The window size to train the model.
            stride (int): The time interval between first points of consecutive 
                sliding windows in training.
            batch_size (int): The batch size to train the model.
            lr (float): The larning rate to train the model.
            num_epochs (float): The number of epochs to train the model.
            device (str): The name of a device to train the model. `cpu` and 
                `cuda` are possible.
            verbose (bool): If true, show the progress bar in training.
            name (str): The name of the model for artifact storing.
            random_seed (int): Seed for random number generation to ensure reproducible results.
            val_ratio (float): Proportion of the dataset used for validation, between 0 and 1.
            save_checkpoints (bool): If true, store checkpoints.
            threshold_level (float): Takes a value from 0 to 1. It specifies
                the quantile in the distribution of errors on the training
                dataset at which the threshold value is set.
            hidden_dims (list): Dimensions of hidden layers in encoder/decoder.
        """
        super().__init__(
            window_size, stride, batch_size, lr, num_epochs, device, verbose, name, random_seed, 
            val_ratio, save_checkpoints, threshold_level
        )
        self.hidden_dims = hidden_dims

    _param_conf_map = dict(BaseAnomalyDetection._param_conf_map,
            **{
                "hidden_dims": ["MODEL", "HIDDEN_DIMS"]
            }
        )

    def _create_model(self, input_dim: int, output_dim: int):
        self.model = nn.Sequential(
            MLP(
                input_dim,
                self.window_size,
                hidden_dims=self.hidden_dims,
            ),
            MLP(
                input_dim,
                self.window_size,
                hidden_dims=self.hidden_dims[::-1],
                decoder=True
            )
        )