Source code for ice.anomaly_detection.models.autoencoder

import pandas as pd
from torch import nn

from ice.anomaly_detection.models.base import BaseAnomalyDetection


class MLP(nn.Module):
    def __init__(
            self,
            input_dim: int,
            window_size: int,
            hidden_dims: list,
            decoder: bool = False,
            ):
        super().__init__()
        self.input_dim = input_dim
        self.window_size = window_size
        self.hidden_dims = [input_dim * window_size]
        self.decoder = decoder
        if self.decoder:
            self.hidden_dims = hidden_dims + self.hidden_dims
        else:
            self.hidden_dims = self.hidden_dims + hidden_dims
        self.mlp = nn.Sequential(nn.Flatten())
        for i in range(len(hidden_dims)):
            self.mlp.append(nn.Linear(
                                self.hidden_dims[i],
                                self.hidden_dims[i + 1])
                                )
            if self.decoder and i + 1 == len(hidden_dims):
                break
            self.mlp.append(nn.ReLU())

    def forward(self, x):
        output = self.mlp(x)
        if self.decoder:
            return output.view(-1, self.window_size, self.input_dim)

        return output


[docs]class AutoEncoderMLP(BaseAnomalyDetection): """ MLP autoencoder consists of MLP encoder and MLP decoder parts. Each sample is reshaped to a vector (B, L, C) -> (B, L * C) for calculations and to a vector (B, L * C) -> (B, L, C) for the output. Where B is the batch size, L is the sequence length, C is the number of sensors. """ def __init__( self, window_size: int, stride: int = 1, batch_size: int = 128, lr: float = 0.001, num_epochs: int = 10, device: str = 'cpu', verbose: bool = False, name: str = 'ae_anomaly_detection', random_seed: int = 42, val_ratio: float = 0.15, save_checkpoints: bool = False, threshold_level: float = 0.95, hidden_dims: list = [256, 128, 64] ): """ Args: window_size (int): The window size to train the model. stride (int): The time interval between first points of consecutive sliding windows in training. batch_size (int): The batch size to train the model. lr (float): The larning rate to train the model. num_epochs (float): The number of epochs to train the model. device (str): The name of a device to train the model. `cpu` and `cuda` are possible. verbose (bool): If true, show the progress bar in training. name (str): The name of the model for artifact storing. random_seed (int): Seed for random number generation to ensure reproducible results. val_ratio (float): Proportion of the dataset used for validation, between 0 and 1. save_checkpoints (bool): If true, store checkpoints. threshold_level (float): Takes a value from 0 to 1. It specifies the quantile in the distribution of errors on the training dataset at which the threshold value is set. hidden_dims (list): Dimensions of hidden layers in encoder/decoder. """ super().__init__( window_size, stride, batch_size, lr, num_epochs, device, verbose, name, random_seed, val_ratio, save_checkpoints, threshold_level ) self.hidden_dims = hidden_dims _param_conf_map = dict(BaseAnomalyDetection._param_conf_map, **{ "hidden_dims": ["MODEL", "HIDDEN_DIMS"] } ) def _create_model(self, input_dim: int, output_dim: int): self.model = nn.Sequential( MLP( input_dim, self.window_size, hidden_dims=self.hidden_dims, ), MLP( input_dim, self.window_size, hidden_dims=self.hidden_dims[::-1], decoder=True ) )