Source code for skchange.new_api.penalties._linear

"""Linear penalties for change and anomaly detection."""

import numbers

import numpy as np

from skchange.new_api.penalties._constant import chi2_penalty
from skchange.new_api.utils._param_validation import Interval, validate_params


[docs] @validate_params( { "n_features": [Interval(numbers.Integral, 1, None, closed="left")], "intercept": [Interval(numbers.Real, 0, None, closed="left")], "slope": [Interval(numbers.Real, 0, None, closed="left")], }, prefer_skip_nested_validation=True, ) def linear_penalty(n_features: int, intercept: float, slope: float) -> np.ndarray: """Create a linear penalty. The penalty is given by ``intercept + slope * (1, 2, ..., n_features)``, where `n_features` is the number of features/columns in the data being analysed. The penalty is non-decreasing. Parameters ---------- n_features : int Number of features/columns in the data being analysed. intercept : float Intercept of the linear penalty. slope : float Slope of the linear penalty. Returns ------- np.ndarray of shape (n_features,) The non-decreasing linear penalty values. Element ``i`` is the penalty for ``i+1`` features being affected by a change or anomaly. Examples -------- >>> linear_penalty(3, 1.0, 2.0) array([3., 5., 7.]) """ return intercept + slope * np.arange(1, n_features + 1)
[docs] @validate_params( { "n_samples": [Interval(numbers.Integral, 1, None, closed="left")], "n_features": [Interval(numbers.Integral, 1, None, closed="left")], "n_params_per_feature": [Interval(numbers.Integral, 1, None, closed="left")], }, prefer_skip_nested_validation=True, ) def linear_chi2_penalty( n_samples: int, n_features: int, n_params_per_feature: int = 1 ) -> np.ndarray: """Create a linear chi-square penalty. The penalty is a piece of the default penalty for the `MVCAPA` algorithm. It is described as "penalty regime 2" in the MVCAPA article [1]_, suitable for detecting sparse anomalies in the data. Sparse anomalies only affect a few features. Parameters ---------- n_samples : int Sample size. n_features : int Number of features/columns in the data being analysed. n_params_per_feature : int, default=1 Number of model parameters per feature and segment. Returns ------- np.ndarray of shape (n_features,) The non-decreasing linear chi-square penalty values. Element ``i`` is the penalty for ``i+1`` features being affected by a change or anomaly. References ---------- .. [1] Fisch, A. T., Eckley, I. A., & Fearnhead, P. (2022). Subset multivariate segment and point anomaly detection. Journal of Computational and Graphical Statistics, 31(2), 574-585. Examples -------- >>> linear_chi2_penalty(100, 3) array([...]) """ if n_features == 1: # Not valid for n_features == 1; fall back to constant chi2 penalty. return np.array([chi2_penalty(n_samples, n_params_per_feature)]) psi = np.log(n_samples) component_penalty = 2 * np.log(n_params_per_feature * n_features) return 2 * psi + 2 * np.cumsum(np.full(n_features, component_penalty))