Source code for skchange.new_api.penalties._constant
"""Constant penalties for change and anomaly detection."""
import numbers
import numpy as np
from skchange.new_api.utils._param_validation import Interval, validate_params
[docs]
@validate_params(
{
"n_samples": [Interval(numbers.Integral, 1, None, closed="left")],
"n_params": [Interval(numbers.Integral, 1, None, closed="left")],
"additional_cpts": [Interval(numbers.Integral, 0, None, closed="left")],
},
prefer_skip_nested_validation=True,
)
def bic_penalty(n_samples: int, n_params: int = 1, additional_cpts: int = 1) -> float:
"""Create a Bayesian Information Criterion (BIC) penalty.
The BIC penalty is a constant penalty given by
``(n_params + additional_cpts) * log(n_samples)``, where `n_samples` is the
sample size, `n_params` is the number of parameters per segment in the model
across all features, and `additional_cpts` is the number of additional change
point parameters per segment. For change detection, this is 1.
Parameters
----------
n_samples : int
Sample size.
n_params : int, default=1
Number of model parameters per segment.
additional_cpts : int, default=1
Number of additional change point parameters per segment. For change
detection, this is 1.
Returns
-------
float
The BIC penalty value.
Examples
--------
>>> bic_penalty(100)
9.210340371976184
"""
return (n_params + additional_cpts) * np.log(n_samples)
[docs]
@validate_params(
{
"n_samples": [Interval(numbers.Integral, 1, None, closed="left")],
"n_params": [Interval(numbers.Integral, 1, None, closed="left")],
},
prefer_skip_nested_validation=True,
)
def chi2_penalty(n_samples: int, n_params: int = 1) -> float:
"""Create a chi-square penalty.
The penalty is the default penalty for the `CAPA` algorithm. It is described as
"penalty regime 1" in the MVCAPA article [1]_. The penalty is based on a
probability bound on the chi-squared distribution.
The penalty is given by
``n_params + 2 * sqrt(n_params * log(n_samples)) + 2 * log(n_samples)``,
where `n_samples` is the sample size and `n_params` is the total number of
parameters per segment in the model across all features.
Parameters
----------
n_samples : int
Sample size.
n_params : int, default=1
Number of model parameters per segment.
Returns
-------
float
The chi-square penalty value.
References
----------
.. [1] Fisch, A. T., Eckley, I. A., & Fearnhead, P. (2022). Subset multivariate
segment and point anomaly detection. Journal of Computational and Graphical
Statistics, 31(2), 574-585.
Examples
--------
>>> chi2_penalty(100, n_params=2)
27.536...
"""
psi = np.log(n_samples)
return n_params + 2 * np.sqrt(n_params * psi) + 2 * psi