Getting started#
This section will help you get started with Skchange by providing a variety of examples that demonstrate the library’s main capabilities. For a deeper understanding of Skchange’s core concepts, please refer to the Concepts section.
Installation#
pip install skchange
To make full use of the library, you can install the optional Numba dependency. This will speed up the computation of the algorithms in Skchange, often by as much as 10-100 times.
pip install skchange[numba]
Change detection#
Detect changes in the mean#
[1]:
import plotly.io as pio
pio.renderers.default = "notebook"
[2]:
from skchange.change_detectors import SeededBinarySegmentation
from skchange.change_scores import CUSUM
from skchange.datasets import generate_piecewise_normal_data
from skchange.utils.plotting import plot_detections
x = generate_piecewise_normal_data(
means=[0, 10, 0, -3, 5, 1],
lengths=[30, 5, 15, 50, 60, 40],
seed=0,
)
detector = SeededBinarySegmentation(
CUSUM(),
penalty=5,
)
change_points = detector.fit_predict(x)
plot_detections(x, change_points).show()
print(change_points)
ilocs
0 30
1 35
2 50
3 100
4 160
Detect changes in a continuous piecewise linear trend#
[3]:
from skchange.change_scores import ContinuousLinearTrendScore
from skchange.datasets import generate_continuous_piecewise_linear_data
x = generate_continuous_piecewise_linear_data(
slopes=[0, 1, -0.5, 0.5, 0.1],
lengths=[30, 20, 50, 60, 40],
seed=1,
)
detector = SeededBinarySegmentation(
ContinuousLinearTrendScore(),
penalty=20,
selection_method="narrowest",
)
change_points = detector.fit_predict(x)
plot_detections(x, change_points).show()
print(change_points)
ilocs
0 27
1 49
2 98
3 163
Detect sparse changes in a high-dimensional mean vector#
[4]:
from skchange.change_detectors import MovingWindow
from skchange.change_scores import ESACScore
x = generate_piecewise_normal_data(
means=[0, 5, 10],
lengths=50,
n_segments=3,
n_variables=100,
proportion_affected=[1.0, 0.1, 0.01],
randomise_affected_variables=True,
seed=3,
)
detector = MovingWindow(
ESACScore(),
bandwidth=[10, 20, 30, 40],
)
change_points = detector.fit_predict(x)
plot_detections(x, change_points).show()
print(change_points)
ilocs
0 50
1 100
Detect changes in a linear regression model#
[5]:
from skchange.change_detectors import PELT
from skchange.costs import LinearRegressionCost
from skchange.datasets import generate_piecewise_regression_data
from skchange.utils.plotting import plot_scatter_segmentation
x, feature_cols, target_cols = generate_piecewise_regression_data(
lengths=50,
n_segments=3,
n_features=1,
n_targets=1,
seed=2,
)
detector = PELT(
LinearRegressionCost(target_cols[0], feature_cols),
penalty=10,
)
change_points = detector.fit_predict(x)
plot_scatter_segmentation(
x, change_points, x_var=feature_cols[0], y_var=target_cols[0]
).show()
print(change_points)
ilocs
0 50
1 100
Segment anomaly detection#
Detect segment anomalies in the mean#
[6]:
from skchange.anomaly_detectors import CAPA
from skchange.anomaly_scores import L2Saving
x = generate_piecewise_normal_data(
means=[0, 4, 0, 10, 0, -3],
lengths=[100, 30, 50, 5, 50, 40],
seed=5,
)
detector = CAPA(L2Saving())
anomalies = detector.fit_predict(x)
plot_detections(x, anomalies).show()
print(anomalies)
ilocs labels
0 [100, 130) 1
1 [180, 185) 2
2 [235, 275) 3
Detect segment anomalies in multivariate data and identify the anomalous variables#
[7]:
from skchange.compose.penalised_score import PenalisedScore
x = generate_piecewise_normal_data(
means=[0, [8.0, 0.0, 0.0], 0, [2.0, 3.0, 5.0]],
lengths=[100, 20, 130, 50],
seed=1,
)
score = L2Saving()
penalty = [15, 20, 25] # Different penalty per amount of affected variables
penalised_score = PenalisedScore(score, penalty)
detector = CAPA(penalised_score, find_affected_components=True)
anomalies = detector.fit_predict(x)
plot_detections(x, anomalies, "subplot-line").show()
print(anomalies)
ilocs labels icolumns
0 [100, 120) 1 [0]
1 [250, 300) 2 [2, 1, 0]
Detect segment anomalies in the covariance matrix#
[8]:
import numpy as np
from skchange.anomaly_detectors import CAPA
from skchange.costs import MultivariateGaussianCost
baseline_cov = np.array([[1, 0.9], [0.9, 1]])
anomalous_cov = np.eye(baseline_cov.shape[0])
x = generate_piecewise_normal_data(
means=0,
variances=[baseline_cov, anomalous_cov, baseline_cov],
lengths=[100, 100, 100],
seed=8,
)
detector = CAPA(
MultivariateGaussianCost((0, baseline_cov)),
min_segment_length=10,
)
anomalies = detector.fit_predict(x)
plot_scatter_segmentation(x, anomalies).show()
print(anomalies)
ilocs labels
0 [98, 204) 1