1# coding=utf-8 2""" 3© 2014 LinkedIn Corp. All rights reserved. 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 8Unless required by applicable law or agreed to in writing, software 9distributed under the License is distributed on an "AS IS" BASIS, 10WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11""" 12import numpy 13 14from luminol import utils 15from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 16from luminol.constants import * 17from luminol.modules.time_series import TimeSeries 18 19 20class ExpAvgDetector(AnomalyDetectorAlgorithm): 21 22 """ 23 Exponential Moving Average. 24 This method uses a data point's deviation from the exponential moving average of a lagging window 25 to determine its anomaly score. 26 """ 27 def __init__(self, time_series, baseline_time_series=None, smoothing_factor=None, use_lag_window=False, lag_window_size=None): 28 """ 29 Initializer 30 :param TimeSeries time_series: a TimeSeries object. 31 :param TimeSeries baseline_time_series: baseline TimeSeries. 32 :param float smoothing_factor: smoothing factor for computing exponential moving average. 33 :param int lag_window_size: lagging window size. 34 """ 35 super(ExpAvgDetector, self).__init__(self.__class__.__name__, time_series, baseline_time_series) 36 self.use_lag_window = use_lag_window 37 self.smoothing_factor = smoothing_factor if smoothing_factor > 0 else DEFAULT_EMA_SMOOTHING_FACTOR 38 self.lag_window_size = lag_window_size if lag_window_size else int(self.time_series_length * DEFAULT_EMA_WINDOW_SIZE_PCT) 39 self.time_series_items = self.time_series.items() 40 41 def _compute_anom_score(self, lag_window_points, point): 42 """ 43 Compute anomaly score for a single data point. 44 Anomaly score for a single data point(t,v) equals: abs(v - ema(lagging window)). 45 :param list lag_window_points: values in the lagging window. 46 :param float point: data point value. 47 :return float: the anomaly score. 48 """ 49 ema = utils.compute_ema(self.smoothing_factor, lag_window_points)[-1] 50 return abs(point - ema) 51 52 def _compute_anom_data_using_window(self): 53 """ 54 Compute anomaly scores using a lagging window. 55 """ 56 anom_scores = {} 57 values = self.time_series.values 58 stdev = numpy.std(values) 59 for i, (timestamp, value) in enumerate(self.time_series_items): 60 if i < self.lag_window_size: 61 anom_score = self._compute_anom_score(values[:i + 1], value) 62 else: 63 anom_score = self._compute_anom_score(values[i - self.lag_window_size: i + 1], value) 64 if stdev: 65 anom_scores[timestamp] = anom_score / stdev 66 else: 67 anom_scores[timestamp] = anom_score 68 self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 69 70 def _compute_anom_data_decay_all(self): 71 """ 72 Compute anomaly scores using a lagging window covering all the data points before. 73 """ 74 anom_scores = {} 75 values = self.time_series.values 76 ema = utils.compute_ema(self.smoothing_factor, values) 77 stdev = numpy.std(values) 78 for i, (timestamp, value) in enumerate(self.time_series_items): 79 anom_score = abs((value - ema[i]) / stdev) if stdev else value - ema[i] 80 anom_scores[timestamp] = anom_score 81 self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 82 83 def _set_scores(self): 84 """ 85 Compute anomaly scores for the time series. 86 Currently uses a lagging window covering all the data points before. 87 """ 88 if self.use_lag_window: 89 self._compute_anom_data_using_window() 90 self._compute_anom_data_decay_all() 91