1# coding=utf-8
2"""
3© 2014 LinkedIn Corp. All rights reserved.
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at  http://www.apache.org/licenses/LICENSE-2.0
7
8Unless required by applicable law or agreed to in writing, software
9distributed under the License is distributed on an "AS IS" BASIS,
10WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11"""
12import numpy
13
14from luminol import utils
15from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm
16from luminol.constants import *
17from luminol.modules.time_series import TimeSeries
18
19
20class ExpAvgDetector(AnomalyDetectorAlgorithm):
21
22  """
23  Exponential Moving Average.
24  This method uses a data point's deviation from the exponential moving average of a lagging window
25  to determine its anomaly score.
26  """
27  def __init__(self, time_series, baseline_time_series=None, smoothing_factor=None, use_lag_window=False, lag_window_size=None):
28    """
29    Initializer
30    :param TimeSeries time_series: a TimeSeries object.
31    :param TimeSeries baseline_time_series: baseline TimeSeries.
32    :param float smoothing_factor: smoothing factor for computing exponential moving average.
33    :param int lag_window_size: lagging window size.
34    """
35    super(ExpAvgDetector, self).__init__(self.__class__.__name__, time_series, baseline_time_series)
36    self.use_lag_window = use_lag_window
37    self.smoothing_factor = smoothing_factor if smoothing_factor > 0 else DEFAULT_EMA_SMOOTHING_FACTOR
38    self.lag_window_size = lag_window_size if lag_window_size else int(self.time_series_length * DEFAULT_EMA_WINDOW_SIZE_PCT)
39    self.time_series_items = self.time_series.items()
40
41  def _compute_anom_score(self, lag_window_points, point):
42    """
43    Compute anomaly score for a single data point.
44    Anomaly score for a single data point(t,v) equals: abs(v - ema(lagging window)).
45    :param list lag_window_points: values in the lagging window.
46    :param float point: data point value.
47    :return float: the anomaly score.
48    """
49    ema = utils.compute_ema(self.smoothing_factor, lag_window_points)[-1]
50    return abs(point - ema)
51
52  def _compute_anom_data_using_window(self):
53    """
54    Compute anomaly scores using a lagging window.
55    """
56    anom_scores = {}
57    values = self.time_series.values
58    stdev = numpy.std(values)
59    for i, (timestamp, value) in enumerate(self.time_series_items):
60      if i < self.lag_window_size:
61        anom_score = self._compute_anom_score(values[:i + 1], value)
62      else:
63        anom_score = self._compute_anom_score(values[i - self.lag_window_size: i + 1], value)
64      if stdev:
65        anom_scores[timestamp] = anom_score / stdev
66      else:
67        anom_scores[timestamp] = anom_score
68    self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
69
70  def _compute_anom_data_decay_all(self):
71    """
72    Compute anomaly scores using a lagging window covering all the data points before.
73    """
74    anom_scores = {}
75    values = self.time_series.values
76    ema = utils.compute_ema(self.smoothing_factor, values)
77    stdev = numpy.std(values)
78    for i, (timestamp, value) in enumerate(self.time_series_items):
79      anom_score = abs((value - ema[i]) / stdev) if stdev else value - ema[i]
80      anom_scores[timestamp] = anom_score
81    self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
82
83  def _set_scores(self):
84    """
85    Compute anomaly scores for the time series.
86    Currently uses a lagging window covering all the data points before.
87    """
88    if self.use_lag_window:
89      self._compute_anom_data_using_window()
90    self._compute_anom_data_decay_all()
91