1# coding=utf-8 2""" 3© 2014 LinkedIn Corp. All rights reserved. 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 8Unless required by applicable law or agreed to in writing, software 9distributed under the License is distributed on an "AS IS" BASIS, 10WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11""" 12 13from luminol import exceptions 14from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 15from luminol.constants import * 16from luminol.modules.time_series import TimeSeries 17 18 19class DiffPercentThreshold(AnomalyDetectorAlgorithm): 20 """ 21 In this algorithm, anomalies are those data points that are above a percentage threshold as compared to the baseline. 22 This algorithm assumes that time_series and baseline_time_series are perfectly aligned, meaning that: 23 a) every timestamp that exists in time_series also exists in baseline_time_series 24 b) lengths of both time series are same 25 """ 26 def __init__(self, time_series, baseline_time_series, percent_threshold_upper=None, percent_threshold_lower=None): 27 """ 28 :param time_series: current time series 29 :param baseline_time_series: baseline time series 30 :param percent_threshold_upper: If time_series is larger than baseline_time_series by this percent, then its 31 an anomaly 32 :param percent_threshold_lower: If time_series is smaller than baseline_time_series by this percent, then its 33 an anomaly 34 """ 35 super(DiffPercentThreshold, self).__init__(self.__class__.__name__, time_series, baseline_time_series) 36 self.percent_threshold_upper = percent_threshold_upper 37 self.percent_threshold_lower = percent_threshold_lower 38 if not self.percent_threshold_upper and not self.percent_threshold_lower: 39 raise exceptions.RequiredParametersNotPassed('luminol.algorithms.anomaly_detector_algorithms.diff_percent_threshold: \ 40 Either percent_threshold_upper or percent_threshold_lower needed') 41 42 def _set_scores(self): 43 """ 44 Compute anomaly scores for the time series 45 This algorithm just takes the diff of threshold with current value as anomaly score 46 """ 47 anom_scores = {} 48 for i, (timestamp, value) in enumerate(self.time_series.items()): 49 50 baseline_value = self.baseline_time_series[i] 51 52 if baseline_value > 0: 53 diff_percent = 100 * (value - baseline_value) / baseline_value 54 elif value > 0: 55 diff_percent = 100.0 56 else: 57 diff_percent = 0.0 58 59 anom_scores[timestamp] = 0.0 60 if self.percent_threshold_upper and diff_percent > 0 and diff_percent > self.percent_threshold_upper: 61 anom_scores[timestamp] = diff_percent 62 if self.percent_threshold_lower and diff_percent < 0 and diff_percent < self.percent_threshold_lower: 63 anom_scores[timestamp] = -1 * diff_percent 64 65 self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 66