1%%%
2%%% Copyright 2011, Boundary
3%%%
4%%% Licensed under the Apache License, Version 2.0 (the "License");
5%%% you may not use this file except in compliance with the License.
6%%% You may obtain a copy of the License at
7%%%
8%%%     http://www.apache.org/licenses/LICENSE-2.0
9%%%
10%%% Unless required by applicable law or agreed to in writing, software
11%%% distributed under the License is distributed on an "AS IS" BASIS,
12%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13%%% See the License for the specific language governing permissions and
14%%% limitations under the License.
15%%%
16
17
18%%%-------------------------------------------------------------------
19%%% File:      folsom_sample_exdec.erl
20%%% @author    joe williams <j@boundary.com>
21%%% @doc
22%%% erlang implementation of a exponentially-decaying random sample
23%%% based on a java implementation by coda hale, which can be found at:
24%%%
25%%% https://github.com/codahale/metrics/blob/development/src/main/java/com/yammer/metrics/core/ExponentiallyDecayingSample.java
26%%%
27%%% that implementation is based on:
28%%%
29%%% http://dimacs.rutgers.edu/~graham/pubs/papers/fwddecay.pdf
30%%% @end
31%%%------------------------------------------------------------------
32
33-module(folsom_sample_exdec).
34
35-export([
36         new/2,
37         update/2,
38         get_values/1
39        ]).
40
41-define(HOURSECS, 3600).
42
43-include("folsom.hrl").
44
45new(Size, Alpha) ->
46    Now = folsom_utils:now_epoch(),
47    #exdec{start = Now, next = Now + ?HOURSECS, alpha = Alpha, size = Size}.
48
49update(#exdec{reservoir = Reservoir, alpha = Alpha, start = Start, next = Next} = Sample, Value) ->
50    Timestamp = folsom_utils:now_epoch(),
51
52    % immediately see if we need to rescale
53    {NewRes, NewStart, NewNext} = rescale(Reservoir, Timestamp, Next, Start, Alpha),
54
55    % now lets update the sample if the new value is worthy
56    update(Sample#exdec{reservoir = NewRes, next = NewNext, start = NewStart}, Value, Timestamp).
57
58get_values(#exdec{reservoir = Reservoir}) ->
59    {_, Values} = lists:unzip(ets:tab2list(Reservoir)),
60    Values.
61
62% internal api
63
64update(#exdec{reservoir = Reservoir, alpha = Alpha, start = Start, n = N, size = Size, seed = Seed} = Sample, Value, Timestamp) when N =< Size ->
65    % since N is =< Size we can just add the new value to the sample
66
67    {Rand, New_seed} = rand:uniform_s(N, Seed),
68    Priority = priority(Alpha, Timestamp, Start, Rand),
69    true = ets:insert(Reservoir, {Priority, Value}),
70
71    Sample#exdec{n = folsom_utils:get_ets_size(Reservoir), seed = New_seed};
72update(#exdec{reservoir = Reservoir, alpha = Alpha, start = Start, n = N, seed = Seed} = Sample, Value, Timestamp) ->
73    % when N is not =< Size we need to check to see if the priority of
74    % the new value is greater than the first (smallest) existing priority
75
76    {Rand, NewSeed} = rand:uniform_s(N, Seed),
77    Priority = priority(Alpha, Timestamp, Start, Rand),
78    First = ets:first(Reservoir),
79
80    update_on_priority(Sample, First, Priority, NewSeed, Value).
81
82update_on_priority(#exdec{reservoir = Reservoir} = Sample, First, Priority, NewSeed, Value) when First < Priority ->
83    true = case ets:insert_new(Reservoir, {Priority, Value}) of
84        true ->
85            % priority didnt already exist, so we created it and need to delete the first one
86            ets:delete(Reservoir, First);
87        false ->
88            % priority existed, we dont need to do anything
89            true
90    end,
91    Sample#exdec{n = folsom_utils:get_ets_size(Reservoir), seed = NewSeed};
92update_on_priority(Sample, _, _, _, _) ->
93    Sample.
94
95% gaurd against a possible bug, T should always be =< ?HOURSECS
96% also to prevent overflow issues make sure alpha is always =<
97% math:log(1.79769313486231570815e+308) / 3599 = 0.19721664709457737
98weight(Alpha, T) when T =< ?HOURSECS, Alpha =< 0.19721664709457737 ->
99    math:exp(Alpha * T).
100
101priority(Alpha, Time, Start, Rand) ->
102    weight(Alpha, Time - Start) / Rand.
103
104rescale(Reservoir, Now, Next, OldStart, Alpha) when Now >= Next ->
105    NewStart = Now + ?HOURSECS,
106    NewRes = delete_and_rescale(Reservoir, NewStart, OldStart, Alpha),
107    {NewRes, Now, NewStart};
108rescale(Reservoir, _, Next, Start, _) ->
109    {Reservoir, Start, Next}.
110
111delete_and_rescale(Reservoir, NewStart, OldStart, Alpha) ->
112    % get the existing reservoir
113    ResList = ets:tab2list(Reservoir),
114
115    % create a new ets table to use
116    NewRes = folsom_metrics_histogram_ets:new(folsom_exdec,[ordered_set, {write_concurrency, true}, public]),
117
118    % populate it with new priorities and the existing values
119    [true = ets:insert(NewRes, {recalc_priority(Priority, Alpha, NewStart, OldStart) ,Value}) || {Priority, Value} <- ResList],
120
121    % delete the old ets table
122    true = ets:delete(Reservoir),
123
124    % return the new ets table
125    NewRes.
126
127recalc_priority(Priority, Alpha, Start, OldStart) ->
128    Priority * math:exp(-Alpha * (Start - OldStart)).
129