1%%% 2%%% Copyright 2011, Boundary 3%%% 4%%% Licensed under the Apache License, Version 2.0 (the "License"); 5%%% you may not use this file except in compliance with the License. 6%%% You may obtain a copy of the License at 7%%% 8%%% http://www.apache.org/licenses/LICENSE-2.0 9%%% 10%%% Unless required by applicable law or agreed to in writing, software 11%%% distributed under the License is distributed on an "AS IS" BASIS, 12%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13%%% See the License for the specific language governing permissions and 14%%% limitations under the License. 15%%% 16 17 18%%%------------------------------------------------------------------- 19%%% File: folsom_sample_exdec.erl 20%%% @author joe williams <j@boundary.com> 21%%% @doc 22%%% erlang implementation of a exponentially-decaying random sample 23%%% based on a java implementation by coda hale, which can be found at: 24%%% 25%%% https://github.com/codahale/metrics/blob/development/src/main/java/com/yammer/metrics/core/ExponentiallyDecayingSample.java 26%%% 27%%% that implementation is based on: 28%%% 29%%% http://dimacs.rutgers.edu/~graham/pubs/papers/fwddecay.pdf 30%%% @end 31%%%------------------------------------------------------------------ 32 33-module(folsom_sample_exdec). 34 35-export([ 36 new/2, 37 update/2, 38 get_values/1 39 ]). 40 41-define(HOURSECS, 3600). 42 43-include("folsom.hrl"). 44 45new(Size, Alpha) -> 46 Now = folsom_utils:now_epoch(), 47 #exdec{start = Now, next = Now + ?HOURSECS, alpha = Alpha, size = Size}. 48 49update(#exdec{reservoir = Reservoir, alpha = Alpha, start = Start, next = Next} = Sample, Value) -> 50 Timestamp = folsom_utils:now_epoch(), 51 52 % immediately see if we need to rescale 53 {NewRes, NewStart, NewNext} = rescale(Reservoir, Timestamp, Next, Start, Alpha), 54 55 % now lets update the sample if the new value is worthy 56 update(Sample#exdec{reservoir = NewRes, next = NewNext, start = NewStart}, Value, Timestamp). 57 58get_values(#exdec{reservoir = Reservoir}) -> 59 {_, Values} = lists:unzip(ets:tab2list(Reservoir)), 60 Values. 61 62% internal api 63 64update(#exdec{reservoir = Reservoir, alpha = Alpha, start = Start, n = N, size = Size, seed = Seed} = Sample, Value, Timestamp) when N =< Size -> 65 % since N is =< Size we can just add the new value to the sample 66 67 {Rand, New_seed} = rand:uniform_s(N, Seed), 68 Priority = priority(Alpha, Timestamp, Start, Rand), 69 true = ets:insert(Reservoir, {Priority, Value}), 70 71 Sample#exdec{n = folsom_utils:get_ets_size(Reservoir), seed = New_seed}; 72update(#exdec{reservoir = Reservoir, alpha = Alpha, start = Start, n = N, seed = Seed} = Sample, Value, Timestamp) -> 73 % when N is not =< Size we need to check to see if the priority of 74 % the new value is greater than the first (smallest) existing priority 75 76 {Rand, NewSeed} = rand:uniform_s(N, Seed), 77 Priority = priority(Alpha, Timestamp, Start, Rand), 78 First = ets:first(Reservoir), 79 80 update_on_priority(Sample, First, Priority, NewSeed, Value). 81 82update_on_priority(#exdec{reservoir = Reservoir} = Sample, First, Priority, NewSeed, Value) when First < Priority -> 83 true = case ets:insert_new(Reservoir, {Priority, Value}) of 84 true -> 85 % priority didnt already exist, so we created it and need to delete the first one 86 ets:delete(Reservoir, First); 87 false -> 88 % priority existed, we dont need to do anything 89 true 90 end, 91 Sample#exdec{n = folsom_utils:get_ets_size(Reservoir), seed = NewSeed}; 92update_on_priority(Sample, _, _, _, _) -> 93 Sample. 94 95% gaurd against a possible bug, T should always be =< ?HOURSECS 96% also to prevent overflow issues make sure alpha is always =< 97% math:log(1.79769313486231570815e+308) / 3599 = 0.19721664709457737 98weight(Alpha, T) when T =< ?HOURSECS, Alpha =< 0.19721664709457737 -> 99 math:exp(Alpha * T). 100 101priority(Alpha, Time, Start, Rand) -> 102 weight(Alpha, Time - Start) / Rand. 103 104rescale(Reservoir, Now, Next, OldStart, Alpha) when Now >= Next -> 105 NewStart = Now + ?HOURSECS, 106 NewRes = delete_and_rescale(Reservoir, NewStart, OldStart, Alpha), 107 {NewRes, Now, NewStart}; 108rescale(Reservoir, _, Next, Start, _) -> 109 {Reservoir, Start, Next}. 110 111delete_and_rescale(Reservoir, NewStart, OldStart, Alpha) -> 112 % get the existing reservoir 113 ResList = ets:tab2list(Reservoir), 114 115 % create a new ets table to use 116 NewRes = folsom_metrics_histogram_ets:new(folsom_exdec,[ordered_set, {write_concurrency, true}, public]), 117 118 % populate it with new priorities and the existing values 119 [true = ets:insert(NewRes, {recalc_priority(Priority, Alpha, NewStart, OldStart) ,Value}) || {Priority, Value} <- ResList], 120 121 % delete the old ets table 122 true = ets:delete(Reservoir), 123 124 % return the new ets table 125 NewRes. 126 127recalc_priority(Priority, Alpha, Start, OldStart) -> 128 Priority * math:exp(-Alpha * (Start - OldStart)). 129