1%%%
2%%% Copyright 2013, Rodolphe Quiedeville <rodolphe@quiedeville.org>
3%%%
4%%% Licensed under the Apache License, Version 2.0 (the "License");
5%%% you may not use this file except in compliance with the License.
6%%% You may obtain a copy of the License at
7%%%
8%%%     http://www.apache.org/licenses/LICENSE-2.0
9%%%
10%%% Unless required by applicable law or agreed to in writing, software
11%%% distributed under the License is distributed on an "AS IS" BASIS,
12%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13%%% See the License for the specific language governing permissions and
14%%% limitations under the License.
15%%%
16
17%%% ====================================================================
18%%% file : bear_test.erl
19%%% @author : Rodolphe Quiedeville <rodolphe@quiedeville.org>
20%%% @doc
21%%% Unit test for functions defined in bear.erl
22%%% @end
23%%% ====================================================================
24-module(bear_test).
25
26-compile(export_all).
27
28-record(scan_result, {n=0, sumX=0, sumXX=0, sumInv=0, sumLog, max, min}).
29-record(scan_result2, {x2=0, x3=0, x4=0}).
30
31-include_lib("eunit/include/eunit.hrl").
32
33-define(PRECISION_DIGIT, 6).
34
35get_statistics_1_empty_test() ->
36    %% get_statistics/1
37    %% Empty set of values
38    Percentile = [{50, 0.0},{75, 0.0},{90, 0.0},{95, 0.0},{99, 0.0},{999, 0.0}],
39    Stats = bear:get_statistics([]),
40    ?assertEqual({min, 0.0}, lists:keyfind(min, 1, Stats)),
41    ?assertEqual({max, 0.0}, lists:keyfind(max, 1, Stats)),
42    ?assertEqual({arithmetic_mean, 0.0}, lists:keyfind(arithmetic_mean, 1, Stats)),
43    ?assertEqual({geometric_mean, 0.0}, lists:keyfind(geometric_mean, 1, Stats)),
44    ?assertEqual({harmonic_mean, 0.0}, lists:keyfind(harmonic_mean, 1, Stats)),
45    ?assertEqual({median, 0.0}, lists:keyfind(median, 1, Stats)),
46    ?assertEqual({variance, 0.0}, lists:keyfind(variance, 1, Stats)),
47    ?assertEqual({standard_deviation, 0.0}, lists:keyfind(standard_deviation, 1, Stats)),
48    ?assertEqual({skewness, 0.0}, lists:keyfind(skewness, 1, Stats)),
49    ?assertEqual({kurtosis, 0.0}, lists:keyfind(kurtosis, 1, Stats)),
50    ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)),
51    ?assertEqual({histogram, [{0,0}]}, lists:keyfind(histogram, 1, Stats)),
52    ?assertEqual({n, 0}, lists:keyfind(n, 1, Stats)).
53
54get_statistics_1_regular_test() ->
55    %% get_statistics/1
56    %% Non empty set of values
57    Percentile = [{50, -10},{75, 23},{90, 43},{95, 46},{99, 50},{999, 50}],
58    Stats = bear:get_statistics(sample1()),
59
60    {geometric_mean, Geometric} = lists:keyfind(geometric_mean, 1, Stats),
61    {harmonic_mean, Harmonic} = lists:keyfind(harmonic_mean, 1, Stats),
62    {variance, Variance} = lists:keyfind(variance, 1, Stats),
63    {standard_deviation, StandardDeviation} = lists:keyfind(standard_deviation, 1, Stats),
64    {kurtosis, Kurtosis} = lists:keyfind(kurtosis, 1, Stats),
65    {skewness, Skewness} = lists:keyfind(skewness, 1, Stats),
66
67    ?assertEqual({min, -49}, lists:keyfind(min, 1, Stats)),
68    ?assertEqual({max, 50}, lists:keyfind(max, 1, Stats)),
69    ?assertEqual({arithmetic_mean, -1.66}, lists:keyfind(arithmetic_mean, 1, Stats)),
70    ?assertEqual(true, approx(4.08326, Geometric)),
71    ?assertEqual(true, approx(54.255629738, Harmonic)),
72    ?assertEqual({median, -10}, lists:keyfind(median, 1, Stats)),
73    ?assertEqual(true, approx(921.0453061, Variance)),
74    ?assertEqual(true, approx(30.348728, StandardDeviation)),
75    ?assertEqual(true, approx(0.148722, Skewness)),
76    ?assertEqual(true, approx(-1.2651687, Kurtosis)),
77    ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)),
78    ?assertEqual({histogram, [{-20,16},{11,16},{41,12},{71,6}]}, lists:keyfind(histogram, 1, Stats)),
79    ?assertEqual({n, 50}, lists:keyfind(n, 1, Stats)).
80
81get_statistics_2_1_test() ->
82    %% get_statistics/2
83    %% First set of values is empty
84    Stats = bear:get_statistics(lists:seq(1,10), []),
85    ?assertEqual(0.0, Stats).
86
87get_statistics_3_test() ->
88    %% get_statistics/2
89    %% Second set of values is empty
90    Stats = bear:get_statistics([], lists:seq(1,10)),
91    ?assertEqual(0.0, Stats).
92
93get_statistics_4_test() ->
94    %% get_statistics/2
95    %% Two set of values with different sizes
96    Stats = bear:get_statistics(lists:seq(1,10),lists:seq(1,20)),
97    ?assertEqual(0.0, Stats).
98
99get_statistics_5_test() ->
100    %% get_statistics/2
101    %% Two set of values are valid
102    Stats = bear:get_statistics(lists:seq(0,10),lists:seq(4,24,2)),
103    ?assertEqual({covariance, 20.0}, lists:keyfind(covariance, 1, Stats)),
104    ?assertEqual({tau, 1.0}, lists:keyfind(tau, 1, Stats)),
105    ?assertEqual({rho, 1.0}, lists:keyfind(rho, 1, Stats)),
106    ?assertEqual({r, 1.0}, lists:keyfind(r, 1, Stats)).
107
108scan_values_test() ->
109    ?assertEqual(#scan_result{n=8}, bear:scan_values([], #scan_result{n=8})),
110    ?assertEqual(#scan_result{n=1,sumX=1,sumXX=1,sumInv=1.0,sumLog=0.0,max=1,min=1}, bear:scan_values([1])),
111    ?assertEqual(#scan_result{n=4,sumX=10,sumXX=30,sumInv=2.083333333333333,sumLog=3.1780538303479453,max=4,min=1},
112                 bear:scan_values([1,3,2,4])).
113
114scan_values2_test() ->
115    ?assertEqual(#scan_result{n=8}, bear:scan_values2([], 3, #scan_result{n=8})),
116    ?assertEqual(#scan_result2{x2=6.6875,x3=-13.359375,x4=28.07421875}, bear:scan_values2([4,3,5], #scan_result{n=8,sumX=42})).
117
118revsort_test() ->
119    ?assertEqual([], bear:revsort([])),
120    ?assertEqual([4,3,2], bear:revsort([3,2,4])).
121
122arithmetic_mean_test() ->
123    ?assertEqual(10.0, bear:arithmetic_mean(#scan_result{n=4, sumX=40})).
124
125geometric_mean_test() ->
126    ?assertEqual(25.790339917193062, bear:geometric_mean(#scan_result{n=4, sumLog=13})).
127
128harmonic_mean_test() ->
129    ?assertEqual(0, bear:harmonic_mean(#scan_result{n=100, sumInv=0})),
130    ?assertEqual(10.0, bear:harmonic_mean(#scan_result{n=100, sumInv=10})).
131
132percentile_test() ->
133    ?assertEqual(3, bear:percentile([1,2,3,4,5], #scan_result{n=5},0.5)),
134    ?assertEqual(5, bear:percentile([1,2,3,4,5], #scan_result{n=5},0.95)).
135
136variance_test() ->
137    ?assertEqual(7.0, bear:variance(#scan_result{n=7},#scan_result2{x2=42})).
138
139std_deviation_test() ->
140    ?assertEqual(3.0, bear:std_deviation(#scan_result{n=10},#scan_result2{x2=81})).
141
142skewness_test() ->
143    ?assertEqual(0.0, bear:skewness(#scan_result{n=10},#scan_result2{x2=0,x3=81})),
144    ?assertEqual(3.0, bear:skewness(#scan_result{n=10},#scan_result2{x2=81,x3=810})).
145
146kurtosis_test() ->
147    ?assertEqual(0.0, bear:kurtosis(#scan_result{n=10},#scan_result2{x2=0,x4=81})),
148    ?assertEqual(-2.0, bear:kurtosis(#scan_result{n=10},#scan_result2{x2=81,x4=810})).
149
150update_bin_1_test() ->
151    %% with empty dict
152    Dict = dict:new(),
153    C = bear:update_bin(4, [4], Dict),
154    ?assertEqual(1, dict:fetch(4, C)).
155
156get_covariance_exceptions_test() ->
157    %% Array 1 is too short
158    ?assertEqual(0.0, bear:get_covariance([], [2,1,2,3,4,5,6])),
159    %% Array 2 is too short
160    ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [])),
161    %% diffenrent arry length
162    ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [1,2,3,4,5,6,7])).
163
164get_covariance_regular_test() ->
165    %% Usual case
166    %% Result is not the same as R compute, R use an unbiased estimate
167    %% http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance
168    ?assertEqual(true, approx(170.813599, bear:get_covariance(sample1(),sample2()))).
169
170ranks_of_test() ->
171    ?assertEqual([4.0,3.0,1.0,2.0], bear:ranks_of([3,4,15,6])).
172
173get_pearson_correlation_exceptions_test() ->
174    ?assertEqual(0.0, bear:get_pearson_correlation([], 42)),
175    ?assertEqual(0.0, bear:get_pearson_correlation(42, [])),
176    ?assertEqual(0.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,11))),
177    ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,10))),
178    ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(0,10), lists:seq(5,15))).
179
180get_pearson_correlation_regular_test() ->
181    %% Target is calculate by R
182    ?assertEqual(true, approx(0.2068785, bear:get_pearson_correlation(sample1(), sample2()))).
183
184get_pearson_correlation_nullresult_test() ->
185    %% The two series do not correlate
186    A = [-1,-0.5,0,0.5,1],
187    B = [1,0.25,0,0.25,1],
188    ?assertEqual(0.0, bear:get_pearson_correlation(A, B)).
189
190round_bin_test() ->
191    ?assertEqual(10, bear:round_bin(10)),
192    ?assertEqual(10, bear:round_bin(10, 5)),
193    ?assertEqual(42, bear:round_bin(15, 42)),
194    ?assertEqual(45, bear:round_bin(42, 15)).
195
196get_bin_width_test() ->
197    ?assertEqual(1, bear:get_bin_width(0, 10)),
198    ?assertEqual(22, bear:get_bin_width(10.0, 4.0)).
199
200get_bin_count_test() ->
201    ?assertEqual(3, bear:get_bin_count(9, 15, 3)),
202    ?assertEqual(4, bear:get_bin_count(10.2, 20.2, 4)).
203
204get_kendall_correlation_exceptions_test()->
205    ?assertEqual(0.0, bear:get_kendall_correlation([], [])),
206    ?assertEqual(0.0, bear:get_kendall_correlation([], [1,2,3,4,5,6,7])),
207    ?assertEqual(0.0, bear:get_kendall_correlation([1,2,3,4,5,6,7],[])),
208    ?assertEqual(0.0, bear:get_kendall_correlation(lists:seq(1,10),lists:seq(1,11))).
209
210get_kendall_correlation_regular_test()->
211    Kendall = bear:get_kendall_correlation(sample1(order), sample2(order)),
212    ?assertEqual(true, approx(0.9787755, Kendall)).
213
214kendall_correlation_test()->
215    Kendall = bear:kendall_correlation(sample1(order), sample2(order)),
216    ?assertEqual(true, approx(0.9787755, Kendall)).
217
218get_spearman_correlation_exceptions_test()->
219    ?assertEqual(0.0, bear:get_spearman_correlation([], [])),
220    ?assertEqual(0.0, bear:get_spearman_correlation([], [1,2,3,4,5,6,7])),
221    ?assertEqual(0.0, bear:get_spearman_correlation([1,2,3,4,5,6,7],[])),
222    ?assertEqual(0.0, bear:get_spearman_correlation(lists:seq(1,10),lists:seq(1,11))).
223
224get_spearman_correlation_regular_test()->
225    ?assertEqual(true, approx(0.997888, bear:get_spearman_correlation(sample1(order), sample2(order)))).
226
227math_log_test() ->
228    ?assertEqual(1, bear:math_log(0)),
229    ?assertEqual(1.0, bear:math_log(0.0)),
230    ?assertEqual(true, approx(3.737669618283368, bear:math_log(42))).
231
232inverse_test() ->
233    ?assertEqual(0, bear:inverse(0)),
234    ?assertEqual(0.0, bear:inverse(0.0)),
235    ?assertEqual(0.5, bear:inverse(2)).
236
237get_hist_bins_test() ->
238    ?assertEqual([4], bear:get_hist_bins(1, 4, 5, 10)).
239
240tied_ordered_ranking_test() ->
241    ?assertEqual([3,2,1], bear:tied_ordered_ranking([], [], [1,2,3])).
242
243kendall_right_off_test() ->
244    %% empty array
245    ?assertEqual("654321", bear:kendall_right_of([],"123456")).
246
247tied_add_prev_test() ->
248    ?assertEqual([{2.5,5},{2.5,5},{2.5,5},{2.5,5},{2,3}], bear:tied_add_prev([{2, 3}], {[1,2,3,4], 5})).
249
250tied_rank_worker_test() ->
251    ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5}], bear:tied_rank_worker([], [{2.0,5}], {[1,2,3], 5})),
252    ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5}],
253                 bear:tied_rank_worker([{2.0,5},{2.0,5}], [{2.0,5}], {[1,2,3], 5})).
254
255perc_test() ->
256    ?assertEqual(14, bear:perc(36, 40)),
257    ?assertEqual(5, bear:perc(900, 5)),
258    ?assertEqual(5, bear:perc(0.9, 5)).
259
260get_statistics_subset_nev_test() ->
261    %% Not enough values case
262    ?assertEqual([], bear:get_statistics_subset([1,2], [])).
263
264get_statistics_subset_regular_test() ->
265    %% Regular case
266    ?assertEqual([{max, 50},{min, -49}], bear:get_statistics_subset(sample1(), [max,min])).
267
268subset_test() ->
269    Stats = bear:get_statistics(test_values()),
270    match_values(Stats).
271
272full_subset_test() ->
273    Stats = bear:get_statistics(test_values()),
274    match_values2(Stats).
275
276negative_test() ->
277    %% make sure things don't blow up with a negative value
278    Values = [1,-1,-2,3,3,4,5,6,7],
279    [{min, -2}] = bear:get_statistics_subset(Values, [min]).
280
281negative2_test() ->
282    %% make sure things don't blow up with a negative value
283    Values = [-1,-1,-2,-2,-3,-5,-6,-10],
284    [{min, -10}] = bear:get_statistics_subset(Values, [min]).
285
286match_values([H|T]) ->
287    Res = bear:get_statistics_subset(test_values(), [mk_item(H)]),
288    Res = [H],
289    match_values(T);
290match_values([]) ->
291    ok.
292
293mk_item({percentile, Ps}) ->
294    {percentile, [P || {P,_} <- Ps]};
295mk_item({K, _}) ->
296    K.
297
298match_values2(Stats) ->
299    Items = [mk_item(I) || I <- Stats],
300    Stats = bear:get_statistics_subset(test_values(), Items),
301    ok.
302
303test_values() ->
304    [1,1,1,1,1,1,1,
305     2,2,2,2,2,2,2,
306     3,3,3,3,3,3,3,3,3,3,3,3,3,3,
307     4,4,4,4,4,4,4,4,4,4,4,4,4,4,
308     5,5,5,5,5,5,5,5,5,5,5,5,5,5,
309     6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
310     7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
311     8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
312     9,9,9,9,9,9,9].
313
314negative_values() ->
315    %% All values are negative
316    [-1,-1,-1,-1,-1,-1,-1,
317     -2,-2,-2,-2,-2,-2,-2,
318     -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
319     -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
320     -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
321     -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
322     -7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,
323     -8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,
324     -9,-9,-9,-9,-9,-9,-9].
325
326between(Value, Low, High) ->
327    (Value >= Low) and (Value =< High).
328
329approx(Target, Value) ->
330    High = Target + math:pow(10, - ?PRECISION_DIGIT),
331    Low = Target - math:pow(10, - ?PRECISION_DIGIT),
332    case (Value > Low) and (Value < High) of
333        true -> true;
334        _ -> Value
335    end.
336
337check_sample_test() ->
338    ?assertEqual(50, length(sample1())),
339    ?assertEqual(50, length(sample1(order))),
340    ?assertEqual(50, length(sample2())),
341    ?assertEqual(50, length(sample2(order))).
342
343sample1(X) when X == order ->
344    lists:sort(sample1()).
345
346sample2(X) when X == order ->
347    lists:sort(sample2()).
348
349sample1() ->
350    %% datas from file bear/samples/data.csv
351    %% first column X
352    [-16,-18,-47,22,-18,36,25,49,-24,15,36,-10,-21,43,-35,1,-24,10,33,-21,-18,-36,-36,-43,-37,-10,23,50,31,-49,43,46,22,-43,12,-47,15,-14,6,-31,46,-8,0,-46,-16,-22,6,10,38,-11].
353
354sample2() ->
355    %% datas from file bear/samples/data.csv
356    %% second column Y
357    [33,20,-35,16,-19,8,25,3,4,10,36,-20,-41,43,28,39,-30,3,-47,-23,17,-6,-50,16,-26,-49,8,-31,24,16,32,27,-19,-32,-17,1,-37,25,-50,-32,-42,-22,25,18,-34,-37,7,-13,16,10].
358