1%%% 2%%% Copyright 2013, Rodolphe Quiedeville <rodolphe@quiedeville.org> 3%%% 4%%% Licensed under the Apache License, Version 2.0 (the "License"); 5%%% you may not use this file except in compliance with the License. 6%%% You may obtain a copy of the License at 7%%% 8%%% http://www.apache.org/licenses/LICENSE-2.0 9%%% 10%%% Unless required by applicable law or agreed to in writing, software 11%%% distributed under the License is distributed on an "AS IS" BASIS, 12%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13%%% See the License for the specific language governing permissions and 14%%% limitations under the License. 15%%% 16 17%%% ==================================================================== 18%%% file : bear_test.erl 19%%% @author : Rodolphe Quiedeville <rodolphe@quiedeville.org> 20%%% @doc 21%%% Unit test for functions defined in bear.erl 22%%% @end 23%%% ==================================================================== 24-module(bear_test). 25 26-compile(export_all). 27 28-record(scan_result, {n=0, sumX=0, sumXX=0, sumInv=0, sumLog, max, min}). 29-record(scan_result2, {x2=0, x3=0, x4=0}). 30 31-include_lib("eunit/include/eunit.hrl"). 32 33-define(PRECISION_DIGIT, 6). 34 35get_statistics_1_empty_test() -> 36 %% get_statistics/1 37 %% Empty set of values 38 Percentile = [{50, 0.0},{75, 0.0},{90, 0.0},{95, 0.0},{99, 0.0},{999, 0.0}], 39 Stats = bear:get_statistics([]), 40 ?assertEqual({min, 0.0}, lists:keyfind(min, 1, Stats)), 41 ?assertEqual({max, 0.0}, lists:keyfind(max, 1, Stats)), 42 ?assertEqual({arithmetic_mean, 0.0}, lists:keyfind(arithmetic_mean, 1, Stats)), 43 ?assertEqual({geometric_mean, 0.0}, lists:keyfind(geometric_mean, 1, Stats)), 44 ?assertEqual({harmonic_mean, 0.0}, lists:keyfind(harmonic_mean, 1, Stats)), 45 ?assertEqual({median, 0.0}, lists:keyfind(median, 1, Stats)), 46 ?assertEqual({variance, 0.0}, lists:keyfind(variance, 1, Stats)), 47 ?assertEqual({standard_deviation, 0.0}, lists:keyfind(standard_deviation, 1, Stats)), 48 ?assertEqual({skewness, 0.0}, lists:keyfind(skewness, 1, Stats)), 49 ?assertEqual({kurtosis, 0.0}, lists:keyfind(kurtosis, 1, Stats)), 50 ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)), 51 ?assertEqual({histogram, [{0,0}]}, lists:keyfind(histogram, 1, Stats)), 52 ?assertEqual({n, 0}, lists:keyfind(n, 1, Stats)). 53 54get_statistics_1_regular_test() -> 55 %% get_statistics/1 56 %% Non empty set of values 57 Percentile = [{50, -10},{75, 23},{90, 43},{95, 46},{99, 50},{999, 50}], 58 Stats = bear:get_statistics(sample1()), 59 60 {geometric_mean, Geometric} = lists:keyfind(geometric_mean, 1, Stats), 61 {harmonic_mean, Harmonic} = lists:keyfind(harmonic_mean, 1, Stats), 62 {variance, Variance} = lists:keyfind(variance, 1, Stats), 63 {standard_deviation, StandardDeviation} = lists:keyfind(standard_deviation, 1, Stats), 64 {kurtosis, Kurtosis} = lists:keyfind(kurtosis, 1, Stats), 65 {skewness, Skewness} = lists:keyfind(skewness, 1, Stats), 66 67 ?assertEqual({min, -49}, lists:keyfind(min, 1, Stats)), 68 ?assertEqual({max, 50}, lists:keyfind(max, 1, Stats)), 69 ?assertEqual({arithmetic_mean, -1.66}, lists:keyfind(arithmetic_mean, 1, Stats)), 70 ?assertEqual(true, approx(4.08326, Geometric)), 71 ?assertEqual(true, approx(54.255629738, Harmonic)), 72 ?assertEqual({median, -10}, lists:keyfind(median, 1, Stats)), 73 ?assertEqual(true, approx(921.0453061, Variance)), 74 ?assertEqual(true, approx(30.348728, StandardDeviation)), 75 ?assertEqual(true, approx(0.148722, Skewness)), 76 ?assertEqual(true, approx(-1.2651687, Kurtosis)), 77 ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)), 78 ?assertEqual({histogram, [{-20,16},{11,16},{41,12},{71,6}]}, lists:keyfind(histogram, 1, Stats)), 79 ?assertEqual({n, 50}, lists:keyfind(n, 1, Stats)). 80 81get_statistics_2_1_test() -> 82 %% get_statistics/2 83 %% First set of values is empty 84 Stats = bear:get_statistics(lists:seq(1,10), []), 85 ?assertEqual(0.0, Stats). 86 87get_statistics_3_test() -> 88 %% get_statistics/2 89 %% Second set of values is empty 90 Stats = bear:get_statistics([], lists:seq(1,10)), 91 ?assertEqual(0.0, Stats). 92 93get_statistics_4_test() -> 94 %% get_statistics/2 95 %% Two set of values with different sizes 96 Stats = bear:get_statistics(lists:seq(1,10),lists:seq(1,20)), 97 ?assertEqual(0.0, Stats). 98 99get_statistics_5_test() -> 100 %% get_statistics/2 101 %% Two set of values are valid 102 Stats = bear:get_statistics(lists:seq(0,10),lists:seq(4,24,2)), 103 ?assertEqual({covariance, 20.0}, lists:keyfind(covariance, 1, Stats)), 104 ?assertEqual({tau, 1.0}, lists:keyfind(tau, 1, Stats)), 105 ?assertEqual({rho, 1.0}, lists:keyfind(rho, 1, Stats)), 106 ?assertEqual({r, 1.0}, lists:keyfind(r, 1, Stats)). 107 108scan_values_test() -> 109 ?assertEqual(#scan_result{n=8}, bear:scan_values([], #scan_result{n=8})), 110 ?assertEqual(#scan_result{n=1,sumX=1,sumXX=1,sumInv=1.0,sumLog=0.0,max=1,min=1}, bear:scan_values([1])), 111 ?assertEqual(#scan_result{n=4,sumX=10,sumXX=30,sumInv=2.083333333333333,sumLog=3.1780538303479453,max=4,min=1}, 112 bear:scan_values([1,3,2,4])). 113 114scan_values2_test() -> 115 ?assertEqual(#scan_result{n=8}, bear:scan_values2([], 3, #scan_result{n=8})), 116 ?assertEqual(#scan_result2{x2=6.6875,x3=-13.359375,x4=28.07421875}, bear:scan_values2([4,3,5], #scan_result{n=8,sumX=42})). 117 118revsort_test() -> 119 ?assertEqual([], bear:revsort([])), 120 ?assertEqual([4,3,2], bear:revsort([3,2,4])). 121 122arithmetic_mean_test() -> 123 ?assertEqual(10.0, bear:arithmetic_mean(#scan_result{n=4, sumX=40})). 124 125geometric_mean_test() -> 126 ?assertEqual(25.790339917193062, bear:geometric_mean(#scan_result{n=4, sumLog=13})). 127 128harmonic_mean_test() -> 129 ?assertEqual(0, bear:harmonic_mean(#scan_result{n=100, sumInv=0})), 130 ?assertEqual(10.0, bear:harmonic_mean(#scan_result{n=100, sumInv=10})). 131 132percentile_test() -> 133 ?assertEqual(3, bear:percentile([1,2,3,4,5], #scan_result{n=5},0.5)), 134 ?assertEqual(5, bear:percentile([1,2,3,4,5], #scan_result{n=5},0.95)). 135 136variance_test() -> 137 ?assertEqual(7.0, bear:variance(#scan_result{n=7},#scan_result2{x2=42})). 138 139std_deviation_test() -> 140 ?assertEqual(3.0, bear:std_deviation(#scan_result{n=10},#scan_result2{x2=81})). 141 142skewness_test() -> 143 ?assertEqual(0.0, bear:skewness(#scan_result{n=10},#scan_result2{x2=0,x3=81})), 144 ?assertEqual(3.0, bear:skewness(#scan_result{n=10},#scan_result2{x2=81,x3=810})). 145 146kurtosis_test() -> 147 ?assertEqual(0.0, bear:kurtosis(#scan_result{n=10},#scan_result2{x2=0,x4=81})), 148 ?assertEqual(-2.0, bear:kurtosis(#scan_result{n=10},#scan_result2{x2=81,x4=810})). 149 150update_bin_1_test() -> 151 %% with empty dict 152 Dict = dict:new(), 153 C = bear:update_bin(4, [4], Dict), 154 ?assertEqual(1, dict:fetch(4, C)). 155 156get_covariance_exceptions_test() -> 157 %% Array 1 is too short 158 ?assertEqual(0.0, bear:get_covariance([], [2,1,2,3,4,5,6])), 159 %% Array 2 is too short 160 ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [])), 161 %% diffenrent arry length 162 ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [1,2,3,4,5,6,7])). 163 164get_covariance_regular_test() -> 165 %% Usual case 166 %% Result is not the same as R compute, R use an unbiased estimate 167 %% http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance 168 ?assertEqual(true, approx(170.813599, bear:get_covariance(sample1(),sample2()))). 169 170ranks_of_test() -> 171 ?assertEqual([4.0,3.0,1.0,2.0], bear:ranks_of([3,4,15,6])). 172 173get_pearson_correlation_exceptions_test() -> 174 ?assertEqual(0.0, bear:get_pearson_correlation([], 42)), 175 ?assertEqual(0.0, bear:get_pearson_correlation(42, [])), 176 ?assertEqual(0.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,11))), 177 ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,10))), 178 ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(0,10), lists:seq(5,15))). 179 180get_pearson_correlation_regular_test() -> 181 %% Target is calculate by R 182 ?assertEqual(true, approx(0.2068785, bear:get_pearson_correlation(sample1(), sample2()))). 183 184get_pearson_correlation_nullresult_test() -> 185 %% The two series do not correlate 186 A = [-1,-0.5,0,0.5,1], 187 B = [1,0.25,0,0.25,1], 188 ?assertEqual(0.0, bear:get_pearson_correlation(A, B)). 189 190round_bin_test() -> 191 ?assertEqual(10, bear:round_bin(10)), 192 ?assertEqual(10, bear:round_bin(10, 5)), 193 ?assertEqual(42, bear:round_bin(15, 42)), 194 ?assertEqual(45, bear:round_bin(42, 15)). 195 196get_bin_width_test() -> 197 ?assertEqual(1, bear:get_bin_width(0, 10)), 198 ?assertEqual(22, bear:get_bin_width(10.0, 4.0)). 199 200get_bin_count_test() -> 201 ?assertEqual(3, bear:get_bin_count(9, 15, 3)), 202 ?assertEqual(4, bear:get_bin_count(10.2, 20.2, 4)). 203 204get_kendall_correlation_exceptions_test()-> 205 ?assertEqual(0.0, bear:get_kendall_correlation([], [])), 206 ?assertEqual(0.0, bear:get_kendall_correlation([], [1,2,3,4,5,6,7])), 207 ?assertEqual(0.0, bear:get_kendall_correlation([1,2,3,4,5,6,7],[])), 208 ?assertEqual(0.0, bear:get_kendall_correlation(lists:seq(1,10),lists:seq(1,11))). 209 210get_kendall_correlation_regular_test()-> 211 Kendall = bear:get_kendall_correlation(sample1(order), sample2(order)), 212 ?assertEqual(true, approx(0.9787755, Kendall)). 213 214kendall_correlation_test()-> 215 Kendall = bear:kendall_correlation(sample1(order), sample2(order)), 216 ?assertEqual(true, approx(0.9787755, Kendall)). 217 218get_spearman_correlation_exceptions_test()-> 219 ?assertEqual(0.0, bear:get_spearman_correlation([], [])), 220 ?assertEqual(0.0, bear:get_spearman_correlation([], [1,2,3,4,5,6,7])), 221 ?assertEqual(0.0, bear:get_spearman_correlation([1,2,3,4,5,6,7],[])), 222 ?assertEqual(0.0, bear:get_spearman_correlation(lists:seq(1,10),lists:seq(1,11))). 223 224get_spearman_correlation_regular_test()-> 225 ?assertEqual(true, approx(0.997888, bear:get_spearman_correlation(sample1(order), sample2(order)))). 226 227math_log_test() -> 228 ?assertEqual(1, bear:math_log(0)), 229 ?assertEqual(1.0, bear:math_log(0.0)), 230 ?assertEqual(true, approx(3.737669618283368, bear:math_log(42))). 231 232inverse_test() -> 233 ?assertEqual(0, bear:inverse(0)), 234 ?assertEqual(0.0, bear:inverse(0.0)), 235 ?assertEqual(0.5, bear:inverse(2)). 236 237get_hist_bins_test() -> 238 ?assertEqual([4], bear:get_hist_bins(1, 4, 5, 10)). 239 240tied_ordered_ranking_test() -> 241 ?assertEqual([3,2,1], bear:tied_ordered_ranking([], [], [1,2,3])). 242 243kendall_right_off_test() -> 244 %% empty array 245 ?assertEqual("654321", bear:kendall_right_of([],"123456")). 246 247tied_add_prev_test() -> 248 ?assertEqual([{2.5,5},{2.5,5},{2.5,5},{2.5,5},{2,3}], bear:tied_add_prev([{2, 3}], {[1,2,3,4], 5})). 249 250tied_rank_worker_test() -> 251 ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5}], bear:tied_rank_worker([], [{2.0,5}], {[1,2,3], 5})), 252 ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5}], 253 bear:tied_rank_worker([{2.0,5},{2.0,5}], [{2.0,5}], {[1,2,3], 5})). 254 255perc_test() -> 256 ?assertEqual(14, bear:perc(36, 40)), 257 ?assertEqual(5, bear:perc(900, 5)), 258 ?assertEqual(5, bear:perc(0.9, 5)). 259 260get_statistics_subset_nev_test() -> 261 %% Not enough values case 262 ?assertEqual([], bear:get_statistics_subset([1,2], [])). 263 264get_statistics_subset_regular_test() -> 265 %% Regular case 266 ?assertEqual([{max, 50},{min, -49}], bear:get_statistics_subset(sample1(), [max,min])). 267 268subset_test() -> 269 Stats = bear:get_statistics(test_values()), 270 match_values(Stats). 271 272full_subset_test() -> 273 Stats = bear:get_statistics(test_values()), 274 match_values2(Stats). 275 276negative_test() -> 277 %% make sure things don't blow up with a negative value 278 Values = [1,-1,-2,3,3,4,5,6,7], 279 [{min, -2}] = bear:get_statistics_subset(Values, [min]). 280 281negative2_test() -> 282 %% make sure things don't blow up with a negative value 283 Values = [-1,-1,-2,-2,-3,-5,-6,-10], 284 [{min, -10}] = bear:get_statistics_subset(Values, [min]). 285 286match_values([H|T]) -> 287 Res = bear:get_statistics_subset(test_values(), [mk_item(H)]), 288 Res = [H], 289 match_values(T); 290match_values([]) -> 291 ok. 292 293mk_item({percentile, Ps}) -> 294 {percentile, [P || {P,_} <- Ps]}; 295mk_item({K, _}) -> 296 K. 297 298match_values2(Stats) -> 299 Items = [mk_item(I) || I <- Stats], 300 Stats = bear:get_statistics_subset(test_values(), Items), 301 ok. 302 303test_values() -> 304 [1,1,1,1,1,1,1, 305 2,2,2,2,2,2,2, 306 3,3,3,3,3,3,3,3,3,3,3,3,3,3, 307 4,4,4,4,4,4,4,4,4,4,4,4,4,4, 308 5,5,5,5,5,5,5,5,5,5,5,5,5,5, 309 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 310 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 311 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 312 9,9,9,9,9,9,9]. 313 314negative_values() -> 315 %% All values are negative 316 [-1,-1,-1,-1,-1,-1,-1, 317 -2,-2,-2,-2,-2,-2,-2, 318 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 319 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 320 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 321 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 322 -7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7, 323 -8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8, 324 -9,-9,-9,-9,-9,-9,-9]. 325 326between(Value, Low, High) -> 327 (Value >= Low) and (Value =< High). 328 329approx(Target, Value) -> 330 High = Target + math:pow(10, - ?PRECISION_DIGIT), 331 Low = Target - math:pow(10, - ?PRECISION_DIGIT), 332 case (Value > Low) and (Value < High) of 333 true -> true; 334 _ -> Value 335 end. 336 337check_sample_test() -> 338 ?assertEqual(50, length(sample1())), 339 ?assertEqual(50, length(sample1(order))), 340 ?assertEqual(50, length(sample2())), 341 ?assertEqual(50, length(sample2(order))). 342 343sample1(X) when X == order -> 344 lists:sort(sample1()). 345 346sample2(X) when X == order -> 347 lists:sort(sample2()). 348 349sample1() -> 350 %% datas from file bear/samples/data.csv 351 %% first column X 352 [-16,-18,-47,22,-18,36,25,49,-24,15,36,-10,-21,43,-35,1,-24,10,33,-21,-18,-36,-36,-43,-37,-10,23,50,31,-49,43,46,22,-43,12,-47,15,-14,6,-31,46,-8,0,-46,-16,-22,6,10,38,-11]. 353 354sample2() -> 355 %% datas from file bear/samples/data.csv 356 %% second column Y 357 [33,20,-35,16,-19,8,25,3,4,10,36,-20,-41,43,28,39,-30,3,-47,-23,17,-6,-50,16,-26,-49,8,-31,24,16,32,27,-19,-32,-17,1,-37,25,-50,-32,-42,-22,25,18,-34,-37,7,-13,16,10]. 358