1#!./perl 2 3# Test that setting PERL_HASH_SEED and PERL_PERTURB_KEYS in different 4# combinations works as expected, and that changing the values provided 5# produces the expected results 6# 7# We do this by first executing Perl with a given PERL_PERTURB_KEYS 8# mode, and then extract the randomly chosen PERL_HASH_SEED it ran under 9# from its debug output which was printed to STDERR, and then use it for 10# further tests. This allows the tests to be robust to the choice of hash 11# function and seed sizes that might be in use in the perl being tested. 12# We do not ask perl to output any keys on this run, as our subsequent 13# runs will use different environment variables (specifically 14# PERL_HASH_SEED) which will change any key order results we see. 15# 16# We then execute perl a further three times and ask perl to build a 17# hash with a specific number of buckets and a specific set of keys. We 18# then have perl print the raw keys to STDOUT. 19# 20# For two of these three runs we supply the same seed, and both of those 21# times we supply the same perturb mode, but in different ways, once as 22# a name and once as a digit. The debug output should be identical in 23# both cases regardless of mode. For PERL_PERTURB_KEYS mode 0=NO, and 24# 2=DETERMINISTIC the key order should match. For mode 1=RANDOM the key 25# order should differ the vast majority of the time, however the test is 26# probabilistic and occasionally may result in the same key order. 27# 28# The third run we supply a different seed, with a 1 bit difference, but 29# with the same PERL_PERTURB_KEYS mode. In this case we expect the key 30# order to differ for all three modes, but again the test is 31# probabilistic and we may get the same key order in a small percentage 32# of the times we try this. 33# 34# To address the probabilistic nature of these tests we run them 35# multiple times and count how many times we get the same key order. 36# Most times this should be zero, but occasionally it might be higher. 37# Therefore we use a threshold $allowed_fails to determine how many 38# times the key order may be unchanged before we consider the tests 39# actually failed. We also use a largish number of keys in a hash with 40# a large number of buckets, which means we produce a lot a large temp 41# files as we test, so we aggressively clean them up as we go. 42 43 44BEGIN { 45 chdir 't' if -d 't'; 46 @INC = '../lib'; 47 require './test.pl'; 48 require Config; 49 Config->import; 50} 51 52skip_all_without_config('d_fork'); 53skip_all("NO_PERL_HASH_ENV or NO_PERL_HASH_SEED_DEBUG set") 54 if $Config{ccflags} =~ /-DNO_PERL_HASH_ENV\b/ 55 || $Config{ccflags} =~ /-DNO_PERL_HASH_SEED_DEBUG\b/; 56use strict; 57use warnings; 58 59# enable DEBUG_RUNENV if you want to see what is being returned 60# by the executed perl. 61sub my_runperl { 62 my ($cmd_array, $perturb, $set_seed) = @_; 63 my $opts_hash= { 64 PERL_HASH_SEED_DEBUG => 1, 65 PERL_PERTURB_KEYS => $perturb 66 }; 67 $opts_hash->{PERL_HASH_SEED}= $set_seed if $set_seed; 68 69 my ( $out, $err ) 70 = runperl_and_capture( $opts_hash, $cmd_array ); 71 my @err= split /\n/, $err; 72 73 my $seed; 74 my $mode_name; 75 my $mode_digit; 76 my @err_got_data; 77 my @rand_bits; 78 foreach my $line (@err) { 79 if ($line=~/^Got.*/) { 80 push @err_got_data, $line; 81 } 82 elsif ($line=~/^PL_hash_rand_bits=.*/) { 83 push @rand_bits, $line; 84 } 85 elsif ($line=~/HASH_SEED = (0x[a-f0-9]+)/) { 86 $seed= $1; 87 $line =~ /PERTURB_KEYS = (\d) \((\w+)\)/ 88 or die "Failed to extract perturb mode: $err"; 89 $mode_digit = $1; 90 $mode_name = $2; 91 92 } 93 } 94 if (!$seed){ 95 die "Failed to extract seed: $err"; 96 } 97 my $err_got_data= join("\n",@err_got_data); 98 return ($seed, $mode_digit, $mode_name, $out, $err_got_data, \@rand_bits); 99} 100 101my @mode_names = ( 102 'NO', # 0 103 'RANDOM', # 1 104 'DETERMINISTIC', # 2 105); 106 107my $repeat = 50; # if this changes adjust the comments below. 108my $min_buckets = 100_000; 109my $actual_buckets = 1; 110$actual_buckets *= 2 while $actual_buckets <= $min_buckets; 111my $key_expr = '0..999, "aa".."zz", map { $_ x 30 } "a".."z"'; #1702 keys 112my @keys = eval $key_expr 113 or die "bad '$key_expr': $@"; 114my $allowed_fails = 2; # Adjust this up to make the test tolerate 115 # more "errors". Maybe one day we will compute 116 # it from the value of $repeat, and $actual_buckets 117 # and the number of @keys. 118 119plan tests => (4 * $repeat) # DETERMINISTIC 120 + (1 * $repeat) # NO 121 + 1 # RANDOM mode 122 + (8 * @mode_names) # validation per mode 123 + @mode_names; # all modes 124 125 126# Note the keys(%h) = $n will cause perl to allocate the power of 2 larger 127# than $n buckets, so if $n = 100_000, then $actual_buckets will be 131072. 128 129my @perl_args = ( 130 '-I../lib', 131 (is_miniperl() ? () # no Hash::Util here! 132 : '-MHash::Util=hash_traversal_mask,num_buckets'), 133 '-e', 134 'my %h; keys(%h)=' . $min_buckets . '; ' . 135 '@h{' . $key_expr . '}=(); @k=keys %h; ' . 136 'print join ":", 0+@k, ' . 137 (is_miniperl() ? '' : # no Hash::Util here! 138 'num_buckets(%h),hash_traversal_mask(\\%h), ') . 139 'join ",", @k;' 140 ); 141 142for my $test_mode_digit (0 .. $#mode_names) { 143 my $test_mode_name = $mode_names[$test_mode_digit]; 144 my $descr_mode = "mode = $test_mode_name"; 145 146 my $print_keys= [ ($test_mode_name eq "DETERMINISTIC") 147 ? "-Dh" : (), # enable hash diags 148 @perl_args ]; 149 150 my $validated_mode= 0; 151 my $random_same = 0; 152 my $seed_change_same = 0; 153 for my $try (1 .. $repeat) { 154 155 my $descr = sprintf "%s, try %2d:", $descr_mode, $try; 156 157 # First let perl choose the seed. We only use the $seed and $err 158 # output here. We extract the seed that perl chose, which 159 # hardens us against the use of different hash functions with 160 # different seed sizes. Also the act of adding the PERL_HASH_SEED 161 # to the environment later on will likely change the $out. 162 my ( $seed, $digit, $mode ) 163 = my_runperl( ['-e1'], $test_mode_name ); 164 165 # Now we have to run it again. 166 my ( $seed1, $digit1, $mode1, $out1, $err_got_data1, $rand_bits1 ) 167 = my_runperl( $print_keys, $test_mode_name, $seed ); 168 169 # And once more, these two should do the same thing for 170 # DETERMINISTIC and NO, and be different for RANDOM. 171 # We set the mode via the digit not the name here. 172 my ( $seed2, $digit2, $mode2, $out2, $err_got_data2, $rand_bits2 ) 173 = my_runperl( $print_keys, $test_mode_digit, $seed ); 174 175 if (!$validated_mode++) { 176 is($digit, $test_mode_digit, 177 "$descr base run set the mode digit as expected"); 178 179 is($mode, $test_mode_name, 180 "$descr base run set the mode name as expected"); 181 182 is( $seed1, $seed, 183 "$descr retry 1 set the seed as expected"); 184 185 is( $mode1, $test_mode_name, 186 "$descr retry 1 set the mode by name as expected"); 187 188 is( $digit2, $test_mode_digit, 189 "$descr retry 2 set the mode by digit as expected"); 190 191 is( $seed1, $seed2, 192 "$descr seeds match between retries"); 193 194 is( $digit1, $digit2, 195 "$descr mode digits match between retries"); 196 197 is( $mode1, $mode2, 198 "$descr mode names match between retries"); 199 } 200 201 { 202 # We also test that a 1 bit change to the seed will 203 # actually change the output in all modes. It should 204 # most of the time. 205 my $munged_seed = $seed; 206 substr($munged_seed,-1)=~tr/0-9a-f/1-9a-f0/; 207 if ( $munged_seed eq $seed ) { 208 die "Failed to munge seed '$seed'"; 209 } 210 211 my ( $new_seed, $new_digit, $new_mode, $new_out ) 212 = my_runperl( \@perl_args, $test_mode_name, $munged_seed ); 213 if ($new_seed ne $munged_seed) { 214 die "panic: seed change didn't seem to propagate"; 215 } 216 if ( 217 $new_mode ne $test_mode_name or 218 $new_digit ne $test_mode_digit 219 ) { 220 die "panic: mode setting not as expected"; 221 } 222 223 # The result should be different most times, but there 224 # is a small chance that we got the same result, so 225 # count how many times it happens and then check if it 226 # exceeds $allowed_fails later. 227 $seed_change_same++ if $out1 eq $new_out; 228 } 229 230 if ( $test_mode_name eq 'RANDOM' ) { 231 # The result should be different most times, but there is a 232 # small chance that we get the same result, so count how 233 # many times it happens and then check if it exceeds 234 # $allowed_fails later. 235 $random_same++ if $out1 eq $out2; 236 next; 237 } 238 239 # From this point on we are testing DETERMINISTIC and NO 240 # modes only. 241 242 is( $out1, $out2, 243 "$descr results in the same key order each time" 244 ); 245 246 next if $test_mode_name eq "NO"; 247 248 # From this point on we are testing the DETERMINISTIC 249 # mode only. 250 251 SKIP: { 252 # skip these tests if we are not running in a DEBUGGING perl. 253 skip "$descr not testing rand bits, not a DEBUGGING perl", 3 254 if @$rand_bits1 + @$rand_bits2 == 0; 255 256 is ( 0+@$rand_bits1, 0+@$rand_bits2, 257 "$descr same count of rand_bits entries each time"); 258 259 my $max_i = $#$rand_bits1 > $#$rand_bits2 260 ? $#$rand_bits1 : $#$rand_bits2; 261 262 my $bad_idx; 263 for my $i (0 .. $max_i) { 264 if (($rand_bits2->[$i] // "") ne 265 ($rand_bits1->[$i] // "")) 266 { 267 $bad_idx = $i; 268 last; 269 } 270 } 271 is($bad_idx, undef, 272 "$descr bad rand bits data index should be undef"); 273 if (defined $bad_idx) { 274 # we use is() to see the differing data, but this test 275 # is expected to fail - the description seems a little 276 # odd here, but since it will always fail it makes sense 277 # in context. 278 is($rand_bits2->[$bad_idx],$rand_bits1->[$bad_idx], 279 "$descr rand bits data is the same at idx $bad_idx"); 280 } else { 281 pass("$descr rand bits data is the same"); 282 } 283 } 284 } 285 continue { 286 # We create a lot of big temp files so clean them up as we go. 287 # This is in a continue block so we can do this cleanup after 288 # each iteration even if we call next in the middle of the loop. 289 unlink_tempfiles(); 290 } 291 292 # We just finished $repeat tests, now deal with the probabilistic 293 # results and ensure that we are under the $allowed_fails threshold 294 295 if ($test_mode_name eq "RANDOM") { 296 # There is a small chance we got the same result a few times 297 # even when everything is working as expected. So allow a 298 # small number number of fails determined by $allowed_fails. 299 ok( $random_same <= $allowed_fails, 300 "$descr_mode same key order no more than $allowed_fails times") 301 or diag( 302 "Key order was the same $random_same/$repeat times in", 303 "RANDOM mode. This test is probabilistic so if the number", 304 "is low and you re-run the tests and it does not fail", 305 "again then you can ignore this test fail."); 306 307 } 308 309 # There is a small chance we got the same result a few times even 310 # when everything is working as expected. So allow a small number 311 # of fails as determined by $allowed_fails. 312 ok( $seed_change_same <= $allowed_fails, 313 "$descr_mode same key order with different seed no more " . 314 "than $allowed_fails times" ) 315 or diag( 316 "Key order was the same $random_same/$repeat times with", 317 "a different seed. This test is probabilistic so if the number", 318 "is low and you re-run the tests and it does not fail", 319 "again then you can ignore this test fail."); 320} 321