1-- 2-- exercises for the hash join code 3-- 4 5begin; 6 7set local min_parallel_table_scan_size = 0; 8set local parallel_setup_cost = 0; 9set local enable_hashjoin = on; 10 11-- Extract bucket and batch counts from an explain analyze plan. In 12-- general we can't make assertions about how many batches (or 13-- buckets) will be required because it can vary, but we can in some 14-- special cases and we can check for growth. 15create or replace function find_hash(node json) 16returns json language plpgsql 17as 18$$ 19declare 20 x json; 21 child json; 22begin 23 if node->>'Node Type' = 'Hash' then 24 return node; 25 else 26 for child in select json_array_elements(node->'Plans') 27 loop 28 x := find_hash(child); 29 if x is not null then 30 return x; 31 end if; 32 end loop; 33 return null; 34 end if; 35end; 36$$; 37create or replace function hash_join_batches(query text) 38returns table (original int, final int) language plpgsql 39as 40$$ 41declare 42 whole_plan json; 43 hash_node json; 44begin 45 for whole_plan in 46 execute 'explain (analyze, format ''json'') ' || query 47 loop 48 hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); 49 original := hash_node->>'Original Hash Batches'; 50 final := hash_node->>'Hash Batches'; 51 return next; 52 end loop; 53end; 54$$; 55 56-- Make a simple relation with well distributed keys and correctly 57-- estimated size. 58create table simple as 59 select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; 60alter table simple set (parallel_workers = 2); 61analyze simple; 62 63-- Make a relation whose size we will under-estimate. We want stats 64-- to say 1000 rows, but actually there are 20,000 rows. 65create table bigger_than_it_looks as 66 select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; 67alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); 68alter table bigger_than_it_looks set (parallel_workers = 2); 69analyze bigger_than_it_looks; 70update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; 71 72-- Make a relation whose size we underestimate and that also has a 73-- kind of skew that breaks our batching scheme. We want stats to say 74-- 2 rows, but actually there are 20,000 rows with the same key. 75create table extremely_skewed (id int, t text); 76alter table extremely_skewed set (autovacuum_enabled = 'false'); 77alter table extremely_skewed set (parallel_workers = 2); 78analyze extremely_skewed; 79insert into extremely_skewed 80 select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' 81 from generate_series(1, 20000); 82update pg_class 83 set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 84 where relname = 'extremely_skewed'; 85 86-- Make a relation with a couple of enormous tuples. 87create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; 88alter table wide set (parallel_workers = 2); 89 90-- The "optimal" case: the hash table fits in memory; we plan for 1 91-- batch, we stick to that number, and peak memory usage stays within 92-- our work_mem budget 93 94-- non-parallel 95savepoint settings; 96set local max_parallel_workers_per_gather = 0; 97set local work_mem = '4MB'; 98explain (costs off) 99 select count(*) from simple r join simple s using (id); 100select count(*) from simple r join simple s using (id); 101select original > 1 as initially_multibatch, final > original as increased_batches 102 from hash_join_batches( 103$$ 104 select count(*) from simple r join simple s using (id); 105$$); 106rollback to settings; 107 108-- parallel with parallel-oblivious hash join 109savepoint settings; 110set local max_parallel_workers_per_gather = 2; 111set local work_mem = '4MB'; 112set local enable_parallel_hash = off; 113explain (costs off) 114 select count(*) from simple r join simple s using (id); 115select count(*) from simple r join simple s using (id); 116select original > 1 as initially_multibatch, final > original as increased_batches 117 from hash_join_batches( 118$$ 119 select count(*) from simple r join simple s using (id); 120$$); 121rollback to settings; 122 123-- parallel with parallel-aware hash join 124savepoint settings; 125set local max_parallel_workers_per_gather = 2; 126set local work_mem = '4MB'; 127set local enable_parallel_hash = on; 128explain (costs off) 129 select count(*) from simple r join simple s using (id); 130select count(*) from simple r join simple s using (id); 131select original > 1 as initially_multibatch, final > original as increased_batches 132 from hash_join_batches( 133$$ 134 select count(*) from simple r join simple s using (id); 135$$); 136rollback to settings; 137 138-- The "good" case: batches required, but we plan the right number; we 139-- plan for some number of batches, and we stick to that number, and 140-- peak memory usage says within our work_mem budget 141 142-- non-parallel 143savepoint settings; 144set local max_parallel_workers_per_gather = 0; 145set local work_mem = '128kB'; 146explain (costs off) 147 select count(*) from simple r join simple s using (id); 148select count(*) from simple r join simple s using (id); 149select original > 1 as initially_multibatch, final > original as increased_batches 150 from hash_join_batches( 151$$ 152 select count(*) from simple r join simple s using (id); 153$$); 154rollback to settings; 155 156-- parallel with parallel-oblivious hash join 157savepoint settings; 158set local max_parallel_workers_per_gather = 2; 159set local work_mem = '128kB'; 160set local enable_parallel_hash = off; 161explain (costs off) 162 select count(*) from simple r join simple s using (id); 163select count(*) from simple r join simple s using (id); 164select original > 1 as initially_multibatch, final > original as increased_batches 165 from hash_join_batches( 166$$ 167 select count(*) from simple r join simple s using (id); 168$$); 169rollback to settings; 170 171-- parallel with parallel-aware hash join 172savepoint settings; 173set local max_parallel_workers_per_gather = 2; 174set local work_mem = '192kB'; 175set local enable_parallel_hash = on; 176explain (costs off) 177 select count(*) from simple r join simple s using (id); 178select count(*) from simple r join simple s using (id); 179select original > 1 as initially_multibatch, final > original as increased_batches 180 from hash_join_batches( 181$$ 182 select count(*) from simple r join simple s using (id); 183$$); 184rollback to settings; 185 186-- The "bad" case: during execution we need to increase number of 187-- batches; in this case we plan for 1 batch, and increase at least a 188-- couple of times, and peak memory usage stays within our work_mem 189-- budget 190 191-- non-parallel 192savepoint settings; 193set local max_parallel_workers_per_gather = 0; 194set local work_mem = '128kB'; 195explain (costs off) 196 select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); 197select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); 198select original > 1 as initially_multibatch, final > original as increased_batches 199 from hash_join_batches( 200$$ 201 select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); 202$$); 203rollback to settings; 204 205-- parallel with parallel-oblivious hash join 206savepoint settings; 207set local max_parallel_workers_per_gather = 2; 208set local work_mem = '128kB'; 209set local enable_parallel_hash = off; 210explain (costs off) 211 select count(*) from simple r join bigger_than_it_looks s using (id); 212select count(*) from simple r join bigger_than_it_looks s using (id); 213select original > 1 as initially_multibatch, final > original as increased_batches 214 from hash_join_batches( 215$$ 216 select count(*) from simple r join bigger_than_it_looks s using (id); 217$$); 218rollback to settings; 219 220-- parallel with parallel-aware hash join 221savepoint settings; 222set local max_parallel_workers_per_gather = 1; 223set local work_mem = '192kB'; 224set local enable_parallel_hash = on; 225explain (costs off) 226 select count(*) from simple r join bigger_than_it_looks s using (id); 227select count(*) from simple r join bigger_than_it_looks s using (id); 228select original > 1 as initially_multibatch, final > original as increased_batches 229 from hash_join_batches( 230$$ 231 select count(*) from simple r join bigger_than_it_looks s using (id); 232$$); 233rollback to settings; 234 235-- The "ugly" case: increasing the number of batches during execution 236-- doesn't help, so stop trying to fit in work_mem and hope for the 237-- best; in this case we plan for 1 batch, increases just once and 238-- then stop increasing because that didn't help at all, so we blow 239-- right through the work_mem budget and hope for the best... 240 241-- non-parallel 242savepoint settings; 243set local max_parallel_workers_per_gather = 0; 244set local work_mem = '128kB'; 245explain (costs off) 246 select count(*) from simple r join extremely_skewed s using (id); 247select count(*) from simple r join extremely_skewed s using (id); 248select * from hash_join_batches( 249$$ 250 select count(*) from simple r join extremely_skewed s using (id); 251$$); 252rollback to settings; 253 254-- parallel with parallel-oblivious hash join 255savepoint settings; 256set local max_parallel_workers_per_gather = 2; 257set local work_mem = '128kB'; 258set local enable_parallel_hash = off; 259explain (costs off) 260 select count(*) from simple r join extremely_skewed s using (id); 261select count(*) from simple r join extremely_skewed s using (id); 262select * from hash_join_batches( 263$$ 264 select count(*) from simple r join extremely_skewed s using (id); 265$$); 266rollback to settings; 267 268-- parallel with parallel-aware hash join 269savepoint settings; 270set local max_parallel_workers_per_gather = 1; 271set local work_mem = '128kB'; 272set local enable_parallel_hash = on; 273explain (costs off) 274 select count(*) from simple r join extremely_skewed s using (id); 275select count(*) from simple r join extremely_skewed s using (id); 276select * from hash_join_batches( 277$$ 278 select count(*) from simple r join extremely_skewed s using (id); 279$$); 280rollback to settings; 281 282-- A couple of other hash join tests unrelated to work_mem management. 283 284-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate 285savepoint settings; 286set local max_parallel_workers_per_gather = 2; 287set local work_mem = '4MB'; 288set local parallel_leader_participation = off; 289select * from hash_join_batches( 290$$ 291 select count(*) from simple r join simple s using (id); 292$$); 293rollback to settings; 294 295-- Exercise rescans. We'll turn off parallel_leader_participation so 296-- that we can check that instrumentation comes back correctly. 297 298create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; 299alter table join_foo set (parallel_workers = 0); 300create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; 301alter table join_bar set (parallel_workers = 2); 302 303-- multi-batch with rescan, parallel-oblivious 304savepoint settings; 305set enable_parallel_hash = off; 306set parallel_leader_participation = off; 307set min_parallel_table_scan_size = 0; 308set parallel_setup_cost = 0; 309set parallel_tuple_cost = 0; 310set max_parallel_workers_per_gather = 2; 311set enable_material = off; 312set enable_mergejoin = off; 313set work_mem = '64kB'; 314explain (costs off) 315 select count(*) from join_foo 316 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 317 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 318select count(*) from join_foo 319 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 320 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 321select final > 1 as multibatch 322 from hash_join_batches( 323$$ 324 select count(*) from join_foo 325 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 326 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 327$$); 328rollback to settings; 329 330-- single-batch with rescan, parallel-oblivious 331savepoint settings; 332set enable_parallel_hash = off; 333set parallel_leader_participation = off; 334set min_parallel_table_scan_size = 0; 335set parallel_setup_cost = 0; 336set parallel_tuple_cost = 0; 337set max_parallel_workers_per_gather = 2; 338set enable_material = off; 339set enable_mergejoin = off; 340set work_mem = '4MB'; 341explain (costs off) 342 select count(*) from join_foo 343 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 344 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 345select count(*) from join_foo 346 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 347 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 348select final > 1 as multibatch 349 from hash_join_batches( 350$$ 351 select count(*) from join_foo 352 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 353 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 354$$); 355rollback to settings; 356 357-- multi-batch with rescan, parallel-aware 358savepoint settings; 359set enable_parallel_hash = on; 360set parallel_leader_participation = off; 361set min_parallel_table_scan_size = 0; 362set parallel_setup_cost = 0; 363set parallel_tuple_cost = 0; 364set max_parallel_workers_per_gather = 2; 365set enable_material = off; 366set enable_mergejoin = off; 367set work_mem = '64kB'; 368explain (costs off) 369 select count(*) from join_foo 370 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 371 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 372select count(*) from join_foo 373 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 374 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 375select final > 1 as multibatch 376 from hash_join_batches( 377$$ 378 select count(*) from join_foo 379 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 380 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 381$$); 382rollback to settings; 383 384-- single-batch with rescan, parallel-aware 385savepoint settings; 386set enable_parallel_hash = on; 387set parallel_leader_participation = off; 388set min_parallel_table_scan_size = 0; 389set parallel_setup_cost = 0; 390set parallel_tuple_cost = 0; 391set max_parallel_workers_per_gather = 2; 392set enable_material = off; 393set enable_mergejoin = off; 394set work_mem = '4MB'; 395explain (costs off) 396 select count(*) from join_foo 397 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 398 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 399select count(*) from join_foo 400 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 401 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 402select final > 1 as multibatch 403 from hash_join_batches( 404$$ 405 select count(*) from join_foo 406 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 407 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 408$$); 409rollback to settings; 410 411-- A full outer join where every record is matched. 412 413-- non-parallel 414savepoint settings; 415set local max_parallel_workers_per_gather = 0; 416explain (costs off) 417 select count(*) from simple r full outer join simple s using (id); 418select count(*) from simple r full outer join simple s using (id); 419rollback to settings; 420 421-- parallelism not possible with parallel-oblivious outer hash join 422savepoint settings; 423set local max_parallel_workers_per_gather = 2; 424explain (costs off) 425 select count(*) from simple r full outer join simple s using (id); 426select count(*) from simple r full outer join simple s using (id); 427rollback to settings; 428 429-- An full outer join where every record is not matched. 430 431-- non-parallel 432savepoint settings; 433set local max_parallel_workers_per_gather = 0; 434explain (costs off) 435 select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 436select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 437rollback to settings; 438 439-- parallelism not possible with parallel-oblivious outer hash join 440savepoint settings; 441set local max_parallel_workers_per_gather = 2; 442explain (costs off) 443 select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 444select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 445rollback to settings; 446 447-- exercise special code paths for huge tuples (note use of non-strict 448-- expression and left join required to get the detoasted tuple into 449-- the hash table) 450 451-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and 452-- sts_puttuple oversized tuple cases because it's multi-batch) 453savepoint settings; 454set max_parallel_workers_per_gather = 2; 455set enable_parallel_hash = on; 456set work_mem = '128kB'; 457explain (costs off) 458 select length(max(s.t)) 459 from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); 460select length(max(s.t)) 461from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); 462select final > 1 as multibatch 463 from hash_join_batches( 464$$ 465 select length(max(s.t)) 466 from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); 467$$); 468rollback to settings; 469 470rollback; 471 472 473-- Verify that hash key expressions reference the correct 474-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's 475-- need to reference Hash's outer plan (which is below HashJoin's 476-- inner plan). It's not trivial to verify that the references are 477-- correct (we don't display the hashkeys themselves), but if the 478-- hashkeys contain subplan references, those will be displayed. Force 479-- subplans to appear just about everywhere. 480-- 481-- Bug report: 482-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com 483-- 484BEGIN; 485SET LOCAL enable_sort = OFF; -- avoid mergejoins 486SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order 487 488CREATE TABLE hjtest_1 (a text, b int, id int, c bool); 489CREATE TABLE hjtest_2 (a bool, id int, b text, c int); 490 491INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches 492INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition 493INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 494INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) 495 496INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches 497INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition 498INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 499INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) 500INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; 501 502EXPLAIN (COSTS OFF, VERBOSE) 503SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 504FROM hjtest_1, hjtest_2 505WHERE 506 hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) 507 AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) 508 AND (SELECT hjtest_1.b * 5) < 50 509 AND (SELECT hjtest_2.c * 5) < 55 510 AND hjtest_1.a <> hjtest_2.b; 511 512SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 513FROM hjtest_1, hjtest_2 514WHERE 515 hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) 516 AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) 517 AND (SELECT hjtest_1.b * 5) < 50 518 AND (SELECT hjtest_2.c * 5) < 55 519 AND hjtest_1.a <> hjtest_2.b; 520 521EXPLAIN (COSTS OFF, VERBOSE) 522SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 523FROM hjtest_2, hjtest_1 524WHERE 525 hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) 526 AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) 527 AND (SELECT hjtest_1.b * 5) < 50 528 AND (SELECT hjtest_2.c * 5) < 55 529 AND hjtest_1.a <> hjtest_2.b; 530 531SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 532FROM hjtest_2, hjtest_1 533WHERE 534 hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) 535 AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) 536 AND (SELECT hjtest_1.b * 5) < 50 537 AND (SELECT hjtest_2.c * 5) < 55 538 AND hjtest_1.a <> hjtest_2.b; 539 540ROLLBACK; 541