1-- 2-- exercises for the hash join code 3-- 4 5begin; 6 7set local min_parallel_table_scan_size = 0; 8set local parallel_setup_cost = 0; 9 10-- Extract bucket and batch counts from an explain analyze plan. In 11-- general we can't make assertions about how many batches (or 12-- buckets) will be required because it can vary, but we can in some 13-- special cases and we can check for growth. 14create or replace function find_hash(node json) 15returns json language plpgsql 16as 17$$ 18declare 19 x json; 20 child json; 21begin 22 if node->>'Node Type' = 'Hash' then 23 return node; 24 else 25 for child in select json_array_elements(node->'Plans') 26 loop 27 x := find_hash(child); 28 if x is not null then 29 return x; 30 end if; 31 end loop; 32 return null; 33 end if; 34end; 35$$; 36create or replace function hash_join_batches(query text) 37returns table (original int, final int) language plpgsql 38as 39$$ 40declare 41 whole_plan json; 42 hash_node json; 43begin 44 for whole_plan in 45 execute 'explain (analyze, format ''json'') ' || query 46 loop 47 hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); 48 original := hash_node->>'Original Hash Batches'; 49 final := hash_node->>'Hash Batches'; 50 return next; 51 end loop; 52end; 53$$; 54 55-- Make a simple relation with well distributed keys and correctly 56-- estimated size. 57create table simple as 58 select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; 59alter table simple set (parallel_workers = 2); 60analyze simple; 61 62-- Make a relation whose size we will under-estimate. We want stats 63-- to say 1000 rows, but actually there are 20,000 rows. 64create table bigger_than_it_looks as 65 select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; 66alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); 67alter table bigger_than_it_looks set (parallel_workers = 2); 68analyze bigger_than_it_looks; 69update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; 70 71-- Make a relation whose size we underestimate and that also has a 72-- kind of skew that breaks our batching scheme. We want stats to say 73-- 2 rows, but actually there are 20,000 rows with the same key. 74create table extremely_skewed (id int, t text); 75alter table extremely_skewed set (autovacuum_enabled = 'false'); 76alter table extremely_skewed set (parallel_workers = 2); 77analyze extremely_skewed; 78insert into extremely_skewed 79 select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' 80 from generate_series(1, 20000); 81update pg_class 82 set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 83 where relname = 'extremely_skewed'; 84 85-- Make a relation with a couple of enormous tuples. 86create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; 87alter table wide set (parallel_workers = 2); 88 89-- The "optimal" case: the hash table fits in memory; we plan for 1 90-- batch, we stick to that number, and peak memory usage stays within 91-- our work_mem budget 92 93-- non-parallel 94savepoint settings; 95set local max_parallel_workers_per_gather = 0; 96set local work_mem = '4MB'; 97explain (costs off) 98 select count(*) from simple r join simple s using (id); 99select count(*) from simple r join simple s using (id); 100select original > 1 as initially_multibatch, final > original as increased_batches 101 from hash_join_batches( 102$$ 103 select count(*) from simple r join simple s using (id); 104$$); 105rollback to settings; 106 107-- parallel with parallel-oblivious hash join 108savepoint settings; 109set local max_parallel_workers_per_gather = 2; 110set local work_mem = '4MB'; 111set local enable_parallel_hash = off; 112explain (costs off) 113 select count(*) from simple r join simple s using (id); 114select count(*) from simple r join simple s using (id); 115select original > 1 as initially_multibatch, final > original as increased_batches 116 from hash_join_batches( 117$$ 118 select count(*) from simple r join simple s using (id); 119$$); 120rollback to settings; 121 122-- parallel with parallel-aware hash join 123savepoint settings; 124set local max_parallel_workers_per_gather = 2; 125set local work_mem = '4MB'; 126set local enable_parallel_hash = on; 127explain (costs off) 128 select count(*) from simple r join simple s using (id); 129select count(*) from simple r join simple s using (id); 130select original > 1 as initially_multibatch, final > original as increased_batches 131 from hash_join_batches( 132$$ 133 select count(*) from simple r join simple s using (id); 134$$); 135rollback to settings; 136 137-- The "good" case: batches required, but we plan the right number; we 138-- plan for some number of batches, and we stick to that number, and 139-- peak memory usage says within our work_mem budget 140 141-- non-parallel 142savepoint settings; 143set local max_parallel_workers_per_gather = 0; 144set local work_mem = '128kB'; 145explain (costs off) 146 select count(*) from simple r join simple s using (id); 147select count(*) from simple r join simple s using (id); 148select original > 1 as initially_multibatch, final > original as increased_batches 149 from hash_join_batches( 150$$ 151 select count(*) from simple r join simple s using (id); 152$$); 153rollback to settings; 154 155-- parallel with parallel-oblivious hash join 156savepoint settings; 157set local max_parallel_workers_per_gather = 2; 158set local work_mem = '128kB'; 159set local enable_parallel_hash = off; 160explain (costs off) 161 select count(*) from simple r join simple s using (id); 162select count(*) from simple r join simple s using (id); 163select original > 1 as initially_multibatch, final > original as increased_batches 164 from hash_join_batches( 165$$ 166 select count(*) from simple r join simple s using (id); 167$$); 168rollback to settings; 169 170-- parallel with parallel-aware hash join 171savepoint settings; 172set local max_parallel_workers_per_gather = 2; 173set local work_mem = '192kB'; 174set local enable_parallel_hash = on; 175explain (costs off) 176 select count(*) from simple r join simple s using (id); 177select count(*) from simple r join simple s using (id); 178select original > 1 as initially_multibatch, final > original as increased_batches 179 from hash_join_batches( 180$$ 181 select count(*) from simple r join simple s using (id); 182$$); 183rollback to settings; 184 185-- The "bad" case: during execution we need to increase number of 186-- batches; in this case we plan for 1 batch, and increase at least a 187-- couple of times, and peak memory usage stays within our work_mem 188-- budget 189 190-- non-parallel 191savepoint settings; 192set local max_parallel_workers_per_gather = 0; 193set local work_mem = '128kB'; 194explain (costs off) 195 select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); 196select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); 197select original > 1 as initially_multibatch, final > original as increased_batches 198 from hash_join_batches( 199$$ 200 select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); 201$$); 202rollback to settings; 203 204-- parallel with parallel-oblivious hash join 205savepoint settings; 206set local max_parallel_workers_per_gather = 2; 207set local work_mem = '128kB'; 208set local enable_parallel_hash = off; 209explain (costs off) 210 select count(*) from simple r join bigger_than_it_looks s using (id); 211select count(*) from simple r join bigger_than_it_looks s using (id); 212select original > 1 as initially_multibatch, final > original as increased_batches 213 from hash_join_batches( 214$$ 215 select count(*) from simple r join bigger_than_it_looks s using (id); 216$$); 217rollback to settings; 218 219-- parallel with parallel-aware hash join 220savepoint settings; 221set local max_parallel_workers_per_gather = 1; 222set local work_mem = '192kB'; 223set local enable_parallel_hash = on; 224explain (costs off) 225 select count(*) from simple r join bigger_than_it_looks s using (id); 226select count(*) from simple r join bigger_than_it_looks s using (id); 227select original > 1 as initially_multibatch, final > original as increased_batches 228 from hash_join_batches( 229$$ 230 select count(*) from simple r join bigger_than_it_looks s using (id); 231$$); 232rollback to settings; 233 234-- The "ugly" case: increasing the number of batches during execution 235-- doesn't help, so stop trying to fit in work_mem and hope for the 236-- best; in this case we plan for 1 batch, increases just once and 237-- then stop increasing because that didn't help at all, so we blow 238-- right through the work_mem budget and hope for the best... 239 240-- non-parallel 241savepoint settings; 242set local max_parallel_workers_per_gather = 0; 243set local work_mem = '128kB'; 244explain (costs off) 245 select count(*) from simple r join extremely_skewed s using (id); 246select count(*) from simple r join extremely_skewed s using (id); 247select * from hash_join_batches( 248$$ 249 select count(*) from simple r join extremely_skewed s using (id); 250$$); 251rollback to settings; 252 253-- parallel with parallel-oblivious hash join 254savepoint settings; 255set local max_parallel_workers_per_gather = 2; 256set local work_mem = '128kB'; 257set local enable_parallel_hash = off; 258explain (costs off) 259 select count(*) from simple r join extremely_skewed s using (id); 260select count(*) from simple r join extremely_skewed s using (id); 261select * from hash_join_batches( 262$$ 263 select count(*) from simple r join extremely_skewed s using (id); 264$$); 265rollback to settings; 266 267-- parallel with parallel-aware hash join 268savepoint settings; 269set local max_parallel_workers_per_gather = 1; 270set local work_mem = '128kB'; 271set local enable_parallel_hash = on; 272explain (costs off) 273 select count(*) from simple r join extremely_skewed s using (id); 274select count(*) from simple r join extremely_skewed s using (id); 275select * from hash_join_batches( 276$$ 277 select count(*) from simple r join extremely_skewed s using (id); 278$$); 279rollback to settings; 280 281-- A couple of other hash join tests unrelated to work_mem management. 282 283-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate 284savepoint settings; 285set local max_parallel_workers_per_gather = 2; 286set local work_mem = '4MB'; 287set local parallel_leader_participation = off; 288select * from hash_join_batches( 289$$ 290 select count(*) from simple r join simple s using (id); 291$$); 292rollback to settings; 293 294-- Exercise rescans. We'll turn off parallel_leader_participation so 295-- that we can check that instrumentation comes back correctly. 296 297create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; 298alter table join_foo set (parallel_workers = 0); 299create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; 300alter table join_bar set (parallel_workers = 2); 301 302-- multi-batch with rescan, parallel-oblivious 303savepoint settings; 304set enable_parallel_hash = off; 305set parallel_leader_participation = off; 306set min_parallel_table_scan_size = 0; 307set parallel_setup_cost = 0; 308set parallel_tuple_cost = 0; 309set max_parallel_workers_per_gather = 2; 310set enable_material = off; 311set enable_mergejoin = off; 312set work_mem = '64kB'; 313explain (costs off) 314 select count(*) from join_foo 315 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 316 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 317select count(*) from join_foo 318 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 319 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 320select final > 1 as multibatch 321 from hash_join_batches( 322$$ 323 select count(*) from join_foo 324 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 325 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 326$$); 327rollback to settings; 328 329-- single-batch with rescan, parallel-oblivious 330savepoint settings; 331set enable_parallel_hash = off; 332set parallel_leader_participation = off; 333set min_parallel_table_scan_size = 0; 334set parallel_setup_cost = 0; 335set parallel_tuple_cost = 0; 336set max_parallel_workers_per_gather = 2; 337set enable_material = off; 338set enable_mergejoin = off; 339set work_mem = '4MB'; 340explain (costs off) 341 select count(*) from join_foo 342 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 343 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 344select count(*) from join_foo 345 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 346 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 347select final > 1 as multibatch 348 from hash_join_batches( 349$$ 350 select count(*) from join_foo 351 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 352 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 353$$); 354rollback to settings; 355 356-- multi-batch with rescan, parallel-aware 357savepoint settings; 358set enable_parallel_hash = on; 359set parallel_leader_participation = off; 360set min_parallel_table_scan_size = 0; 361set parallel_setup_cost = 0; 362set parallel_tuple_cost = 0; 363set max_parallel_workers_per_gather = 2; 364set enable_material = off; 365set enable_mergejoin = off; 366set work_mem = '64kB'; 367explain (costs off) 368 select count(*) from join_foo 369 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 370 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 371select count(*) from join_foo 372 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 373 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 374select final > 1 as multibatch 375 from hash_join_batches( 376$$ 377 select count(*) from join_foo 378 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 379 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 380$$); 381rollback to settings; 382 383-- single-batch with rescan, parallel-aware 384savepoint settings; 385set enable_parallel_hash = on; 386set parallel_leader_participation = off; 387set min_parallel_table_scan_size = 0; 388set parallel_setup_cost = 0; 389set parallel_tuple_cost = 0; 390set max_parallel_workers_per_gather = 2; 391set enable_material = off; 392set enable_mergejoin = off; 393set work_mem = '4MB'; 394explain (costs off) 395 select count(*) from join_foo 396 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 397 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 398select count(*) from join_foo 399 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 400 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 401select final > 1 as multibatch 402 from hash_join_batches( 403$$ 404 select count(*) from join_foo 405 left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss 406 on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; 407$$); 408rollback to settings; 409 410-- A full outer join where every record is matched. 411 412-- non-parallel 413savepoint settings; 414set local max_parallel_workers_per_gather = 0; 415explain (costs off) 416 select count(*) from simple r full outer join simple s using (id); 417select count(*) from simple r full outer join simple s using (id); 418rollback to settings; 419 420-- parallelism not possible with parallel-oblivious outer hash join 421savepoint settings; 422set local max_parallel_workers_per_gather = 2; 423explain (costs off) 424 select count(*) from simple r full outer join simple s using (id); 425select count(*) from simple r full outer join simple s using (id); 426rollback to settings; 427 428-- An full outer join where every record is not matched. 429 430-- non-parallel 431savepoint settings; 432set local max_parallel_workers_per_gather = 0; 433explain (costs off) 434 select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 435select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 436rollback to settings; 437 438-- parallelism not possible with parallel-oblivious outer hash join 439savepoint settings; 440set local max_parallel_workers_per_gather = 2; 441explain (costs off) 442 select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 443select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); 444rollback to settings; 445 446-- exercise special code paths for huge tuples (note use of non-strict 447-- expression and left join required to get the detoasted tuple into 448-- the hash table) 449 450-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and 451-- sts_puttuple oversized tuple cases because it's multi-batch) 452savepoint settings; 453set max_parallel_workers_per_gather = 2; 454set enable_parallel_hash = on; 455set work_mem = '128kB'; 456explain (costs off) 457 select length(max(s.t)) 458 from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); 459select length(max(s.t)) 460from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); 461select final > 1 as multibatch 462 from hash_join_batches( 463$$ 464 select length(max(s.t)) 465 from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); 466$$); 467rollback to settings; 468 469rollback; 470