1--
2-- exercises for the hash join code
3--
4
5begin;
6
7set local min_parallel_table_scan_size = 0;
8set local parallel_setup_cost = 0;
9set local enable_hashjoin = on;
10
11-- Extract bucket and batch counts from an explain analyze plan.  In
12-- general we can't make assertions about how many batches (or
13-- buckets) will be required because it can vary, but we can in some
14-- special cases and we can check for growth.
15create or replace function find_hash(node json)
16returns json language plpgsql
17as
18$$
19declare
20  x json;
21  child json;
22begin
23  if node->>'Node Type' = 'Hash' then
24    return node;
25  else
26    for child in select json_array_elements(node->'Plans')
27    loop
28      x := find_hash(child);
29      if x is not null then
30        return x;
31      end if;
32    end loop;
33    return null;
34  end if;
35end;
36$$;
37create or replace function hash_join_batches(query text)
38returns table (original int, final int) language plpgsql
39as
40$$
41declare
42  whole_plan json;
43  hash_node json;
44begin
45  for whole_plan in
46    execute 'explain (analyze, format ''json'') ' || query
47  loop
48    hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan'));
49    original := hash_node->>'Original Hash Batches';
50    final := hash_node->>'Hash Batches';
51    return next;
52  end loop;
53end;
54$$;
55
56-- Make a simple relation with well distributed keys and correctly
57-- estimated size.
58create table simple as
59  select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
60alter table simple set (parallel_workers = 2);
61analyze simple;
62
63-- Make a relation whose size we will under-estimate.  We want stats
64-- to say 1000 rows, but actually there are 20,000 rows.
65create table bigger_than_it_looks as
66  select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
67alter table bigger_than_it_looks set (autovacuum_enabled = 'false');
68alter table bigger_than_it_looks set (parallel_workers = 2);
69analyze bigger_than_it_looks;
70update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks';
71
72-- Make a relation whose size we underestimate and that also has a
73-- kind of skew that breaks our batching scheme.  We want stats to say
74-- 2 rows, but actually there are 20,000 rows with the same key.
75create table extremely_skewed (id int, t text);
76alter table extremely_skewed set (autovacuum_enabled = 'false');
77alter table extremely_skewed set (parallel_workers = 2);
78analyze extremely_skewed;
79insert into extremely_skewed
80  select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
81  from generate_series(1, 20000);
82update pg_class
83  set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
84  where relname = 'extremely_skewed';
85
86-- Make a relation with a couple of enormous tuples.
87create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
88alter table wide set (parallel_workers = 2);
89
90-- The "optimal" case: the hash table fits in memory; we plan for 1
91-- batch, we stick to that number, and peak memory usage stays within
92-- our work_mem budget
93
94-- non-parallel
95savepoint settings;
96set local max_parallel_workers_per_gather = 0;
97set local work_mem = '4MB';
98explain (costs off)
99  select count(*) from simple r join simple s using (id);
100select count(*) from simple r join simple s using (id);
101select original > 1 as initially_multibatch, final > original as increased_batches
102  from hash_join_batches(
103$$
104  select count(*) from simple r join simple s using (id);
105$$);
106rollback to settings;
107
108-- parallel with parallel-oblivious hash join
109savepoint settings;
110set local max_parallel_workers_per_gather = 2;
111set local work_mem = '4MB';
112set local enable_parallel_hash = off;
113explain (costs off)
114  select count(*) from simple r join simple s using (id);
115select count(*) from simple r join simple s using (id);
116select original > 1 as initially_multibatch, final > original as increased_batches
117  from hash_join_batches(
118$$
119  select count(*) from simple r join simple s using (id);
120$$);
121rollback to settings;
122
123-- parallel with parallel-aware hash join
124savepoint settings;
125set local max_parallel_workers_per_gather = 2;
126set local work_mem = '4MB';
127set local enable_parallel_hash = on;
128explain (costs off)
129  select count(*) from simple r join simple s using (id);
130select count(*) from simple r join simple s using (id);
131select original > 1 as initially_multibatch, final > original as increased_batches
132  from hash_join_batches(
133$$
134  select count(*) from simple r join simple s using (id);
135$$);
136rollback to settings;
137
138-- The "good" case: batches required, but we plan the right number; we
139-- plan for some number of batches, and we stick to that number, and
140-- peak memory usage says within our work_mem budget
141
142-- non-parallel
143savepoint settings;
144set local max_parallel_workers_per_gather = 0;
145set local work_mem = '128kB';
146explain (costs off)
147  select count(*) from simple r join simple s using (id);
148select count(*) from simple r join simple s using (id);
149select original > 1 as initially_multibatch, final > original as increased_batches
150  from hash_join_batches(
151$$
152  select count(*) from simple r join simple s using (id);
153$$);
154rollback to settings;
155
156-- parallel with parallel-oblivious hash join
157savepoint settings;
158set local max_parallel_workers_per_gather = 2;
159set local work_mem = '128kB';
160set local enable_parallel_hash = off;
161explain (costs off)
162  select count(*) from simple r join simple s using (id);
163select count(*) from simple r join simple s using (id);
164select original > 1 as initially_multibatch, final > original as increased_batches
165  from hash_join_batches(
166$$
167  select count(*) from simple r join simple s using (id);
168$$);
169rollback to settings;
170
171-- parallel with parallel-aware hash join
172savepoint settings;
173set local max_parallel_workers_per_gather = 2;
174set local work_mem = '192kB';
175set local enable_parallel_hash = on;
176explain (costs off)
177  select count(*) from simple r join simple s using (id);
178select count(*) from simple r join simple s using (id);
179select original > 1 as initially_multibatch, final > original as increased_batches
180  from hash_join_batches(
181$$
182  select count(*) from simple r join simple s using (id);
183$$);
184rollback to settings;
185
186-- The "bad" case: during execution we need to increase number of
187-- batches; in this case we plan for 1 batch, and increase at least a
188-- couple of times, and peak memory usage stays within our work_mem
189-- budget
190
191-- non-parallel
192savepoint settings;
193set local max_parallel_workers_per_gather = 0;
194set local work_mem = '128kB';
195explain (costs off)
196  select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
197select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
198select original > 1 as initially_multibatch, final > original as increased_batches
199  from hash_join_batches(
200$$
201  select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
202$$);
203rollback to settings;
204
205-- parallel with parallel-oblivious hash join
206savepoint settings;
207set local max_parallel_workers_per_gather = 2;
208set local work_mem = '128kB';
209set local enable_parallel_hash = off;
210explain (costs off)
211  select count(*) from simple r join bigger_than_it_looks s using (id);
212select count(*) from simple r join bigger_than_it_looks s using (id);
213select original > 1 as initially_multibatch, final > original as increased_batches
214  from hash_join_batches(
215$$
216  select count(*) from simple r join bigger_than_it_looks s using (id);
217$$);
218rollback to settings;
219
220-- parallel with parallel-aware hash join
221savepoint settings;
222set local max_parallel_workers_per_gather = 1;
223set local work_mem = '192kB';
224set local enable_parallel_hash = on;
225explain (costs off)
226  select count(*) from simple r join bigger_than_it_looks s using (id);
227select count(*) from simple r join bigger_than_it_looks s using (id);
228select original > 1 as initially_multibatch, final > original as increased_batches
229  from hash_join_batches(
230$$
231  select count(*) from simple r join bigger_than_it_looks s using (id);
232$$);
233rollback to settings;
234
235-- The "ugly" case: increasing the number of batches during execution
236-- doesn't help, so stop trying to fit in work_mem and hope for the
237-- best; in this case we plan for 1 batch, increases just once and
238-- then stop increasing because that didn't help at all, so we blow
239-- right through the work_mem budget and hope for the best...
240
241-- non-parallel
242savepoint settings;
243set local max_parallel_workers_per_gather = 0;
244set local work_mem = '128kB';
245explain (costs off)
246  select count(*) from simple r join extremely_skewed s using (id);
247select count(*) from simple r join extremely_skewed s using (id);
248select * from hash_join_batches(
249$$
250  select count(*) from simple r join extremely_skewed s using (id);
251$$);
252rollback to settings;
253
254-- parallel with parallel-oblivious hash join
255savepoint settings;
256set local max_parallel_workers_per_gather = 2;
257set local work_mem = '128kB';
258set local enable_parallel_hash = off;
259explain (costs off)
260  select count(*) from simple r join extremely_skewed s using (id);
261select count(*) from simple r join extremely_skewed s using (id);
262select * from hash_join_batches(
263$$
264  select count(*) from simple r join extremely_skewed s using (id);
265$$);
266rollback to settings;
267
268-- parallel with parallel-aware hash join
269savepoint settings;
270set local max_parallel_workers_per_gather = 1;
271set local work_mem = '128kB';
272set local enable_parallel_hash = on;
273explain (costs off)
274  select count(*) from simple r join extremely_skewed s using (id);
275select count(*) from simple r join extremely_skewed s using (id);
276select * from hash_join_batches(
277$$
278  select count(*) from simple r join extremely_skewed s using (id);
279$$);
280rollback to settings;
281
282-- A couple of other hash join tests unrelated to work_mem management.
283
284-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate
285savepoint settings;
286set local max_parallel_workers_per_gather = 2;
287set local work_mem = '4MB';
288set local parallel_leader_participation = off;
289select * from hash_join_batches(
290$$
291  select count(*) from simple r join simple s using (id);
292$$);
293rollback to settings;
294
295-- Exercise rescans.  We'll turn off parallel_leader_participation so
296-- that we can check that instrumentation comes back correctly.
297
298create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t;
299alter table join_foo set (parallel_workers = 0);
300create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t;
301alter table join_bar set (parallel_workers = 2);
302
303-- multi-batch with rescan, parallel-oblivious
304savepoint settings;
305set enable_parallel_hash = off;
306set parallel_leader_participation = off;
307set min_parallel_table_scan_size = 0;
308set parallel_setup_cost = 0;
309set parallel_tuple_cost = 0;
310set max_parallel_workers_per_gather = 2;
311set enable_material = off;
312set enable_mergejoin = off;
313set work_mem = '64kB';
314explain (costs off)
315  select count(*) from join_foo
316    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
317    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
318select count(*) from join_foo
319  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
320  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
321select final > 1 as multibatch
322  from hash_join_batches(
323$$
324  select count(*) from join_foo
325    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
326    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
327$$);
328rollback to settings;
329
330-- single-batch with rescan, parallel-oblivious
331savepoint settings;
332set enable_parallel_hash = off;
333set parallel_leader_participation = off;
334set min_parallel_table_scan_size = 0;
335set parallel_setup_cost = 0;
336set parallel_tuple_cost = 0;
337set max_parallel_workers_per_gather = 2;
338set enable_material = off;
339set enable_mergejoin = off;
340set work_mem = '4MB';
341explain (costs off)
342  select count(*) from join_foo
343    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
344    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
345select count(*) from join_foo
346  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
347  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
348select final > 1 as multibatch
349  from hash_join_batches(
350$$
351  select count(*) from join_foo
352    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
353    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
354$$);
355rollback to settings;
356
357-- multi-batch with rescan, parallel-aware
358savepoint settings;
359set enable_parallel_hash = on;
360set parallel_leader_participation = off;
361set min_parallel_table_scan_size = 0;
362set parallel_setup_cost = 0;
363set parallel_tuple_cost = 0;
364set max_parallel_workers_per_gather = 2;
365set enable_material = off;
366set enable_mergejoin = off;
367set work_mem = '64kB';
368explain (costs off)
369  select count(*) from join_foo
370    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
371    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
372select count(*) from join_foo
373  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
374  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
375select final > 1 as multibatch
376  from hash_join_batches(
377$$
378  select count(*) from join_foo
379    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
380    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
381$$);
382rollback to settings;
383
384-- single-batch with rescan, parallel-aware
385savepoint settings;
386set enable_parallel_hash = on;
387set parallel_leader_participation = off;
388set min_parallel_table_scan_size = 0;
389set parallel_setup_cost = 0;
390set parallel_tuple_cost = 0;
391set max_parallel_workers_per_gather = 2;
392set enable_material = off;
393set enable_mergejoin = off;
394set work_mem = '4MB';
395explain (costs off)
396  select count(*) from join_foo
397    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
398    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
399select count(*) from join_foo
400  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
401  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
402select final > 1 as multibatch
403  from hash_join_batches(
404$$
405  select count(*) from join_foo
406    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
407    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
408$$);
409rollback to settings;
410
411-- A full outer join where every record is matched.
412
413-- non-parallel
414savepoint settings;
415set local max_parallel_workers_per_gather = 0;
416explain (costs off)
417     select  count(*) from simple r full outer join simple s using (id);
418select  count(*) from simple r full outer join simple s using (id);
419rollback to settings;
420
421-- parallelism not possible with parallel-oblivious outer hash join
422savepoint settings;
423set local max_parallel_workers_per_gather = 2;
424explain (costs off)
425     select  count(*) from simple r full outer join simple s using (id);
426select  count(*) from simple r full outer join simple s using (id);
427rollback to settings;
428
429-- An full outer join where every record is not matched.
430
431-- non-parallel
432savepoint settings;
433set local max_parallel_workers_per_gather = 0;
434explain (costs off)
435     select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
436select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
437rollback to settings;
438
439-- parallelism not possible with parallel-oblivious outer hash join
440savepoint settings;
441set local max_parallel_workers_per_gather = 2;
442explain (costs off)
443     select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
444select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
445rollback to settings;
446
447-- exercise special code paths for huge tuples (note use of non-strict
448-- expression and left join required to get the detoasted tuple into
449-- the hash table)
450
451-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and
452-- sts_puttuple oversized tuple cases because it's multi-batch)
453savepoint settings;
454set max_parallel_workers_per_gather = 2;
455set enable_parallel_hash = on;
456set work_mem = '128kB';
457explain (costs off)
458  select length(max(s.t))
459  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
460select length(max(s.t))
461from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
462select final > 1 as multibatch
463  from hash_join_batches(
464$$
465  select length(max(s.t))
466  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
467$$);
468rollback to settings;
469
470rollback;
471
472
473-- Verify that hash key expressions reference the correct
474-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's
475-- need to reference Hash's outer plan (which is below HashJoin's
476-- inner plan). It's not trivial to verify that the references are
477-- correct (we don't display the hashkeys themselves), but if the
478-- hashkeys contain subplan references, those will be displayed. Force
479-- subplans to appear just about everywhere.
480--
481-- Bug report:
482-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com
483--
484BEGIN;
485SET LOCAL enable_sort = OFF; -- avoid mergejoins
486SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order
487
488CREATE TABLE hjtest_1 (a text, b int, id int, c bool);
489CREATE TABLE hjtest_2 (a bool, id int, b text, c int);
490
491INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches
492INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition
493INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50
494INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
495
496INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches
497INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition
498INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90);  -- fails < 55
499INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
500INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); --  fails hjtest_1.a <> hjtest_2.b;
501
502EXPLAIN (COSTS OFF, VERBOSE)
503SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
504FROM hjtest_1, hjtest_2
505WHERE
506    hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
507    AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
508    AND (SELECT hjtest_1.b * 5) < 50
509    AND (SELECT hjtest_2.c * 5) < 55
510    AND hjtest_1.a <> hjtest_2.b;
511
512SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
513FROM hjtest_1, hjtest_2
514WHERE
515    hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
516    AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
517    AND (SELECT hjtest_1.b * 5) < 50
518    AND (SELECT hjtest_2.c * 5) < 55
519    AND hjtest_1.a <> hjtest_2.b;
520
521EXPLAIN (COSTS OFF, VERBOSE)
522SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
523FROM hjtest_2, hjtest_1
524WHERE
525    hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
526    AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
527    AND (SELECT hjtest_1.b * 5) < 50
528    AND (SELECT hjtest_2.c * 5) < 55
529    AND hjtest_1.a <> hjtest_2.b;
530
531SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2
532FROM hjtest_2, hjtest_1
533WHERE
534    hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1)
535    AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5)
536    AND (SELECT hjtest_1.b * 5) < 50
537    AND (SELECT hjtest_2.c * 5) < 55
538    AND hjtest_1.a <> hjtest_2.b;
539
540ROLLBACK;
541