1--
2-- exercises for the hash join code
3--
4
5begin;
6
7set local min_parallel_table_scan_size = 0;
8set local parallel_setup_cost = 0;
9
10-- Extract bucket and batch counts from an explain analyze plan.  In
11-- general we can't make assertions about how many batches (or
12-- buckets) will be required because it can vary, but we can in some
13-- special cases and we can check for growth.
14create or replace function find_hash(node json)
15returns json language plpgsql
16as
17$$
18declare
19  x json;
20  child json;
21begin
22  if node->>'Node Type' = 'Hash' then
23    return node;
24  else
25    for child in select json_array_elements(node->'Plans')
26    loop
27      x := find_hash(child);
28      if x is not null then
29        return x;
30      end if;
31    end loop;
32    return null;
33  end if;
34end;
35$$;
36create or replace function hash_join_batches(query text)
37returns table (original int, final int) language plpgsql
38as
39$$
40declare
41  whole_plan json;
42  hash_node json;
43begin
44  for whole_plan in
45    execute 'explain (analyze, format ''json'') ' || query
46  loop
47    hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan'));
48    original := hash_node->>'Original Hash Batches';
49    final := hash_node->>'Hash Batches';
50    return next;
51  end loop;
52end;
53$$;
54
55-- Make a simple relation with well distributed keys and correctly
56-- estimated size.
57create table simple as
58  select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
59alter table simple set (parallel_workers = 2);
60analyze simple;
61
62-- Make a relation whose size we will under-estimate.  We want stats
63-- to say 1000 rows, but actually there are 20,000 rows.
64create table bigger_than_it_looks as
65  select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
66alter table bigger_than_it_looks set (autovacuum_enabled = 'false');
67alter table bigger_than_it_looks set (parallel_workers = 2);
68analyze bigger_than_it_looks;
69update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks';
70
71-- Make a relation whose size we underestimate and that also has a
72-- kind of skew that breaks our batching scheme.  We want stats to say
73-- 2 rows, but actually there are 20,000 rows with the same key.
74create table extremely_skewed (id int, t text);
75alter table extremely_skewed set (autovacuum_enabled = 'false');
76alter table extremely_skewed set (parallel_workers = 2);
77analyze extremely_skewed;
78insert into extremely_skewed
79  select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
80  from generate_series(1, 20000);
81update pg_class
82  set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
83  where relname = 'extremely_skewed';
84
85-- Make a relation with a couple of enormous tuples.
86create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
87alter table wide set (parallel_workers = 2);
88
89-- The "optimal" case: the hash table fits in memory; we plan for 1
90-- batch, we stick to that number, and peak memory usage stays within
91-- our work_mem budget
92
93-- non-parallel
94savepoint settings;
95set local max_parallel_workers_per_gather = 0;
96set local work_mem = '4MB';
97explain (costs off)
98  select count(*) from simple r join simple s using (id);
99select count(*) from simple r join simple s using (id);
100select original > 1 as initially_multibatch, final > original as increased_batches
101  from hash_join_batches(
102$$
103  select count(*) from simple r join simple s using (id);
104$$);
105rollback to settings;
106
107-- parallel with parallel-oblivious hash join
108savepoint settings;
109set local max_parallel_workers_per_gather = 2;
110set local work_mem = '4MB';
111set local enable_parallel_hash = off;
112explain (costs off)
113  select count(*) from simple r join simple s using (id);
114select count(*) from simple r join simple s using (id);
115select original > 1 as initially_multibatch, final > original as increased_batches
116  from hash_join_batches(
117$$
118  select count(*) from simple r join simple s using (id);
119$$);
120rollback to settings;
121
122-- parallel with parallel-aware hash join
123savepoint settings;
124set local max_parallel_workers_per_gather = 2;
125set local work_mem = '4MB';
126set local enable_parallel_hash = on;
127explain (costs off)
128  select count(*) from simple r join simple s using (id);
129select count(*) from simple r join simple s using (id);
130select original > 1 as initially_multibatch, final > original as increased_batches
131  from hash_join_batches(
132$$
133  select count(*) from simple r join simple s using (id);
134$$);
135rollback to settings;
136
137-- The "good" case: batches required, but we plan the right number; we
138-- plan for some number of batches, and we stick to that number, and
139-- peak memory usage says within our work_mem budget
140
141-- non-parallel
142savepoint settings;
143set local max_parallel_workers_per_gather = 0;
144set local work_mem = '128kB';
145explain (costs off)
146  select count(*) from simple r join simple s using (id);
147select count(*) from simple r join simple s using (id);
148select original > 1 as initially_multibatch, final > original as increased_batches
149  from hash_join_batches(
150$$
151  select count(*) from simple r join simple s using (id);
152$$);
153rollback to settings;
154
155-- parallel with parallel-oblivious hash join
156savepoint settings;
157set local max_parallel_workers_per_gather = 2;
158set local work_mem = '128kB';
159set local enable_parallel_hash = off;
160explain (costs off)
161  select count(*) from simple r join simple s using (id);
162select count(*) from simple r join simple s using (id);
163select original > 1 as initially_multibatch, final > original as increased_batches
164  from hash_join_batches(
165$$
166  select count(*) from simple r join simple s using (id);
167$$);
168rollback to settings;
169
170-- parallel with parallel-aware hash join
171savepoint settings;
172set local max_parallel_workers_per_gather = 2;
173set local work_mem = '192kB';
174set local enable_parallel_hash = on;
175explain (costs off)
176  select count(*) from simple r join simple s using (id);
177select count(*) from simple r join simple s using (id);
178select original > 1 as initially_multibatch, final > original as increased_batches
179  from hash_join_batches(
180$$
181  select count(*) from simple r join simple s using (id);
182$$);
183rollback to settings;
184
185-- The "bad" case: during execution we need to increase number of
186-- batches; in this case we plan for 1 batch, and increase at least a
187-- couple of times, and peak memory usage stays within our work_mem
188-- budget
189
190-- non-parallel
191savepoint settings;
192set local max_parallel_workers_per_gather = 0;
193set local work_mem = '128kB';
194explain (costs off)
195  select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
196select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
197select original > 1 as initially_multibatch, final > original as increased_batches
198  from hash_join_batches(
199$$
200  select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id);
201$$);
202rollback to settings;
203
204-- parallel with parallel-oblivious hash join
205savepoint settings;
206set local max_parallel_workers_per_gather = 2;
207set local work_mem = '128kB';
208set local enable_parallel_hash = off;
209explain (costs off)
210  select count(*) from simple r join bigger_than_it_looks s using (id);
211select count(*) from simple r join bigger_than_it_looks s using (id);
212select original > 1 as initially_multibatch, final > original as increased_batches
213  from hash_join_batches(
214$$
215  select count(*) from simple r join bigger_than_it_looks s using (id);
216$$);
217rollback to settings;
218
219-- parallel with parallel-aware hash join
220savepoint settings;
221set local max_parallel_workers_per_gather = 1;
222set local work_mem = '192kB';
223set local enable_parallel_hash = on;
224explain (costs off)
225  select count(*) from simple r join bigger_than_it_looks s using (id);
226select count(*) from simple r join bigger_than_it_looks s using (id);
227select original > 1 as initially_multibatch, final > original as increased_batches
228  from hash_join_batches(
229$$
230  select count(*) from simple r join bigger_than_it_looks s using (id);
231$$);
232rollback to settings;
233
234-- The "ugly" case: increasing the number of batches during execution
235-- doesn't help, so stop trying to fit in work_mem and hope for the
236-- best; in this case we plan for 1 batch, increases just once and
237-- then stop increasing because that didn't help at all, so we blow
238-- right through the work_mem budget and hope for the best...
239
240-- non-parallel
241savepoint settings;
242set local max_parallel_workers_per_gather = 0;
243set local work_mem = '128kB';
244explain (costs off)
245  select count(*) from simple r join extremely_skewed s using (id);
246select count(*) from simple r join extremely_skewed s using (id);
247select * from hash_join_batches(
248$$
249  select count(*) from simple r join extremely_skewed s using (id);
250$$);
251rollback to settings;
252
253-- parallel with parallel-oblivious hash join
254savepoint settings;
255set local max_parallel_workers_per_gather = 2;
256set local work_mem = '128kB';
257set local enable_parallel_hash = off;
258explain (costs off)
259  select count(*) from simple r join extremely_skewed s using (id);
260select count(*) from simple r join extremely_skewed s using (id);
261select * from hash_join_batches(
262$$
263  select count(*) from simple r join extremely_skewed s using (id);
264$$);
265rollback to settings;
266
267-- parallel with parallel-aware hash join
268savepoint settings;
269set local max_parallel_workers_per_gather = 1;
270set local work_mem = '128kB';
271set local enable_parallel_hash = on;
272explain (costs off)
273  select count(*) from simple r join extremely_skewed s using (id);
274select count(*) from simple r join extremely_skewed s using (id);
275select * from hash_join_batches(
276$$
277  select count(*) from simple r join extremely_skewed s using (id);
278$$);
279rollback to settings;
280
281-- A couple of other hash join tests unrelated to work_mem management.
282
283-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate
284savepoint settings;
285set local max_parallel_workers_per_gather = 2;
286set local work_mem = '4MB';
287set local parallel_leader_participation = off;
288select * from hash_join_batches(
289$$
290  select count(*) from simple r join simple s using (id);
291$$);
292rollback to settings;
293
294-- Exercise rescans.  We'll turn off parallel_leader_participation so
295-- that we can check that instrumentation comes back correctly.
296
297create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t;
298alter table join_foo set (parallel_workers = 0);
299create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t;
300alter table join_bar set (parallel_workers = 2);
301
302-- multi-batch with rescan, parallel-oblivious
303savepoint settings;
304set enable_parallel_hash = off;
305set parallel_leader_participation = off;
306set min_parallel_table_scan_size = 0;
307set parallel_setup_cost = 0;
308set parallel_tuple_cost = 0;
309set max_parallel_workers_per_gather = 2;
310set enable_material = off;
311set enable_mergejoin = off;
312set work_mem = '64kB';
313explain (costs off)
314  select count(*) from join_foo
315    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
316    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
317select count(*) from join_foo
318  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
319  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
320select final > 1 as multibatch
321  from hash_join_batches(
322$$
323  select count(*) from join_foo
324    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
325    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
326$$);
327rollback to settings;
328
329-- single-batch with rescan, parallel-oblivious
330savepoint settings;
331set enable_parallel_hash = off;
332set parallel_leader_participation = off;
333set min_parallel_table_scan_size = 0;
334set parallel_setup_cost = 0;
335set parallel_tuple_cost = 0;
336set max_parallel_workers_per_gather = 2;
337set enable_material = off;
338set enable_mergejoin = off;
339set work_mem = '4MB';
340explain (costs off)
341  select count(*) from join_foo
342    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
343    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
344select count(*) from join_foo
345  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
346  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
347select final > 1 as multibatch
348  from hash_join_batches(
349$$
350  select count(*) from join_foo
351    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
352    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
353$$);
354rollback to settings;
355
356-- multi-batch with rescan, parallel-aware
357savepoint settings;
358set enable_parallel_hash = on;
359set parallel_leader_participation = off;
360set min_parallel_table_scan_size = 0;
361set parallel_setup_cost = 0;
362set parallel_tuple_cost = 0;
363set max_parallel_workers_per_gather = 2;
364set enable_material = off;
365set enable_mergejoin = off;
366set work_mem = '64kB';
367explain (costs off)
368  select count(*) from join_foo
369    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
370    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
371select count(*) from join_foo
372  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
373  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
374select final > 1 as multibatch
375  from hash_join_batches(
376$$
377  select count(*) from join_foo
378    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
379    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
380$$);
381rollback to settings;
382
383-- single-batch with rescan, parallel-aware
384savepoint settings;
385set enable_parallel_hash = on;
386set parallel_leader_participation = off;
387set min_parallel_table_scan_size = 0;
388set parallel_setup_cost = 0;
389set parallel_tuple_cost = 0;
390set max_parallel_workers_per_gather = 2;
391set enable_material = off;
392set enable_mergejoin = off;
393set work_mem = '4MB';
394explain (costs off)
395  select count(*) from join_foo
396    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
397    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
398select count(*) from join_foo
399  left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
400  on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
401select final > 1 as multibatch
402  from hash_join_batches(
403$$
404  select count(*) from join_foo
405    left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss
406    on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1;
407$$);
408rollback to settings;
409
410-- A full outer join where every record is matched.
411
412-- non-parallel
413savepoint settings;
414set local max_parallel_workers_per_gather = 0;
415explain (costs off)
416     select  count(*) from simple r full outer join simple s using (id);
417select  count(*) from simple r full outer join simple s using (id);
418rollback to settings;
419
420-- parallelism not possible with parallel-oblivious outer hash join
421savepoint settings;
422set local max_parallel_workers_per_gather = 2;
423explain (costs off)
424     select  count(*) from simple r full outer join simple s using (id);
425select  count(*) from simple r full outer join simple s using (id);
426rollback to settings;
427
428-- An full outer join where every record is not matched.
429
430-- non-parallel
431savepoint settings;
432set local max_parallel_workers_per_gather = 0;
433explain (costs off)
434     select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
435select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
436rollback to settings;
437
438-- parallelism not possible with parallel-oblivious outer hash join
439savepoint settings;
440set local max_parallel_workers_per_gather = 2;
441explain (costs off)
442     select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
443select  count(*) from simple r full outer join simple s on (r.id = 0 - s.id);
444rollback to settings;
445
446-- exercise special code paths for huge tuples (note use of non-strict
447-- expression and left join required to get the detoasted tuple into
448-- the hash table)
449
450-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and
451-- sts_puttuple oversized tuple cases because it's multi-batch)
452savepoint settings;
453set max_parallel_workers_per_gather = 2;
454set enable_parallel_hash = on;
455set work_mem = '128kB';
456explain (costs off)
457  select length(max(s.t))
458  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
459select length(max(s.t))
460from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
461select final > 1 as multibatch
462  from hash_join_batches(
463$$
464  select length(max(s.t))
465  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
466$$);
467rollback to settings;
468
469rollback;
470