1-- Generic extended statistics support
2-- We will be checking execution plans without/with statistics, so
3-- let's make sure we get simple non-parallel plans. Also set the
4-- work_mem low so that we can use small amounts of data.
5SET max_parallel_workers = 0;
6SET max_parallel_workers_per_gather = 0;
7SET work_mem = '128kB';
8-- Verify failures
9CREATE TABLE ext_stats_test (x int, y int, z int);
10CREATE STATISTICS tst;
11ERROR:  syntax error at or near ";"
12LINE 1: CREATE STATISTICS tst;
13                             ^
14CREATE STATISTICS tst ON a, b;
15ERROR:  syntax error at or near ";"
16LINE 1: CREATE STATISTICS tst ON a, b;
17                                     ^
18CREATE STATISTICS tst FROM sometab;
19ERROR:  syntax error at or near "FROM"
20LINE 1: CREATE STATISTICS tst FROM sometab;
21                              ^
22CREATE STATISTICS tst ON a, b FROM nonexistant;
23ERROR:  relation "nonexistant" does not exist
24CREATE STATISTICS tst ON a, b FROM ext_stats_test;
25ERROR:  column "a" does not exist
26CREATE STATISTICS tst ON x, x, y FROM ext_stats_test;
27ERROR:  duplicate column name in statistics definition
28CREATE STATISTICS tst ON x + y FROM ext_stats_test;
29ERROR:  only simple column references are allowed in CREATE STATISTICS
30CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
31ERROR:  only simple column references are allowed in CREATE STATISTICS
32CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
33ERROR:  unrecognized statistics kind "unrecognized"
34DROP TABLE ext_stats_test;
35-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
36CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
37CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
38COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';
39CREATE ROLE regress_stats_ext;
40SET SESSION AUTHORIZATION regress_stats_ext;
41COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';
42ERROR:  must be owner of statistics object ab1_a_b_stats
43DROP STATISTICS ab1_a_b_stats;
44ERROR:  must be owner of statistics object ab1_a_b_stats
45ALTER STATISTICS ab1_a_b_stats RENAME TO ab1_a_b_stats_new;
46ERROR:  must be owner of statistics object ab1_a_b_stats
47RESET SESSION AUTHORIZATION;
48DROP ROLE regress_stats_ext;
49CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
50NOTICE:  statistics object "ab1_a_b_stats" already exists, skipping
51DROP STATISTICS ab1_a_b_stats;
52CREATE SCHEMA regress_schema_2;
53CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1;
54-- Let's also verify the pg_get_statisticsobjdef output looks sane.
55SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats';
56                      pg_get_statisticsobjdef
57-------------------------------------------------------------------
58 CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1
59(1 row)
60
61DROP STATISTICS regress_schema_2.ab1_a_b_stats;
62-- Ensure statistics are dropped when columns are
63CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1;
64CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1;
65CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1;
66ALTER TABLE ab1 DROP COLUMN a;
67\d ab1
68                Table "public.ab1"
69 Column |  Type   | Collation | Nullable | Default
70--------+---------+-----------+----------+---------
71 b      | integer |           |          |
72 c      | integer |           |          |
73Statistics objects:
74    "public"."ab1_b_c_stats" (ndistinct, dependencies) ON b, c FROM ab1
75
76-- Ensure statistics are dropped when table is
77SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
78    stxname
79---------------
80 ab1_b_c_stats
81(1 row)
82
83DROP TABLE ab1;
84SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
85 stxname
86---------
87(0 rows)
88
89-- Ensure things work sanely with SET STATISTICS 0
90CREATE TABLE ab1 (a INTEGER, b INTEGER);
91ALTER TABLE ab1 ALTER a SET STATISTICS 0;
92INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
93CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
94ANALYZE ab1;
95WARNING:  statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1"
96ALTER TABLE ab1 ALTER a SET STATISTICS -1;
97-- partial analyze doesn't build stats either
98ANALYZE ab1 (a);
99WARNING:  statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1"
100ANALYZE ab1;
101DROP TABLE ab1;
102-- Ensure we can build statistics for tables with inheritance.
103CREATE TABLE ab1 (a INTEGER, b INTEGER);
104CREATE TABLE ab1c () INHERITS (ab1);
105INSERT INTO ab1 VALUES (1,1);
106CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
107ANALYZE ab1;
108DROP TABLE ab1 CASCADE;
109NOTICE:  drop cascades to table ab1c
110-- Verify supported object types for extended statistics
111CREATE schema tststats;
112CREATE TABLE tststats.t (a int, b int, c text);
113CREATE INDEX ti ON tststats.t (a, b);
114CREATE SEQUENCE tststats.s;
115CREATE VIEW tststats.v AS SELECT * FROM tststats.t;
116CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t;
117CREATE TYPE tststats.ty AS (a int, b int, c text);
118CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw;
119CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw;
120CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv;
121CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b);
122CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10);
123CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t;
124CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti;
125ERROR:  relation "ti" is not a table, foreign table, or materialized view
126CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s;
127ERROR:  relation "s" is not a table, foreign table, or materialized view
128CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v;
129ERROR:  relation "v" is not a table, foreign table, or materialized view
130CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv;
131CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty;
132ERROR:  relation "ty" is not a table, foreign table, or materialized view
133CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f;
134CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt;
135CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1;
136DO $$
137DECLARE
138	relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass;
139BEGIN
140	EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname;
141EXCEPTION WHEN wrong_object_type THEN
142	RAISE NOTICE 'stats on toast table not created';
143END;
144$$;
145NOTICE:  stats on toast table not created
146\set VERBOSITY terse \\ -- suppress cascade details
147DROP SCHEMA tststats CASCADE;
148NOTICE:  drop cascades to 7 other objects
149DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE;
150NOTICE:  drop cascades to server extstats_dummy_srv
151\set VERBOSITY default
152-- n-distinct tests
153CREATE TABLE ndistinct (
154    filler1 TEXT,
155    filler2 NUMERIC,
156    a INT,
157    b INT,
158    filler3 DATE,
159    c INT,
160    d INT
161);
162-- over-estimates when using only per-column statistics
163INSERT INTO ndistinct (a, b, c, filler1)
164     SELECT i/100, i/100, i/100, cash_words((i/100)::money)
165       FROM generate_series(1,30000) s(i);
166ANALYZE ndistinct;
167-- Group Aggregate, due to over-estimate of the number of groups
168EXPLAIN (COSTS off)
169 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
170            QUERY PLAN
171-----------------------------------
172 GroupAggregate
173   Group Key: a, b
174   ->  Sort
175         Sort Key: a, b
176         ->  Seq Scan on ndistinct
177(5 rows)
178
179EXPLAIN (COSTS off)
180 SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
181            QUERY PLAN
182-----------------------------------
183 GroupAggregate
184   Group Key: b, c
185   ->  Sort
186         Sort Key: b, c
187         ->  Seq Scan on ndistinct
188(5 rows)
189
190EXPLAIN (COSTS off)
191 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
192            QUERY PLAN
193-----------------------------------
194 GroupAggregate
195   Group Key: a, b, c
196   ->  Sort
197         Sort Key: a, b, c
198         ->  Seq Scan on ndistinct
199(5 rows)
200
201EXPLAIN (COSTS off)
202 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
203            QUERY PLAN
204-----------------------------------
205 GroupAggregate
206   Group Key: a, b, c, d
207   ->  Sort
208         Sort Key: a, b, c, d
209         ->  Seq Scan on ndistinct
210(5 rows)
211
212EXPLAIN (COSTS off)
213 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
214            QUERY PLAN
215-----------------------------------
216 GroupAggregate
217   Group Key: b, c, d
218   ->  Sort
219         Sort Key: b, c, d
220         ->  Seq Scan on ndistinct
221(5 rows)
222
223-- correct command
224CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
225ANALYZE ndistinct;
226SELECT stxkind, stxndistinct
227  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
228 stxkind |                      stxndistinct
229---------+---------------------------------------------------------
230 {d,f}   | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
231(1 row)
232
233-- minor improvement, make sure the ctid does not break the matching
234EXPLAIN (COSTS off)
235SELECT COUNT(*) FROM ndistinct GROUP BY ctid, a, b;
236            QUERY PLAN
237-----------------------------------
238 GroupAggregate
239   Group Key: ctid, a, b
240   ->  Sort
241         Sort Key: ctid, a, b
242         ->  Seq Scan on ndistinct
243(5 rows)
244
245-- Hash Aggregate, thanks to estimates improved by the statistic
246EXPLAIN (COSTS off)
247 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
248         QUERY PLAN
249-----------------------------
250 HashAggregate
251   Group Key: a, b
252   ->  Seq Scan on ndistinct
253(3 rows)
254
255EXPLAIN (COSTS off)
256 SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
257         QUERY PLAN
258-----------------------------
259 HashAggregate
260   Group Key: b, c
261   ->  Seq Scan on ndistinct
262(3 rows)
263
264EXPLAIN (COSTS off)
265 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
266         QUERY PLAN
267-----------------------------
268 HashAggregate
269   Group Key: a, b, c
270   ->  Seq Scan on ndistinct
271(3 rows)
272
273-- last two plans keep using Group Aggregate, because 'd' is not covered
274-- by the statistic and while it's NULL-only we assume 200 values for it
275EXPLAIN (COSTS off)
276 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
277            QUERY PLAN
278-----------------------------------
279 GroupAggregate
280   Group Key: a, b, c, d
281   ->  Sort
282         Sort Key: a, b, c, d
283         ->  Seq Scan on ndistinct
284(5 rows)
285
286EXPLAIN (COSTS off)
287 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
288            QUERY PLAN
289-----------------------------------
290 GroupAggregate
291   Group Key: b, c, d
292   ->  Sort
293         Sort Key: b, c, d
294         ->  Seq Scan on ndistinct
295(5 rows)
296
297TRUNCATE TABLE ndistinct;
298-- under-estimates when using only per-column statistics
299INSERT INTO ndistinct (a, b, c, filler1)
300     SELECT mod(i,50), mod(i,51), mod(i,32),
301            cash_words(mod(i,33)::int::money)
302       FROM generate_series(1,10000) s(i);
303ANALYZE ndistinct;
304SELECT stxkind, stxndistinct
305  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
306 stxkind |                        stxndistinct
307---------+-------------------------------------------------------------
308 {d,f}   | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
309(1 row)
310
311-- plans using Group Aggregate, thanks to using correct esimates
312EXPLAIN (COSTS off)
313 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
314            QUERY PLAN
315-----------------------------------
316 GroupAggregate
317   Group Key: a, b
318   ->  Sort
319         Sort Key: a, b
320         ->  Seq Scan on ndistinct
321(5 rows)
322
323EXPLAIN (COSTS off)
324 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
325            QUERY PLAN
326-----------------------------------
327 GroupAggregate
328   Group Key: a, b, c
329   ->  Sort
330         Sort Key: a, b, c
331         ->  Seq Scan on ndistinct
332(5 rows)
333
334EXPLAIN (COSTS off)
335 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
336            QUERY PLAN
337-----------------------------------
338 GroupAggregate
339   Group Key: a, b, c, d
340   ->  Sort
341         Sort Key: a, b, c, d
342         ->  Seq Scan on ndistinct
343(5 rows)
344
345EXPLAIN (COSTS off)
346 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
347         QUERY PLAN
348-----------------------------
349 HashAggregate
350   Group Key: b, c, d
351   ->  Seq Scan on ndistinct
352(3 rows)
353
354EXPLAIN (COSTS off)
355 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
356         QUERY PLAN
357-----------------------------
358 HashAggregate
359   Group Key: a, d
360   ->  Seq Scan on ndistinct
361(3 rows)
362
363DROP STATISTICS s10;
364SELECT stxkind, stxndistinct
365  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
366 stxkind | stxndistinct
367---------+--------------
368(0 rows)
369
370-- dropping the statistics switches the plans to Hash Aggregate,
371-- due to under-estimates
372EXPLAIN (COSTS off)
373 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
374         QUERY PLAN
375-----------------------------
376 HashAggregate
377   Group Key: a, b
378   ->  Seq Scan on ndistinct
379(3 rows)
380
381EXPLAIN (COSTS off)
382 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
383         QUERY PLAN
384-----------------------------
385 HashAggregate
386   Group Key: a, b, c
387   ->  Seq Scan on ndistinct
388(3 rows)
389
390EXPLAIN (COSTS off)
391 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
392         QUERY PLAN
393-----------------------------
394 HashAggregate
395   Group Key: a, b, c, d
396   ->  Seq Scan on ndistinct
397(3 rows)
398
399EXPLAIN (COSTS off)
400 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
401         QUERY PLAN
402-----------------------------
403 HashAggregate
404   Group Key: b, c, d
405   ->  Seq Scan on ndistinct
406(3 rows)
407
408EXPLAIN (COSTS off)
409 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
410         QUERY PLAN
411-----------------------------
412 HashAggregate
413   Group Key: a, d
414   ->  Seq Scan on ndistinct
415(3 rows)
416
417-- functional dependencies tests
418CREATE TABLE functional_dependencies (
419    filler1 TEXT,
420    filler2 NUMERIC,
421    a INT,
422    b TEXT,
423    filler3 DATE,
424    c INT,
425    d TEXT
426);
427SET random_page_cost = 1.2;
428CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
429CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
430-- random data (no functional dependencies)
431INSERT INTO functional_dependencies (a, b, c, filler1)
432     SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
433ANALYZE functional_dependencies;
434EXPLAIN (COSTS OFF)
435 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
436                    QUERY PLAN
437---------------------------------------------------
438 Bitmap Heap Scan on functional_dependencies
439   Recheck Cond: ((a = 1) AND (b = '1'::text))
440   ->  Bitmap Index Scan on fdeps_abc_idx
441         Index Cond: ((a = 1) AND (b = '1'::text))
442(4 rows)
443
444EXPLAIN (COSTS OFF)
445 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
446                        QUERY PLAN
447-----------------------------------------------------------
448 Index Scan using fdeps_abc_idx on functional_dependencies
449   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
450(2 rows)
451
452-- create statistics
453CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
454ANALYZE functional_dependencies;
455EXPLAIN (COSTS OFF)
456 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
457                    QUERY PLAN
458---------------------------------------------------
459 Bitmap Heap Scan on functional_dependencies
460   Recheck Cond: ((a = 1) AND (b = '1'::text))
461   ->  Bitmap Index Scan on fdeps_abc_idx
462         Index Cond: ((a = 1) AND (b = '1'::text))
463(4 rows)
464
465EXPLAIN (COSTS OFF)
466 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
467                        QUERY PLAN
468-----------------------------------------------------------
469 Index Scan using fdeps_abc_idx on functional_dependencies
470   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
471(2 rows)
472
473-- a => b, a => c, b => c
474TRUNCATE functional_dependencies;
475DROP STATISTICS func_deps_stat;
476INSERT INTO functional_dependencies (a, b, c, filler1)
477     SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
478ANALYZE functional_dependencies;
479EXPLAIN (COSTS OFF)
480 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
481                        QUERY PLAN
482-----------------------------------------------------------
483 Index Scan using fdeps_abc_idx on functional_dependencies
484   Index Cond: ((a = 1) AND (b = '1'::text))
485(2 rows)
486
487EXPLAIN (COSTS OFF)
488 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
489                        QUERY PLAN
490-----------------------------------------------------------
491 Index Scan using fdeps_abc_idx on functional_dependencies
492   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
493(2 rows)
494
495-- create statistics
496CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
497ANALYZE functional_dependencies;
498EXPLAIN (COSTS OFF)
499 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
500                    QUERY PLAN
501---------------------------------------------------
502 Bitmap Heap Scan on functional_dependencies
503   Recheck Cond: ((a = 1) AND (b = '1'::text))
504   ->  Bitmap Index Scan on fdeps_abc_idx
505         Index Cond: ((a = 1) AND (b = '1'::text))
506(4 rows)
507
508EXPLAIN (COSTS OFF)
509 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
510                    QUERY PLAN
511---------------------------------------------------
512 Bitmap Heap Scan on functional_dependencies
513   Recheck Cond: ((a = 1) AND (b = '1'::text))
514   Filter: (c = 1)
515   ->  Bitmap Index Scan on fdeps_ab_idx
516         Index Cond: ((a = 1) AND (b = '1'::text))
517(5 rows)
518
519-- check change of column type doesn't break it
520ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
521EXPLAIN (COSTS OFF)
522 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
523                    QUERY PLAN
524---------------------------------------------------
525 Bitmap Heap Scan on functional_dependencies
526   Recheck Cond: ((a = 1) AND (b = '1'::text))
527   Filter: (c = '1'::numeric)
528   ->  Bitmap Index Scan on fdeps_ab_idx
529         Index Cond: ((a = 1) AND (b = '1'::text))
530(5 rows)
531
532ANALYZE functional_dependencies;
533EXPLAIN (COSTS OFF)
534 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
535                    QUERY PLAN
536---------------------------------------------------
537 Bitmap Heap Scan on functional_dependencies
538   Recheck Cond: ((a = 1) AND (b = '1'::text))
539   Filter: (c = '1'::numeric)
540   ->  Bitmap Index Scan on fdeps_ab_idx
541         Index Cond: ((a = 1) AND (b = '1'::text))
542(5 rows)
543
544RESET random_page_cost;
545