1-- Generic extended statistics support
2
3-- We will be checking execution plans without/with statistics, so
4-- let's make sure we get simple non-parallel plans. Also set the
5-- work_mem low so that we can use small amounts of data.
6SET max_parallel_workers = 0;
7SET max_parallel_workers_per_gather = 0;
8SET work_mem = '128kB';
9
10-- Verify failures
11CREATE TABLE ext_stats_test (x int, y int, z int);
12CREATE STATISTICS tst;
13CREATE STATISTICS tst ON a, b;
14CREATE STATISTICS tst FROM sometab;
15CREATE STATISTICS tst ON a, b FROM nonexistant;
16CREATE STATISTICS tst ON a, b FROM ext_stats_test;
17CREATE STATISTICS tst ON x, x, y FROM ext_stats_test;
18CREATE STATISTICS tst ON x + y FROM ext_stats_test;
19CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
20CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test;
21DROP TABLE ext_stats_test;
22
23-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
24CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
25CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
26COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment';
27CREATE ROLE regress_stats_ext;
28SET SESSION AUTHORIZATION regress_stats_ext;
29COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment';
30DROP STATISTICS ab1_a_b_stats;
31ALTER STATISTICS ab1_a_b_stats RENAME TO ab1_a_b_stats_new;
32RESET SESSION AUTHORIZATION;
33DROP ROLE regress_stats_ext;
34
35CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
36DROP STATISTICS ab1_a_b_stats;
37
38CREATE SCHEMA regress_schema_2;
39CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1;
40
41-- Let's also verify the pg_get_statisticsobjdef output looks sane.
42SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats';
43
44DROP STATISTICS regress_schema_2.ab1_a_b_stats;
45
46-- Ensure statistics are dropped when columns are
47CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1;
48CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1;
49CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1;
50ALTER TABLE ab1 DROP COLUMN a;
51\d ab1
52-- Ensure statistics are dropped when table is
53SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
54DROP TABLE ab1;
55SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
56
57-- Ensure things work sanely with SET STATISTICS 0
58CREATE TABLE ab1 (a INTEGER, b INTEGER);
59ALTER TABLE ab1 ALTER a SET STATISTICS 0;
60INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
61CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
62ANALYZE ab1;
63ALTER TABLE ab1 ALTER a SET STATISTICS -1;
64-- partial analyze doesn't build stats either
65ANALYZE ab1 (a);
66ANALYZE ab1;
67DROP TABLE ab1;
68
69-- Ensure we can build statistics for tables with inheritance.
70CREATE TABLE ab1 (a INTEGER, b INTEGER);
71CREATE TABLE ab1c () INHERITS (ab1);
72INSERT INTO ab1 VALUES (1,1);
73CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
74ANALYZE ab1;
75DROP TABLE ab1 CASCADE;
76
77-- Verify supported object types for extended statistics
78CREATE schema tststats;
79
80CREATE TABLE tststats.t (a int, b int, c text);
81CREATE INDEX ti ON tststats.t (a, b);
82CREATE SEQUENCE tststats.s;
83CREATE VIEW tststats.v AS SELECT * FROM tststats.t;
84CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t;
85CREATE TYPE tststats.ty AS (a int, b int, c text);
86CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw;
87CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw;
88CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv;
89CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b);
90CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10);
91
92CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t;
93CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti;
94CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s;
95CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v;
96CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv;
97CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty;
98CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f;
99CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt;
100CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1;
101DO $$
102DECLARE
103	relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass;
104BEGIN
105	EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname;
106EXCEPTION WHEN wrong_object_type THEN
107	RAISE NOTICE 'stats on toast table not created';
108END;
109$$;
110
111\set VERBOSITY terse \\ -- suppress cascade details
112DROP SCHEMA tststats CASCADE;
113DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE;
114\set VERBOSITY default
115
116-- n-distinct tests
117CREATE TABLE ndistinct (
118    filler1 TEXT,
119    filler2 NUMERIC,
120    a INT,
121    b INT,
122    filler3 DATE,
123    c INT,
124    d INT
125);
126
127-- over-estimates when using only per-column statistics
128INSERT INTO ndistinct (a, b, c, filler1)
129     SELECT i/100, i/100, i/100, cash_words((i/100)::money)
130       FROM generate_series(1,30000) s(i);
131
132ANALYZE ndistinct;
133
134-- Group Aggregate, due to over-estimate of the number of groups
135EXPLAIN (COSTS off)
136 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
137
138EXPLAIN (COSTS off)
139 SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
140
141EXPLAIN (COSTS off)
142 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
143
144EXPLAIN (COSTS off)
145 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
146
147EXPLAIN (COSTS off)
148 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
149
150-- correct command
151CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
152
153ANALYZE ndistinct;
154
155SELECT stxkind, stxndistinct
156  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
157
158-- minor improvement, make sure the ctid does not break the matching
159EXPLAIN (COSTS off)
160SELECT COUNT(*) FROM ndistinct GROUP BY ctid, a, b;
161
162-- Hash Aggregate, thanks to estimates improved by the statistic
163EXPLAIN (COSTS off)
164 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
165
166EXPLAIN (COSTS off)
167 SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
168
169EXPLAIN (COSTS off)
170 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
171
172-- last two plans keep using Group Aggregate, because 'd' is not covered
173-- by the statistic and while it's NULL-only we assume 200 values for it
174EXPLAIN (COSTS off)
175 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
176
177EXPLAIN (COSTS off)
178 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
179
180TRUNCATE TABLE ndistinct;
181
182-- under-estimates when using only per-column statistics
183INSERT INTO ndistinct (a, b, c, filler1)
184     SELECT mod(i,50), mod(i,51), mod(i,32),
185            cash_words(mod(i,33)::int::money)
186       FROM generate_series(1,10000) s(i);
187
188ANALYZE ndistinct;
189
190SELECT stxkind, stxndistinct
191  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
192
193-- plans using Group Aggregate, thanks to using correct esimates
194EXPLAIN (COSTS off)
195 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
196
197EXPLAIN (COSTS off)
198 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
199
200EXPLAIN (COSTS off)
201 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
202
203EXPLAIN (COSTS off)
204 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
205
206EXPLAIN (COSTS off)
207 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
208
209DROP STATISTICS s10;
210
211SELECT stxkind, stxndistinct
212  FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
213
214-- dropping the statistics switches the plans to Hash Aggregate,
215-- due to under-estimates
216EXPLAIN (COSTS off)
217 SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
218
219EXPLAIN (COSTS off)
220 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
221
222EXPLAIN (COSTS off)
223 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
224
225EXPLAIN (COSTS off)
226 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
227
228EXPLAIN (COSTS off)
229 SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
230
231-- functional dependencies tests
232CREATE TABLE functional_dependencies (
233    filler1 TEXT,
234    filler2 NUMERIC,
235    a INT,
236    b TEXT,
237    filler3 DATE,
238    c INT,
239    d TEXT
240);
241
242SET random_page_cost = 1.2;
243
244CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
245CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
246
247-- random data (no functional dependencies)
248INSERT INTO functional_dependencies (a, b, c, filler1)
249     SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
250
251ANALYZE functional_dependencies;
252
253EXPLAIN (COSTS OFF)
254 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
255
256EXPLAIN (COSTS OFF)
257 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
258
259-- create statistics
260CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
261
262ANALYZE functional_dependencies;
263
264EXPLAIN (COSTS OFF)
265 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
266
267EXPLAIN (COSTS OFF)
268 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
269
270-- a => b, a => c, b => c
271TRUNCATE functional_dependencies;
272DROP STATISTICS func_deps_stat;
273
274INSERT INTO functional_dependencies (a, b, c, filler1)
275     SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
276
277ANALYZE functional_dependencies;
278
279EXPLAIN (COSTS OFF)
280 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
281
282EXPLAIN (COSTS OFF)
283 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
284
285-- create statistics
286CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
287
288ANALYZE functional_dependencies;
289
290EXPLAIN (COSTS OFF)
291 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
292
293EXPLAIN (COSTS OFF)
294 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
295
296-- check change of column type doesn't break it
297ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
298
299EXPLAIN (COSTS OFF)
300 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
301
302ANALYZE functional_dependencies;
303
304EXPLAIN (COSTS OFF)
305 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
306
307RESET random_page_cost;
308