1-- Generic extended statistics support 2 3-- We will be checking execution plans without/with statistics, so 4-- let's make sure we get simple non-parallel plans. Also set the 5-- work_mem low so that we can use small amounts of data. 6SET max_parallel_workers = 0; 7SET max_parallel_workers_per_gather = 0; 8SET work_mem = '128kB'; 9 10-- Verify failures 11CREATE TABLE ext_stats_test (x int, y int, z int); 12CREATE STATISTICS tst; 13CREATE STATISTICS tst ON a, b; 14CREATE STATISTICS tst FROM sometab; 15CREATE STATISTICS tst ON a, b FROM nonexistant; 16CREATE STATISTICS tst ON a, b FROM ext_stats_test; 17CREATE STATISTICS tst ON x, x, y FROM ext_stats_test; 18CREATE STATISTICS tst ON x + y FROM ext_stats_test; 19CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; 20CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; 21DROP TABLE ext_stats_test; 22 23-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it 24CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); 25CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; 26COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment'; 27CREATE ROLE regress_stats_ext; 28SET SESSION AUTHORIZATION regress_stats_ext; 29COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment'; 30DROP STATISTICS ab1_a_b_stats; 31ALTER STATISTICS ab1_a_b_stats RENAME TO ab1_a_b_stats_new; 32RESET SESSION AUTHORIZATION; 33DROP ROLE regress_stats_ext; 34 35CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; 36DROP STATISTICS ab1_a_b_stats; 37 38CREATE SCHEMA regress_schema_2; 39CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1; 40 41-- Let's also verify the pg_get_statisticsobjdef output looks sane. 42SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats'; 43 44DROP STATISTICS regress_schema_2.ab1_a_b_stats; 45 46-- Ensure statistics are dropped when columns are 47CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1; 48CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1; 49CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1; 50ALTER TABLE ab1 DROP COLUMN a; 51\d ab1 52-- Ensure statistics are dropped when table is 53SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; 54DROP TABLE ab1; 55SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; 56 57-- Ensure things work sanely with SET STATISTICS 0 58CREATE TABLE ab1 (a INTEGER, b INTEGER); 59ALTER TABLE ab1 ALTER a SET STATISTICS 0; 60INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; 61CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; 62ANALYZE ab1; 63ALTER TABLE ab1 ALTER a SET STATISTICS -1; 64-- partial analyze doesn't build stats either 65ANALYZE ab1 (a); 66ANALYZE ab1; 67DROP TABLE ab1; 68 69-- Ensure we can build statistics for tables with inheritance. 70CREATE TABLE ab1 (a INTEGER, b INTEGER); 71CREATE TABLE ab1c () INHERITS (ab1); 72INSERT INTO ab1 VALUES (1,1); 73CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; 74ANALYZE ab1; 75DROP TABLE ab1 CASCADE; 76 77-- Verify supported object types for extended statistics 78CREATE schema tststats; 79 80CREATE TABLE tststats.t (a int, b int, c text); 81CREATE INDEX ti ON tststats.t (a, b); 82CREATE SEQUENCE tststats.s; 83CREATE VIEW tststats.v AS SELECT * FROM tststats.t; 84CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t; 85CREATE TYPE tststats.ty AS (a int, b int, c text); 86CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw; 87CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw; 88CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv; 89CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b); 90CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10); 91 92CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t; 93CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti; 94CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s; 95CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v; 96CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv; 97CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty; 98CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f; 99CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt; 100CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1; 101DO $$ 102DECLARE 103 relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass; 104BEGIN 105 EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname; 106EXCEPTION WHEN wrong_object_type THEN 107 RAISE NOTICE 'stats on toast table not created'; 108END; 109$$; 110 111\set VERBOSITY terse \\ -- suppress cascade details 112DROP SCHEMA tststats CASCADE; 113DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE; 114\set VERBOSITY default 115 116-- n-distinct tests 117CREATE TABLE ndistinct ( 118 filler1 TEXT, 119 filler2 NUMERIC, 120 a INT, 121 b INT, 122 filler3 DATE, 123 c INT, 124 d INT 125); 126 127-- over-estimates when using only per-column statistics 128INSERT INTO ndistinct (a, b, c, filler1) 129 SELECT i/100, i/100, i/100, cash_words((i/100)::money) 130 FROM generate_series(1,30000) s(i); 131 132ANALYZE ndistinct; 133 134-- Group Aggregate, due to over-estimate of the number of groups 135EXPLAIN (COSTS off) 136 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 137 138EXPLAIN (COSTS off) 139 SELECT COUNT(*) FROM ndistinct GROUP BY b, c; 140 141EXPLAIN (COSTS off) 142 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 143 144EXPLAIN (COSTS off) 145 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 146 147EXPLAIN (COSTS off) 148 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 149 150-- correct command 151CREATE STATISTICS s10 ON a, b, c FROM ndistinct; 152 153ANALYZE ndistinct; 154 155SELECT stxkind, stxndistinct 156 FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; 157 158-- minor improvement, make sure the ctid does not break the matching 159EXPLAIN (COSTS off) 160SELECT COUNT(*) FROM ndistinct GROUP BY ctid, a, b; 161 162-- Hash Aggregate, thanks to estimates improved by the statistic 163EXPLAIN (COSTS off) 164 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 165 166EXPLAIN (COSTS off) 167 SELECT COUNT(*) FROM ndistinct GROUP BY b, c; 168 169EXPLAIN (COSTS off) 170 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 171 172-- last two plans keep using Group Aggregate, because 'd' is not covered 173-- by the statistic and while it's NULL-only we assume 200 values for it 174EXPLAIN (COSTS off) 175 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 176 177EXPLAIN (COSTS off) 178 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 179 180TRUNCATE TABLE ndistinct; 181 182-- under-estimates when using only per-column statistics 183INSERT INTO ndistinct (a, b, c, filler1) 184 SELECT mod(i,50), mod(i,51), mod(i,32), 185 cash_words(mod(i,33)::int::money) 186 FROM generate_series(1,10000) s(i); 187 188ANALYZE ndistinct; 189 190SELECT stxkind, stxndistinct 191 FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; 192 193-- plans using Group Aggregate, thanks to using correct esimates 194EXPLAIN (COSTS off) 195 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 196 197EXPLAIN (COSTS off) 198 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 199 200EXPLAIN (COSTS off) 201 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 202 203EXPLAIN (COSTS off) 204 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 205 206EXPLAIN (COSTS off) 207 SELECT COUNT(*) FROM ndistinct GROUP BY a, d; 208 209DROP STATISTICS s10; 210 211SELECT stxkind, stxndistinct 212 FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; 213 214-- dropping the statistics switches the plans to Hash Aggregate, 215-- due to under-estimates 216EXPLAIN (COSTS off) 217 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 218 219EXPLAIN (COSTS off) 220 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 221 222EXPLAIN (COSTS off) 223 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 224 225EXPLAIN (COSTS off) 226 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 227 228EXPLAIN (COSTS off) 229 SELECT COUNT(*) FROM ndistinct GROUP BY a, d; 230 231-- functional dependencies tests 232CREATE TABLE functional_dependencies ( 233 filler1 TEXT, 234 filler2 NUMERIC, 235 a INT, 236 b TEXT, 237 filler3 DATE, 238 c INT, 239 d TEXT 240); 241 242SET random_page_cost = 1.2; 243 244CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b); 245CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c); 246 247-- random data (no functional dependencies) 248INSERT INTO functional_dependencies (a, b, c, filler1) 249 SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i); 250 251ANALYZE functional_dependencies; 252 253EXPLAIN (COSTS OFF) 254 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 255 256EXPLAIN (COSTS OFF) 257 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 258 259-- create statistics 260CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; 261 262ANALYZE functional_dependencies; 263 264EXPLAIN (COSTS OFF) 265 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 266 267EXPLAIN (COSTS OFF) 268 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 269 270-- a => b, a => c, b => c 271TRUNCATE functional_dependencies; 272DROP STATISTICS func_deps_stat; 273 274INSERT INTO functional_dependencies (a, b, c, filler1) 275 SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); 276 277ANALYZE functional_dependencies; 278 279EXPLAIN (COSTS OFF) 280 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 281 282EXPLAIN (COSTS OFF) 283 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 284 285-- create statistics 286CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; 287 288ANALYZE functional_dependencies; 289 290EXPLAIN (COSTS OFF) 291 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 292 293EXPLAIN (COSTS OFF) 294 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 295 296-- check change of column type doesn't break it 297ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric; 298 299EXPLAIN (COSTS OFF) 300 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 301 302ANALYZE functional_dependencies; 303 304EXPLAIN (COSTS OFF) 305 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 306 307RESET random_page_cost; 308