1-- Generic extended statistics support 2-- We will be checking execution plans without/with statistics, so 3-- let's make sure we get simple non-parallel plans. Also set the 4-- work_mem low so that we can use small amounts of data. 5SET max_parallel_workers = 0; 6SET max_parallel_workers_per_gather = 0; 7SET work_mem = '128kB'; 8-- Verify failures 9CREATE TABLE ext_stats_test (x int, y int, z int); 10CREATE STATISTICS tst; 11ERROR: syntax error at or near ";" 12LINE 1: CREATE STATISTICS tst; 13 ^ 14CREATE STATISTICS tst ON a, b; 15ERROR: syntax error at or near ";" 16LINE 1: CREATE STATISTICS tst ON a, b; 17 ^ 18CREATE STATISTICS tst FROM sometab; 19ERROR: syntax error at or near "FROM" 20LINE 1: CREATE STATISTICS tst FROM sometab; 21 ^ 22CREATE STATISTICS tst ON a, b FROM nonexistant; 23ERROR: relation "nonexistant" does not exist 24CREATE STATISTICS tst ON a, b FROM ext_stats_test; 25ERROR: column "a" does not exist 26CREATE STATISTICS tst ON x, x, y FROM ext_stats_test; 27ERROR: duplicate column name in statistics definition 28CREATE STATISTICS tst ON x + y FROM ext_stats_test; 29ERROR: only simple column references are allowed in CREATE STATISTICS 30CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; 31ERROR: only simple column references are allowed in CREATE STATISTICS 32CREATE STATISTICS tst (unrecognized) ON x, y FROM ext_stats_test; 33ERROR: unrecognized statistics kind "unrecognized" 34DROP TABLE ext_stats_test; 35-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it 36CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); 37CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; 38COMMENT ON STATISTICS ab1_a_b_stats IS 'new comment'; 39CREATE ROLE regress_stats_ext; 40SET SESSION AUTHORIZATION regress_stats_ext; 41COMMENT ON STATISTICS ab1_a_b_stats IS 'changed comment'; 42ERROR: must be owner of statistics object ab1_a_b_stats 43DROP STATISTICS ab1_a_b_stats; 44ERROR: must be owner of statistics object ab1_a_b_stats 45ALTER STATISTICS ab1_a_b_stats RENAME TO ab1_a_b_stats_new; 46ERROR: must be owner of statistics object ab1_a_b_stats 47RESET SESSION AUTHORIZATION; 48DROP ROLE regress_stats_ext; 49CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1; 50NOTICE: statistics object "ab1_a_b_stats" already exists, skipping 51DROP STATISTICS ab1_a_b_stats; 52CREATE SCHEMA regress_schema_2; 53CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1; 54-- Let's also verify the pg_get_statisticsobjdef output looks sane. 55SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats'; 56 pg_get_statisticsobjdef 57------------------------------------------------------------------- 58 CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1 59(1 row) 60 61DROP STATISTICS regress_schema_2.ab1_a_b_stats; 62-- Ensure statistics are dropped when columns are 63CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1; 64CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1; 65CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1; 66ALTER TABLE ab1 DROP COLUMN a; 67\d ab1 68 Table "public.ab1" 69 Column | Type | Collation | Nullable | Default 70--------+---------+-----------+----------+--------- 71 b | integer | | | 72 c | integer | | | 73Statistics objects: 74 "public"."ab1_b_c_stats" (ndistinct, dependencies) ON b, c FROM ab1 75 76-- Ensure statistics are dropped when table is 77SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; 78 stxname 79--------------- 80 ab1_b_c_stats 81(1 row) 82 83DROP TABLE ab1; 84SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%'; 85 stxname 86--------- 87(0 rows) 88 89-- Ensure things work sanely with SET STATISTICS 0 90CREATE TABLE ab1 (a INTEGER, b INTEGER); 91ALTER TABLE ab1 ALTER a SET STATISTICS 0; 92INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a; 93CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; 94ANALYZE ab1; 95WARNING: statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1" 96ALTER TABLE ab1 ALTER a SET STATISTICS -1; 97-- partial analyze doesn't build stats either 98ANALYZE ab1 (a); 99WARNING: statistics object "public.ab1_a_b_stats" could not be computed for relation "public.ab1" 100ANALYZE ab1; 101DROP TABLE ab1; 102-- Ensure we can build statistics for tables with inheritance. 103CREATE TABLE ab1 (a INTEGER, b INTEGER); 104CREATE TABLE ab1c () INHERITS (ab1); 105INSERT INTO ab1 VALUES (1,1); 106CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1; 107ANALYZE ab1; 108DROP TABLE ab1 CASCADE; 109NOTICE: drop cascades to table ab1c 110-- Verify supported object types for extended statistics 111CREATE schema tststats; 112CREATE TABLE tststats.t (a int, b int, c text); 113CREATE INDEX ti ON tststats.t (a, b); 114CREATE SEQUENCE tststats.s; 115CREATE VIEW tststats.v AS SELECT * FROM tststats.t; 116CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t; 117CREATE TYPE tststats.ty AS (a int, b int, c text); 118CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw; 119CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw; 120CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv; 121CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b); 122CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10); 123CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t; 124CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti; 125ERROR: relation "ti" is not a table, foreign table, or materialized view 126CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s; 127ERROR: relation "s" is not a table, foreign table, or materialized view 128CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v; 129ERROR: relation "v" is not a table, foreign table, or materialized view 130CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv; 131CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty; 132ERROR: relation "ty" is not a table, foreign table, or materialized view 133CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f; 134CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt; 135CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1; 136DO $$ 137DECLARE 138 relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass; 139BEGIN 140 EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname; 141EXCEPTION WHEN wrong_object_type THEN 142 RAISE NOTICE 'stats on toast table not created'; 143END; 144$$; 145NOTICE: stats on toast table not created 146\set VERBOSITY terse \\ -- suppress cascade details 147DROP SCHEMA tststats CASCADE; 148NOTICE: drop cascades to 7 other objects 149DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE; 150NOTICE: drop cascades to server extstats_dummy_srv 151\set VERBOSITY default 152-- n-distinct tests 153CREATE TABLE ndistinct ( 154 filler1 TEXT, 155 filler2 NUMERIC, 156 a INT, 157 b INT, 158 filler3 DATE, 159 c INT, 160 d INT 161); 162-- over-estimates when using only per-column statistics 163INSERT INTO ndistinct (a, b, c, filler1) 164 SELECT i/100, i/100, i/100, cash_words((i/100)::money) 165 FROM generate_series(1,30000) s(i); 166ANALYZE ndistinct; 167-- Group Aggregate, due to over-estimate of the number of groups 168EXPLAIN (COSTS off) 169 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 170 QUERY PLAN 171----------------------------------- 172 GroupAggregate 173 Group Key: a, b 174 -> Sort 175 Sort Key: a, b 176 -> Seq Scan on ndistinct 177(5 rows) 178 179EXPLAIN (COSTS off) 180 SELECT COUNT(*) FROM ndistinct GROUP BY b, c; 181 QUERY PLAN 182----------------------------------- 183 GroupAggregate 184 Group Key: b, c 185 -> Sort 186 Sort Key: b, c 187 -> Seq Scan on ndistinct 188(5 rows) 189 190EXPLAIN (COSTS off) 191 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 192 QUERY PLAN 193----------------------------------- 194 GroupAggregate 195 Group Key: a, b, c 196 -> Sort 197 Sort Key: a, b, c 198 -> Seq Scan on ndistinct 199(5 rows) 200 201EXPLAIN (COSTS off) 202 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 203 QUERY PLAN 204----------------------------------- 205 GroupAggregate 206 Group Key: a, b, c, d 207 -> Sort 208 Sort Key: a, b, c, d 209 -> Seq Scan on ndistinct 210(5 rows) 211 212EXPLAIN (COSTS off) 213 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 214 QUERY PLAN 215----------------------------------- 216 GroupAggregate 217 Group Key: b, c, d 218 -> Sort 219 Sort Key: b, c, d 220 -> Seq Scan on ndistinct 221(5 rows) 222 223-- correct command 224CREATE STATISTICS s10 ON a, b, c FROM ndistinct; 225ANALYZE ndistinct; 226SELECT stxkind, stxndistinct 227 FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; 228 stxkind | stxndistinct 229---------+--------------------------------------------------------- 230 {d,f} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301} 231(1 row) 232 233-- minor improvement, make sure the ctid does not break the matching 234EXPLAIN (COSTS off) 235SELECT COUNT(*) FROM ndistinct GROUP BY ctid, a, b; 236 QUERY PLAN 237----------------------------------- 238 GroupAggregate 239 Group Key: ctid, a, b 240 -> Sort 241 Sort Key: ctid, a, b 242 -> Seq Scan on ndistinct 243(5 rows) 244 245-- Hash Aggregate, thanks to estimates improved by the statistic 246EXPLAIN (COSTS off) 247 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 248 QUERY PLAN 249----------------------------- 250 HashAggregate 251 Group Key: a, b 252 -> Seq Scan on ndistinct 253(3 rows) 254 255EXPLAIN (COSTS off) 256 SELECT COUNT(*) FROM ndistinct GROUP BY b, c; 257 QUERY PLAN 258----------------------------- 259 HashAggregate 260 Group Key: b, c 261 -> Seq Scan on ndistinct 262(3 rows) 263 264EXPLAIN (COSTS off) 265 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 266 QUERY PLAN 267----------------------------- 268 HashAggregate 269 Group Key: a, b, c 270 -> Seq Scan on ndistinct 271(3 rows) 272 273-- last two plans keep using Group Aggregate, because 'd' is not covered 274-- by the statistic and while it's NULL-only we assume 200 values for it 275EXPLAIN (COSTS off) 276 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 277 QUERY PLAN 278----------------------------------- 279 GroupAggregate 280 Group Key: a, b, c, d 281 -> Sort 282 Sort Key: a, b, c, d 283 -> Seq Scan on ndistinct 284(5 rows) 285 286EXPLAIN (COSTS off) 287 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 288 QUERY PLAN 289----------------------------------- 290 GroupAggregate 291 Group Key: b, c, d 292 -> Sort 293 Sort Key: b, c, d 294 -> Seq Scan on ndistinct 295(5 rows) 296 297TRUNCATE TABLE ndistinct; 298-- under-estimates when using only per-column statistics 299INSERT INTO ndistinct (a, b, c, filler1) 300 SELECT mod(i,50), mod(i,51), mod(i,32), 301 cash_words(mod(i,33)::int::money) 302 FROM generate_series(1,10000) s(i); 303ANALYZE ndistinct; 304SELECT stxkind, stxndistinct 305 FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; 306 stxkind | stxndistinct 307---------+------------------------------------------------------------- 308 {d,f} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000} 309(1 row) 310 311-- plans using Group Aggregate, thanks to using correct esimates 312EXPLAIN (COSTS off) 313 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 314 QUERY PLAN 315----------------------------------- 316 GroupAggregate 317 Group Key: a, b 318 -> Sort 319 Sort Key: a, b 320 -> Seq Scan on ndistinct 321(5 rows) 322 323EXPLAIN (COSTS off) 324 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 325 QUERY PLAN 326----------------------------------- 327 GroupAggregate 328 Group Key: a, b, c 329 -> Sort 330 Sort Key: a, b, c 331 -> Seq Scan on ndistinct 332(5 rows) 333 334EXPLAIN (COSTS off) 335 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 336 QUERY PLAN 337----------------------------------- 338 GroupAggregate 339 Group Key: a, b, c, d 340 -> Sort 341 Sort Key: a, b, c, d 342 -> Seq Scan on ndistinct 343(5 rows) 344 345EXPLAIN (COSTS off) 346 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 347 QUERY PLAN 348----------------------------- 349 HashAggregate 350 Group Key: b, c, d 351 -> Seq Scan on ndistinct 352(3 rows) 353 354EXPLAIN (COSTS off) 355 SELECT COUNT(*) FROM ndistinct GROUP BY a, d; 356 QUERY PLAN 357----------------------------- 358 HashAggregate 359 Group Key: a, d 360 -> Seq Scan on ndistinct 361(3 rows) 362 363DROP STATISTICS s10; 364SELECT stxkind, stxndistinct 365 FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass; 366 stxkind | stxndistinct 367---------+-------------- 368(0 rows) 369 370-- dropping the statistics switches the plans to Hash Aggregate, 371-- due to under-estimates 372EXPLAIN (COSTS off) 373 SELECT COUNT(*) FROM ndistinct GROUP BY a, b; 374 QUERY PLAN 375----------------------------- 376 HashAggregate 377 Group Key: a, b 378 -> Seq Scan on ndistinct 379(3 rows) 380 381EXPLAIN (COSTS off) 382 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; 383 QUERY PLAN 384----------------------------- 385 HashAggregate 386 Group Key: a, b, c 387 -> Seq Scan on ndistinct 388(3 rows) 389 390EXPLAIN (COSTS off) 391 SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; 392 QUERY PLAN 393----------------------------- 394 HashAggregate 395 Group Key: a, b, c, d 396 -> Seq Scan on ndistinct 397(3 rows) 398 399EXPLAIN (COSTS off) 400 SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; 401 QUERY PLAN 402----------------------------- 403 HashAggregate 404 Group Key: b, c, d 405 -> Seq Scan on ndistinct 406(3 rows) 407 408EXPLAIN (COSTS off) 409 SELECT COUNT(*) FROM ndistinct GROUP BY a, d; 410 QUERY PLAN 411----------------------------- 412 HashAggregate 413 Group Key: a, d 414 -> Seq Scan on ndistinct 415(3 rows) 416 417-- functional dependencies tests 418CREATE TABLE functional_dependencies ( 419 filler1 TEXT, 420 filler2 NUMERIC, 421 a INT, 422 b TEXT, 423 filler3 DATE, 424 c INT, 425 d TEXT 426); 427SET random_page_cost = 1.2; 428CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b); 429CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c); 430-- random data (no functional dependencies) 431INSERT INTO functional_dependencies (a, b, c, filler1) 432 SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i); 433ANALYZE functional_dependencies; 434EXPLAIN (COSTS OFF) 435 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 436 QUERY PLAN 437--------------------------------------------------- 438 Bitmap Heap Scan on functional_dependencies 439 Recheck Cond: ((a = 1) AND (b = '1'::text)) 440 -> Bitmap Index Scan on fdeps_abc_idx 441 Index Cond: ((a = 1) AND (b = '1'::text)) 442(4 rows) 443 444EXPLAIN (COSTS OFF) 445 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 446 QUERY PLAN 447----------------------------------------------------------- 448 Index Scan using fdeps_abc_idx on functional_dependencies 449 Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) 450(2 rows) 451 452-- create statistics 453CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; 454ANALYZE functional_dependencies; 455EXPLAIN (COSTS OFF) 456 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 457 QUERY PLAN 458--------------------------------------------------- 459 Bitmap Heap Scan on functional_dependencies 460 Recheck Cond: ((a = 1) AND (b = '1'::text)) 461 -> Bitmap Index Scan on fdeps_abc_idx 462 Index Cond: ((a = 1) AND (b = '1'::text)) 463(4 rows) 464 465EXPLAIN (COSTS OFF) 466 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 467 QUERY PLAN 468----------------------------------------------------------- 469 Index Scan using fdeps_abc_idx on functional_dependencies 470 Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) 471(2 rows) 472 473-- a => b, a => c, b => c 474TRUNCATE functional_dependencies; 475DROP STATISTICS func_deps_stat; 476INSERT INTO functional_dependencies (a, b, c, filler1) 477 SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i); 478ANALYZE functional_dependencies; 479EXPLAIN (COSTS OFF) 480 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 481 QUERY PLAN 482----------------------------------------------------------- 483 Index Scan using fdeps_abc_idx on functional_dependencies 484 Index Cond: ((a = 1) AND (b = '1'::text)) 485(2 rows) 486 487EXPLAIN (COSTS OFF) 488 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 489 QUERY PLAN 490----------------------------------------------------------- 491 Index Scan using fdeps_abc_idx on functional_dependencies 492 Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1)) 493(2 rows) 494 495-- create statistics 496CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; 497ANALYZE functional_dependencies; 498EXPLAIN (COSTS OFF) 499 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1'; 500 QUERY PLAN 501--------------------------------------------------- 502 Bitmap Heap Scan on functional_dependencies 503 Recheck Cond: ((a = 1) AND (b = '1'::text)) 504 -> Bitmap Index Scan on fdeps_abc_idx 505 Index Cond: ((a = 1) AND (b = '1'::text)) 506(4 rows) 507 508EXPLAIN (COSTS OFF) 509 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 510 QUERY PLAN 511--------------------------------------------------- 512 Bitmap Heap Scan on functional_dependencies 513 Recheck Cond: ((a = 1) AND (b = '1'::text)) 514 Filter: (c = 1) 515 -> Bitmap Index Scan on fdeps_ab_idx 516 Index Cond: ((a = 1) AND (b = '1'::text)) 517(5 rows) 518 519-- check change of column type doesn't break it 520ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric; 521EXPLAIN (COSTS OFF) 522 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 523 QUERY PLAN 524--------------------------------------------------- 525 Bitmap Heap Scan on functional_dependencies 526 Recheck Cond: ((a = 1) AND (b = '1'::text)) 527 Filter: (c = '1'::numeric) 528 -> Bitmap Index Scan on fdeps_ab_idx 529 Index Cond: ((a = 1) AND (b = '1'::text)) 530(5 rows) 531 532ANALYZE functional_dependencies; 533EXPLAIN (COSTS OFF) 534 SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1; 535 QUERY PLAN 536--------------------------------------------------- 537 Bitmap Heap Scan on functional_dependencies 538 Recheck Cond: ((a = 1) AND (b = '1'::text)) 539 Filter: (c = '1'::numeric) 540 -> Bitmap Index Scan on fdeps_ab_idx 541 Index Cond: ((a = 1) AND (b = '1'::text)) 542(5 rows) 543 544RESET random_page_cost; 545