1 /*------------------------------------------------------------------------- 2 * 3 * selfuncs.c 4 * Selectivity functions and index cost estimation functions for 5 * standard operators and index access methods. 6 * 7 * Selectivity routines are registered in the pg_operator catalog 8 * in the "oprrest" and "oprjoin" attributes. 9 * 10 * Index cost functions are located via the index AM's API struct, 11 * which is obtained from the handler function registered in pg_am. 12 * 13 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 14 * Portions Copyright (c) 1994, Regents of the University of California 15 * 16 * 17 * IDENTIFICATION 18 * src/backend/utils/adt/selfuncs.c 19 * 20 *------------------------------------------------------------------------- 21 */ 22 23 /*---------- 24 * Operator selectivity estimation functions are called to estimate the 25 * selectivity of WHERE clauses whose top-level operator is their operator. 26 * We divide the problem into two cases: 27 * Restriction clause estimation: the clause involves vars of just 28 * one relation. 29 * Join clause estimation: the clause involves vars of multiple rels. 30 * Join selectivity estimation is far more difficult and usually less accurate 31 * than restriction estimation. 32 * 33 * When dealing with the inner scan of a nestloop join, we consider the 34 * join's joinclauses as restriction clauses for the inner relation, and 35 * treat vars of the outer relation as parameters (a/k/a constants of unknown 36 * values). So, restriction estimators need to be able to accept an argument 37 * telling which relation is to be treated as the variable. 38 * 39 * The call convention for a restriction estimator (oprrest function) is 40 * 41 * Selectivity oprrest (PlannerInfo *root, 42 * Oid operator, 43 * List *args, 44 * int varRelid); 45 * 46 * root: general information about the query (rtable and RelOptInfo lists 47 * are particularly important for the estimator). 48 * operator: OID of the specific operator in question. 49 * args: argument list from the operator clause. 50 * varRelid: if not zero, the relid (rtable index) of the relation to 51 * be treated as the variable relation. May be zero if the args list 52 * is known to contain vars of only one relation. 53 * 54 * This is represented at the SQL level (in pg_proc) as 55 * 56 * float8 oprrest (internal, oid, internal, int4); 57 * 58 * The result is a selectivity, that is, a fraction (0 to 1) of the rows 59 * of the relation that are expected to produce a TRUE result for the 60 * given operator. 61 * 62 * The call convention for a join estimator (oprjoin function) is similar 63 * except that varRelid is not needed, and instead join information is 64 * supplied: 65 * 66 * Selectivity oprjoin (PlannerInfo *root, 67 * Oid operator, 68 * List *args, 69 * JoinType jointype, 70 * SpecialJoinInfo *sjinfo); 71 * 72 * float8 oprjoin (internal, oid, internal, int2, internal); 73 * 74 * (Before Postgres 8.4, join estimators had only the first four of these 75 * parameters. That signature is still allowed, but deprecated.) The 76 * relationship between jointype and sjinfo is explained in the comments for 77 * clause_selectivity() --- the short version is that jointype is usually 78 * best ignored in favor of examining sjinfo. 79 * 80 * Join selectivity for regular inner and outer joins is defined as the 81 * fraction (0 to 1) of the cross product of the relations that is expected 82 * to produce a TRUE result for the given operator. For both semi and anti 83 * joins, however, the selectivity is defined as the fraction of the left-hand 84 * side relation's rows that are expected to have a match (ie, at least one 85 * row with a TRUE result) in the right-hand side. 86 * 87 * For both oprrest and oprjoin functions, the operator's input collation OID 88 * (if any) is passed using the standard fmgr mechanism, so that the estimator 89 * function can fetch it with PG_GET_COLLATION(). Note, however, that all 90 * statistics in pg_statistic are currently built using the relevant column's 91 * collation. 92 *---------- 93 */ 94 95 #include "postgres.h" 96 97 #include <ctype.h> 98 #include <math.h> 99 100 #include "access/brin.h" 101 #include "access/brin_page.h" 102 #include "access/gin.h" 103 #include "access/table.h" 104 #include "access/tableam.h" 105 #include "access/visibilitymap.h" 106 #include "catalog/pg_am.h" 107 #include "catalog/pg_collation.h" 108 #include "catalog/pg_operator.h" 109 #include "catalog/pg_statistic.h" 110 #include "catalog/pg_statistic_ext.h" 111 #include "executor/nodeAgg.h" 112 #include "miscadmin.h" 113 #include "nodes/makefuncs.h" 114 #include "nodes/nodeFuncs.h" 115 #include "optimizer/clauses.h" 116 #include "optimizer/cost.h" 117 #include "optimizer/optimizer.h" 118 #include "optimizer/pathnode.h" 119 #include "optimizer/paths.h" 120 #include "optimizer/plancat.h" 121 #include "parser/parse_clause.h" 122 #include "parser/parsetree.h" 123 #include "statistics/statistics.h" 124 #include "storage/bufmgr.h" 125 #include "utils/acl.h" 126 #include "utils/builtins.h" 127 #include "utils/date.h" 128 #include "utils/datum.h" 129 #include "utils/fmgroids.h" 130 #include "utils/index_selfuncs.h" 131 #include "utils/lsyscache.h" 132 #include "utils/memutils.h" 133 #include "utils/pg_locale.h" 134 #include "utils/rel.h" 135 #include "utils/selfuncs.h" 136 #include "utils/snapmgr.h" 137 #include "utils/spccache.h" 138 #include "utils/syscache.h" 139 #include "utils/timestamp.h" 140 #include "utils/typcache.h" 141 142 143 /* source-code-compatibility hacks for pull_varnos() API change */ 144 #define pull_varnos(a,b) pull_varnos_new(a,b) 145 #define NumRelids(a,b) NumRelids_new(a,b) 146 147 /* Hooks for plugins to get control when we ask for stats */ 148 get_relation_stats_hook_type get_relation_stats_hook = NULL; 149 get_index_stats_hook_type get_index_stats_hook = NULL; 150 151 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); 152 static double eqjoinsel_inner(Oid opfuncoid, Oid collation, 153 VariableStatData *vardata1, VariableStatData *vardata2, 154 double nd1, double nd2, 155 bool isdefault1, bool isdefault2, 156 AttStatsSlot *sslot1, AttStatsSlot *sslot2, 157 Form_pg_statistic stats1, Form_pg_statistic stats2, 158 bool have_mcvs1, bool have_mcvs2); 159 static double eqjoinsel_semi(Oid opfuncoid, Oid collation, 160 VariableStatData *vardata1, VariableStatData *vardata2, 161 double nd1, double nd2, 162 bool isdefault1, bool isdefault2, 163 AttStatsSlot *sslot1, AttStatsSlot *sslot2, 164 Form_pg_statistic stats1, Form_pg_statistic stats2, 165 bool have_mcvs1, bool have_mcvs2, 166 RelOptInfo *inner_rel); 167 static bool estimate_multivariate_ndistinct(PlannerInfo *root, 168 RelOptInfo *rel, List **varinfos, double *ndistinct); 169 static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid, 170 double *scaledvalue, 171 Datum lobound, Datum hibound, Oid boundstypid, 172 double *scaledlobound, double *scaledhibound); 173 static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure); 174 static void convert_string_to_scalar(char *value, 175 double *scaledvalue, 176 char *lobound, 177 double *scaledlobound, 178 char *hibound, 179 double *scaledhibound); 180 static void convert_bytea_to_scalar(Datum value, 181 double *scaledvalue, 182 Datum lobound, 183 double *scaledlobound, 184 Datum hibound, 185 double *scaledhibound); 186 static double convert_one_string_to_scalar(char *value, 187 int rangelo, int rangehi); 188 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, 189 int rangelo, int rangehi); 190 static char *convert_string_datum(Datum value, Oid typid, Oid collid, 191 bool *failure); 192 static double convert_timevalue_to_scalar(Datum value, Oid typid, 193 bool *failure); 194 static void examine_simple_variable(PlannerInfo *root, Var *var, 195 VariableStatData *vardata); 196 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, 197 Oid sortop, Oid collation, 198 Datum *min, Datum *max); 199 static void get_stats_slot_range(AttStatsSlot *sslot, 200 Oid opfuncoid, FmgrInfo *opproc, 201 Oid collation, int16 typLen, bool typByVal, 202 Datum *min, Datum *max, bool *p_have_data); 203 static bool get_actual_variable_range(PlannerInfo *root, 204 VariableStatData *vardata, 205 Oid sortop, Oid collation, 206 Datum *min, Datum *max); 207 static bool get_actual_variable_endpoint(Relation heapRel, 208 Relation indexRel, 209 ScanDirection indexscandir, 210 ScanKey scankeys, 211 int16 typLen, 212 bool typByVal, 213 TupleTableSlot *tableslot, 214 MemoryContext outercontext, 215 Datum *endpointDatum); 216 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); 217 218 219 /* 220 * eqsel - Selectivity of "=" for any data types. 221 * 222 * Note: this routine is also used to estimate selectivity for some 223 * operators that are not "=" but have comparable selectivity behavior, 224 * such as "~=" (geometric approximate-match). Even for "=", we must 225 * keep in mind that the left and right datatypes may differ. 226 */ 227 Datum 228 eqsel(PG_FUNCTION_ARGS) 229 { 230 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false)); 231 } 232 233 /* 234 * Common code for eqsel() and neqsel() 235 */ 236 static double 237 eqsel_internal(PG_FUNCTION_ARGS, bool negate) 238 { 239 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); 240 Oid operator = PG_GETARG_OID(1); 241 List *args = (List *) PG_GETARG_POINTER(2); 242 int varRelid = PG_GETARG_INT32(3); 243 Oid collation = PG_GET_COLLATION(); 244 VariableStatData vardata; 245 Node *other; 246 bool varonleft; 247 double selec; 248 249 /* 250 * When asked about <>, we do the estimation using the corresponding = 251 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac". 252 */ 253 if (negate) 254 { 255 operator = get_negator(operator); 256 if (!OidIsValid(operator)) 257 { 258 /* Use default selectivity (should we raise an error instead?) */ 259 return 1.0 - DEFAULT_EQ_SEL; 260 } 261 } 262 263 /* 264 * If expression is not variable = something or something = variable, then 265 * punt and return a default estimate. 266 */ 267 if (!get_restriction_variable(root, args, varRelid, 268 &vardata, &other, &varonleft)) 269 return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; 270 271 /* 272 * We can do a lot better if the something is a constant. (Note: the 273 * Const might result from estimation rather than being a simple constant 274 * in the query.) 275 */ 276 if (IsA(other, Const)) 277 selec = var_eq_const(&vardata, operator, collation, 278 ((Const *) other)->constvalue, 279 ((Const *) other)->constisnull, 280 varonleft, negate); 281 else 282 selec = var_eq_non_const(&vardata, operator, collation, other, 283 varonleft, negate); 284 285 ReleaseVariableStats(vardata); 286 287 return selec; 288 } 289 290 /* 291 * var_eq_const --- eqsel for var = const case 292 * 293 * This is exported so that some other estimation functions can use it. 294 */ 295 double 296 var_eq_const(VariableStatData *vardata, Oid operator, Oid collation, 297 Datum constval, bool constisnull, 298 bool varonleft, bool negate) 299 { 300 double selec; 301 double nullfrac = 0.0; 302 bool isdefault; 303 Oid opfuncoid; 304 305 /* 306 * If the constant is NULL, assume operator is strict and return zero, ie, 307 * operator will never return TRUE. (It's zero even for a negator op.) 308 */ 309 if (constisnull) 310 return 0.0; 311 312 /* 313 * Grab the nullfrac for use below. Note we allow use of nullfrac 314 * regardless of security check. 315 */ 316 if (HeapTupleIsValid(vardata->statsTuple)) 317 { 318 Form_pg_statistic stats; 319 320 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); 321 nullfrac = stats->stanullfrac; 322 } 323 324 /* 325 * If we matched the var to a unique index or DISTINCT clause, assume 326 * there is exactly one match regardless of anything else. (This is 327 * slightly bogus, since the index or clause's equality operator might be 328 * different from ours, but it's much more likely to be right than 329 * ignoring the information.) 330 */ 331 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) 332 { 333 selec = 1.0 / vardata->rel->tuples; 334 } 335 else if (HeapTupleIsValid(vardata->statsTuple) && 336 statistic_proc_security_check(vardata, 337 (opfuncoid = get_opcode(operator)))) 338 { 339 AttStatsSlot sslot; 340 bool match = false; 341 int i; 342 343 /* 344 * Is the constant "=" to any of the column's most common values? 345 * (Although the given operator may not really be "=", we will assume 346 * that seeing whether it returns TRUE is an appropriate test. If you 347 * don't like this, maybe you shouldn't be using eqsel for your 348 * operator...) 349 */ 350 if (get_attstatsslot(&sslot, vardata->statsTuple, 351 STATISTIC_KIND_MCV, InvalidOid, 352 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) 353 { 354 LOCAL_FCINFO(fcinfo, 2); 355 FmgrInfo eqproc; 356 357 fmgr_info(opfuncoid, &eqproc); 358 359 /* 360 * Save a few cycles by setting up the fcinfo struct just once. 361 * Using FunctionCallInvoke directly also avoids failure if the 362 * eqproc returns NULL, though really equality functions should 363 * never do that. 364 */ 365 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation, 366 NULL, NULL); 367 fcinfo->args[0].isnull = false; 368 fcinfo->args[1].isnull = false; 369 /* be careful to apply operator right way 'round */ 370 if (varonleft) 371 fcinfo->args[1].value = constval; 372 else 373 fcinfo->args[0].value = constval; 374 375 for (i = 0; i < sslot.nvalues; i++) 376 { 377 Datum fresult; 378 379 if (varonleft) 380 fcinfo->args[0].value = sslot.values[i]; 381 else 382 fcinfo->args[1].value = sslot.values[i]; 383 fcinfo->isnull = false; 384 fresult = FunctionCallInvoke(fcinfo); 385 if (!fcinfo->isnull && DatumGetBool(fresult)) 386 { 387 match = true; 388 break; 389 } 390 } 391 } 392 else 393 { 394 /* no most-common-value info available */ 395 i = 0; /* keep compiler quiet */ 396 } 397 398 if (match) 399 { 400 /* 401 * Constant is "=" to this common value. We know selectivity 402 * exactly (or as exactly as ANALYZE could calculate it, anyway). 403 */ 404 selec = sslot.numbers[i]; 405 } 406 else 407 { 408 /* 409 * Comparison is against a constant that is neither NULL nor any 410 * of the common values. Its selectivity cannot be more than 411 * this: 412 */ 413 double sumcommon = 0.0; 414 double otherdistinct; 415 416 for (i = 0; i < sslot.nnumbers; i++) 417 sumcommon += sslot.numbers[i]; 418 selec = 1.0 - sumcommon - nullfrac; 419 CLAMP_PROBABILITY(selec); 420 421 /* 422 * and in fact it's probably a good deal less. We approximate that 423 * all the not-common values share this remaining fraction 424 * equally, so we divide by the number of other distinct values. 425 */ 426 otherdistinct = get_variable_numdistinct(vardata, &isdefault) - 427 sslot.nnumbers; 428 if (otherdistinct > 1) 429 selec /= otherdistinct; 430 431 /* 432 * Another cross-check: selectivity shouldn't be estimated as more 433 * than the least common "most common value". 434 */ 435 if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1]) 436 selec = sslot.numbers[sslot.nnumbers - 1]; 437 } 438 439 free_attstatsslot(&sslot); 440 } 441 else 442 { 443 /* 444 * No ANALYZE stats available, so make a guess using estimated number 445 * of distinct values and assuming they are equally common. (The guess 446 * is unlikely to be very good, but we do know a few special cases.) 447 */ 448 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault); 449 } 450 451 /* now adjust if we wanted <> rather than = */ 452 if (negate) 453 selec = 1.0 - selec - nullfrac; 454 455 /* result should be in range, but make sure... */ 456 CLAMP_PROBABILITY(selec); 457 458 return selec; 459 } 460 461 /* 462 * var_eq_non_const --- eqsel for var = something-other-than-const case 463 * 464 * This is exported so that some other estimation functions can use it. 465 */ 466 double 467 var_eq_non_const(VariableStatData *vardata, Oid operator, Oid collation, 468 Node *other, 469 bool varonleft, bool negate) 470 { 471 double selec; 472 double nullfrac = 0.0; 473 bool isdefault; 474 475 /* 476 * Grab the nullfrac for use below. 477 */ 478 if (HeapTupleIsValid(vardata->statsTuple)) 479 { 480 Form_pg_statistic stats; 481 482 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); 483 nullfrac = stats->stanullfrac; 484 } 485 486 /* 487 * If we matched the var to a unique index or DISTINCT clause, assume 488 * there is exactly one match regardless of anything else. (This is 489 * slightly bogus, since the index or clause's equality operator might be 490 * different from ours, but it's much more likely to be right than 491 * ignoring the information.) 492 */ 493 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) 494 { 495 selec = 1.0 / vardata->rel->tuples; 496 } 497 else if (HeapTupleIsValid(vardata->statsTuple)) 498 { 499 double ndistinct; 500 AttStatsSlot sslot; 501 502 /* 503 * Search is for a value that we do not know a priori, but we will 504 * assume it is not NULL. Estimate the selectivity as non-null 505 * fraction divided by number of distinct values, so that we get a 506 * result averaged over all possible values whether common or 507 * uncommon. (Essentially, we are assuming that the not-yet-known 508 * comparison value is equally likely to be any of the possible 509 * values, regardless of their frequency in the table. Is that a good 510 * idea?) 511 */ 512 selec = 1.0 - nullfrac; 513 ndistinct = get_variable_numdistinct(vardata, &isdefault); 514 if (ndistinct > 1) 515 selec /= ndistinct; 516 517 /* 518 * Cross-check: selectivity should never be estimated as more than the 519 * most common value's. 520 */ 521 if (get_attstatsslot(&sslot, vardata->statsTuple, 522 STATISTIC_KIND_MCV, InvalidOid, 523 ATTSTATSSLOT_NUMBERS)) 524 { 525 if (sslot.nnumbers > 0 && selec > sslot.numbers[0]) 526 selec = sslot.numbers[0]; 527 free_attstatsslot(&sslot); 528 } 529 } 530 else 531 { 532 /* 533 * No ANALYZE stats available, so make a guess using estimated number 534 * of distinct values and assuming they are equally common. (The guess 535 * is unlikely to be very good, but we do know a few special cases.) 536 */ 537 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault); 538 } 539 540 /* now adjust if we wanted <> rather than = */ 541 if (negate) 542 selec = 1.0 - selec - nullfrac; 543 544 /* result should be in range, but make sure... */ 545 CLAMP_PROBABILITY(selec); 546 547 return selec; 548 } 549 550 /* 551 * neqsel - Selectivity of "!=" for any data types. 552 * 553 * This routine is also used for some operators that are not "!=" 554 * but have comparable selectivity behavior. See above comments 555 * for eqsel(). 556 */ 557 Datum 558 neqsel(PG_FUNCTION_ARGS) 559 { 560 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true)); 561 } 562 563 /* 564 * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars. 565 * 566 * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel. 567 * The isgt and iseq flags distinguish which of the four cases apply. 568 * 569 * The caller has commuted the clause, if necessary, so that we can treat 570 * the variable as being on the left. The caller must also make sure that 571 * the other side of the clause is a non-null Const, and dissect that into 572 * a value and datatype. (This definition simplifies some callers that 573 * want to estimate against a computed value instead of a Const node.) 574 * 575 * This routine works for any datatype (or pair of datatypes) known to 576 * convert_to_scalar(). If it is applied to some other datatype, 577 * it will return an approximate estimate based on assuming that the constant 578 * value falls in the middle of the bin identified by binary search. 579 */ 580 static double 581 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, 582 Oid collation, 583 VariableStatData *vardata, Datum constval, Oid consttype) 584 { 585 Form_pg_statistic stats; 586 FmgrInfo opproc; 587 double mcv_selec, 588 hist_selec, 589 sumcommon; 590 double selec; 591 592 if (!HeapTupleIsValid(vardata->statsTuple)) 593 { 594 /* 595 * No stats are available. Typically this means we have to fall back 596 * on the default estimate; but if the variable is CTID then we can 597 * make an estimate based on comparing the constant to the table size. 598 */ 599 if (vardata->var && IsA(vardata->var, Var) && 600 ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber) 601 { 602 ItemPointer itemptr; 603 double block; 604 double density; 605 606 /* 607 * If the relation's empty, we're going to include all of it. 608 * (This is mostly to avoid divide-by-zero below.) 609 */ 610 if (vardata->rel->pages == 0) 611 return 1.0; 612 613 itemptr = (ItemPointer) DatumGetPointer(constval); 614 block = ItemPointerGetBlockNumberNoCheck(itemptr); 615 616 /* 617 * Determine the average number of tuples per page (density). 618 * 619 * Since the last page will, on average, be only half full, we can 620 * estimate it to have half as many tuples as earlier pages. So 621 * give it half the weight of a regular page. 622 */ 623 density = vardata->rel->tuples / (vardata->rel->pages - 0.5); 624 625 /* If target is the last page, use half the density. */ 626 if (block >= vardata->rel->pages - 1) 627 density *= 0.5; 628 629 /* 630 * Using the average tuples per page, calculate how far into the 631 * page the itemptr is likely to be and adjust block accordingly, 632 * by adding that fraction of a whole block (but never more than a 633 * whole block, no matter how high the itemptr's offset is). Here 634 * we are ignoring the possibility of dead-tuple line pointers, 635 * which is fairly bogus, but we lack the info to do better. 636 */ 637 if (density > 0.0) 638 { 639 OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr); 640 641 block += Min(offset / density, 1.0); 642 } 643 644 /* 645 * Convert relative block number to selectivity. Again, the last 646 * page has only half weight. 647 */ 648 selec = block / (vardata->rel->pages - 0.5); 649 650 /* 651 * The calculation so far gave us a selectivity for the "<=" case. 652 * We'll have one less tuple for "<" and one additional tuple for 653 * ">=", the latter of which we'll reverse the selectivity for 654 * below, so we can simply subtract one tuple for both cases. The 655 * cases that need this adjustment can be identified by iseq being 656 * equal to isgt. 657 */ 658 if (iseq == isgt && vardata->rel->tuples >= 1.0) 659 selec -= (1.0 / vardata->rel->tuples); 660 661 /* Finally, reverse the selectivity for the ">", ">=" cases. */ 662 if (isgt) 663 selec = 1.0 - selec; 664 665 CLAMP_PROBABILITY(selec); 666 return selec; 667 } 668 669 /* no stats available, so default result */ 670 return DEFAULT_INEQ_SEL; 671 } 672 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); 673 674 fmgr_info(get_opcode(operator), &opproc); 675 676 /* 677 * If we have most-common-values info, add up the fractions of the MCV 678 * entries that satisfy MCV OP CONST. These fractions contribute directly 679 * to the result selectivity. Also add up the total fraction represented 680 * by MCV entries. 681 */ 682 mcv_selec = mcv_selectivity(vardata, &opproc, collation, constval, true, 683 &sumcommon); 684 685 /* 686 * If there is a histogram, determine which bin the constant falls in, and 687 * compute the resulting contribution to selectivity. 688 */ 689 hist_selec = ineq_histogram_selectivity(root, vardata, 690 operator, &opproc, isgt, iseq, 691 collation, 692 constval, consttype); 693 694 /* 695 * Now merge the results from the MCV and histogram calculations, 696 * realizing that the histogram covers only the non-null values that are 697 * not listed in MCV. 698 */ 699 selec = 1.0 - stats->stanullfrac - sumcommon; 700 701 if (hist_selec >= 0.0) 702 selec *= hist_selec; 703 else 704 { 705 /* 706 * If no histogram but there are values not accounted for by MCV, 707 * arbitrarily assume half of them will match. 708 */ 709 selec *= 0.5; 710 } 711 712 selec += mcv_selec; 713 714 /* result should be in range, but make sure... */ 715 CLAMP_PROBABILITY(selec); 716 717 return selec; 718 } 719 720 /* 721 * mcv_selectivity - Examine the MCV list for selectivity estimates 722 * 723 * Determine the fraction of the variable's MCV population that satisfies 724 * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also 725 * compute the fraction of the total column population represented by the MCV 726 * list. This code will work for any boolean-returning predicate operator. 727 * 728 * The function result is the MCV selectivity, and the fraction of the 729 * total population is returned into *sumcommonp. Zeroes are returned 730 * if there is no MCV list. 731 */ 732 double 733 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, 734 Datum constval, bool varonleft, 735 double *sumcommonp) 736 { 737 double mcv_selec, 738 sumcommon; 739 AttStatsSlot sslot; 740 int i; 741 742 mcv_selec = 0.0; 743 sumcommon = 0.0; 744 745 if (HeapTupleIsValid(vardata->statsTuple) && 746 statistic_proc_security_check(vardata, opproc->fn_oid) && 747 get_attstatsslot(&sslot, vardata->statsTuple, 748 STATISTIC_KIND_MCV, InvalidOid, 749 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) 750 { 751 LOCAL_FCINFO(fcinfo, 2); 752 753 /* 754 * We invoke the opproc "by hand" so that we won't fail on NULL 755 * results. Such cases won't arise for normal comparison functions, 756 * but generic_restriction_selectivity could perhaps be used with 757 * operators that can return NULL. A small side benefit is to not 758 * need to re-initialize the fcinfo struct from scratch each time. 759 */ 760 InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, 761 NULL, NULL); 762 fcinfo->args[0].isnull = false; 763 fcinfo->args[1].isnull = false; 764 /* be careful to apply operator right way 'round */ 765 if (varonleft) 766 fcinfo->args[1].value = constval; 767 else 768 fcinfo->args[0].value = constval; 769 770 for (i = 0; i < sslot.nvalues; i++) 771 { 772 Datum fresult; 773 774 if (varonleft) 775 fcinfo->args[0].value = sslot.values[i]; 776 else 777 fcinfo->args[1].value = sslot.values[i]; 778 fcinfo->isnull = false; 779 fresult = FunctionCallInvoke(fcinfo); 780 if (!fcinfo->isnull && DatumGetBool(fresult)) 781 mcv_selec += sslot.numbers[i]; 782 sumcommon += sslot.numbers[i]; 783 } 784 free_attstatsslot(&sslot); 785 } 786 787 *sumcommonp = sumcommon; 788 return mcv_selec; 789 } 790 791 /* 792 * histogram_selectivity - Examine the histogram for selectivity estimates 793 * 794 * Determine the fraction of the variable's histogram entries that satisfy 795 * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. 796 * 797 * This code will work for any boolean-returning predicate operator, whether 798 * or not it has anything to do with the histogram sort operator. We are 799 * essentially using the histogram just as a representative sample. However, 800 * small histograms are unlikely to be all that representative, so the caller 801 * should be prepared to fall back on some other estimation approach when the 802 * histogram is missing or very small. It may also be prudent to combine this 803 * approach with another one when the histogram is small. 804 * 805 * If the actual histogram size is not at least min_hist_size, we won't bother 806 * to do the calculation at all. Also, if the n_skip parameter is > 0, we 807 * ignore the first and last n_skip histogram elements, on the grounds that 808 * they are outliers and hence not very representative. Typical values for 809 * these parameters are 10 and 1. 810 * 811 * The function result is the selectivity, or -1 if there is no histogram 812 * or it's smaller than min_hist_size. 813 * 814 * The output parameter *hist_size receives the actual histogram size, 815 * or zero if no histogram. Callers may use this number to decide how 816 * much faith to put in the function result. 817 * 818 * Note that the result disregards both the most-common-values (if any) and 819 * null entries. The caller is expected to combine this result with 820 * statistics for those portions of the column population. It may also be 821 * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs. 822 */ 823 double 824 histogram_selectivity(VariableStatData *vardata, 825 FmgrInfo *opproc, Oid collation, 826 Datum constval, bool varonleft, 827 int min_hist_size, int n_skip, 828 int *hist_size) 829 { 830 double result; 831 AttStatsSlot sslot; 832 833 /* check sanity of parameters */ 834 Assert(n_skip >= 0); 835 Assert(min_hist_size > 2 * n_skip); 836 837 if (HeapTupleIsValid(vardata->statsTuple) && 838 statistic_proc_security_check(vardata, opproc->fn_oid) && 839 get_attstatsslot(&sslot, vardata->statsTuple, 840 STATISTIC_KIND_HISTOGRAM, InvalidOid, 841 ATTSTATSSLOT_VALUES)) 842 { 843 *hist_size = sslot.nvalues; 844 if (sslot.nvalues >= min_hist_size) 845 { 846 LOCAL_FCINFO(fcinfo, 2); 847 int nmatch = 0; 848 int i; 849 850 /* 851 * We invoke the opproc "by hand" so that we won't fail on NULL 852 * results. Such cases won't arise for normal comparison 853 * functions, but generic_restriction_selectivity could perhaps be 854 * used with operators that can return NULL. A small side benefit 855 * is to not need to re-initialize the fcinfo struct from scratch 856 * each time. 857 */ 858 InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, 859 NULL, NULL); 860 fcinfo->args[0].isnull = false; 861 fcinfo->args[1].isnull = false; 862 /* be careful to apply operator right way 'round */ 863 if (varonleft) 864 fcinfo->args[1].value = constval; 865 else 866 fcinfo->args[0].value = constval; 867 868 for (i = n_skip; i < sslot.nvalues - n_skip; i++) 869 { 870 Datum fresult; 871 872 if (varonleft) 873 fcinfo->args[0].value = sslot.values[i]; 874 else 875 fcinfo->args[1].value = sslot.values[i]; 876 fcinfo->isnull = false; 877 fresult = FunctionCallInvoke(fcinfo); 878 if (!fcinfo->isnull && DatumGetBool(fresult)) 879 nmatch++; 880 } 881 result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip)); 882 } 883 else 884 result = -1; 885 free_attstatsslot(&sslot); 886 } 887 else 888 { 889 *hist_size = 0; 890 result = -1; 891 } 892 893 return result; 894 } 895 896 /* 897 * generic_restriction_selectivity - Selectivity for almost anything 898 * 899 * This function estimates selectivity for operators that we don't have any 900 * special knowledge about, but are on data types that we collect standard 901 * MCV and/or histogram statistics for. (Additional assumptions are that 902 * the operator is strict and immutable, or at least stable.) 903 * 904 * If we have "VAR OP CONST" or "CONST OP VAR", selectivity is estimated by 905 * applying the operator to each element of the column's MCV and/or histogram 906 * stats, and merging the results using the assumption that the histogram is 907 * a reasonable random sample of the column's non-MCV population. Note that 908 * if the operator's semantics are related to the histogram ordering, this 909 * might not be such a great assumption; other functions such as 910 * scalarineqsel() are probably a better match in such cases. 911 * 912 * Otherwise, fall back to the default selectivity provided by the caller. 913 */ 914 double 915 generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation, 916 List *args, int varRelid, 917 double default_selectivity) 918 { 919 double selec; 920 VariableStatData vardata; 921 Node *other; 922 bool varonleft; 923 924 /* 925 * If expression is not variable OP something or something OP variable, 926 * then punt and return the default estimate. 927 */ 928 if (!get_restriction_variable(root, args, varRelid, 929 &vardata, &other, &varonleft)) 930 return default_selectivity; 931 932 /* 933 * If the something is a NULL constant, assume operator is strict and 934 * return zero, ie, operator will never return TRUE. 935 */ 936 if (IsA(other, Const) && 937 ((Const *) other)->constisnull) 938 { 939 ReleaseVariableStats(vardata); 940 return 0.0; 941 } 942 943 if (IsA(other, Const)) 944 { 945 /* Variable is being compared to a known non-null constant */ 946 Datum constval = ((Const *) other)->constvalue; 947 FmgrInfo opproc; 948 double mcvsum; 949 double mcvsel; 950 double nullfrac; 951 int hist_size; 952 953 fmgr_info(get_opcode(oproid), &opproc); 954 955 /* 956 * Calculate the selectivity for the column's most common values. 957 */ 958 mcvsel = mcv_selectivity(&vardata, &opproc, collation, 959 constval, varonleft, 960 &mcvsum); 961 962 /* 963 * If the histogram is large enough, see what fraction of it matches 964 * the query, and assume that's representative of the non-MCV 965 * population. Otherwise use the default selectivity for the non-MCV 966 * population. 967 */ 968 selec = histogram_selectivity(&vardata, &opproc, collation, 969 constval, varonleft, 970 10, 1, &hist_size); 971 if (selec < 0) 972 { 973 /* Nope, fall back on default */ 974 selec = default_selectivity; 975 } 976 else if (hist_size < 100) 977 { 978 /* 979 * For histogram sizes from 10 to 100, we combine the histogram 980 * and default selectivities, putting increasingly more trust in 981 * the histogram for larger sizes. 982 */ 983 double hist_weight = hist_size / 100.0; 984 985 selec = selec * hist_weight + 986 default_selectivity * (1.0 - hist_weight); 987 } 988 989 /* In any case, don't believe extremely small or large estimates. */ 990 if (selec < 0.0001) 991 selec = 0.0001; 992 else if (selec > 0.9999) 993 selec = 0.9999; 994 995 /* Don't forget to account for nulls. */ 996 if (HeapTupleIsValid(vardata.statsTuple)) 997 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac; 998 else 999 nullfrac = 0.0; 1000 1001 /* 1002 * Now merge the results from the MCV and histogram calculations, 1003 * realizing that the histogram covers only the non-null values that 1004 * are not listed in MCV. 1005 */ 1006 selec *= 1.0 - nullfrac - mcvsum; 1007 selec += mcvsel; 1008 } 1009 else 1010 { 1011 /* Comparison value is not constant, so we can't do anything */ 1012 selec = default_selectivity; 1013 } 1014 1015 ReleaseVariableStats(vardata); 1016 1017 /* result should be in range, but make sure... */ 1018 CLAMP_PROBABILITY(selec); 1019 1020 return selec; 1021 } 1022 1023 /* 1024 * ineq_histogram_selectivity - Examine the histogram for scalarineqsel 1025 * 1026 * Determine the fraction of the variable's histogram population that 1027 * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST. 1028 * The isgt and iseq flags distinguish which of the four cases apply. 1029 * 1030 * While opproc could be looked up from the operator OID, common callers 1031 * also need to call it separately, so we make the caller pass both. 1032 * 1033 * Returns -1 if there is no histogram (valid results will always be >= 0). 1034 * 1035 * Note that the result disregards both the most-common-values (if any) and 1036 * null entries. The caller is expected to combine this result with 1037 * statistics for those portions of the column population. 1038 * 1039 * This is exported so that some other estimation functions can use it. 1040 */ 1041 double 1042 ineq_histogram_selectivity(PlannerInfo *root, 1043 VariableStatData *vardata, 1044 Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq, 1045 Oid collation, 1046 Datum constval, Oid consttype) 1047 { 1048 double hist_selec; 1049 AttStatsSlot sslot; 1050 1051 hist_selec = -1.0; 1052 1053 /* 1054 * Someday, ANALYZE might store more than one histogram per rel/att, 1055 * corresponding to more than one possible sort ordering defined for the 1056 * column type. Right now, we know there is only one, so just grab it and 1057 * see if it matches the query. 1058 * 1059 * Note that we can't use opoid as search argument; the staop appearing in 1060 * pg_statistic will be for the relevant '<' operator, but what we have 1061 * might be some other inequality operator such as '>='. (Even if opoid 1062 * is a '<' operator, it could be cross-type.) Hence we must use 1063 * comparison_ops_are_compatible() to see if the operators match. 1064 */ 1065 if (HeapTupleIsValid(vardata->statsTuple) && 1066 statistic_proc_security_check(vardata, opproc->fn_oid) && 1067 get_attstatsslot(&sslot, vardata->statsTuple, 1068 STATISTIC_KIND_HISTOGRAM, InvalidOid, 1069 ATTSTATSSLOT_VALUES)) 1070 { 1071 if (sslot.nvalues > 1 && 1072 sslot.stacoll == collation && 1073 comparison_ops_are_compatible(sslot.staop, opoid)) 1074 { 1075 /* 1076 * Use binary search to find the desired location, namely the 1077 * right end of the histogram bin containing the comparison value, 1078 * which is the leftmost entry for which the comparison operator 1079 * succeeds (if isgt) or fails (if !isgt). 1080 * 1081 * In this loop, we pay no attention to whether the operator iseq 1082 * or not; that detail will be mopped up below. (We cannot tell, 1083 * anyway, whether the operator thinks the values are equal.) 1084 * 1085 * If the binary search accesses the first or last histogram 1086 * entry, we try to replace that endpoint with the true column min 1087 * or max as found by get_actual_variable_range(). This 1088 * ameliorates misestimates when the min or max is moving as a 1089 * result of changes since the last ANALYZE. Note that this could 1090 * result in effectively including MCVs into the histogram that 1091 * weren't there before, but we don't try to correct for that. 1092 */ 1093 double histfrac; 1094 int lobound = 0; /* first possible slot to search */ 1095 int hibound = sslot.nvalues; /* last+1 slot to search */ 1096 bool have_end = false; 1097 1098 /* 1099 * If there are only two histogram entries, we'll want up-to-date 1100 * values for both. (If there are more than two, we need at most 1101 * one of them to be updated, so we deal with that within the 1102 * loop.) 1103 */ 1104 if (sslot.nvalues == 2) 1105 have_end = get_actual_variable_range(root, 1106 vardata, 1107 sslot.staop, 1108 collation, 1109 &sslot.values[0], 1110 &sslot.values[1]); 1111 1112 while (lobound < hibound) 1113 { 1114 int probe = (lobound + hibound) / 2; 1115 bool ltcmp; 1116 1117 /* 1118 * If we find ourselves about to compare to the first or last 1119 * histogram entry, first try to replace it with the actual 1120 * current min or max (unless we already did so above). 1121 */ 1122 if (probe == 0 && sslot.nvalues > 2) 1123 have_end = get_actual_variable_range(root, 1124 vardata, 1125 sslot.staop, 1126 collation, 1127 &sslot.values[0], 1128 NULL); 1129 else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2) 1130 have_end = get_actual_variable_range(root, 1131 vardata, 1132 sslot.staop, 1133 collation, 1134 NULL, 1135 &sslot.values[probe]); 1136 1137 ltcmp = DatumGetBool(FunctionCall2Coll(opproc, 1138 collation, 1139 sslot.values[probe], 1140 constval)); 1141 if (isgt) 1142 ltcmp = !ltcmp; 1143 if (ltcmp) 1144 lobound = probe + 1; 1145 else 1146 hibound = probe; 1147 } 1148 1149 if (lobound <= 0) 1150 { 1151 /* 1152 * Constant is below lower histogram boundary. More 1153 * precisely, we have found that no entry in the histogram 1154 * satisfies the inequality clause (if !isgt) or they all do 1155 * (if isgt). We estimate that that's true of the entire 1156 * table, so set histfrac to 0.0 (which we'll flip to 1.0 1157 * below, if isgt). 1158 */ 1159 histfrac = 0.0; 1160 } 1161 else if (lobound >= sslot.nvalues) 1162 { 1163 /* 1164 * Inverse case: constant is above upper histogram boundary. 1165 */ 1166 histfrac = 1.0; 1167 } 1168 else 1169 { 1170 /* We have values[i-1] <= constant <= values[i]. */ 1171 int i = lobound; 1172 double eq_selec = 0; 1173 double val, 1174 high, 1175 low; 1176 double binfrac; 1177 1178 /* 1179 * In the cases where we'll need it below, obtain an estimate 1180 * of the selectivity of "x = constval". We use a calculation 1181 * similar to what var_eq_const() does for a non-MCV constant, 1182 * ie, estimate that all distinct non-MCV values occur equally 1183 * often. But multiplication by "1.0 - sumcommon - nullfrac" 1184 * will be done by our caller, so we shouldn't do that here. 1185 * Therefore we can't try to clamp the estimate by reference 1186 * to the least common MCV; the result would be too small. 1187 * 1188 * Note: since this is effectively assuming that constval 1189 * isn't an MCV, it's logically dubious if constval in fact is 1190 * one. But we have to apply *some* correction for equality, 1191 * and anyway we cannot tell if constval is an MCV, since we 1192 * don't have a suitable equality operator at hand. 1193 */ 1194 if (i == 1 || isgt == iseq) 1195 { 1196 double otherdistinct; 1197 bool isdefault; 1198 AttStatsSlot mcvslot; 1199 1200 /* Get estimated number of distinct values */ 1201 otherdistinct = get_variable_numdistinct(vardata, 1202 &isdefault); 1203 1204 /* Subtract off the number of known MCVs */ 1205 if (get_attstatsslot(&mcvslot, vardata->statsTuple, 1206 STATISTIC_KIND_MCV, InvalidOid, 1207 ATTSTATSSLOT_NUMBERS)) 1208 { 1209 otherdistinct -= mcvslot.nnumbers; 1210 free_attstatsslot(&mcvslot); 1211 } 1212 1213 /* If result doesn't seem sane, leave eq_selec at 0 */ 1214 if (otherdistinct > 1) 1215 eq_selec = 1.0 / otherdistinct; 1216 } 1217 1218 /* 1219 * Convert the constant and the two nearest bin boundary 1220 * values to a uniform comparison scale, and do a linear 1221 * interpolation within this bin. 1222 */ 1223 if (convert_to_scalar(constval, consttype, collation, 1224 &val, 1225 sslot.values[i - 1], sslot.values[i], 1226 vardata->vartype, 1227 &low, &high)) 1228 { 1229 if (high <= low) 1230 { 1231 /* cope if bin boundaries appear identical */ 1232 binfrac = 0.5; 1233 } 1234 else if (val <= low) 1235 binfrac = 0.0; 1236 else if (val >= high) 1237 binfrac = 1.0; 1238 else 1239 { 1240 binfrac = (val - low) / (high - low); 1241 1242 /* 1243 * Watch out for the possibility that we got a NaN or 1244 * Infinity from the division. This can happen 1245 * despite the previous checks, if for example "low" 1246 * is -Infinity. 1247 */ 1248 if (isnan(binfrac) || 1249 binfrac < 0.0 || binfrac > 1.0) 1250 binfrac = 0.5; 1251 } 1252 } 1253 else 1254 { 1255 /* 1256 * Ideally we'd produce an error here, on the grounds that 1257 * the given operator shouldn't have scalarXXsel 1258 * registered as its selectivity func unless we can deal 1259 * with its operand types. But currently, all manner of 1260 * stuff is invoking scalarXXsel, so give a default 1261 * estimate until that can be fixed. 1262 */ 1263 binfrac = 0.5; 1264 } 1265 1266 /* 1267 * Now, compute the overall selectivity across the values 1268 * represented by the histogram. We have i-1 full bins and 1269 * binfrac partial bin below the constant. 1270 */ 1271 histfrac = (double) (i - 1) + binfrac; 1272 histfrac /= (double) (sslot.nvalues - 1); 1273 1274 /* 1275 * At this point, histfrac is an estimate of the fraction of 1276 * the population represented by the histogram that satisfies 1277 * "x <= constval". Somewhat remarkably, this statement is 1278 * true regardless of which operator we were doing the probes 1279 * with, so long as convert_to_scalar() delivers reasonable 1280 * results. If the probe constant is equal to some histogram 1281 * entry, we would have considered the bin to the left of that 1282 * entry if probing with "<" or ">=", or the bin to the right 1283 * if probing with "<=" or ">"; but binfrac would have come 1284 * out as 1.0 in the first case and 0.0 in the second, leading 1285 * to the same histfrac in either case. For probe constants 1286 * between histogram entries, we find the same bin and get the 1287 * same estimate with any operator. 1288 * 1289 * The fact that the estimate corresponds to "x <= constval" 1290 * and not "x < constval" is because of the way that ANALYZE 1291 * constructs the histogram: each entry is, effectively, the 1292 * rightmost value in its sample bucket. So selectivity 1293 * values that are exact multiples of 1/(histogram_size-1) 1294 * should be understood as estimates including a histogram 1295 * entry plus everything to its left. 1296 * 1297 * However, that breaks down for the first histogram entry, 1298 * which necessarily is the leftmost value in its sample 1299 * bucket. That means the first histogram bin is slightly 1300 * narrower than the rest, by an amount equal to eq_selec. 1301 * Another way to say that is that we want "x <= leftmost" to 1302 * be estimated as eq_selec not zero. So, if we're dealing 1303 * with the first bin (i==1), rescale to make that true while 1304 * adjusting the rest of that bin linearly. 1305 */ 1306 if (i == 1) 1307 histfrac += eq_selec * (1.0 - binfrac); 1308 1309 /* 1310 * "x <= constval" is good if we want an estimate for "<=" or 1311 * ">", but if we are estimating for "<" or ">=", we now need 1312 * to decrease the estimate by eq_selec. 1313 */ 1314 if (isgt == iseq) 1315 histfrac -= eq_selec; 1316 } 1317 1318 /* 1319 * Now the estimate is finished for "<" and "<=" cases. If we are 1320 * estimating for ">" or ">=", flip it. 1321 */ 1322 hist_selec = isgt ? (1.0 - histfrac) : histfrac; 1323 1324 /* 1325 * The histogram boundaries are only approximate to begin with, 1326 * and may well be out of date anyway. Therefore, don't believe 1327 * extremely small or large selectivity estimates --- unless we 1328 * got actual current endpoint values from the table, in which 1329 * case just do the usual sanity clamp. Somewhat arbitrarily, we 1330 * set the cutoff for other cases at a hundredth of the histogram 1331 * resolution. 1332 */ 1333 if (have_end) 1334 CLAMP_PROBABILITY(hist_selec); 1335 else 1336 { 1337 double cutoff = 0.01 / (double) (sslot.nvalues - 1); 1338 1339 if (hist_selec < cutoff) 1340 hist_selec = cutoff; 1341 else if (hist_selec > 1.0 - cutoff) 1342 hist_selec = 1.0 - cutoff; 1343 } 1344 } 1345 else if (sslot.nvalues > 1) 1346 { 1347 /* 1348 * If we get here, we have a histogram but it's not sorted the way 1349 * we want. Do a brute-force search to see how many of the 1350 * entries satisfy the comparison condition, and take that 1351 * fraction as our estimate. (This is identical to the inner loop 1352 * of histogram_selectivity; maybe share code?) 1353 */ 1354 LOCAL_FCINFO(fcinfo, 2); 1355 int nmatch = 0; 1356 1357 InitFunctionCallInfoData(*fcinfo, opproc, 2, collation, 1358 NULL, NULL); 1359 fcinfo->args[0].isnull = false; 1360 fcinfo->args[1].isnull = false; 1361 fcinfo->args[1].value = constval; 1362 for (int i = 0; i < sslot.nvalues; i++) 1363 { 1364 Datum fresult; 1365 1366 fcinfo->args[0].value = sslot.values[i]; 1367 fcinfo->isnull = false; 1368 fresult = FunctionCallInvoke(fcinfo); 1369 if (!fcinfo->isnull && DatumGetBool(fresult)) 1370 nmatch++; 1371 } 1372 hist_selec = ((double) nmatch) / ((double) sslot.nvalues); 1373 1374 /* 1375 * As above, clamp to a hundredth of the histogram resolution. 1376 * This case is surely even less trustworthy than the normal one, 1377 * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the 1378 * clamp should be more restrictive in this case?) 1379 */ 1380 { 1381 double cutoff = 0.01 / (double) (sslot.nvalues - 1); 1382 1383 if (hist_selec < cutoff) 1384 hist_selec = cutoff; 1385 else if (hist_selec > 1.0 - cutoff) 1386 hist_selec = 1.0 - cutoff; 1387 } 1388 } 1389 1390 free_attstatsslot(&sslot); 1391 } 1392 1393 return hist_selec; 1394 } 1395 1396 /* 1397 * Common wrapper function for the selectivity estimators that simply 1398 * invoke scalarineqsel(). 1399 */ 1400 static Datum 1401 scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) 1402 { 1403 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); 1404 Oid operator = PG_GETARG_OID(1); 1405 List *args = (List *) PG_GETARG_POINTER(2); 1406 int varRelid = PG_GETARG_INT32(3); 1407 Oid collation = PG_GET_COLLATION(); 1408 VariableStatData vardata; 1409 Node *other; 1410 bool varonleft; 1411 Datum constval; 1412 Oid consttype; 1413 double selec; 1414 1415 /* 1416 * If expression is not variable op something or something op variable, 1417 * then punt and return a default estimate. 1418 */ 1419 if (!get_restriction_variable(root, args, varRelid, 1420 &vardata, &other, &varonleft)) 1421 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 1422 1423 /* 1424 * Can't do anything useful if the something is not a constant, either. 1425 */ 1426 if (!IsA(other, Const)) 1427 { 1428 ReleaseVariableStats(vardata); 1429 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 1430 } 1431 1432 /* 1433 * If the constant is NULL, assume operator is strict and return zero, ie, 1434 * operator will never return TRUE. 1435 */ 1436 if (((Const *) other)->constisnull) 1437 { 1438 ReleaseVariableStats(vardata); 1439 PG_RETURN_FLOAT8(0.0); 1440 } 1441 constval = ((Const *) other)->constvalue; 1442 consttype = ((Const *) other)->consttype; 1443 1444 /* 1445 * Force the var to be on the left to simplify logic in scalarineqsel. 1446 */ 1447 if (!varonleft) 1448 { 1449 operator = get_commutator(operator); 1450 if (!operator) 1451 { 1452 /* Use default selectivity (should we raise an error instead?) */ 1453 ReleaseVariableStats(vardata); 1454 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 1455 } 1456 isgt = !isgt; 1457 } 1458 1459 /* The rest of the work is done by scalarineqsel(). */ 1460 selec = scalarineqsel(root, operator, isgt, iseq, collation, 1461 &vardata, constval, consttype); 1462 1463 ReleaseVariableStats(vardata); 1464 1465 PG_RETURN_FLOAT8((float8) selec); 1466 } 1467 1468 /* 1469 * scalarltsel - Selectivity of "<" for scalars. 1470 */ 1471 Datum 1472 scalarltsel(PG_FUNCTION_ARGS) 1473 { 1474 return scalarineqsel_wrapper(fcinfo, false, false); 1475 } 1476 1477 /* 1478 * scalarlesel - Selectivity of "<=" for scalars. 1479 */ 1480 Datum 1481 scalarlesel(PG_FUNCTION_ARGS) 1482 { 1483 return scalarineqsel_wrapper(fcinfo, false, true); 1484 } 1485 1486 /* 1487 * scalargtsel - Selectivity of ">" for scalars. 1488 */ 1489 Datum 1490 scalargtsel(PG_FUNCTION_ARGS) 1491 { 1492 return scalarineqsel_wrapper(fcinfo, true, false); 1493 } 1494 1495 /* 1496 * scalargesel - Selectivity of ">=" for scalars. 1497 */ 1498 Datum 1499 scalargesel(PG_FUNCTION_ARGS) 1500 { 1501 return scalarineqsel_wrapper(fcinfo, true, true); 1502 } 1503 1504 /* 1505 * boolvarsel - Selectivity of Boolean variable. 1506 * 1507 * This can actually be called on any boolean-valued expression. If it 1508 * involves only Vars of the specified relation, and if there are statistics 1509 * about the Var or expression (the latter is possible if it's indexed) then 1510 * we'll produce a real estimate; otherwise it's just a default. 1511 */ 1512 Selectivity 1513 boolvarsel(PlannerInfo *root, Node *arg, int varRelid) 1514 { 1515 VariableStatData vardata; 1516 double selec; 1517 1518 examine_variable(root, arg, varRelid, &vardata); 1519 if (HeapTupleIsValid(vardata.statsTuple)) 1520 { 1521 /* 1522 * A boolean variable V is equivalent to the clause V = 't', so we 1523 * compute the selectivity as if that is what we have. 1524 */ 1525 selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid, 1526 BoolGetDatum(true), false, true, false); 1527 } 1528 else 1529 { 1530 /* Otherwise, the default estimate is 0.5 */ 1531 selec = 0.5; 1532 } 1533 ReleaseVariableStats(vardata); 1534 return selec; 1535 } 1536 1537 /* 1538 * booltestsel - Selectivity of BooleanTest Node. 1539 */ 1540 Selectivity 1541 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, 1542 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) 1543 { 1544 VariableStatData vardata; 1545 double selec; 1546 1547 examine_variable(root, arg, varRelid, &vardata); 1548 1549 if (HeapTupleIsValid(vardata.statsTuple)) 1550 { 1551 Form_pg_statistic stats; 1552 double freq_null; 1553 AttStatsSlot sslot; 1554 1555 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); 1556 freq_null = stats->stanullfrac; 1557 1558 if (get_attstatsslot(&sslot, vardata.statsTuple, 1559 STATISTIC_KIND_MCV, InvalidOid, 1560 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS) 1561 && sslot.nnumbers > 0) 1562 { 1563 double freq_true; 1564 double freq_false; 1565 1566 /* 1567 * Get first MCV frequency and derive frequency for true. 1568 */ 1569 if (DatumGetBool(sslot.values[0])) 1570 freq_true = sslot.numbers[0]; 1571 else 1572 freq_true = 1.0 - sslot.numbers[0] - freq_null; 1573 1574 /* 1575 * Next derive frequency for false. Then use these as appropriate 1576 * to derive frequency for each case. 1577 */ 1578 freq_false = 1.0 - freq_true - freq_null; 1579 1580 switch (booltesttype) 1581 { 1582 case IS_UNKNOWN: 1583 /* select only NULL values */ 1584 selec = freq_null; 1585 break; 1586 case IS_NOT_UNKNOWN: 1587 /* select non-NULL values */ 1588 selec = 1.0 - freq_null; 1589 break; 1590 case IS_TRUE: 1591 /* select only TRUE values */ 1592 selec = freq_true; 1593 break; 1594 case IS_NOT_TRUE: 1595 /* select non-TRUE values */ 1596 selec = 1.0 - freq_true; 1597 break; 1598 case IS_FALSE: 1599 /* select only FALSE values */ 1600 selec = freq_false; 1601 break; 1602 case IS_NOT_FALSE: 1603 /* select non-FALSE values */ 1604 selec = 1.0 - freq_false; 1605 break; 1606 default: 1607 elog(ERROR, "unrecognized booltesttype: %d", 1608 (int) booltesttype); 1609 selec = 0.0; /* Keep compiler quiet */ 1610 break; 1611 } 1612 1613 free_attstatsslot(&sslot); 1614 } 1615 else 1616 { 1617 /* 1618 * No most-common-value info available. Still have null fraction 1619 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust 1620 * for null fraction and assume a 50-50 split of TRUE and FALSE. 1621 */ 1622 switch (booltesttype) 1623 { 1624 case IS_UNKNOWN: 1625 /* select only NULL values */ 1626 selec = freq_null; 1627 break; 1628 case IS_NOT_UNKNOWN: 1629 /* select non-NULL values */ 1630 selec = 1.0 - freq_null; 1631 break; 1632 case IS_TRUE: 1633 case IS_FALSE: 1634 /* Assume we select half of the non-NULL values */ 1635 selec = (1.0 - freq_null) / 2.0; 1636 break; 1637 case IS_NOT_TRUE: 1638 case IS_NOT_FALSE: 1639 /* Assume we select NULLs plus half of the non-NULLs */ 1640 /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */ 1641 selec = (freq_null + 1.0) / 2.0; 1642 break; 1643 default: 1644 elog(ERROR, "unrecognized booltesttype: %d", 1645 (int) booltesttype); 1646 selec = 0.0; /* Keep compiler quiet */ 1647 break; 1648 } 1649 } 1650 } 1651 else 1652 { 1653 /* 1654 * If we can't get variable statistics for the argument, perhaps 1655 * clause_selectivity can do something with it. We ignore the 1656 * possibility of a NULL value when using clause_selectivity, and just 1657 * assume the value is either TRUE or FALSE. 1658 */ 1659 switch (booltesttype) 1660 { 1661 case IS_UNKNOWN: 1662 selec = DEFAULT_UNK_SEL; 1663 break; 1664 case IS_NOT_UNKNOWN: 1665 selec = DEFAULT_NOT_UNK_SEL; 1666 break; 1667 case IS_TRUE: 1668 case IS_NOT_FALSE: 1669 selec = (double) clause_selectivity(root, arg, 1670 varRelid, 1671 jointype, sjinfo); 1672 break; 1673 case IS_FALSE: 1674 case IS_NOT_TRUE: 1675 selec = 1.0 - (double) clause_selectivity(root, arg, 1676 varRelid, 1677 jointype, sjinfo); 1678 break; 1679 default: 1680 elog(ERROR, "unrecognized booltesttype: %d", 1681 (int) booltesttype); 1682 selec = 0.0; /* Keep compiler quiet */ 1683 break; 1684 } 1685 } 1686 1687 ReleaseVariableStats(vardata); 1688 1689 /* result should be in range, but make sure... */ 1690 CLAMP_PROBABILITY(selec); 1691 1692 return (Selectivity) selec; 1693 } 1694 1695 /* 1696 * nulltestsel - Selectivity of NullTest Node. 1697 */ 1698 Selectivity 1699 nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, 1700 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) 1701 { 1702 VariableStatData vardata; 1703 double selec; 1704 1705 examine_variable(root, arg, varRelid, &vardata); 1706 1707 if (HeapTupleIsValid(vardata.statsTuple)) 1708 { 1709 Form_pg_statistic stats; 1710 double freq_null; 1711 1712 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); 1713 freq_null = stats->stanullfrac; 1714 1715 switch (nulltesttype) 1716 { 1717 case IS_NULL: 1718 1719 /* 1720 * Use freq_null directly. 1721 */ 1722 selec = freq_null; 1723 break; 1724 case IS_NOT_NULL: 1725 1726 /* 1727 * Select not unknown (not null) values. Calculate from 1728 * freq_null. 1729 */ 1730 selec = 1.0 - freq_null; 1731 break; 1732 default: 1733 elog(ERROR, "unrecognized nulltesttype: %d", 1734 (int) nulltesttype); 1735 return (Selectivity) 0; /* keep compiler quiet */ 1736 } 1737 } 1738 else if (vardata.var && IsA(vardata.var, Var) && 1739 ((Var *) vardata.var)->varattno < 0) 1740 { 1741 /* 1742 * There are no stats for system columns, but we know they are never 1743 * NULL. 1744 */ 1745 selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0; 1746 } 1747 else 1748 { 1749 /* 1750 * No ANALYZE stats available, so make a guess 1751 */ 1752 switch (nulltesttype) 1753 { 1754 case IS_NULL: 1755 selec = DEFAULT_UNK_SEL; 1756 break; 1757 case IS_NOT_NULL: 1758 selec = DEFAULT_NOT_UNK_SEL; 1759 break; 1760 default: 1761 elog(ERROR, "unrecognized nulltesttype: %d", 1762 (int) nulltesttype); 1763 return (Selectivity) 0; /* keep compiler quiet */ 1764 } 1765 } 1766 1767 ReleaseVariableStats(vardata); 1768 1769 /* result should be in range, but make sure... */ 1770 CLAMP_PROBABILITY(selec); 1771 1772 return (Selectivity) selec; 1773 } 1774 1775 /* 1776 * strip_array_coercion - strip binary-compatible relabeling from an array expr 1777 * 1778 * For array values, the parser normally generates ArrayCoerceExpr conversions, 1779 * but it seems possible that RelabelType might show up. Also, the planner 1780 * is not currently tense about collapsing stacked ArrayCoerceExpr nodes, 1781 * so we need to be ready to deal with more than one level. 1782 */ 1783 static Node * 1784 strip_array_coercion(Node *node) 1785 { 1786 for (;;) 1787 { 1788 if (node && IsA(node, ArrayCoerceExpr)) 1789 { 1790 ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node; 1791 1792 /* 1793 * If the per-element expression is just a RelabelType on top of 1794 * CaseTestExpr, then we know it's a binary-compatible relabeling. 1795 */ 1796 if (IsA(acoerce->elemexpr, RelabelType) && 1797 IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr)) 1798 node = (Node *) acoerce->arg; 1799 else 1800 break; 1801 } 1802 else if (node && IsA(node, RelabelType)) 1803 { 1804 /* We don't really expect this case, but may as well cope */ 1805 node = (Node *) ((RelabelType *) node)->arg; 1806 } 1807 else 1808 break; 1809 } 1810 return node; 1811 } 1812 1813 /* 1814 * scalararraysel - Selectivity of ScalarArrayOpExpr Node. 1815 */ 1816 Selectivity 1817 scalararraysel(PlannerInfo *root, 1818 ScalarArrayOpExpr *clause, 1819 bool is_join_clause, 1820 int varRelid, 1821 JoinType jointype, 1822 SpecialJoinInfo *sjinfo) 1823 { 1824 Oid operator = clause->opno; 1825 bool useOr = clause->useOr; 1826 bool isEquality = false; 1827 bool isInequality = false; 1828 Node *leftop; 1829 Node *rightop; 1830 Oid nominal_element_type; 1831 Oid nominal_element_collation; 1832 TypeCacheEntry *typentry; 1833 RegProcedure oprsel; 1834 FmgrInfo oprselproc; 1835 Selectivity s1; 1836 Selectivity s1disjoint; 1837 1838 /* First, deconstruct the expression */ 1839 Assert(list_length(clause->args) == 2); 1840 leftop = (Node *) linitial(clause->args); 1841 rightop = (Node *) lsecond(clause->args); 1842 1843 /* aggressively reduce both sides to constants */ 1844 leftop = estimate_expression_value(root, leftop); 1845 rightop = estimate_expression_value(root, rightop); 1846 1847 /* get nominal (after relabeling) element type of rightop */ 1848 nominal_element_type = get_base_element_type(exprType(rightop)); 1849 if (!OidIsValid(nominal_element_type)) 1850 return (Selectivity) 0.5; /* probably shouldn't happen */ 1851 /* get nominal collation, too, for generating constants */ 1852 nominal_element_collation = exprCollation(rightop); 1853 1854 /* look through any binary-compatible relabeling of rightop */ 1855 rightop = strip_array_coercion(rightop); 1856 1857 /* 1858 * Detect whether the operator is the default equality or inequality 1859 * operator of the array element type. 1860 */ 1861 typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR); 1862 if (OidIsValid(typentry->eq_opr)) 1863 { 1864 if (operator == typentry->eq_opr) 1865 isEquality = true; 1866 else if (get_negator(operator) == typentry->eq_opr) 1867 isInequality = true; 1868 } 1869 1870 /* 1871 * If it is equality or inequality, we might be able to estimate this as a 1872 * form of array containment; for instance "const = ANY(column)" can be 1873 * treated as "ARRAY[const] <@ column". scalararraysel_containment tries 1874 * that, and returns the selectivity estimate if successful, or -1 if not. 1875 */ 1876 if ((isEquality || isInequality) && !is_join_clause) 1877 { 1878 s1 = scalararraysel_containment(root, leftop, rightop, 1879 nominal_element_type, 1880 isEquality, useOr, varRelid); 1881 if (s1 >= 0.0) 1882 return s1; 1883 } 1884 1885 /* 1886 * Look up the underlying operator's selectivity estimator. Punt if it 1887 * hasn't got one. 1888 */ 1889 if (is_join_clause) 1890 oprsel = get_oprjoin(operator); 1891 else 1892 oprsel = get_oprrest(operator); 1893 if (!oprsel) 1894 return (Selectivity) 0.5; 1895 fmgr_info(oprsel, &oprselproc); 1896 1897 /* 1898 * In the array-containment check above, we must only believe that an 1899 * operator is equality or inequality if it is the default btree equality 1900 * operator (or its negator) for the element type, since those are the 1901 * operators that array containment will use. But in what follows, we can 1902 * be a little laxer, and also believe that any operators using eqsel() or 1903 * neqsel() as selectivity estimator act like equality or inequality. 1904 */ 1905 if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL) 1906 isEquality = true; 1907 else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL) 1908 isInequality = true; 1909 1910 /* 1911 * We consider three cases: 1912 * 1913 * 1. rightop is an Array constant: deconstruct the array, apply the 1914 * operator's selectivity function for each array element, and merge the 1915 * results in the same way that clausesel.c does for AND/OR combinations. 1916 * 1917 * 2. rightop is an ARRAY[] construct: apply the operator's selectivity 1918 * function for each element of the ARRAY[] construct, and merge. 1919 * 1920 * 3. otherwise, make a guess ... 1921 */ 1922 if (rightop && IsA(rightop, Const)) 1923 { 1924 Datum arraydatum = ((Const *) rightop)->constvalue; 1925 bool arrayisnull = ((Const *) rightop)->constisnull; 1926 ArrayType *arrayval; 1927 int16 elmlen; 1928 bool elmbyval; 1929 char elmalign; 1930 int num_elems; 1931 Datum *elem_values; 1932 bool *elem_nulls; 1933 int i; 1934 1935 if (arrayisnull) /* qual can't succeed if null array */ 1936 return (Selectivity) 0.0; 1937 arrayval = DatumGetArrayTypeP(arraydatum); 1938 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), 1939 &elmlen, &elmbyval, &elmalign); 1940 deconstruct_array(arrayval, 1941 ARR_ELEMTYPE(arrayval), 1942 elmlen, elmbyval, elmalign, 1943 &elem_values, &elem_nulls, &num_elems); 1944 1945 /* 1946 * For generic operators, we assume the probability of success is 1947 * independent for each array element. But for "= ANY" or "<> ALL", 1948 * if the array elements are distinct (which'd typically be the case) 1949 * then the probabilities are disjoint, and we should just sum them. 1950 * 1951 * If we were being really tense we would try to confirm that the 1952 * elements are all distinct, but that would be expensive and it 1953 * doesn't seem to be worth the cycles; it would amount to penalizing 1954 * well-written queries in favor of poorly-written ones. However, we 1955 * do protect ourselves a little bit by checking whether the 1956 * disjointness assumption leads to an impossible (out of range) 1957 * probability; if so, we fall back to the normal calculation. 1958 */ 1959 s1 = s1disjoint = (useOr ? 0.0 : 1.0); 1960 1961 for (i = 0; i < num_elems; i++) 1962 { 1963 List *args; 1964 Selectivity s2; 1965 1966 args = list_make2(leftop, 1967 makeConst(nominal_element_type, 1968 -1, 1969 nominal_element_collation, 1970 elmlen, 1971 elem_values[i], 1972 elem_nulls[i], 1973 elmbyval)); 1974 if (is_join_clause) 1975 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, 1976 clause->inputcollid, 1977 PointerGetDatum(root), 1978 ObjectIdGetDatum(operator), 1979 PointerGetDatum(args), 1980 Int16GetDatum(jointype), 1981 PointerGetDatum(sjinfo))); 1982 else 1983 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, 1984 clause->inputcollid, 1985 PointerGetDatum(root), 1986 ObjectIdGetDatum(operator), 1987 PointerGetDatum(args), 1988 Int32GetDatum(varRelid))); 1989 1990 if (useOr) 1991 { 1992 s1 = s1 + s2 - s1 * s2; 1993 if (isEquality) 1994 s1disjoint += s2; 1995 } 1996 else 1997 { 1998 s1 = s1 * s2; 1999 if (isInequality) 2000 s1disjoint += s2 - 1.0; 2001 } 2002 } 2003 2004 /* accept disjoint-probability estimate if in range */ 2005 if ((useOr ? isEquality : isInequality) && 2006 s1disjoint >= 0.0 && s1disjoint <= 1.0) 2007 s1 = s1disjoint; 2008 } 2009 else if (rightop && IsA(rightop, ArrayExpr) && 2010 !((ArrayExpr *) rightop)->multidims) 2011 { 2012 ArrayExpr *arrayexpr = (ArrayExpr *) rightop; 2013 int16 elmlen; 2014 bool elmbyval; 2015 ListCell *l; 2016 2017 get_typlenbyval(arrayexpr->element_typeid, 2018 &elmlen, &elmbyval); 2019 2020 /* 2021 * We use the assumption of disjoint probabilities here too, although 2022 * the odds of equal array elements are rather higher if the elements 2023 * are not all constants (which they won't be, else constant folding 2024 * would have reduced the ArrayExpr to a Const). In this path it's 2025 * critical to have the sanity check on the s1disjoint estimate. 2026 */ 2027 s1 = s1disjoint = (useOr ? 0.0 : 1.0); 2028 2029 foreach(l, arrayexpr->elements) 2030 { 2031 Node *elem = (Node *) lfirst(l); 2032 List *args; 2033 Selectivity s2; 2034 2035 /* 2036 * Theoretically, if elem isn't of nominal_element_type we should 2037 * insert a RelabelType, but it seems unlikely that any operator 2038 * estimation function would really care ... 2039 */ 2040 args = list_make2(leftop, elem); 2041 if (is_join_clause) 2042 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, 2043 clause->inputcollid, 2044 PointerGetDatum(root), 2045 ObjectIdGetDatum(operator), 2046 PointerGetDatum(args), 2047 Int16GetDatum(jointype), 2048 PointerGetDatum(sjinfo))); 2049 else 2050 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, 2051 clause->inputcollid, 2052 PointerGetDatum(root), 2053 ObjectIdGetDatum(operator), 2054 PointerGetDatum(args), 2055 Int32GetDatum(varRelid))); 2056 2057 if (useOr) 2058 { 2059 s1 = s1 + s2 - s1 * s2; 2060 if (isEquality) 2061 s1disjoint += s2; 2062 } 2063 else 2064 { 2065 s1 = s1 * s2; 2066 if (isInequality) 2067 s1disjoint += s2 - 1.0; 2068 } 2069 } 2070 2071 /* accept disjoint-probability estimate if in range */ 2072 if ((useOr ? isEquality : isInequality) && 2073 s1disjoint >= 0.0 && s1disjoint <= 1.0) 2074 s1 = s1disjoint; 2075 } 2076 else 2077 { 2078 CaseTestExpr *dummyexpr; 2079 List *args; 2080 Selectivity s2; 2081 int i; 2082 2083 /* 2084 * We need a dummy rightop to pass to the operator selectivity 2085 * routine. It can be pretty much anything that doesn't look like a 2086 * constant; CaseTestExpr is a convenient choice. 2087 */ 2088 dummyexpr = makeNode(CaseTestExpr); 2089 dummyexpr->typeId = nominal_element_type; 2090 dummyexpr->typeMod = -1; 2091 dummyexpr->collation = clause->inputcollid; 2092 args = list_make2(leftop, dummyexpr); 2093 if (is_join_clause) 2094 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc, 2095 clause->inputcollid, 2096 PointerGetDatum(root), 2097 ObjectIdGetDatum(operator), 2098 PointerGetDatum(args), 2099 Int16GetDatum(jointype), 2100 PointerGetDatum(sjinfo))); 2101 else 2102 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc, 2103 clause->inputcollid, 2104 PointerGetDatum(root), 2105 ObjectIdGetDatum(operator), 2106 PointerGetDatum(args), 2107 Int32GetDatum(varRelid))); 2108 s1 = useOr ? 0.0 : 1.0; 2109 2110 /* 2111 * Arbitrarily assume 10 elements in the eventual array value (see 2112 * also estimate_array_length). We don't risk an assumption of 2113 * disjoint probabilities here. 2114 */ 2115 for (i = 0; i < 10; i++) 2116 { 2117 if (useOr) 2118 s1 = s1 + s2 - s1 * s2; 2119 else 2120 s1 = s1 * s2; 2121 } 2122 } 2123 2124 /* result should be in range, but make sure... */ 2125 CLAMP_PROBABILITY(s1); 2126 2127 return s1; 2128 } 2129 2130 /* 2131 * Estimate number of elements in the array yielded by an expression. 2132 * 2133 * It's important that this agree with scalararraysel. 2134 */ 2135 int 2136 estimate_array_length(Node *arrayexpr) 2137 { 2138 /* look through any binary-compatible relabeling of arrayexpr */ 2139 arrayexpr = strip_array_coercion(arrayexpr); 2140 2141 if (arrayexpr && IsA(arrayexpr, Const)) 2142 { 2143 Datum arraydatum = ((Const *) arrayexpr)->constvalue; 2144 bool arrayisnull = ((Const *) arrayexpr)->constisnull; 2145 ArrayType *arrayval; 2146 2147 if (arrayisnull) 2148 return 0; 2149 arrayval = DatumGetArrayTypeP(arraydatum); 2150 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval)); 2151 } 2152 else if (arrayexpr && IsA(arrayexpr, ArrayExpr) && 2153 !((ArrayExpr *) arrayexpr)->multidims) 2154 { 2155 return list_length(((ArrayExpr *) arrayexpr)->elements); 2156 } 2157 else 2158 { 2159 /* default guess --- see also scalararraysel */ 2160 return 10; 2161 } 2162 } 2163 2164 /* 2165 * rowcomparesel - Selectivity of RowCompareExpr Node. 2166 * 2167 * We estimate RowCompare selectivity by considering just the first (high 2168 * order) columns, which makes it equivalent to an ordinary OpExpr. While 2169 * this estimate could be refined by considering additional columns, it 2170 * seems unlikely that we could do a lot better without multi-column 2171 * statistics. 2172 */ 2173 Selectivity 2174 rowcomparesel(PlannerInfo *root, 2175 RowCompareExpr *clause, 2176 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo) 2177 { 2178 Selectivity s1; 2179 Oid opno = linitial_oid(clause->opnos); 2180 Oid inputcollid = linitial_oid(clause->inputcollids); 2181 List *opargs; 2182 bool is_join_clause; 2183 2184 /* Build equivalent arg list for single operator */ 2185 opargs = list_make2(linitial(clause->largs), linitial(clause->rargs)); 2186 2187 /* 2188 * Decide if it's a join clause. This should match clausesel.c's 2189 * treat_as_join_clause(), except that we intentionally consider only the 2190 * leading columns and not the rest of the clause. 2191 */ 2192 if (varRelid != 0) 2193 { 2194 /* 2195 * Caller is forcing restriction mode (eg, because we are examining an 2196 * inner indexscan qual). 2197 */ 2198 is_join_clause = false; 2199 } 2200 else if (sjinfo == NULL) 2201 { 2202 /* 2203 * It must be a restriction clause, since it's being evaluated at a 2204 * scan node. 2205 */ 2206 is_join_clause = false; 2207 } 2208 else 2209 { 2210 /* 2211 * Otherwise, it's a join if there's more than one relation used. 2212 */ 2213 is_join_clause = (NumRelids(root, (Node *) opargs) > 1); 2214 } 2215 2216 if (is_join_clause) 2217 { 2218 /* Estimate selectivity for a join clause. */ 2219 s1 = join_selectivity(root, opno, 2220 opargs, 2221 inputcollid, 2222 jointype, 2223 sjinfo); 2224 } 2225 else 2226 { 2227 /* Estimate selectivity for a restriction clause. */ 2228 s1 = restriction_selectivity(root, opno, 2229 opargs, 2230 inputcollid, 2231 varRelid); 2232 } 2233 2234 return s1; 2235 } 2236 2237 /* 2238 * eqjoinsel - Join selectivity of "=" 2239 */ 2240 Datum 2241 eqjoinsel(PG_FUNCTION_ARGS) 2242 { 2243 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); 2244 Oid operator = PG_GETARG_OID(1); 2245 List *args = (List *) PG_GETARG_POINTER(2); 2246 2247 #ifdef NOT_USED 2248 JoinType jointype = (JoinType) PG_GETARG_INT16(3); 2249 #endif 2250 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); 2251 Oid collation = PG_GET_COLLATION(); 2252 double selec; 2253 double selec_inner; 2254 VariableStatData vardata1; 2255 VariableStatData vardata2; 2256 double nd1; 2257 double nd2; 2258 bool isdefault1; 2259 bool isdefault2; 2260 Oid opfuncoid; 2261 AttStatsSlot sslot1; 2262 AttStatsSlot sslot2; 2263 Form_pg_statistic stats1 = NULL; 2264 Form_pg_statistic stats2 = NULL; 2265 bool have_mcvs1 = false; 2266 bool have_mcvs2 = false; 2267 bool join_is_reversed; 2268 RelOptInfo *inner_rel; 2269 2270 get_join_variables(root, args, sjinfo, 2271 &vardata1, &vardata2, &join_is_reversed); 2272 2273 nd1 = get_variable_numdistinct(&vardata1, &isdefault1); 2274 nd2 = get_variable_numdistinct(&vardata2, &isdefault2); 2275 2276 opfuncoid = get_opcode(operator); 2277 2278 memset(&sslot1, 0, sizeof(sslot1)); 2279 memset(&sslot2, 0, sizeof(sslot2)); 2280 2281 if (HeapTupleIsValid(vardata1.statsTuple)) 2282 { 2283 /* note we allow use of nullfrac regardless of security check */ 2284 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); 2285 if (statistic_proc_security_check(&vardata1, opfuncoid)) 2286 have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple, 2287 STATISTIC_KIND_MCV, InvalidOid, 2288 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); 2289 } 2290 2291 if (HeapTupleIsValid(vardata2.statsTuple)) 2292 { 2293 /* note we allow use of nullfrac regardless of security check */ 2294 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); 2295 if (statistic_proc_security_check(&vardata2, opfuncoid)) 2296 have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple, 2297 STATISTIC_KIND_MCV, InvalidOid, 2298 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS); 2299 } 2300 2301 /* We need to compute the inner-join selectivity in all cases */ 2302 selec_inner = eqjoinsel_inner(opfuncoid, collation, 2303 &vardata1, &vardata2, 2304 nd1, nd2, 2305 isdefault1, isdefault2, 2306 &sslot1, &sslot2, 2307 stats1, stats2, 2308 have_mcvs1, have_mcvs2); 2309 2310 switch (sjinfo->jointype) 2311 { 2312 case JOIN_INNER: 2313 case JOIN_LEFT: 2314 case JOIN_FULL: 2315 selec = selec_inner; 2316 break; 2317 case JOIN_SEMI: 2318 case JOIN_ANTI: 2319 2320 /* 2321 * Look up the join's inner relation. min_righthand is sufficient 2322 * information because neither SEMI nor ANTI joins permit any 2323 * reassociation into or out of their RHS, so the righthand will 2324 * always be exactly that set of rels. 2325 */ 2326 inner_rel = find_join_input_rel(root, sjinfo->min_righthand); 2327 2328 if (!join_is_reversed) 2329 selec = eqjoinsel_semi(opfuncoid, collation, 2330 &vardata1, &vardata2, 2331 nd1, nd2, 2332 isdefault1, isdefault2, 2333 &sslot1, &sslot2, 2334 stats1, stats2, 2335 have_mcvs1, have_mcvs2, 2336 inner_rel); 2337 else 2338 { 2339 Oid commop = get_commutator(operator); 2340 Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid; 2341 2342 selec = eqjoinsel_semi(commopfuncoid, collation, 2343 &vardata2, &vardata1, 2344 nd2, nd1, 2345 isdefault2, isdefault1, 2346 &sslot2, &sslot1, 2347 stats2, stats1, 2348 have_mcvs2, have_mcvs1, 2349 inner_rel); 2350 } 2351 2352 /* 2353 * We should never estimate the output of a semijoin to be more 2354 * rows than we estimate for an inner join with the same input 2355 * rels and join condition; it's obviously impossible for that to 2356 * happen. The former estimate is N1 * Ssemi while the latter is 2357 * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing 2358 * this is worthwhile because of the shakier estimation rules we 2359 * use in eqjoinsel_semi, particularly in cases where it has to 2360 * punt entirely. 2361 */ 2362 selec = Min(selec, inner_rel->rows * selec_inner); 2363 break; 2364 default: 2365 /* other values not expected here */ 2366 elog(ERROR, "unrecognized join type: %d", 2367 (int) sjinfo->jointype); 2368 selec = 0; /* keep compiler quiet */ 2369 break; 2370 } 2371 2372 free_attstatsslot(&sslot1); 2373 free_attstatsslot(&sslot2); 2374 2375 ReleaseVariableStats(vardata1); 2376 ReleaseVariableStats(vardata2); 2377 2378 CLAMP_PROBABILITY(selec); 2379 2380 PG_RETURN_FLOAT8((float8) selec); 2381 } 2382 2383 /* 2384 * eqjoinsel_inner --- eqjoinsel for normal inner join 2385 * 2386 * We also use this for LEFT/FULL outer joins; it's not presently clear 2387 * that it's worth trying to distinguish them here. 2388 */ 2389 static double 2390 eqjoinsel_inner(Oid opfuncoid, Oid collation, 2391 VariableStatData *vardata1, VariableStatData *vardata2, 2392 double nd1, double nd2, 2393 bool isdefault1, bool isdefault2, 2394 AttStatsSlot *sslot1, AttStatsSlot *sslot2, 2395 Form_pg_statistic stats1, Form_pg_statistic stats2, 2396 bool have_mcvs1, bool have_mcvs2) 2397 { 2398 double selec; 2399 2400 if (have_mcvs1 && have_mcvs2) 2401 { 2402 /* 2403 * We have most-common-value lists for both relations. Run through 2404 * the lists to see which MCVs actually join to each other with the 2405 * given operator. This allows us to determine the exact join 2406 * selectivity for the portion of the relations represented by the MCV 2407 * lists. We still have to estimate for the remaining population, but 2408 * in a skewed distribution this gives us a big leg up in accuracy. 2409 * For motivation see the analysis in Y. Ioannidis and S. 2410 * Christodoulakis, "On the propagation of errors in the size of join 2411 * results", Technical Report 1018, Computer Science Dept., University 2412 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu). 2413 */ 2414 LOCAL_FCINFO(fcinfo, 2); 2415 FmgrInfo eqproc; 2416 bool *hasmatch1; 2417 bool *hasmatch2; 2418 double nullfrac1 = stats1->stanullfrac; 2419 double nullfrac2 = stats2->stanullfrac; 2420 double matchprodfreq, 2421 matchfreq1, 2422 matchfreq2, 2423 unmatchfreq1, 2424 unmatchfreq2, 2425 otherfreq1, 2426 otherfreq2, 2427 totalsel1, 2428 totalsel2; 2429 int i, 2430 nmatches; 2431 2432 fmgr_info(opfuncoid, &eqproc); 2433 2434 /* 2435 * Save a few cycles by setting up the fcinfo struct just once. Using 2436 * FunctionCallInvoke directly also avoids failure if the eqproc 2437 * returns NULL, though really equality functions should never do 2438 * that. 2439 */ 2440 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation, 2441 NULL, NULL); 2442 fcinfo->args[0].isnull = false; 2443 fcinfo->args[1].isnull = false; 2444 2445 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool)); 2446 hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool)); 2447 2448 /* 2449 * Note we assume that each MCV will match at most one member of the 2450 * other MCV list. If the operator isn't really equality, there could 2451 * be multiple matches --- but we don't look for them, both for speed 2452 * and because the math wouldn't add up... 2453 */ 2454 matchprodfreq = 0.0; 2455 nmatches = 0; 2456 for (i = 0; i < sslot1->nvalues; i++) 2457 { 2458 int j; 2459 2460 fcinfo->args[0].value = sslot1->values[i]; 2461 2462 for (j = 0; j < sslot2->nvalues; j++) 2463 { 2464 Datum fresult; 2465 2466 if (hasmatch2[j]) 2467 continue; 2468 fcinfo->args[1].value = sslot2->values[j]; 2469 fcinfo->isnull = false; 2470 fresult = FunctionCallInvoke(fcinfo); 2471 if (!fcinfo->isnull && DatumGetBool(fresult)) 2472 { 2473 hasmatch1[i] = hasmatch2[j] = true; 2474 matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j]; 2475 nmatches++; 2476 break; 2477 } 2478 } 2479 } 2480 CLAMP_PROBABILITY(matchprodfreq); 2481 /* Sum up frequencies of matched and unmatched MCVs */ 2482 matchfreq1 = unmatchfreq1 = 0.0; 2483 for (i = 0; i < sslot1->nvalues; i++) 2484 { 2485 if (hasmatch1[i]) 2486 matchfreq1 += sslot1->numbers[i]; 2487 else 2488 unmatchfreq1 += sslot1->numbers[i]; 2489 } 2490 CLAMP_PROBABILITY(matchfreq1); 2491 CLAMP_PROBABILITY(unmatchfreq1); 2492 matchfreq2 = unmatchfreq2 = 0.0; 2493 for (i = 0; i < sslot2->nvalues; i++) 2494 { 2495 if (hasmatch2[i]) 2496 matchfreq2 += sslot2->numbers[i]; 2497 else 2498 unmatchfreq2 += sslot2->numbers[i]; 2499 } 2500 CLAMP_PROBABILITY(matchfreq2); 2501 CLAMP_PROBABILITY(unmatchfreq2); 2502 pfree(hasmatch1); 2503 pfree(hasmatch2); 2504 2505 /* 2506 * Compute total frequency of non-null values that are not in the MCV 2507 * lists. 2508 */ 2509 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1; 2510 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2; 2511 CLAMP_PROBABILITY(otherfreq1); 2512 CLAMP_PROBABILITY(otherfreq2); 2513 2514 /* 2515 * We can estimate the total selectivity from the point of view of 2516 * relation 1 as: the known selectivity for matched MCVs, plus 2517 * unmatched MCVs that are assumed to match against random members of 2518 * relation 2's non-MCV population, plus non-MCV values that are 2519 * assumed to match against random members of relation 2's unmatched 2520 * MCVs plus non-MCV values. 2521 */ 2522 totalsel1 = matchprodfreq; 2523 if (nd2 > sslot2->nvalues) 2524 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues); 2525 if (nd2 > nmatches) 2526 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) / 2527 (nd2 - nmatches); 2528 /* Same estimate from the point of view of relation 2. */ 2529 totalsel2 = matchprodfreq; 2530 if (nd1 > sslot1->nvalues) 2531 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues); 2532 if (nd1 > nmatches) 2533 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) / 2534 (nd1 - nmatches); 2535 2536 /* 2537 * Use the smaller of the two estimates. This can be justified in 2538 * essentially the same terms as given below for the no-stats case: to 2539 * a first approximation, we are estimating from the point of view of 2540 * the relation with smaller nd. 2541 */ 2542 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2; 2543 } 2544 else 2545 { 2546 /* 2547 * We do not have MCV lists for both sides. Estimate the join 2548 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This 2549 * is plausible if we assume that the join operator is strict and the 2550 * non-null values are about equally distributed: a given non-null 2551 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows 2552 * of rel2, so total join rows are at most 2553 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of 2554 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it 2555 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression 2556 * with MIN() is an upper bound. Using the MIN() means we estimate 2557 * from the point of view of the relation with smaller nd (since the 2558 * larger nd is determining the MIN). It is reasonable to assume that 2559 * most tuples in this rel will have join partners, so the bound is 2560 * probably reasonably tight and should be taken as-is. 2561 * 2562 * XXX Can we be smarter if we have an MCV list for just one side? It 2563 * seems that if we assume equal distribution for the other side, we 2564 * end up with the same answer anyway. 2565 */ 2566 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; 2567 double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; 2568 2569 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); 2570 if (nd1 > nd2) 2571 selec /= nd1; 2572 else 2573 selec /= nd2; 2574 } 2575 2576 return selec; 2577 } 2578 2579 /* 2580 * eqjoinsel_semi --- eqjoinsel for semi join 2581 * 2582 * (Also used for anti join, which we are supposed to estimate the same way.) 2583 * Caller has ensured that vardata1 is the LHS variable. 2584 * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid. 2585 */ 2586 static double 2587 eqjoinsel_semi(Oid opfuncoid, Oid collation, 2588 VariableStatData *vardata1, VariableStatData *vardata2, 2589 double nd1, double nd2, 2590 bool isdefault1, bool isdefault2, 2591 AttStatsSlot *sslot1, AttStatsSlot *sslot2, 2592 Form_pg_statistic stats1, Form_pg_statistic stats2, 2593 bool have_mcvs1, bool have_mcvs2, 2594 RelOptInfo *inner_rel) 2595 { 2596 double selec; 2597 2598 /* 2599 * We clamp nd2 to be not more than what we estimate the inner relation's 2600 * size to be. This is intuitively somewhat reasonable since obviously 2601 * there can't be more than that many distinct values coming from the 2602 * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1 2603 * likewise) is that this is the only pathway by which restriction clauses 2604 * applied to the inner rel will affect the join result size estimate, 2605 * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by 2606 * only the outer rel's size. If we clamped nd1 we'd be double-counting 2607 * the selectivity of outer-rel restrictions. 2608 * 2609 * We can apply this clamping both with respect to the base relation from 2610 * which the join variable comes (if there is just one), and to the 2611 * immediate inner input relation of the current join. 2612 * 2613 * If we clamp, we can treat nd2 as being a non-default estimate; it's not 2614 * great, maybe, but it didn't come out of nowhere either. This is most 2615 * helpful when the inner relation is empty and consequently has no stats. 2616 */ 2617 if (vardata2->rel) 2618 { 2619 if (nd2 >= vardata2->rel->rows) 2620 { 2621 nd2 = vardata2->rel->rows; 2622 isdefault2 = false; 2623 } 2624 } 2625 if (nd2 >= inner_rel->rows) 2626 { 2627 nd2 = inner_rel->rows; 2628 isdefault2 = false; 2629 } 2630 2631 if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid)) 2632 { 2633 /* 2634 * We have most-common-value lists for both relations. Run through 2635 * the lists to see which MCVs actually join to each other with the 2636 * given operator. This allows us to determine the exact join 2637 * selectivity for the portion of the relations represented by the MCV 2638 * lists. We still have to estimate for the remaining population, but 2639 * in a skewed distribution this gives us a big leg up in accuracy. 2640 */ 2641 LOCAL_FCINFO(fcinfo, 2); 2642 FmgrInfo eqproc; 2643 bool *hasmatch1; 2644 bool *hasmatch2; 2645 double nullfrac1 = stats1->stanullfrac; 2646 double matchfreq1, 2647 uncertainfrac, 2648 uncertain; 2649 int i, 2650 nmatches, 2651 clamped_nvalues2; 2652 2653 /* 2654 * The clamping above could have resulted in nd2 being less than 2655 * sslot2->nvalues; in which case, we assume that precisely the nd2 2656 * most common values in the relation will appear in the join input, 2657 * and so compare to only the first nd2 members of the MCV list. Of 2658 * course this is frequently wrong, but it's the best bet we can make. 2659 */ 2660 clamped_nvalues2 = Min(sslot2->nvalues, nd2); 2661 2662 fmgr_info(opfuncoid, &eqproc); 2663 2664 /* 2665 * Save a few cycles by setting up the fcinfo struct just once. Using 2666 * FunctionCallInvoke directly also avoids failure if the eqproc 2667 * returns NULL, though really equality functions should never do 2668 * that. 2669 */ 2670 InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation, 2671 NULL, NULL); 2672 fcinfo->args[0].isnull = false; 2673 fcinfo->args[1].isnull = false; 2674 2675 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool)); 2676 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool)); 2677 2678 /* 2679 * Note we assume that each MCV will match at most one member of the 2680 * other MCV list. If the operator isn't really equality, there could 2681 * be multiple matches --- but we don't look for them, both for speed 2682 * and because the math wouldn't add up... 2683 */ 2684 nmatches = 0; 2685 for (i = 0; i < sslot1->nvalues; i++) 2686 { 2687 int j; 2688 2689 fcinfo->args[0].value = sslot1->values[i]; 2690 2691 for (j = 0; j < clamped_nvalues2; j++) 2692 { 2693 Datum fresult; 2694 2695 if (hasmatch2[j]) 2696 continue; 2697 fcinfo->args[1].value = sslot2->values[j]; 2698 fcinfo->isnull = false; 2699 fresult = FunctionCallInvoke(fcinfo); 2700 if (!fcinfo->isnull && DatumGetBool(fresult)) 2701 { 2702 hasmatch1[i] = hasmatch2[j] = true; 2703 nmatches++; 2704 break; 2705 } 2706 } 2707 } 2708 /* Sum up frequencies of matched MCVs */ 2709 matchfreq1 = 0.0; 2710 for (i = 0; i < sslot1->nvalues; i++) 2711 { 2712 if (hasmatch1[i]) 2713 matchfreq1 += sslot1->numbers[i]; 2714 } 2715 CLAMP_PROBABILITY(matchfreq1); 2716 pfree(hasmatch1); 2717 pfree(hasmatch2); 2718 2719 /* 2720 * Now we need to estimate the fraction of relation 1 that has at 2721 * least one join partner. We know for certain that the matched MCVs 2722 * do, so that gives us a lower bound, but we're really in the dark 2723 * about everything else. Our crude approach is: if nd1 <= nd2 then 2724 * assume all non-null rel1 rows have join partners, else assume for 2725 * the uncertain rows that a fraction nd2/nd1 have join partners. We 2726 * can discount the known-matched MCVs from the distinct-values counts 2727 * before doing the division. 2728 * 2729 * Crude as the above is, it's completely useless if we don't have 2730 * reliable ndistinct values for both sides. Hence, if either nd1 or 2731 * nd2 is default, punt and assume half of the uncertain rows have 2732 * join partners. 2733 */ 2734 if (!isdefault1 && !isdefault2) 2735 { 2736 nd1 -= nmatches; 2737 nd2 -= nmatches; 2738 if (nd1 <= nd2 || nd2 < 0) 2739 uncertainfrac = 1.0; 2740 else 2741 uncertainfrac = nd2 / nd1; 2742 } 2743 else 2744 uncertainfrac = 0.5; 2745 uncertain = 1.0 - matchfreq1 - nullfrac1; 2746 CLAMP_PROBABILITY(uncertain); 2747 selec = matchfreq1 + uncertainfrac * uncertain; 2748 } 2749 else 2750 { 2751 /* 2752 * Without MCV lists for both sides, we can only use the heuristic 2753 * about nd1 vs nd2. 2754 */ 2755 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; 2756 2757 if (!isdefault1 && !isdefault2) 2758 { 2759 if (nd1 <= nd2 || nd2 < 0) 2760 selec = 1.0 - nullfrac1; 2761 else 2762 selec = (nd2 / nd1) * (1.0 - nullfrac1); 2763 } 2764 else 2765 selec = 0.5 * (1.0 - nullfrac1); 2766 } 2767 2768 return selec; 2769 } 2770 2771 /* 2772 * neqjoinsel - Join selectivity of "!=" 2773 */ 2774 Datum 2775 neqjoinsel(PG_FUNCTION_ARGS) 2776 { 2777 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); 2778 Oid operator = PG_GETARG_OID(1); 2779 List *args = (List *) PG_GETARG_POINTER(2); 2780 JoinType jointype = (JoinType) PG_GETARG_INT16(3); 2781 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); 2782 Oid collation = PG_GET_COLLATION(); 2783 float8 result; 2784 2785 if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) 2786 { 2787 /* 2788 * For semi-joins, if there is more than one distinct value in the RHS 2789 * relation then every non-null LHS row must find a row to join since 2790 * it can only be equal to one of them. We'll assume that there is 2791 * always more than one distinct RHS value for the sake of stability, 2792 * though in theory we could have special cases for empty RHS 2793 * (selectivity = 0) and single-distinct-value RHS (selectivity = 2794 * fraction of LHS that has the same value as the single RHS value). 2795 * 2796 * For anti-joins, if we use the same assumption that there is more 2797 * than one distinct key in the RHS relation, then every non-null LHS 2798 * row must be suppressed by the anti-join. 2799 * 2800 * So either way, the selectivity estimate should be 1 - nullfrac. 2801 */ 2802 VariableStatData leftvar; 2803 VariableStatData rightvar; 2804 bool reversed; 2805 HeapTuple statsTuple; 2806 double nullfrac; 2807 2808 get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed); 2809 statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple; 2810 if (HeapTupleIsValid(statsTuple)) 2811 nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac; 2812 else 2813 nullfrac = 0.0; 2814 ReleaseVariableStats(leftvar); 2815 ReleaseVariableStats(rightvar); 2816 2817 result = 1.0 - nullfrac; 2818 } 2819 else 2820 { 2821 /* 2822 * We want 1 - eqjoinsel() where the equality operator is the one 2823 * associated with this != operator, that is, its negator. 2824 */ 2825 Oid eqop = get_negator(operator); 2826 2827 if (eqop) 2828 { 2829 result = 2830 DatumGetFloat8(DirectFunctionCall5Coll(eqjoinsel, 2831 collation, 2832 PointerGetDatum(root), 2833 ObjectIdGetDatum(eqop), 2834 PointerGetDatum(args), 2835 Int16GetDatum(jointype), 2836 PointerGetDatum(sjinfo))); 2837 } 2838 else 2839 { 2840 /* Use default selectivity (should we raise an error instead?) */ 2841 result = DEFAULT_EQ_SEL; 2842 } 2843 result = 1.0 - result; 2844 } 2845 2846 PG_RETURN_FLOAT8(result); 2847 } 2848 2849 /* 2850 * scalarltjoinsel - Join selectivity of "<" for scalars 2851 */ 2852 Datum 2853 scalarltjoinsel(PG_FUNCTION_ARGS) 2854 { 2855 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 2856 } 2857 2858 /* 2859 * scalarlejoinsel - Join selectivity of "<=" for scalars 2860 */ 2861 Datum 2862 scalarlejoinsel(PG_FUNCTION_ARGS) 2863 { 2864 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 2865 } 2866 2867 /* 2868 * scalargtjoinsel - Join selectivity of ">" for scalars 2869 */ 2870 Datum 2871 scalargtjoinsel(PG_FUNCTION_ARGS) 2872 { 2873 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 2874 } 2875 2876 /* 2877 * scalargejoinsel - Join selectivity of ">=" for scalars 2878 */ 2879 Datum 2880 scalargejoinsel(PG_FUNCTION_ARGS) 2881 { 2882 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); 2883 } 2884 2885 2886 /* 2887 * mergejoinscansel - Scan selectivity of merge join. 2888 * 2889 * A merge join will stop as soon as it exhausts either input stream. 2890 * Therefore, if we can estimate the ranges of both input variables, 2891 * we can estimate how much of the input will actually be read. This 2892 * can have a considerable impact on the cost when using indexscans. 2893 * 2894 * Also, we can estimate how much of each input has to be read before the 2895 * first join pair is found, which will affect the join's startup time. 2896 * 2897 * clause should be a clause already known to be mergejoinable. opfamily, 2898 * strategy, and nulls_first specify the sort ordering being used. 2899 * 2900 * The outputs are: 2901 * *leftstart is set to the fraction of the left-hand variable expected 2902 * to be scanned before the first join pair is found (0 to 1). 2903 * *leftend is set to the fraction of the left-hand variable expected 2904 * to be scanned before the join terminates (0 to 1). 2905 * *rightstart, *rightend similarly for the right-hand variable. 2906 */ 2907 void 2908 mergejoinscansel(PlannerInfo *root, Node *clause, 2909 Oid opfamily, int strategy, bool nulls_first, 2910 Selectivity *leftstart, Selectivity *leftend, 2911 Selectivity *rightstart, Selectivity *rightend) 2912 { 2913 Node *left, 2914 *right; 2915 VariableStatData leftvar, 2916 rightvar; 2917 int op_strategy; 2918 Oid op_lefttype; 2919 Oid op_righttype; 2920 Oid opno, 2921 collation, 2922 lsortop, 2923 rsortop, 2924 lstatop, 2925 rstatop, 2926 ltop, 2927 leop, 2928 revltop, 2929 revleop; 2930 bool isgt; 2931 Datum leftmin, 2932 leftmax, 2933 rightmin, 2934 rightmax; 2935 double selec; 2936 2937 /* Set default results if we can't figure anything out. */ 2938 /* XXX should default "start" fraction be a bit more than 0? */ 2939 *leftstart = *rightstart = 0.0; 2940 *leftend = *rightend = 1.0; 2941 2942 /* Deconstruct the merge clause */ 2943 if (!is_opclause(clause)) 2944 return; /* shouldn't happen */ 2945 opno = ((OpExpr *) clause)->opno; 2946 collation = ((OpExpr *) clause)->inputcollid; 2947 left = get_leftop((Expr *) clause); 2948 right = get_rightop((Expr *) clause); 2949 if (!right) 2950 return; /* shouldn't happen */ 2951 2952 /* Look for stats for the inputs */ 2953 examine_variable(root, left, 0, &leftvar); 2954 examine_variable(root, right, 0, &rightvar); 2955 2956 /* Extract the operator's declared left/right datatypes */ 2957 get_op_opfamily_properties(opno, opfamily, false, 2958 &op_strategy, 2959 &op_lefttype, 2960 &op_righttype); 2961 Assert(op_strategy == BTEqualStrategyNumber); 2962 2963 /* 2964 * Look up the various operators we need. If we don't find them all, it 2965 * probably means the opfamily is broken, but we just fail silently. 2966 * 2967 * Note: we expect that pg_statistic histograms will be sorted by the '<' 2968 * operator, regardless of which sort direction we are considering. 2969 */ 2970 switch (strategy) 2971 { 2972 case BTLessStrategyNumber: 2973 isgt = false; 2974 if (op_lefttype == op_righttype) 2975 { 2976 /* easy case */ 2977 ltop = get_opfamily_member(opfamily, 2978 op_lefttype, op_righttype, 2979 BTLessStrategyNumber); 2980 leop = get_opfamily_member(opfamily, 2981 op_lefttype, op_righttype, 2982 BTLessEqualStrategyNumber); 2983 lsortop = ltop; 2984 rsortop = ltop; 2985 lstatop = lsortop; 2986 rstatop = rsortop; 2987 revltop = ltop; 2988 revleop = leop; 2989 } 2990 else 2991 { 2992 ltop = get_opfamily_member(opfamily, 2993 op_lefttype, op_righttype, 2994 BTLessStrategyNumber); 2995 leop = get_opfamily_member(opfamily, 2996 op_lefttype, op_righttype, 2997 BTLessEqualStrategyNumber); 2998 lsortop = get_opfamily_member(opfamily, 2999 op_lefttype, op_lefttype, 3000 BTLessStrategyNumber); 3001 rsortop = get_opfamily_member(opfamily, 3002 op_righttype, op_righttype, 3003 BTLessStrategyNumber); 3004 lstatop = lsortop; 3005 rstatop = rsortop; 3006 revltop = get_opfamily_member(opfamily, 3007 op_righttype, op_lefttype, 3008 BTLessStrategyNumber); 3009 revleop = get_opfamily_member(opfamily, 3010 op_righttype, op_lefttype, 3011 BTLessEqualStrategyNumber); 3012 } 3013 break; 3014 case BTGreaterStrategyNumber: 3015 /* descending-order case */ 3016 isgt = true; 3017 if (op_lefttype == op_righttype) 3018 { 3019 /* easy case */ 3020 ltop = get_opfamily_member(opfamily, 3021 op_lefttype, op_righttype, 3022 BTGreaterStrategyNumber); 3023 leop = get_opfamily_member(opfamily, 3024 op_lefttype, op_righttype, 3025 BTGreaterEqualStrategyNumber); 3026 lsortop = ltop; 3027 rsortop = ltop; 3028 lstatop = get_opfamily_member(opfamily, 3029 op_lefttype, op_lefttype, 3030 BTLessStrategyNumber); 3031 rstatop = lstatop; 3032 revltop = ltop; 3033 revleop = leop; 3034 } 3035 else 3036 { 3037 ltop = get_opfamily_member(opfamily, 3038 op_lefttype, op_righttype, 3039 BTGreaterStrategyNumber); 3040 leop = get_opfamily_member(opfamily, 3041 op_lefttype, op_righttype, 3042 BTGreaterEqualStrategyNumber); 3043 lsortop = get_opfamily_member(opfamily, 3044 op_lefttype, op_lefttype, 3045 BTGreaterStrategyNumber); 3046 rsortop = get_opfamily_member(opfamily, 3047 op_righttype, op_righttype, 3048 BTGreaterStrategyNumber); 3049 lstatop = get_opfamily_member(opfamily, 3050 op_lefttype, op_lefttype, 3051 BTLessStrategyNumber); 3052 rstatop = get_opfamily_member(opfamily, 3053 op_righttype, op_righttype, 3054 BTLessStrategyNumber); 3055 revltop = get_opfamily_member(opfamily, 3056 op_righttype, op_lefttype, 3057 BTGreaterStrategyNumber); 3058 revleop = get_opfamily_member(opfamily, 3059 op_righttype, op_lefttype, 3060 BTGreaterEqualStrategyNumber); 3061 } 3062 break; 3063 default: 3064 goto fail; /* shouldn't get here */ 3065 } 3066 3067 if (!OidIsValid(lsortop) || 3068 !OidIsValid(rsortop) || 3069 !OidIsValid(lstatop) || 3070 !OidIsValid(rstatop) || 3071 !OidIsValid(ltop) || 3072 !OidIsValid(leop) || 3073 !OidIsValid(revltop) || 3074 !OidIsValid(revleop)) 3075 goto fail; /* insufficient info in catalogs */ 3076 3077 /* Try to get ranges of both inputs */ 3078 if (!isgt) 3079 { 3080 if (!get_variable_range(root, &leftvar, lstatop, collation, 3081 &leftmin, &leftmax)) 3082 goto fail; /* no range available from stats */ 3083 if (!get_variable_range(root, &rightvar, rstatop, collation, 3084 &rightmin, &rightmax)) 3085 goto fail; /* no range available from stats */ 3086 } 3087 else 3088 { 3089 /* need to swap the max and min */ 3090 if (!get_variable_range(root, &leftvar, lstatop, collation, 3091 &leftmax, &leftmin)) 3092 goto fail; /* no range available from stats */ 3093 if (!get_variable_range(root, &rightvar, rstatop, collation, 3094 &rightmax, &rightmin)) 3095 goto fail; /* no range available from stats */ 3096 } 3097 3098 /* 3099 * Now, the fraction of the left variable that will be scanned is the 3100 * fraction that's <= the right-side maximum value. But only believe 3101 * non-default estimates, else stick with our 1.0. 3102 */ 3103 selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar, 3104 rightmax, op_righttype); 3105 if (selec != DEFAULT_INEQ_SEL) 3106 *leftend = selec; 3107 3108 /* And similarly for the right variable. */ 3109 selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar, 3110 leftmax, op_lefttype); 3111 if (selec != DEFAULT_INEQ_SEL) 3112 *rightend = selec; 3113 3114 /* 3115 * Only one of the two "end" fractions can really be less than 1.0; 3116 * believe the smaller estimate and reset the other one to exactly 1.0. If 3117 * we get exactly equal estimates (as can easily happen with self-joins), 3118 * believe neither. 3119 */ 3120 if (*leftend > *rightend) 3121 *leftend = 1.0; 3122 else if (*leftend < *rightend) 3123 *rightend = 1.0; 3124 else 3125 *leftend = *rightend = 1.0; 3126 3127 /* 3128 * Also, the fraction of the left variable that will be scanned before the 3129 * first join pair is found is the fraction that's < the right-side 3130 * minimum value. But only believe non-default estimates, else stick with 3131 * our own default. 3132 */ 3133 selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar, 3134 rightmin, op_righttype); 3135 if (selec != DEFAULT_INEQ_SEL) 3136 *leftstart = selec; 3137 3138 /* And similarly for the right variable. */ 3139 selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar, 3140 leftmin, op_lefttype); 3141 if (selec != DEFAULT_INEQ_SEL) 3142 *rightstart = selec; 3143 3144 /* 3145 * Only one of the two "start" fractions can really be more than zero; 3146 * believe the larger estimate and reset the other one to exactly 0.0. If 3147 * we get exactly equal estimates (as can easily happen with self-joins), 3148 * believe neither. 3149 */ 3150 if (*leftstart < *rightstart) 3151 *leftstart = 0.0; 3152 else if (*leftstart > *rightstart) 3153 *rightstart = 0.0; 3154 else 3155 *leftstart = *rightstart = 0.0; 3156 3157 /* 3158 * If the sort order is nulls-first, we're going to have to skip over any 3159 * nulls too. These would not have been counted by scalarineqsel, and we 3160 * can safely add in this fraction regardless of whether we believe 3161 * scalarineqsel's results or not. But be sure to clamp the sum to 1.0! 3162 */ 3163 if (nulls_first) 3164 { 3165 Form_pg_statistic stats; 3166 3167 if (HeapTupleIsValid(leftvar.statsTuple)) 3168 { 3169 stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); 3170 *leftstart += stats->stanullfrac; 3171 CLAMP_PROBABILITY(*leftstart); 3172 *leftend += stats->stanullfrac; 3173 CLAMP_PROBABILITY(*leftend); 3174 } 3175 if (HeapTupleIsValid(rightvar.statsTuple)) 3176 { 3177 stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); 3178 *rightstart += stats->stanullfrac; 3179 CLAMP_PROBABILITY(*rightstart); 3180 *rightend += stats->stanullfrac; 3181 CLAMP_PROBABILITY(*rightend); 3182 } 3183 } 3184 3185 /* Disbelieve start >= end, just in case that can happen */ 3186 if (*leftstart >= *leftend) 3187 { 3188 *leftstart = 0.0; 3189 *leftend = 1.0; 3190 } 3191 if (*rightstart >= *rightend) 3192 { 3193 *rightstart = 0.0; 3194 *rightend = 1.0; 3195 } 3196 3197 fail: 3198 ReleaseVariableStats(leftvar); 3199 ReleaseVariableStats(rightvar); 3200 } 3201 3202 3203 /* 3204 * matchingsel -- generic matching-operator selectivity support 3205 * 3206 * Use these for any operators that (a) are on data types for which we collect 3207 * standard statistics, and (b) have behavior for which the default estimate 3208 * (twice DEFAULT_EQ_SEL) is sane. Typically that is good for match-like 3209 * operators. 3210 */ 3211 3212 Datum 3213 matchingsel(PG_FUNCTION_ARGS) 3214 { 3215 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); 3216 Oid operator = PG_GETARG_OID(1); 3217 List *args = (List *) PG_GETARG_POINTER(2); 3218 int varRelid = PG_GETARG_INT32(3); 3219 Oid collation = PG_GET_COLLATION(); 3220 double selec; 3221 3222 /* Use generic restriction selectivity logic. */ 3223 selec = generic_restriction_selectivity(root, operator, collation, 3224 args, varRelid, 3225 DEFAULT_MATCHING_SEL); 3226 3227 PG_RETURN_FLOAT8((float8) selec); 3228 } 3229 3230 Datum 3231 matchingjoinsel(PG_FUNCTION_ARGS) 3232 { 3233 /* Just punt, for the moment. */ 3234 PG_RETURN_FLOAT8(DEFAULT_MATCHING_SEL); 3235 } 3236 3237 3238 /* 3239 * Helper routine for estimate_num_groups: add an item to a list of 3240 * GroupVarInfos, but only if it's not known equal to any of the existing 3241 * entries. 3242 */ 3243 typedef struct 3244 { 3245 Node *var; /* might be an expression, not just a Var */ 3246 RelOptInfo *rel; /* relation it belongs to */ 3247 double ndistinct; /* # distinct values */ 3248 } GroupVarInfo; 3249 3250 static List * 3251 add_unique_group_var(PlannerInfo *root, List *varinfos, 3252 Node *var, VariableStatData *vardata) 3253 { 3254 GroupVarInfo *varinfo; 3255 double ndistinct; 3256 bool isdefault; 3257 ListCell *lc; 3258 3259 ndistinct = get_variable_numdistinct(vardata, &isdefault); 3260 3261 foreach(lc, varinfos) 3262 { 3263 varinfo = (GroupVarInfo *) lfirst(lc); 3264 3265 /* Drop exact duplicates */ 3266 if (equal(var, varinfo->var)) 3267 return varinfos; 3268 3269 /* 3270 * Drop known-equal vars, but only if they belong to different 3271 * relations (see comments for estimate_num_groups) 3272 */ 3273 if (vardata->rel != varinfo->rel && 3274 exprs_known_equal(root, var, varinfo->var)) 3275 { 3276 if (varinfo->ndistinct <= ndistinct) 3277 { 3278 /* Keep older item, forget new one */ 3279 return varinfos; 3280 } 3281 else 3282 { 3283 /* Delete the older item */ 3284 varinfos = foreach_delete_current(varinfos, lc); 3285 } 3286 } 3287 } 3288 3289 varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo)); 3290 3291 varinfo->var = var; 3292 varinfo->rel = vardata->rel; 3293 varinfo->ndistinct = ndistinct; 3294 varinfos = lappend(varinfos, varinfo); 3295 return varinfos; 3296 } 3297 3298 /* 3299 * estimate_num_groups - Estimate number of groups in a grouped query 3300 * 3301 * Given a query having a GROUP BY clause, estimate how many groups there 3302 * will be --- ie, the number of distinct combinations of the GROUP BY 3303 * expressions. 3304 * 3305 * This routine is also used to estimate the number of rows emitted by 3306 * a DISTINCT filtering step; that is an isomorphic problem. (Note: 3307 * actually, we only use it for DISTINCT when there's no grouping or 3308 * aggregation ahead of the DISTINCT.) 3309 * 3310 * Inputs: 3311 * root - the query 3312 * groupExprs - list of expressions being grouped by 3313 * input_rows - number of rows estimated to arrive at the group/unique 3314 * filter step 3315 * pgset - NULL, or a List** pointing to a grouping set to filter the 3316 * groupExprs against 3317 * 3318 * Given the lack of any cross-correlation statistics in the system, it's 3319 * impossible to do anything really trustworthy with GROUP BY conditions 3320 * involving multiple Vars. We should however avoid assuming the worst 3321 * case (all possible cross-product terms actually appear as groups) since 3322 * very often the grouped-by Vars are highly correlated. Our current approach 3323 * is as follows: 3324 * 1. Expressions yielding boolean are assumed to contribute two groups, 3325 * independently of their content, and are ignored in the subsequent 3326 * steps. This is mainly because tests like "col IS NULL" break the 3327 * heuristic used in step 2 especially badly. 3328 * 2. Reduce the given expressions to a list of unique Vars used. For 3329 * example, GROUP BY a, a + b is treated the same as GROUP BY a, b. 3330 * It is clearly correct not to count the same Var more than once. 3331 * It is also reasonable to treat f(x) the same as x: f() cannot 3332 * increase the number of distinct values (unless it is volatile, 3333 * which we consider unlikely for grouping), but it probably won't 3334 * reduce the number of distinct values much either. 3335 * As a special case, if a GROUP BY expression can be matched to an 3336 * expressional index for which we have statistics, then we treat the 3337 * whole expression as though it were just a Var. 3338 * 3. If the list contains Vars of different relations that are known equal 3339 * due to equivalence classes, then drop all but one of the Vars from each 3340 * known-equal set, keeping the one with smallest estimated # of values 3341 * (since the extra values of the others can't appear in joined rows). 3342 * Note the reason we only consider Vars of different relations is that 3343 * if we considered ones of the same rel, we'd be double-counting the 3344 * restriction selectivity of the equality in the next step. 3345 * 4. For Vars within a single source rel, we multiply together the numbers 3346 * of values, clamp to the number of rows in the rel (divided by 10 if 3347 * more than one Var), and then multiply by a factor based on the 3348 * selectivity of the restriction clauses for that rel. When there's 3349 * more than one Var, the initial product is probably too high (it's the 3350 * worst case) but clamping to a fraction of the rel's rows seems to be a 3351 * helpful heuristic for not letting the estimate get out of hand. (The 3352 * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor 3353 * we multiply by to adjust for the restriction selectivity assumes that 3354 * the restriction clauses are independent of the grouping, which may not 3355 * be a valid assumption, but it's hard to do better. 3356 * 5. If there are Vars from multiple rels, we repeat step 4 for each such 3357 * rel, and multiply the results together. 3358 * Note that rels not containing grouped Vars are ignored completely, as are 3359 * join clauses. Such rels cannot increase the number of groups, and we 3360 * assume such clauses do not reduce the number either (somewhat bogus, 3361 * but we don't have the info to do better). 3362 */ 3363 double 3364 estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, 3365 List **pgset) 3366 { 3367 List *varinfos = NIL; 3368 double srf_multiplier = 1.0; 3369 double numdistinct; 3370 ListCell *l; 3371 int i; 3372 3373 /* 3374 * We don't ever want to return an estimate of zero groups, as that tends 3375 * to lead to division-by-zero and other unpleasantness. The input_rows 3376 * estimate is usually already at least 1, but clamp it just in case it 3377 * isn't. 3378 */ 3379 input_rows = clamp_row_est(input_rows); 3380 3381 /* 3382 * If no grouping columns, there's exactly one group. (This can't happen 3383 * for normal cases with GROUP BY or DISTINCT, but it is possible for 3384 * corner cases with set operations.) 3385 */ 3386 if (groupExprs == NIL || (pgset && list_length(*pgset) < 1)) 3387 return 1.0; 3388 3389 /* 3390 * Count groups derived from boolean grouping expressions. For other 3391 * expressions, find the unique Vars used, treating an expression as a Var 3392 * if we can find stats for it. For each one, record the statistical 3393 * estimate of number of distinct values (total in its table, without 3394 * regard for filtering). 3395 */ 3396 numdistinct = 1.0; 3397 3398 i = 0; 3399 foreach(l, groupExprs) 3400 { 3401 Node *groupexpr = (Node *) lfirst(l); 3402 double this_srf_multiplier; 3403 VariableStatData vardata; 3404 List *varshere; 3405 ListCell *l2; 3406 3407 /* is expression in this grouping set? */ 3408 if (pgset && !list_member_int(*pgset, i++)) 3409 continue; 3410 3411 /* 3412 * Set-returning functions in grouping columns are a bit problematic. 3413 * The code below will effectively ignore their SRF nature and come up 3414 * with a numdistinct estimate as though they were scalar functions. 3415 * We compensate by scaling up the end result by the largest SRF 3416 * rowcount estimate. (This will be an overestimate if the SRF 3417 * produces multiple copies of any output value, but it seems best to 3418 * assume the SRF's outputs are distinct. In any case, it's probably 3419 * pointless to worry too much about this without much better 3420 * estimates for SRF output rowcounts than we have today.) 3421 */ 3422 this_srf_multiplier = expression_returns_set_rows(root, groupexpr); 3423 if (srf_multiplier < this_srf_multiplier) 3424 srf_multiplier = this_srf_multiplier; 3425 3426 /* Short-circuit for expressions returning boolean */ 3427 if (exprType(groupexpr) == BOOLOID) 3428 { 3429 numdistinct *= 2.0; 3430 continue; 3431 } 3432 3433 /* 3434 * If examine_variable is able to deduce anything about the GROUP BY 3435 * expression, treat it as a single variable even if it's really more 3436 * complicated. 3437 */ 3438 examine_variable(root, groupexpr, 0, &vardata); 3439 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique) 3440 { 3441 varinfos = add_unique_group_var(root, varinfos, 3442 groupexpr, &vardata); 3443 ReleaseVariableStats(vardata); 3444 continue; 3445 } 3446 ReleaseVariableStats(vardata); 3447 3448 /* 3449 * Else pull out the component Vars. Handle PlaceHolderVars by 3450 * recursing into their arguments (effectively assuming that the 3451 * PlaceHolderVar doesn't change the number of groups, which boils 3452 * down to ignoring the possible addition of nulls to the result set). 3453 */ 3454 varshere = pull_var_clause(groupexpr, 3455 PVC_RECURSE_AGGREGATES | 3456 PVC_RECURSE_WINDOWFUNCS | 3457 PVC_RECURSE_PLACEHOLDERS); 3458 3459 /* 3460 * If we find any variable-free GROUP BY item, then either it is a 3461 * constant (and we can ignore it) or it contains a volatile function; 3462 * in the latter case we punt and assume that each input row will 3463 * yield a distinct group. 3464 */ 3465 if (varshere == NIL) 3466 { 3467 if (contain_volatile_functions(groupexpr)) 3468 return input_rows; 3469 continue; 3470 } 3471 3472 /* 3473 * Else add variables to varinfos list 3474 */ 3475 foreach(l2, varshere) 3476 { 3477 Node *var = (Node *) lfirst(l2); 3478 3479 examine_variable(root, var, 0, &vardata); 3480 varinfos = add_unique_group_var(root, varinfos, var, &vardata); 3481 ReleaseVariableStats(vardata); 3482 } 3483 } 3484 3485 /* 3486 * If now no Vars, we must have an all-constant or all-boolean GROUP BY 3487 * list. 3488 */ 3489 if (varinfos == NIL) 3490 { 3491 /* Apply SRF multiplier as we would do in the long path */ 3492 numdistinct *= srf_multiplier; 3493 /* Round off */ 3494 numdistinct = ceil(numdistinct); 3495 /* Guard against out-of-range answers */ 3496 if (numdistinct > input_rows) 3497 numdistinct = input_rows; 3498 if (numdistinct < 1.0) 3499 numdistinct = 1.0; 3500 return numdistinct; 3501 } 3502 3503 /* 3504 * Group Vars by relation and estimate total numdistinct. 3505 * 3506 * For each iteration of the outer loop, we process the frontmost Var in 3507 * varinfos, plus all other Vars in the same relation. We remove these 3508 * Vars from the newvarinfos list for the next iteration. This is the 3509 * easiest way to group Vars of same rel together. 3510 */ 3511 do 3512 { 3513 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos); 3514 RelOptInfo *rel = varinfo1->rel; 3515 double reldistinct = 1; 3516 double relmaxndistinct = reldistinct; 3517 int relvarcount = 0; 3518 List *newvarinfos = NIL; 3519 List *relvarinfos = NIL; 3520 3521 /* 3522 * Split the list of varinfos in two - one for the current rel, one 3523 * for remaining Vars on other rels. 3524 */ 3525 relvarinfos = lappend(relvarinfos, varinfo1); 3526 for_each_from(l, varinfos, 1) 3527 { 3528 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); 3529 3530 if (varinfo2->rel == varinfo1->rel) 3531 { 3532 /* varinfos on current rel */ 3533 relvarinfos = lappend(relvarinfos, varinfo2); 3534 } 3535 else 3536 { 3537 /* not time to process varinfo2 yet */ 3538 newvarinfos = lappend(newvarinfos, varinfo2); 3539 } 3540 } 3541 3542 /* 3543 * Get the numdistinct estimate for the Vars of this rel. We 3544 * iteratively search for multivariate n-distinct with maximum number 3545 * of vars; assuming that each var group is independent of the others, 3546 * we multiply them together. Any remaining relvarinfos after no more 3547 * multivariate matches are found are assumed independent too, so 3548 * their individual ndistinct estimates are multiplied also. 3549 * 3550 * While iterating, count how many separate numdistinct values we 3551 * apply. We apply a fudge factor below, but only if we multiplied 3552 * more than one such values. 3553 */ 3554 while (relvarinfos) 3555 { 3556 double mvndistinct; 3557 3558 if (estimate_multivariate_ndistinct(root, rel, &relvarinfos, 3559 &mvndistinct)) 3560 { 3561 reldistinct *= mvndistinct; 3562 if (relmaxndistinct < mvndistinct) 3563 relmaxndistinct = mvndistinct; 3564 relvarcount++; 3565 } 3566 else 3567 { 3568 foreach(l, relvarinfos) 3569 { 3570 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); 3571 3572 reldistinct *= varinfo2->ndistinct; 3573 if (relmaxndistinct < varinfo2->ndistinct) 3574 relmaxndistinct = varinfo2->ndistinct; 3575 relvarcount++; 3576 } 3577 3578 /* we're done with this relation */ 3579 relvarinfos = NIL; 3580 } 3581 } 3582 3583 /* 3584 * Sanity check --- don't divide by zero if empty relation. 3585 */ 3586 Assert(IS_SIMPLE_REL(rel)); 3587 if (rel->tuples > 0) 3588 { 3589 /* 3590 * Clamp to size of rel, or size of rel / 10 if multiple Vars. The 3591 * fudge factor is because the Vars are probably correlated but we 3592 * don't know by how much. We should never clamp to less than the 3593 * largest ndistinct value for any of the Vars, though, since 3594 * there will surely be at least that many groups. 3595 */ 3596 double clamp = rel->tuples; 3597 3598 if (relvarcount > 1) 3599 { 3600 clamp *= 0.1; 3601 if (clamp < relmaxndistinct) 3602 { 3603 clamp = relmaxndistinct; 3604 /* for sanity in case some ndistinct is too large: */ 3605 if (clamp > rel->tuples) 3606 clamp = rel->tuples; 3607 } 3608 } 3609 if (reldistinct > clamp) 3610 reldistinct = clamp; 3611 3612 /* 3613 * Update the estimate based on the restriction selectivity, 3614 * guarding against division by zero when reldistinct is zero. 3615 * Also skip this if we know that we are returning all rows. 3616 */ 3617 if (reldistinct > 0 && rel->rows < rel->tuples) 3618 { 3619 /* 3620 * Given a table containing N rows with n distinct values in a 3621 * uniform distribution, if we select p rows at random then 3622 * the expected number of distinct values selected is 3623 * 3624 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1)) 3625 * 3626 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!) 3627 * 3628 * See "Approximating block accesses in database 3629 * organizations", S. B. Yao, Communications of the ACM, 3630 * Volume 20 Issue 4, April 1977 Pages 260-261. 3631 * 3632 * Alternatively, re-arranging the terms from the factorials, 3633 * this may be written as 3634 * 3635 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1)) 3636 * 3637 * This form of the formula is more efficient to compute in 3638 * the common case where p is larger than N/n. Additionally, 3639 * as pointed out by Dell'Era, if i << N for all terms in the 3640 * product, it can be approximated by 3641 * 3642 * n * (1 - ((N-p)/N)^(N/n)) 3643 * 3644 * See "Expected distinct values when selecting from a bag 3645 * without replacement", Alberto Dell'Era, 3646 * http://www.adellera.it/investigations/distinct_balls/. 3647 * 3648 * The condition i << N is equivalent to n >> 1, so this is a 3649 * good approximation when the number of distinct values in 3650 * the table is large. It turns out that this formula also 3651 * works well even when n is small. 3652 */ 3653 reldistinct *= 3654 (1 - pow((rel->tuples - rel->rows) / rel->tuples, 3655 rel->tuples / reldistinct)); 3656 } 3657 reldistinct = clamp_row_est(reldistinct); 3658 3659 /* 3660 * Update estimate of total distinct groups. 3661 */ 3662 numdistinct *= reldistinct; 3663 } 3664 3665 varinfos = newvarinfos; 3666 } while (varinfos != NIL); 3667 3668 /* Now we can account for the effects of any SRFs */ 3669 numdistinct *= srf_multiplier; 3670 3671 /* Round off */ 3672 numdistinct = ceil(numdistinct); 3673 3674 /* Guard against out-of-range answers */ 3675 if (numdistinct > input_rows) 3676 numdistinct = input_rows; 3677 if (numdistinct < 1.0) 3678 numdistinct = 1.0; 3679 3680 return numdistinct; 3681 } 3682 3683 /* 3684 * Estimate hash bucket statistics when the specified expression is used 3685 * as a hash key for the given number of buckets. 3686 * 3687 * This attempts to determine two values: 3688 * 3689 * 1. The frequency of the most common value of the expression (returns 3690 * zero into *mcv_freq if we can't get that). 3691 * 3692 * 2. The "bucketsize fraction", ie, average number of entries in a bucket 3693 * divided by total tuples in relation. 3694 * 3695 * XXX This is really pretty bogus since we're effectively assuming that the 3696 * distribution of hash keys will be the same after applying restriction 3697 * clauses as it was in the underlying relation. However, we are not nearly 3698 * smart enough to figure out how the restrict clauses might change the 3699 * distribution, so this will have to do for now. 3700 * 3701 * We are passed the number of buckets the executor will use for the given 3702 * input relation. If the data were perfectly distributed, with the same 3703 * number of tuples going into each available bucket, then the bucketsize 3704 * fraction would be 1/nbuckets. But this happy state of affairs will occur 3705 * only if (a) there are at least nbuckets distinct data values, and (b) 3706 * we have a not-too-skewed data distribution. Otherwise the buckets will 3707 * be nonuniformly occupied. If the other relation in the join has a key 3708 * distribution similar to this one's, then the most-loaded buckets are 3709 * exactly those that will be probed most often. Therefore, the "average" 3710 * bucket size for costing purposes should really be taken as something close 3711 * to the "worst case" bucket size. We try to estimate this by adjusting the 3712 * fraction if there are too few distinct data values, and then scaling up 3713 * by the ratio of the most common value's frequency to the average frequency. 3714 * 3715 * If no statistics are available, use a default estimate of 0.1. This will 3716 * discourage use of a hash rather strongly if the inner relation is large, 3717 * which is what we want. We do not want to hash unless we know that the 3718 * inner rel is well-dispersed (or the alternatives seem much worse). 3719 * 3720 * The caller should also check that the mcv_freq is not so large that the 3721 * most common value would by itself require an impractically large bucket. 3722 * In a hash join, the executor can split buckets if they get too big, but 3723 * obviously that doesn't help for a bucket that contains many duplicates of 3724 * the same value. 3725 */ 3726 void 3727 estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets, 3728 Selectivity *mcv_freq, 3729 Selectivity *bucketsize_frac) 3730 { 3731 VariableStatData vardata; 3732 double estfract, 3733 ndistinct, 3734 stanullfrac, 3735 avgfreq; 3736 bool isdefault; 3737 AttStatsSlot sslot; 3738 3739 examine_variable(root, hashkey, 0, &vardata); 3740 3741 /* Look up the frequency of the most common value, if available */ 3742 *mcv_freq = 0.0; 3743 3744 if (HeapTupleIsValid(vardata.statsTuple)) 3745 { 3746 if (get_attstatsslot(&sslot, vardata.statsTuple, 3747 STATISTIC_KIND_MCV, InvalidOid, 3748 ATTSTATSSLOT_NUMBERS)) 3749 { 3750 /* 3751 * The first MCV stat is for the most common value. 3752 */ 3753 if (sslot.nnumbers > 0) 3754 *mcv_freq = sslot.numbers[0]; 3755 free_attstatsslot(&sslot); 3756 } 3757 } 3758 3759 /* Get number of distinct values */ 3760 ndistinct = get_variable_numdistinct(&vardata, &isdefault); 3761 3762 /* 3763 * If ndistinct isn't real, punt. We normally return 0.1, but if the 3764 * mcv_freq is known to be even higher than that, use it instead. 3765 */ 3766 if (isdefault) 3767 { 3768 *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq); 3769 ReleaseVariableStats(vardata); 3770 return; 3771 } 3772 3773 /* Get fraction that are null */ 3774 if (HeapTupleIsValid(vardata.statsTuple)) 3775 { 3776 Form_pg_statistic stats; 3777 3778 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); 3779 stanullfrac = stats->stanullfrac; 3780 } 3781 else 3782 stanullfrac = 0.0; 3783 3784 /* Compute avg freq of all distinct data values in raw relation */ 3785 avgfreq = (1.0 - stanullfrac) / ndistinct; 3786 3787 /* 3788 * Adjust ndistinct to account for restriction clauses. Observe we are 3789 * assuming that the data distribution is affected uniformly by the 3790 * restriction clauses! 3791 * 3792 * XXX Possibly better way, but much more expensive: multiply by 3793 * selectivity of rel's restriction clauses that mention the target Var. 3794 */ 3795 if (vardata.rel && vardata.rel->tuples > 0) 3796 { 3797 ndistinct *= vardata.rel->rows / vardata.rel->tuples; 3798 ndistinct = clamp_row_est(ndistinct); 3799 } 3800 3801 /* 3802 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the 3803 * number of buckets is less than the expected number of distinct values; 3804 * otherwise it is 1/ndistinct. 3805 */ 3806 if (ndistinct > nbuckets) 3807 estfract = 1.0 / nbuckets; 3808 else 3809 estfract = 1.0 / ndistinct; 3810 3811 /* 3812 * Adjust estimated bucketsize upward to account for skewed distribution. 3813 */ 3814 if (avgfreq > 0.0 && *mcv_freq > avgfreq) 3815 estfract *= *mcv_freq / avgfreq; 3816 3817 /* 3818 * Clamp bucketsize to sane range (the above adjustment could easily 3819 * produce an out-of-range result). We set the lower bound a little above 3820 * zero, since zero isn't a very sane result. 3821 */ 3822 if (estfract < 1.0e-6) 3823 estfract = 1.0e-6; 3824 else if (estfract > 1.0) 3825 estfract = 1.0; 3826 3827 *bucketsize_frac = (Selectivity) estfract; 3828 3829 ReleaseVariableStats(vardata); 3830 } 3831 3832 /* 3833 * estimate_hashagg_tablesize 3834 * estimate the number of bytes that a hash aggregate hashtable will 3835 * require based on the agg_costs, path width and number of groups. 3836 * 3837 * We return the result as "double" to forestall any possible overflow 3838 * problem in the multiplication by dNumGroups. 3839 * 3840 * XXX this may be over-estimating the size now that hashagg knows to omit 3841 * unneeded columns from the hashtable. Also for mixed-mode grouping sets, 3842 * grouping columns not in the hashed set are counted here even though hashagg 3843 * won't store them. Is this a problem? 3844 */ 3845 double 3846 estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, 3847 double dNumGroups) 3848 { 3849 Size hashentrysize = hash_agg_entry_size(agg_costs->numAggs, 3850 path->pathtarget->width, 3851 agg_costs->transitionSpace); 3852 3853 /* 3854 * Note that this disregards the effect of fill-factor and growth policy 3855 * of the hash table. That's probably ok, given that the default 3856 * fill-factor is relatively high. It'd be hard to meaningfully factor in 3857 * "double-in-size" growth policies here. 3858 */ 3859 return hashentrysize * dNumGroups; 3860 } 3861 3862 3863 /*------------------------------------------------------------------------- 3864 * 3865 * Support routines 3866 * 3867 *------------------------------------------------------------------------- 3868 */ 3869 3870 /* 3871 * Find applicable ndistinct statistics for the given list of VarInfos (which 3872 * must all belong to the given rel), and update *ndistinct to the estimate of 3873 * the MVNDistinctItem that best matches. If a match it found, *varinfos is 3874 * updated to remove the list of matched varinfos. 3875 * 3876 * Varinfos that aren't for simple Vars are ignored. 3877 * 3878 * Return true if we're able to find a match, false otherwise. 3879 */ 3880 static bool 3881 estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, 3882 List **varinfos, double *ndistinct) 3883 { 3884 ListCell *lc; 3885 Bitmapset *attnums = NULL; 3886 int nmatches; 3887 Oid statOid = InvalidOid; 3888 MVNDistinct *stats; 3889 Bitmapset *matched = NULL; 3890 3891 /* bail out immediately if the table has no extended statistics */ 3892 if (!rel->statlist) 3893 return false; 3894 3895 /* Determine the attnums we're looking for */ 3896 foreach(lc, *varinfos) 3897 { 3898 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); 3899 AttrNumber attnum; 3900 3901 Assert(varinfo->rel == rel); 3902 3903 if (!IsA(varinfo->var, Var)) 3904 continue; 3905 3906 attnum = ((Var *) varinfo->var)->varattno; 3907 3908 if (!AttrNumberIsForUserDefinedAttr(attnum)) 3909 continue; 3910 3911 attnums = bms_add_member(attnums, attnum); 3912 } 3913 3914 /* look for the ndistinct statistics matching the most vars */ 3915 nmatches = 1; /* we require at least two matches */ 3916 foreach(lc, rel->statlist) 3917 { 3918 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); 3919 Bitmapset *shared; 3920 int nshared; 3921 3922 /* skip statistics of other kinds */ 3923 if (info->kind != STATS_EXT_NDISTINCT) 3924 continue; 3925 3926 /* compute attnums shared by the vars and the statistics object */ 3927 shared = bms_intersect(info->keys, attnums); 3928 nshared = bms_num_members(shared); 3929 3930 /* 3931 * Does this statistics object match more columns than the currently 3932 * best object? If so, use this one instead. 3933 * 3934 * XXX This should break ties using name of the object, or something 3935 * like that, to make the outcome stable. 3936 */ 3937 if (nshared > nmatches) 3938 { 3939 statOid = info->statOid; 3940 nmatches = nshared; 3941 matched = shared; 3942 } 3943 } 3944 3945 /* No match? */ 3946 if (statOid == InvalidOid) 3947 return false; 3948 Assert(nmatches > 1 && matched != NULL); 3949 3950 stats = statext_ndistinct_load(statOid); 3951 3952 /* 3953 * If we have a match, search it for the specific item that matches (there 3954 * must be one), and construct the output values. 3955 */ 3956 if (stats) 3957 { 3958 int i; 3959 List *newlist = NIL; 3960 MVNDistinctItem *item = NULL; 3961 3962 /* Find the specific item that exactly matches the combination */ 3963 for (i = 0; i < stats->nitems; i++) 3964 { 3965 MVNDistinctItem *tmpitem = &stats->items[i]; 3966 3967 if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL) 3968 { 3969 item = tmpitem; 3970 break; 3971 } 3972 } 3973 3974 /* make sure we found an item */ 3975 if (!item) 3976 elog(ERROR, "corrupt MVNDistinct entry"); 3977 3978 /* Form the output varinfo list, keeping only unmatched ones */ 3979 foreach(lc, *varinfos) 3980 { 3981 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); 3982 AttrNumber attnum; 3983 3984 if (!IsA(varinfo->var, Var)) 3985 { 3986 newlist = lappend(newlist, varinfo); 3987 continue; 3988 } 3989 3990 attnum = ((Var *) varinfo->var)->varattno; 3991 3992 if (AttrNumberIsForUserDefinedAttr(attnum) && 3993 bms_is_member(attnum, matched)) 3994 continue; 3995 3996 newlist = lappend(newlist, varinfo); 3997 } 3998 3999 *varinfos = newlist; 4000 *ndistinct = item->ndistinct; 4001 return true; 4002 } 4003 4004 return false; 4005 } 4006 4007 /* 4008 * convert_to_scalar 4009 * Convert non-NULL values of the indicated types to the comparison 4010 * scale needed by scalarineqsel(). 4011 * Returns "true" if successful. 4012 * 4013 * XXX this routine is a hack: ideally we should look up the conversion 4014 * subroutines in pg_type. 4015 * 4016 * All numeric datatypes are simply converted to their equivalent 4017 * "double" values. (NUMERIC values that are outside the range of "double" 4018 * are clamped to +/- HUGE_VAL.) 4019 * 4020 * String datatypes are converted by convert_string_to_scalar(), 4021 * which is explained below. The reason why this routine deals with 4022 * three values at a time, not just one, is that we need it for strings. 4023 * 4024 * The bytea datatype is just enough different from strings that it has 4025 * to be treated separately. 4026 * 4027 * The several datatypes representing absolute times are all converted 4028 * to Timestamp, which is actually a double, and then we just use that 4029 * double value. Note this will give correct results even for the "special" 4030 * values of Timestamp, since those are chosen to compare correctly; 4031 * see timestamp_cmp. 4032 * 4033 * The several datatypes representing relative times (intervals) are all 4034 * converted to measurements expressed in seconds. 4035 */ 4036 static bool 4037 convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, 4038 Datum lobound, Datum hibound, Oid boundstypid, 4039 double *scaledlobound, double *scaledhibound) 4040 { 4041 bool failure = false; 4042 4043 /* 4044 * Both the valuetypid and the boundstypid should exactly match the 4045 * declared input type(s) of the operator we are invoked for. However, 4046 * extensions might try to use scalarineqsel as estimator for operators 4047 * with input type(s) we don't handle here; in such cases, we want to 4048 * return false, not fail. In any case, we mustn't assume that valuetypid 4049 * and boundstypid are identical. 4050 * 4051 * XXX The histogram we are interpolating between points of could belong 4052 * to a column that's only binary-compatible with the declared type. In 4053 * essence we are assuming that the semantics of binary-compatible types 4054 * are enough alike that we can use a histogram generated with one type's 4055 * operators to estimate selectivity for the other's. This is outright 4056 * wrong in some cases --- in particular signed versus unsigned 4057 * interpretation could trip us up. But it's useful enough in the 4058 * majority of cases that we do it anyway. Should think about more 4059 * rigorous ways to do it. 4060 */ 4061 switch (valuetypid) 4062 { 4063 /* 4064 * Built-in numeric types 4065 */ 4066 case BOOLOID: 4067 case INT2OID: 4068 case INT4OID: 4069 case INT8OID: 4070 case FLOAT4OID: 4071 case FLOAT8OID: 4072 case NUMERICOID: 4073 case OIDOID: 4074 case REGPROCOID: 4075 case REGPROCEDUREOID: 4076 case REGOPEROID: 4077 case REGOPERATOROID: 4078 case REGCLASSOID: 4079 case REGTYPEOID: 4080 case REGCONFIGOID: 4081 case REGDICTIONARYOID: 4082 case REGROLEOID: 4083 case REGNAMESPACEOID: 4084 *scaledvalue = convert_numeric_to_scalar(value, valuetypid, 4085 &failure); 4086 *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, 4087 &failure); 4088 *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid, 4089 &failure); 4090 return !failure; 4091 4092 /* 4093 * Built-in string types 4094 */ 4095 case CHAROID: 4096 case BPCHAROID: 4097 case VARCHAROID: 4098 case TEXTOID: 4099 case NAMEOID: 4100 { 4101 char *valstr = convert_string_datum(value, valuetypid, 4102 collid, &failure); 4103 char *lostr = convert_string_datum(lobound, boundstypid, 4104 collid, &failure); 4105 char *histr = convert_string_datum(hibound, boundstypid, 4106 collid, &failure); 4107 4108 /* 4109 * Bail out if any of the values is not of string type. We 4110 * might leak converted strings for the other value(s), but 4111 * that's not worth troubling over. 4112 */ 4113 if (failure) 4114 return false; 4115 4116 convert_string_to_scalar(valstr, scaledvalue, 4117 lostr, scaledlobound, 4118 histr, scaledhibound); 4119 pfree(valstr); 4120 pfree(lostr); 4121 pfree(histr); 4122 return true; 4123 } 4124 4125 /* 4126 * Built-in bytea type 4127 */ 4128 case BYTEAOID: 4129 { 4130 /* We only support bytea vs bytea comparison */ 4131 if (boundstypid != BYTEAOID) 4132 return false; 4133 convert_bytea_to_scalar(value, scaledvalue, 4134 lobound, scaledlobound, 4135 hibound, scaledhibound); 4136 return true; 4137 } 4138 4139 /* 4140 * Built-in time types 4141 */ 4142 case TIMESTAMPOID: 4143 case TIMESTAMPTZOID: 4144 case DATEOID: 4145 case INTERVALOID: 4146 case TIMEOID: 4147 case TIMETZOID: 4148 *scaledvalue = convert_timevalue_to_scalar(value, valuetypid, 4149 &failure); 4150 *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid, 4151 &failure); 4152 *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid, 4153 &failure); 4154 return !failure; 4155 4156 /* 4157 * Built-in network types 4158 */ 4159 case INETOID: 4160 case CIDROID: 4161 case MACADDROID: 4162 case MACADDR8OID: 4163 *scaledvalue = convert_network_to_scalar(value, valuetypid, 4164 &failure); 4165 *scaledlobound = convert_network_to_scalar(lobound, boundstypid, 4166 &failure); 4167 *scaledhibound = convert_network_to_scalar(hibound, boundstypid, 4168 &failure); 4169 return !failure; 4170 } 4171 /* Don't know how to convert */ 4172 *scaledvalue = *scaledlobound = *scaledhibound = 0; 4173 return false; 4174 } 4175 4176 /* 4177 * Do convert_to_scalar()'s work for any numeric data type. 4178 * 4179 * On failure (e.g., unsupported typid), set *failure to true; 4180 * otherwise, that variable is not changed. 4181 */ 4182 static double 4183 convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) 4184 { 4185 switch (typid) 4186 { 4187 case BOOLOID: 4188 return (double) DatumGetBool(value); 4189 case INT2OID: 4190 return (double) DatumGetInt16(value); 4191 case INT4OID: 4192 return (double) DatumGetInt32(value); 4193 case INT8OID: 4194 return (double) DatumGetInt64(value); 4195 case FLOAT4OID: 4196 return (double) DatumGetFloat4(value); 4197 case FLOAT8OID: 4198 return (double) DatumGetFloat8(value); 4199 case NUMERICOID: 4200 /* Note: out-of-range values will be clamped to +-HUGE_VAL */ 4201 return (double) 4202 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow, 4203 value)); 4204 case OIDOID: 4205 case REGPROCOID: 4206 case REGPROCEDUREOID: 4207 case REGOPEROID: 4208 case REGOPERATOROID: 4209 case REGCLASSOID: 4210 case REGTYPEOID: 4211 case REGCONFIGOID: 4212 case REGDICTIONARYOID: 4213 case REGROLEOID: 4214 case REGNAMESPACEOID: 4215 /* we can treat OIDs as integers... */ 4216 return (double) DatumGetObjectId(value); 4217 } 4218 4219 *failure = true; 4220 return 0; 4221 } 4222 4223 /* 4224 * Do convert_to_scalar()'s work for any character-string data type. 4225 * 4226 * String datatypes are converted to a scale that ranges from 0 to 1, 4227 * where we visualize the bytes of the string as fractional digits. 4228 * 4229 * We do not want the base to be 256, however, since that tends to 4230 * generate inflated selectivity estimates; few databases will have 4231 * occurrences of all 256 possible byte values at each position. 4232 * Instead, use the smallest and largest byte values seen in the bounds 4233 * as the estimated range for each byte, after some fudging to deal with 4234 * the fact that we probably aren't going to see the full range that way. 4235 * 4236 * An additional refinement is that we discard any common prefix of the 4237 * three strings before computing the scaled values. This allows us to 4238 * "zoom in" when we encounter a narrow data range. An example is a phone 4239 * number database where all the values begin with the same area code. 4240 * (Actually, the bounds will be adjacent histogram-bin-boundary values, 4241 * so this is more likely to happen than you might think.) 4242 */ 4243 static void 4244 convert_string_to_scalar(char *value, 4245 double *scaledvalue, 4246 char *lobound, 4247 double *scaledlobound, 4248 char *hibound, 4249 double *scaledhibound) 4250 { 4251 int rangelo, 4252 rangehi; 4253 char *sptr; 4254 4255 rangelo = rangehi = (unsigned char) hibound[0]; 4256 for (sptr = lobound; *sptr; sptr++) 4257 { 4258 if (rangelo > (unsigned char) *sptr) 4259 rangelo = (unsigned char) *sptr; 4260 if (rangehi < (unsigned char) *sptr) 4261 rangehi = (unsigned char) *sptr; 4262 } 4263 for (sptr = hibound; *sptr; sptr++) 4264 { 4265 if (rangelo > (unsigned char) *sptr) 4266 rangelo = (unsigned char) *sptr; 4267 if (rangehi < (unsigned char) *sptr) 4268 rangehi = (unsigned char) *sptr; 4269 } 4270 /* If range includes any upper-case ASCII chars, make it include all */ 4271 if (rangelo <= 'Z' && rangehi >= 'A') 4272 { 4273 if (rangelo > 'A') 4274 rangelo = 'A'; 4275 if (rangehi < 'Z') 4276 rangehi = 'Z'; 4277 } 4278 /* Ditto lower-case */ 4279 if (rangelo <= 'z' && rangehi >= 'a') 4280 { 4281 if (rangelo > 'a') 4282 rangelo = 'a'; 4283 if (rangehi < 'z') 4284 rangehi = 'z'; 4285 } 4286 /* Ditto digits */ 4287 if (rangelo <= '9' && rangehi >= '0') 4288 { 4289 if (rangelo > '0') 4290 rangelo = '0'; 4291 if (rangehi < '9') 4292 rangehi = '9'; 4293 } 4294 4295 /* 4296 * If range includes less than 10 chars, assume we have not got enough 4297 * data, and make it include regular ASCII set. 4298 */ 4299 if (rangehi - rangelo < 9) 4300 { 4301 rangelo = ' '; 4302 rangehi = 127; 4303 } 4304 4305 /* 4306 * Now strip any common prefix of the three strings. 4307 */ 4308 while (*lobound) 4309 { 4310 if (*lobound != *hibound || *lobound != *value) 4311 break; 4312 lobound++, hibound++, value++; 4313 } 4314 4315 /* 4316 * Now we can do the conversions. 4317 */ 4318 *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi); 4319 *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi); 4320 *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi); 4321 } 4322 4323 static double 4324 convert_one_string_to_scalar(char *value, int rangelo, int rangehi) 4325 { 4326 int slen = strlen(value); 4327 double num, 4328 denom, 4329 base; 4330 4331 if (slen <= 0) 4332 return 0.0; /* empty string has scalar value 0 */ 4333 4334 /* 4335 * There seems little point in considering more than a dozen bytes from 4336 * the string. Since base is at least 10, that will give us nominal 4337 * resolution of at least 12 decimal digits, which is surely far more 4338 * precision than this estimation technique has got anyway (especially in 4339 * non-C locales). Also, even with the maximum possible base of 256, this 4340 * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not 4341 * overflow on any known machine. 4342 */ 4343 if (slen > 12) 4344 slen = 12; 4345 4346 /* Convert initial characters to fraction */ 4347 base = rangehi - rangelo + 1; 4348 num = 0.0; 4349 denom = base; 4350 while (slen-- > 0) 4351 { 4352 int ch = (unsigned char) *value++; 4353 4354 if (ch < rangelo) 4355 ch = rangelo - 1; 4356 else if (ch > rangehi) 4357 ch = rangehi + 1; 4358 num += ((double) (ch - rangelo)) / denom; 4359 denom *= base; 4360 } 4361 4362 return num; 4363 } 4364 4365 /* 4366 * Convert a string-type Datum into a palloc'd, null-terminated string. 4367 * 4368 * On failure (e.g., unsupported typid), set *failure to true; 4369 * otherwise, that variable is not changed. (We'll return NULL on failure.) 4370 * 4371 * When using a non-C locale, we must pass the string through strxfrm() 4372 * before continuing, so as to generate correct locale-specific results. 4373 */ 4374 static char * 4375 convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure) 4376 { 4377 char *val; 4378 4379 switch (typid) 4380 { 4381 case CHAROID: 4382 val = (char *) palloc(2); 4383 val[0] = DatumGetChar(value); 4384 val[1] = '\0'; 4385 break; 4386 case BPCHAROID: 4387 case VARCHAROID: 4388 case TEXTOID: 4389 val = TextDatumGetCString(value); 4390 break; 4391 case NAMEOID: 4392 { 4393 NameData *nm = (NameData *) DatumGetPointer(value); 4394 4395 val = pstrdup(NameStr(*nm)); 4396 break; 4397 } 4398 default: 4399 *failure = true; 4400 return NULL; 4401 } 4402 4403 if (!lc_collate_is_c(collid)) 4404 { 4405 char *xfrmstr; 4406 size_t xfrmlen; 4407 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY; 4408 4409 /* 4410 * XXX: We could guess at a suitable output buffer size and only call 4411 * strxfrm twice if our guess is too small. 4412 * 4413 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return 4414 * bogus data or set an error. This is not really a problem unless it 4415 * crashes since it will only give an estimation error and nothing 4416 * fatal. 4417 */ 4418 xfrmlen = strxfrm(NULL, val, 0); 4419 #ifdef WIN32 4420 4421 /* 4422 * On Windows, strxfrm returns INT_MAX when an error occurs. Instead 4423 * of trying to allocate this much memory (and fail), just return the 4424 * original string unmodified as if we were in the C locale. 4425 */ 4426 if (xfrmlen == INT_MAX) 4427 return val; 4428 #endif 4429 xfrmstr = (char *) palloc(xfrmlen + 1); 4430 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1); 4431 4432 /* 4433 * Some systems (e.g., glibc) can return a smaller value from the 4434 * second call than the first; thus the Assert must be <= not ==. 4435 */ 4436 Assert(xfrmlen2 <= xfrmlen); 4437 pfree(val); 4438 val = xfrmstr; 4439 } 4440 4441 return val; 4442 } 4443 4444 /* 4445 * Do convert_to_scalar()'s work for any bytea data type. 4446 * 4447 * Very similar to convert_string_to_scalar except we can't assume 4448 * null-termination and therefore pass explicit lengths around. 4449 * 4450 * Also, assumptions about likely "normal" ranges of characters have been 4451 * removed - a data range of 0..255 is always used, for now. (Perhaps 4452 * someday we will add information about actual byte data range to 4453 * pg_statistic.) 4454 */ 4455 static void 4456 convert_bytea_to_scalar(Datum value, 4457 double *scaledvalue, 4458 Datum lobound, 4459 double *scaledlobound, 4460 Datum hibound, 4461 double *scaledhibound) 4462 { 4463 bytea *valuep = DatumGetByteaPP(value); 4464 bytea *loboundp = DatumGetByteaPP(lobound); 4465 bytea *hiboundp = DatumGetByteaPP(hibound); 4466 int rangelo, 4467 rangehi, 4468 valuelen = VARSIZE_ANY_EXHDR(valuep), 4469 loboundlen = VARSIZE_ANY_EXHDR(loboundp), 4470 hiboundlen = VARSIZE_ANY_EXHDR(hiboundp), 4471 i, 4472 minlen; 4473 unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep); 4474 unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp); 4475 unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp); 4476 4477 /* 4478 * Assume bytea data is uniformly distributed across all byte values. 4479 */ 4480 rangelo = 0; 4481 rangehi = 255; 4482 4483 /* 4484 * Now strip any common prefix of the three strings. 4485 */ 4486 minlen = Min(Min(valuelen, loboundlen), hiboundlen); 4487 for (i = 0; i < minlen; i++) 4488 { 4489 if (*lostr != *histr || *lostr != *valstr) 4490 break; 4491 lostr++, histr++, valstr++; 4492 loboundlen--, hiboundlen--, valuelen--; 4493 } 4494 4495 /* 4496 * Now we can do the conversions. 4497 */ 4498 *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi); 4499 *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi); 4500 *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi); 4501 } 4502 4503 static double 4504 convert_one_bytea_to_scalar(unsigned char *value, int valuelen, 4505 int rangelo, int rangehi) 4506 { 4507 double num, 4508 denom, 4509 base; 4510 4511 if (valuelen <= 0) 4512 return 0.0; /* empty string has scalar value 0 */ 4513 4514 /* 4515 * Since base is 256, need not consider more than about 10 chars (even 4516 * this many seems like overkill) 4517 */ 4518 if (valuelen > 10) 4519 valuelen = 10; 4520 4521 /* Convert initial characters to fraction */ 4522 base = rangehi - rangelo + 1; 4523 num = 0.0; 4524 denom = base; 4525 while (valuelen-- > 0) 4526 { 4527 int ch = *value++; 4528 4529 if (ch < rangelo) 4530 ch = rangelo - 1; 4531 else if (ch > rangehi) 4532 ch = rangehi + 1; 4533 num += ((double) (ch - rangelo)) / denom; 4534 denom *= base; 4535 } 4536 4537 return num; 4538 } 4539 4540 /* 4541 * Do convert_to_scalar()'s work for any timevalue data type. 4542 * 4543 * On failure (e.g., unsupported typid), set *failure to true; 4544 * otherwise, that variable is not changed. 4545 */ 4546 static double 4547 convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure) 4548 { 4549 switch (typid) 4550 { 4551 case TIMESTAMPOID: 4552 return DatumGetTimestamp(value); 4553 case TIMESTAMPTZOID: 4554 return DatumGetTimestampTz(value); 4555 case DATEOID: 4556 return date2timestamp_no_overflow(DatumGetDateADT(value)); 4557 case INTERVALOID: 4558 { 4559 Interval *interval = DatumGetIntervalP(value); 4560 4561 /* 4562 * Convert the month part of Interval to days using assumed 4563 * average month length of 365.25/12.0 days. Not too 4564 * accurate, but plenty good enough for our purposes. 4565 */ 4566 return interval->time + interval->day * (double) USECS_PER_DAY + 4567 interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY); 4568 } 4569 case TIMEOID: 4570 return DatumGetTimeADT(value); 4571 case TIMETZOID: 4572 { 4573 TimeTzADT *timetz = DatumGetTimeTzADTP(value); 4574 4575 /* use GMT-equivalent time */ 4576 return (double) (timetz->time + (timetz->zone * 1000000.0)); 4577 } 4578 } 4579 4580 *failure = true; 4581 return 0; 4582 } 4583 4584 4585 /* 4586 * get_restriction_variable 4587 * Examine the args of a restriction clause to see if it's of the 4588 * form (variable op pseudoconstant) or (pseudoconstant op variable), 4589 * where "variable" could be either a Var or an expression in vars of a 4590 * single relation. If so, extract information about the variable, 4591 * and also indicate which side it was on and the other argument. 4592 * 4593 * Inputs: 4594 * root: the planner info 4595 * args: clause argument list 4596 * varRelid: see specs for restriction selectivity functions 4597 * 4598 * Outputs: (these are valid only if true is returned) 4599 * *vardata: gets information about variable (see examine_variable) 4600 * *other: gets other clause argument, aggressively reduced to a constant 4601 * *varonleft: set true if variable is on the left, false if on the right 4602 * 4603 * Returns true if a variable is identified, otherwise false. 4604 * 4605 * Note: if there are Vars on both sides of the clause, we must fail, because 4606 * callers are expecting that the other side will act like a pseudoconstant. 4607 */ 4608 bool 4609 get_restriction_variable(PlannerInfo *root, List *args, int varRelid, 4610 VariableStatData *vardata, Node **other, 4611 bool *varonleft) 4612 { 4613 Node *left, 4614 *right; 4615 VariableStatData rdata; 4616 4617 /* Fail if not a binary opclause (probably shouldn't happen) */ 4618 if (list_length(args) != 2) 4619 return false; 4620 4621 left = (Node *) linitial(args); 4622 right = (Node *) lsecond(args); 4623 4624 /* 4625 * Examine both sides. Note that when varRelid is nonzero, Vars of other 4626 * relations will be treated as pseudoconstants. 4627 */ 4628 examine_variable(root, left, varRelid, vardata); 4629 examine_variable(root, right, varRelid, &rdata); 4630 4631 /* 4632 * If one side is a variable and the other not, we win. 4633 */ 4634 if (vardata->rel && rdata.rel == NULL) 4635 { 4636 *varonleft = true; 4637 *other = estimate_expression_value(root, rdata.var); 4638 /* Assume we need no ReleaseVariableStats(rdata) here */ 4639 return true; 4640 } 4641 4642 if (vardata->rel == NULL && rdata.rel) 4643 { 4644 *varonleft = false; 4645 *other = estimate_expression_value(root, vardata->var); 4646 /* Assume we need no ReleaseVariableStats(*vardata) here */ 4647 *vardata = rdata; 4648 return true; 4649 } 4650 4651 /* Oops, clause has wrong structure (probably var op var) */ 4652 ReleaseVariableStats(*vardata); 4653 ReleaseVariableStats(rdata); 4654 4655 return false; 4656 } 4657 4658 /* 4659 * get_join_variables 4660 * Apply examine_variable() to each side of a join clause. 4661 * Also, attempt to identify whether the join clause has the same 4662 * or reversed sense compared to the SpecialJoinInfo. 4663 * 4664 * We consider the join clause "normal" if it is "lhs_var OP rhs_var", 4665 * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases 4666 * where we can't tell for sure, we default to assuming it's normal. 4667 */ 4668 void 4669 get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo, 4670 VariableStatData *vardata1, VariableStatData *vardata2, 4671 bool *join_is_reversed) 4672 { 4673 Node *left, 4674 *right; 4675 4676 if (list_length(args) != 2) 4677 elog(ERROR, "join operator should take two arguments"); 4678 4679 left = (Node *) linitial(args); 4680 right = (Node *) lsecond(args); 4681 4682 examine_variable(root, left, 0, vardata1); 4683 examine_variable(root, right, 0, vardata2); 4684 4685 if (vardata1->rel && 4686 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand)) 4687 *join_is_reversed = true; /* var1 is on RHS */ 4688 else if (vardata2->rel && 4689 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand)) 4690 *join_is_reversed = true; /* var2 is on LHS */ 4691 else 4692 *join_is_reversed = false; 4693 } 4694 4695 /* 4696 * examine_variable 4697 * Try to look up statistical data about an expression. 4698 * Fill in a VariableStatData struct to describe the expression. 4699 * 4700 * Inputs: 4701 * root: the planner info 4702 * node: the expression tree to examine 4703 * varRelid: see specs for restriction selectivity functions 4704 * 4705 * Outputs: *vardata is filled as follows: 4706 * var: the input expression (with any binary relabeling stripped, if 4707 * it is or contains a variable; but otherwise the type is preserved) 4708 * rel: RelOptInfo for relation containing variable; NULL if expression 4709 * contains no Vars (NOTE this could point to a RelOptInfo of a 4710 * subquery, not one in the current query). 4711 * statsTuple: the pg_statistic entry for the variable, if one exists; 4712 * otherwise NULL. 4713 * freefunc: pointer to a function to release statsTuple with. 4714 * vartype: exposed type of the expression; this should always match 4715 * the declared input type of the operator we are estimating for. 4716 * atttype, atttypmod: actual type/typmod of the "var" expression. This is 4717 * commonly the same as the exposed type of the variable argument, 4718 * but can be different in binary-compatible-type cases. 4719 * isunique: true if we were able to match the var to a unique index or a 4720 * single-column DISTINCT clause, implying its values are unique for 4721 * this query. (Caution: this should be trusted for statistical 4722 * purposes only, since we do not check indimmediate nor verify that 4723 * the exact same definition of equality applies.) 4724 * acl_ok: true if current user has permission to read the column(s) 4725 * underlying the pg_statistic entry. This is consulted by 4726 * statistic_proc_security_check(). 4727 * 4728 * Caller is responsible for doing ReleaseVariableStats() before exiting. 4729 */ 4730 void 4731 examine_variable(PlannerInfo *root, Node *node, int varRelid, 4732 VariableStatData *vardata) 4733 { 4734 Node *basenode; 4735 Relids varnos; 4736 RelOptInfo *onerel; 4737 4738 /* Make sure we don't return dangling pointers in vardata */ 4739 MemSet(vardata, 0, sizeof(VariableStatData)); 4740 4741 /* Save the exposed type of the expression */ 4742 vardata->vartype = exprType(node); 4743 4744 /* Look inside any binary-compatible relabeling */ 4745 4746 if (IsA(node, RelabelType)) 4747 basenode = (Node *) ((RelabelType *) node)->arg; 4748 else 4749 basenode = node; 4750 4751 /* Fast path for a simple Var */ 4752 4753 if (IsA(basenode, Var) && 4754 (varRelid == 0 || varRelid == ((Var *) basenode)->varno)) 4755 { 4756 Var *var = (Var *) basenode; 4757 4758 /* Set up result fields other than the stats tuple */ 4759 vardata->var = basenode; /* return Var without relabeling */ 4760 vardata->rel = find_base_rel(root, var->varno); 4761 vardata->atttype = var->vartype; 4762 vardata->atttypmod = var->vartypmod; 4763 vardata->isunique = has_unique_index(vardata->rel, var->varattno); 4764 4765 /* Try to locate some stats */ 4766 examine_simple_variable(root, var, vardata); 4767 4768 return; 4769 } 4770 4771 /* 4772 * Okay, it's a more complicated expression. Determine variable 4773 * membership. Note that when varRelid isn't zero, only vars of that 4774 * relation are considered "real" vars. 4775 */ 4776 varnos = pull_varnos(root, basenode); 4777 4778 onerel = NULL; 4779 4780 switch (bms_membership(varnos)) 4781 { 4782 case BMS_EMPTY_SET: 4783 /* No Vars at all ... must be pseudo-constant clause */ 4784 break; 4785 case BMS_SINGLETON: 4786 if (varRelid == 0 || bms_is_member(varRelid, varnos)) 4787 { 4788 onerel = find_base_rel(root, 4789 (varRelid ? varRelid : bms_singleton_member(varnos))); 4790 vardata->rel = onerel; 4791 node = basenode; /* strip any relabeling */ 4792 } 4793 /* else treat it as a constant */ 4794 break; 4795 case BMS_MULTIPLE: 4796 if (varRelid == 0) 4797 { 4798 /* treat it as a variable of a join relation */ 4799 vardata->rel = find_join_rel(root, varnos); 4800 node = basenode; /* strip any relabeling */ 4801 } 4802 else if (bms_is_member(varRelid, varnos)) 4803 { 4804 /* ignore the vars belonging to other relations */ 4805 vardata->rel = find_base_rel(root, varRelid); 4806 node = basenode; /* strip any relabeling */ 4807 /* note: no point in expressional-index search here */ 4808 } 4809 /* else treat it as a constant */ 4810 break; 4811 } 4812 4813 bms_free(varnos); 4814 4815 vardata->var = node; 4816 vardata->atttype = exprType(node); 4817 vardata->atttypmod = exprTypmod(node); 4818 4819 if (onerel) 4820 { 4821 /* 4822 * We have an expression in vars of a single relation. Try to match 4823 * it to expressional index columns, in hopes of finding some 4824 * statistics. 4825 * 4826 * Note that we consider all index columns including INCLUDE columns, 4827 * since there could be stats for such columns. But the test for 4828 * uniqueness needs to be warier. 4829 * 4830 * XXX it's conceivable that there are multiple matches with different 4831 * index opfamilies; if so, we need to pick one that matches the 4832 * operator we are estimating for. FIXME later. 4833 */ 4834 ListCell *ilist; 4835 4836 foreach(ilist, onerel->indexlist) 4837 { 4838 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist); 4839 ListCell *indexpr_item; 4840 int pos; 4841 4842 indexpr_item = list_head(index->indexprs); 4843 if (indexpr_item == NULL) 4844 continue; /* no expressions here... */ 4845 4846 for (pos = 0; pos < index->ncolumns; pos++) 4847 { 4848 if (index->indexkeys[pos] == 0) 4849 { 4850 Node *indexkey; 4851 4852 if (indexpr_item == NULL) 4853 elog(ERROR, "too few entries in indexprs list"); 4854 indexkey = (Node *) lfirst(indexpr_item); 4855 if (indexkey && IsA(indexkey, RelabelType)) 4856 indexkey = (Node *) ((RelabelType *) indexkey)->arg; 4857 if (equal(node, indexkey)) 4858 { 4859 /* 4860 * Found a match ... is it a unique index? Tests here 4861 * should match has_unique_index(). 4862 */ 4863 if (index->unique && 4864 index->nkeycolumns == 1 && 4865 pos == 0 && 4866 (index->indpred == NIL || index->predOK)) 4867 vardata->isunique = true; 4868 4869 /* 4870 * Has it got stats? We only consider stats for 4871 * non-partial indexes, since partial indexes probably 4872 * don't reflect whole-relation statistics; the above 4873 * check for uniqueness is the only info we take from 4874 * a partial index. 4875 * 4876 * An index stats hook, however, must make its own 4877 * decisions about what to do with partial indexes. 4878 */ 4879 if (get_index_stats_hook && 4880 (*get_index_stats_hook) (root, index->indexoid, 4881 pos + 1, vardata)) 4882 { 4883 /* 4884 * The hook took control of acquiring a stats 4885 * tuple. If it did supply a tuple, it'd better 4886 * have supplied a freefunc. 4887 */ 4888 if (HeapTupleIsValid(vardata->statsTuple) && 4889 !vardata->freefunc) 4890 elog(ERROR, "no function provided to release variable stats with"); 4891 } 4892 else if (index->indpred == NIL) 4893 { 4894 vardata->statsTuple = 4895 SearchSysCache3(STATRELATTINH, 4896 ObjectIdGetDatum(index->indexoid), 4897 Int16GetDatum(pos + 1), 4898 BoolGetDatum(false)); 4899 vardata->freefunc = ReleaseSysCache; 4900 4901 if (HeapTupleIsValid(vardata->statsTuple)) 4902 { 4903 /* Get index's table for permission check */ 4904 RangeTblEntry *rte; 4905 Oid userid; 4906 4907 rte = planner_rt_fetch(index->rel->relid, root); 4908 Assert(rte->rtekind == RTE_RELATION); 4909 4910 /* 4911 * Use checkAsUser if it's set, in case we're 4912 * accessing the table via a view. 4913 */ 4914 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); 4915 4916 /* 4917 * For simplicity, we insist on the whole 4918 * table being selectable, rather than trying 4919 * to identify which column(s) the index 4920 * depends on. Also require all rows to be 4921 * selectable --- there must be no 4922 * securityQuals from security barrier views 4923 * or RLS policies. 4924 */ 4925 vardata->acl_ok = 4926 rte->securityQuals == NIL && 4927 (pg_class_aclcheck(rte->relid, userid, 4928 ACL_SELECT) == ACLCHECK_OK); 4929 4930 /* 4931 * If the user doesn't have permissions to 4932 * access an inheritance child relation, check 4933 * the permissions of the table actually 4934 * mentioned in the query, since most likely 4935 * the user does have that permission. Note 4936 * that whole-table select privilege on the 4937 * parent doesn't quite guarantee that the 4938 * user could read all columns of the child. 4939 * But in practice it's unlikely that any 4940 * interesting security violation could result 4941 * from allowing access to the expression 4942 * index's stats, so we allow it anyway. See 4943 * similar code in examine_simple_variable() 4944 * for additional comments. 4945 */ 4946 if (!vardata->acl_ok && 4947 root->append_rel_array != NULL) 4948 { 4949 AppendRelInfo *appinfo; 4950 Index varno = index->rel->relid; 4951 4952 appinfo = root->append_rel_array[varno]; 4953 while (appinfo && 4954 planner_rt_fetch(appinfo->parent_relid, 4955 root)->rtekind == RTE_RELATION) 4956 { 4957 varno = appinfo->parent_relid; 4958 appinfo = root->append_rel_array[varno]; 4959 } 4960 if (varno != index->rel->relid) 4961 { 4962 /* Repeat access check on this rel */ 4963 rte = planner_rt_fetch(varno, root); 4964 Assert(rte->rtekind == RTE_RELATION); 4965 4966 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); 4967 4968 vardata->acl_ok = 4969 rte->securityQuals == NIL && 4970 (pg_class_aclcheck(rte->relid, 4971 userid, 4972 ACL_SELECT) == ACLCHECK_OK); 4973 } 4974 } 4975 } 4976 else 4977 { 4978 /* suppress leakproofness checks later */ 4979 vardata->acl_ok = true; 4980 } 4981 } 4982 if (vardata->statsTuple) 4983 break; 4984 } 4985 indexpr_item = lnext(index->indexprs, indexpr_item); 4986 } 4987 } 4988 if (vardata->statsTuple) 4989 break; 4990 } 4991 } 4992 } 4993 4994 /* 4995 * examine_simple_variable 4996 * Handle a simple Var for examine_variable 4997 * 4998 * This is split out as a subroutine so that we can recurse to deal with 4999 * Vars referencing subqueries. 5000 * 5001 * We already filled in all the fields of *vardata except for the stats tuple. 5002 */ 5003 static void 5004 examine_simple_variable(PlannerInfo *root, Var *var, 5005 VariableStatData *vardata) 5006 { 5007 RangeTblEntry *rte = root->simple_rte_array[var->varno]; 5008 5009 Assert(IsA(rte, RangeTblEntry)); 5010 5011 if (get_relation_stats_hook && 5012 (*get_relation_stats_hook) (root, rte, var->varattno, vardata)) 5013 { 5014 /* 5015 * The hook took control of acquiring a stats tuple. If it did supply 5016 * a tuple, it'd better have supplied a freefunc. 5017 */ 5018 if (HeapTupleIsValid(vardata->statsTuple) && 5019 !vardata->freefunc) 5020 elog(ERROR, "no function provided to release variable stats with"); 5021 } 5022 else if (rte->rtekind == RTE_RELATION) 5023 { 5024 /* 5025 * Plain table or parent of an inheritance appendrel, so look up the 5026 * column in pg_statistic 5027 */ 5028 vardata->statsTuple = SearchSysCache3(STATRELATTINH, 5029 ObjectIdGetDatum(rte->relid), 5030 Int16GetDatum(var->varattno), 5031 BoolGetDatum(rte->inh)); 5032 vardata->freefunc = ReleaseSysCache; 5033 5034 if (HeapTupleIsValid(vardata->statsTuple)) 5035 { 5036 Oid userid; 5037 5038 /* 5039 * Check if user has permission to read this column. We require 5040 * all rows to be accessible, so there must be no securityQuals 5041 * from security barrier views or RLS policies. Use checkAsUser 5042 * if it's set, in case we're accessing the table via a view. 5043 */ 5044 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); 5045 5046 vardata->acl_ok = 5047 rte->securityQuals == NIL && 5048 ((pg_class_aclcheck(rte->relid, userid, 5049 ACL_SELECT) == ACLCHECK_OK) || 5050 (pg_attribute_aclcheck(rte->relid, var->varattno, userid, 5051 ACL_SELECT) == ACLCHECK_OK)); 5052 5053 /* 5054 * If the user doesn't have permissions to access an inheritance 5055 * child relation or specifically this attribute, check the 5056 * permissions of the table/column actually mentioned in the 5057 * query, since most likely the user does have that permission 5058 * (else the query will fail at runtime), and if the user can read 5059 * the column there then he can get the values of the child table 5060 * too. To do that, we must find out which of the root parent's 5061 * attributes the child relation's attribute corresponds to. 5062 */ 5063 if (!vardata->acl_ok && var->varattno > 0 && 5064 root->append_rel_array != NULL) 5065 { 5066 AppendRelInfo *appinfo; 5067 Index varno = var->varno; 5068 int varattno = var->varattno; 5069 bool found = false; 5070 5071 appinfo = root->append_rel_array[varno]; 5072 5073 /* 5074 * Partitions are mapped to their immediate parent, not the 5075 * root parent, so must be ready to walk up multiple 5076 * AppendRelInfos. But stop if we hit a parent that is not 5077 * RTE_RELATION --- that's a flattened UNION ALL subquery, not 5078 * an inheritance parent. 5079 */ 5080 while (appinfo && 5081 planner_rt_fetch(appinfo->parent_relid, 5082 root)->rtekind == RTE_RELATION) 5083 { 5084 int parent_varattno; 5085 5086 found = false; 5087 if (varattno <= 0 || varattno > appinfo->num_child_cols) 5088 break; /* safety check */ 5089 parent_varattno = appinfo->parent_colnos[varattno - 1]; 5090 if (parent_varattno == 0) 5091 break; /* Var is local to child */ 5092 5093 varno = appinfo->parent_relid; 5094 varattno = parent_varattno; 5095 found = true; 5096 5097 /* If the parent is itself a child, continue up. */ 5098 appinfo = root->append_rel_array[varno]; 5099 } 5100 5101 /* 5102 * In rare cases, the Var may be local to the child table, in 5103 * which case, we've got to live with having no access to this 5104 * column's stats. 5105 */ 5106 if (!found) 5107 return; 5108 5109 /* Repeat the access check on this parent rel & column */ 5110 rte = planner_rt_fetch(varno, root); 5111 Assert(rte->rtekind == RTE_RELATION); 5112 5113 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); 5114 5115 vardata->acl_ok = 5116 rte->securityQuals == NIL && 5117 ((pg_class_aclcheck(rte->relid, userid, 5118 ACL_SELECT) == ACLCHECK_OK) || 5119 (pg_attribute_aclcheck(rte->relid, varattno, userid, 5120 ACL_SELECT) == ACLCHECK_OK)); 5121 } 5122 } 5123 else 5124 { 5125 /* suppress any possible leakproofness checks later */ 5126 vardata->acl_ok = true; 5127 } 5128 } 5129 else if (rte->rtekind == RTE_SUBQUERY && !rte->inh) 5130 { 5131 /* 5132 * Plain subquery (not one that was converted to an appendrel). 5133 */ 5134 Query *subquery = rte->subquery; 5135 RelOptInfo *rel; 5136 TargetEntry *ste; 5137 5138 /* 5139 * Punt if it's a whole-row var rather than a plain column reference. 5140 */ 5141 if (var->varattno == InvalidAttrNumber) 5142 return; 5143 5144 /* 5145 * Punt if subquery uses set operations or GROUP BY, as these will 5146 * mash underlying columns' stats beyond recognition. (Set ops are 5147 * particularly nasty; if we forged ahead, we would return stats 5148 * relevant to only the leftmost subselect...) DISTINCT is also 5149 * problematic, but we check that later because there is a possibility 5150 * of learning something even with it. 5151 */ 5152 if (subquery->setOperations || 5153 subquery->groupClause || 5154 subquery->groupingSets) 5155 return; 5156 5157 /* 5158 * OK, fetch RelOptInfo for subquery. Note that we don't change the 5159 * rel returned in vardata, since caller expects it to be a rel of the 5160 * caller's query level. Because we might already be recursing, we 5161 * can't use that rel pointer either, but have to look up the Var's 5162 * rel afresh. 5163 */ 5164 rel = find_base_rel(root, var->varno); 5165 5166 /* If the subquery hasn't been planned yet, we have to punt */ 5167 if (rel->subroot == NULL) 5168 return; 5169 Assert(IsA(rel->subroot, PlannerInfo)); 5170 5171 /* 5172 * Switch our attention to the subquery as mangled by the planner. It 5173 * was okay to look at the pre-planning version for the tests above, 5174 * but now we need a Var that will refer to the subroot's live 5175 * RelOptInfos. For instance, if any subquery pullup happened during 5176 * planning, Vars in the targetlist might have gotten replaced, and we 5177 * need to see the replacement expressions. 5178 */ 5179 subquery = rel->subroot->parse; 5180 Assert(IsA(subquery, Query)); 5181 5182 /* Get the subquery output expression referenced by the upper Var */ 5183 ste = get_tle_by_resno(subquery->targetList, var->varattno); 5184 if (ste == NULL || ste->resjunk) 5185 elog(ERROR, "subquery %s does not have attribute %d", 5186 rte->eref->aliasname, var->varattno); 5187 var = (Var *) ste->expr; 5188 5189 /* 5190 * If subquery uses DISTINCT, we can't make use of any stats for the 5191 * variable ... but, if it's the only DISTINCT column, we are entitled 5192 * to consider it unique. We do the test this way so that it works 5193 * for cases involving DISTINCT ON. 5194 */ 5195 if (subquery->distinctClause) 5196 { 5197 if (list_length(subquery->distinctClause) == 1 && 5198 targetIsInSortList(ste, InvalidOid, subquery->distinctClause)) 5199 vardata->isunique = true; 5200 /* cannot go further */ 5201 return; 5202 } 5203 5204 /* 5205 * If the sub-query originated from a view with the security_barrier 5206 * attribute, we must not look at the variable's statistics, though it 5207 * seems all right to notice the existence of a DISTINCT clause. So 5208 * stop here. 5209 * 5210 * This is probably a harsher restriction than necessary; it's 5211 * certainly OK for the selectivity estimator (which is a C function, 5212 * and therefore omnipotent anyway) to look at the statistics. But 5213 * many selectivity estimators will happily *invoke the operator 5214 * function* to try to work out a good estimate - and that's not OK. 5215 * So for now, don't dig down for stats. 5216 */ 5217 if (rte->security_barrier) 5218 return; 5219 5220 /* Can only handle a simple Var of subquery's query level */ 5221 if (var && IsA(var, Var) && 5222 var->varlevelsup == 0) 5223 { 5224 /* 5225 * OK, recurse into the subquery. Note that the original setting 5226 * of vardata->isunique (which will surely be false) is left 5227 * unchanged in this situation. That's what we want, since even 5228 * if the underlying column is unique, the subquery may have 5229 * joined to other tables in a way that creates duplicates. 5230 */ 5231 examine_simple_variable(rel->subroot, var, vardata); 5232 } 5233 } 5234 else 5235 { 5236 /* 5237 * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We 5238 * won't see RTE_JOIN here because join alias Vars have already been 5239 * flattened.) There's not much we can do with function outputs, but 5240 * maybe someday try to be smarter about VALUES and/or CTEs. 5241 */ 5242 } 5243 } 5244 5245 /* 5246 * Check whether it is permitted to call func_oid passing some of the 5247 * pg_statistic data in vardata. We allow this either if the user has SELECT 5248 * privileges on the table or column underlying the pg_statistic data or if 5249 * the function is marked leak-proof. 5250 */ 5251 bool 5252 statistic_proc_security_check(VariableStatData *vardata, Oid func_oid) 5253 { 5254 if (vardata->acl_ok) 5255 return true; 5256 5257 if (!OidIsValid(func_oid)) 5258 return false; 5259 5260 if (get_func_leakproof(func_oid)) 5261 return true; 5262 5263 ereport(DEBUG2, 5264 (errmsg_internal("not using statistics because function \"%s\" is not leak-proof", 5265 get_func_name(func_oid)))); 5266 return false; 5267 } 5268 5269 /* 5270 * get_variable_numdistinct 5271 * Estimate the number of distinct values of a variable. 5272 * 5273 * vardata: results of examine_variable 5274 * *isdefault: set to true if the result is a default rather than based on 5275 * anything meaningful. 5276 * 5277 * NB: be careful to produce a positive integral result, since callers may 5278 * compare the result to exact integer counts, or might divide by it. 5279 */ 5280 double 5281 get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) 5282 { 5283 double stadistinct; 5284 double stanullfrac = 0.0; 5285 double ntuples; 5286 5287 *isdefault = false; 5288 5289 /* 5290 * Determine the stadistinct value to use. There are cases where we can 5291 * get an estimate even without a pg_statistic entry, or can get a better 5292 * value than is in pg_statistic. Grab stanullfrac too if we can find it 5293 * (otherwise, assume no nulls, for lack of any better idea). 5294 */ 5295 if (HeapTupleIsValid(vardata->statsTuple)) 5296 { 5297 /* Use the pg_statistic entry */ 5298 Form_pg_statistic stats; 5299 5300 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); 5301 stadistinct = stats->stadistinct; 5302 stanullfrac = stats->stanullfrac; 5303 } 5304 else if (vardata->vartype == BOOLOID) 5305 { 5306 /* 5307 * Special-case boolean columns: presumably, two distinct values. 5308 * 5309 * Are there any other datatypes we should wire in special estimates 5310 * for? 5311 */ 5312 stadistinct = 2.0; 5313 } 5314 else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES) 5315 { 5316 /* 5317 * If the Var represents a column of a VALUES RTE, assume it's unique. 5318 * This could of course be very wrong, but it should tend to be true 5319 * in well-written queries. We could consider examining the VALUES' 5320 * contents to get some real statistics; but that only works if the 5321 * entries are all constants, and it would be pretty expensive anyway. 5322 */ 5323 stadistinct = -1.0; /* unique (and all non null) */ 5324 } 5325 else 5326 { 5327 /* 5328 * We don't keep statistics for system columns, but in some cases we 5329 * can infer distinctness anyway. 5330 */ 5331 if (vardata->var && IsA(vardata->var, Var)) 5332 { 5333 switch (((Var *) vardata->var)->varattno) 5334 { 5335 case SelfItemPointerAttributeNumber: 5336 stadistinct = -1.0; /* unique (and all non null) */ 5337 break; 5338 case TableOidAttributeNumber: 5339 stadistinct = 1.0; /* only 1 value */ 5340 break; 5341 default: 5342 stadistinct = 0.0; /* means "unknown" */ 5343 break; 5344 } 5345 } 5346 else 5347 stadistinct = 0.0; /* means "unknown" */ 5348 5349 /* 5350 * XXX consider using estimate_num_groups on expressions? 5351 */ 5352 } 5353 5354 /* 5355 * If there is a unique index or DISTINCT clause for the variable, assume 5356 * it is unique no matter what pg_statistic says; the statistics could be 5357 * out of date, or we might have found a partial unique index that proves 5358 * the var is unique for this query. However, we'd better still believe 5359 * the null-fraction statistic. 5360 */ 5361 if (vardata->isunique) 5362 stadistinct = -1.0 * (1.0 - stanullfrac); 5363 5364 /* 5365 * If we had an absolute estimate, use that. 5366 */ 5367 if (stadistinct > 0.0) 5368 return clamp_row_est(stadistinct); 5369 5370 /* 5371 * Otherwise we need to get the relation size; punt if not available. 5372 */ 5373 if (vardata->rel == NULL) 5374 { 5375 *isdefault = true; 5376 return DEFAULT_NUM_DISTINCT; 5377 } 5378 ntuples = vardata->rel->tuples; 5379 if (ntuples <= 0.0) 5380 { 5381 *isdefault = true; 5382 return DEFAULT_NUM_DISTINCT; 5383 } 5384 5385 /* 5386 * If we had a relative estimate, use that. 5387 */ 5388 if (stadistinct < 0.0) 5389 return clamp_row_est(-stadistinct * ntuples); 5390 5391 /* 5392 * With no data, estimate ndistinct = ntuples if the table is small, else 5393 * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so 5394 * that the behavior isn't discontinuous. 5395 */ 5396 if (ntuples < DEFAULT_NUM_DISTINCT) 5397 return clamp_row_est(ntuples); 5398 5399 *isdefault = true; 5400 return DEFAULT_NUM_DISTINCT; 5401 } 5402 5403 /* 5404 * get_variable_range 5405 * Estimate the minimum and maximum value of the specified variable. 5406 * If successful, store values in *min and *max, and return true. 5407 * If no data available, return false. 5408 * 5409 * sortop is the "<" comparison operator to use. This should generally 5410 * be "<" not ">", as only the former is likely to be found in pg_statistic. 5411 * The collation must be specified too. 5412 */ 5413 static bool 5414 get_variable_range(PlannerInfo *root, VariableStatData *vardata, 5415 Oid sortop, Oid collation, 5416 Datum *min, Datum *max) 5417 { 5418 Datum tmin = 0; 5419 Datum tmax = 0; 5420 bool have_data = false; 5421 int16 typLen; 5422 bool typByVal; 5423 Oid opfuncoid; 5424 FmgrInfo opproc; 5425 AttStatsSlot sslot; 5426 5427 /* 5428 * XXX It's very tempting to try to use the actual column min and max, if 5429 * we can get them relatively-cheaply with an index probe. However, since 5430 * this function is called many times during join planning, that could 5431 * have unpleasant effects on planning speed. Need more investigation 5432 * before enabling this. 5433 */ 5434 #ifdef NOT_USED 5435 if (get_actual_variable_range(root, vardata, sortop, collation, min, max)) 5436 return true; 5437 #endif 5438 5439 if (!HeapTupleIsValid(vardata->statsTuple)) 5440 { 5441 /* no stats available, so default result */ 5442 return false; 5443 } 5444 5445 /* 5446 * If we can't apply the sortop to the stats data, just fail. In 5447 * principle, if there's a histogram and no MCVs, we could return the 5448 * histogram endpoints without ever applying the sortop ... but it's 5449 * probably not worth trying, because whatever the caller wants to do with 5450 * the endpoints would likely fail the security check too. 5451 */ 5452 if (!statistic_proc_security_check(vardata, 5453 (opfuncoid = get_opcode(sortop)))) 5454 return false; 5455 5456 opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */ 5457 5458 get_typlenbyval(vardata->atttype, &typLen, &typByVal); 5459 5460 /* 5461 * If there is a histogram with the ordering we want, grab the first and 5462 * last values. 5463 */ 5464 if (get_attstatsslot(&sslot, vardata->statsTuple, 5465 STATISTIC_KIND_HISTOGRAM, sortop, 5466 ATTSTATSSLOT_VALUES)) 5467 { 5468 if (sslot.stacoll == collation && sslot.nvalues > 0) 5469 { 5470 tmin = datumCopy(sslot.values[0], typByVal, typLen); 5471 tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen); 5472 have_data = true; 5473 } 5474 free_attstatsslot(&sslot); 5475 } 5476 5477 /* 5478 * Otherwise, if there is a histogram with some other ordering, scan it 5479 * and get the min and max values according to the ordering we want. This 5480 * of course may not find values that are really extremal according to our 5481 * ordering, but it beats ignoring available data. 5482 */ 5483 if (!have_data && 5484 get_attstatsslot(&sslot, vardata->statsTuple, 5485 STATISTIC_KIND_HISTOGRAM, InvalidOid, 5486 ATTSTATSSLOT_VALUES)) 5487 { 5488 get_stats_slot_range(&sslot, opfuncoid, &opproc, 5489 collation, typLen, typByVal, 5490 &tmin, &tmax, &have_data); 5491 free_attstatsslot(&sslot); 5492 } 5493 5494 /* 5495 * If we have most-common-values info, look for extreme MCVs. This is 5496 * needed even if we also have a histogram, since the histogram excludes 5497 * the MCVs. However, if we *only* have MCVs and no histogram, we should 5498 * be pretty wary of deciding that that is a full representation of the 5499 * data. Proceed only if the MCVs represent the whole table (to within 5500 * roundoff error). 5501 */ 5502 if (get_attstatsslot(&sslot, vardata->statsTuple, 5503 STATISTIC_KIND_MCV, InvalidOid, 5504 have_data ? ATTSTATSSLOT_VALUES : 5505 (ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))) 5506 { 5507 bool use_mcvs = have_data; 5508 5509 if (!have_data) 5510 { 5511 double sumcommon = 0.0; 5512 double nullfrac; 5513 int i; 5514 5515 for (i = 0; i < sslot.nnumbers; i++) 5516 sumcommon += sslot.numbers[i]; 5517 nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac; 5518 if (sumcommon + nullfrac > 0.99999) 5519 use_mcvs = true; 5520 } 5521 5522 if (use_mcvs) 5523 get_stats_slot_range(&sslot, opfuncoid, &opproc, 5524 collation, typLen, typByVal, 5525 &tmin, &tmax, &have_data); 5526 free_attstatsslot(&sslot); 5527 } 5528 5529 *min = tmin; 5530 *max = tmax; 5531 return have_data; 5532 } 5533 5534 /* 5535 * get_stats_slot_range: scan sslot for min/max values 5536 * 5537 * Subroutine for get_variable_range: update min/max/have_data according 5538 * to what we find in the statistics array. 5539 */ 5540 static void 5541 get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc, 5542 Oid collation, int16 typLen, bool typByVal, 5543 Datum *min, Datum *max, bool *p_have_data) 5544 { 5545 Datum tmin = *min; 5546 Datum tmax = *max; 5547 bool have_data = *p_have_data; 5548 bool found_tmin = false; 5549 bool found_tmax = false; 5550 5551 /* Look up the comparison function, if we didn't already do so */ 5552 if (opproc->fn_oid != opfuncoid) 5553 fmgr_info(opfuncoid, opproc); 5554 5555 /* Scan all the slot's values */ 5556 for (int i = 0; i < sslot->nvalues; i++) 5557 { 5558 if (!have_data) 5559 { 5560 tmin = tmax = sslot->values[i]; 5561 found_tmin = found_tmax = true; 5562 *p_have_data = have_data = true; 5563 continue; 5564 } 5565 if (DatumGetBool(FunctionCall2Coll(opproc, 5566 collation, 5567 sslot->values[i], tmin))) 5568 { 5569 tmin = sslot->values[i]; 5570 found_tmin = true; 5571 } 5572 if (DatumGetBool(FunctionCall2Coll(opproc, 5573 collation, 5574 tmax, sslot->values[i]))) 5575 { 5576 tmax = sslot->values[i]; 5577 found_tmax = true; 5578 } 5579 } 5580 5581 /* 5582 * Copy the slot's values, if we found new extreme values. 5583 */ 5584 if (found_tmin) 5585 *min = datumCopy(tmin, typByVal, typLen); 5586 if (found_tmax) 5587 *max = datumCopy(tmax, typByVal, typLen); 5588 } 5589 5590 5591 /* 5592 * get_actual_variable_range 5593 * Attempt to identify the current *actual* minimum and/or maximum 5594 * of the specified variable, by looking for a suitable btree index 5595 * and fetching its low and/or high values. 5596 * If successful, store values in *min and *max, and return true. 5597 * (Either pointer can be NULL if that endpoint isn't needed.) 5598 * If no data available, return false. 5599 * 5600 * sortop is the "<" comparison operator to use. 5601 * collation is the required collation. 5602 */ 5603 static bool 5604 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, 5605 Oid sortop, Oid collation, 5606 Datum *min, Datum *max) 5607 { 5608 bool have_data = false; 5609 RelOptInfo *rel = vardata->rel; 5610 RangeTblEntry *rte; 5611 ListCell *lc; 5612 5613 /* No hope if no relation or it doesn't have indexes */ 5614 if (rel == NULL || rel->indexlist == NIL) 5615 return false; 5616 /* If it has indexes it must be a plain relation */ 5617 rte = root->simple_rte_array[rel->relid]; 5618 Assert(rte->rtekind == RTE_RELATION); 5619 5620 /* Search through the indexes to see if any match our problem */ 5621 foreach(lc, rel->indexlist) 5622 { 5623 IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); 5624 ScanDirection indexscandir; 5625 5626 /* Ignore non-btree indexes */ 5627 if (index->relam != BTREE_AM_OID) 5628 continue; 5629 5630 /* 5631 * Ignore partial indexes --- we only want stats that cover the entire 5632 * relation. 5633 */ 5634 if (index->indpred != NIL) 5635 continue; 5636 5637 /* 5638 * The index list might include hypothetical indexes inserted by a 5639 * get_relation_info hook --- don't try to access them. 5640 */ 5641 if (index->hypothetical) 5642 continue; 5643 5644 /* 5645 * The first index column must match the desired variable, sortop, and 5646 * collation --- but we can use a descending-order index. 5647 */ 5648 if (collation != index->indexcollations[0]) 5649 continue; /* test first 'cause it's cheapest */ 5650 if (!match_index_to_operand(vardata->var, 0, index)) 5651 continue; 5652 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0])) 5653 { 5654 case BTLessStrategyNumber: 5655 if (index->reverse_sort[0]) 5656 indexscandir = BackwardScanDirection; 5657 else 5658 indexscandir = ForwardScanDirection; 5659 break; 5660 case BTGreaterStrategyNumber: 5661 if (index->reverse_sort[0]) 5662 indexscandir = ForwardScanDirection; 5663 else 5664 indexscandir = BackwardScanDirection; 5665 break; 5666 default: 5667 /* index doesn't match the sortop */ 5668 continue; 5669 } 5670 5671 /* 5672 * Found a suitable index to extract data from. Set up some data that 5673 * can be used by both invocations of get_actual_variable_endpoint. 5674 */ 5675 { 5676 MemoryContext tmpcontext; 5677 MemoryContext oldcontext; 5678 Relation heapRel; 5679 Relation indexRel; 5680 TupleTableSlot *slot; 5681 int16 typLen; 5682 bool typByVal; 5683 ScanKeyData scankeys[1]; 5684 5685 /* Make sure any cruft gets recycled when we're done */ 5686 tmpcontext = AllocSetContextCreate(CurrentMemoryContext, 5687 "get_actual_variable_range workspace", 5688 ALLOCSET_DEFAULT_SIZES); 5689 oldcontext = MemoryContextSwitchTo(tmpcontext); 5690 5691 /* 5692 * Open the table and index so we can read from them. We should 5693 * already have some type of lock on each. 5694 */ 5695 heapRel = table_open(rte->relid, NoLock); 5696 indexRel = index_open(index->indexoid, NoLock); 5697 5698 /* build some stuff needed for indexscan execution */ 5699 slot = table_slot_create(heapRel, NULL); 5700 get_typlenbyval(vardata->atttype, &typLen, &typByVal); 5701 5702 /* set up an IS NOT NULL scan key so that we ignore nulls */ 5703 ScanKeyEntryInitialize(&scankeys[0], 5704 SK_ISNULL | SK_SEARCHNOTNULL, 5705 1, /* index col to scan */ 5706 InvalidStrategy, /* no strategy */ 5707 InvalidOid, /* no strategy subtype */ 5708 InvalidOid, /* no collation */ 5709 InvalidOid, /* no reg proc for this */ 5710 (Datum) 0); /* constant */ 5711 5712 /* If min is requested ... */ 5713 if (min) 5714 { 5715 have_data = get_actual_variable_endpoint(heapRel, 5716 indexRel, 5717 indexscandir, 5718 scankeys, 5719 typLen, 5720 typByVal, 5721 slot, 5722 oldcontext, 5723 min); 5724 } 5725 else 5726 { 5727 /* If min not requested, assume index is nonempty */ 5728 have_data = true; 5729 } 5730 5731 /* If max is requested, and we didn't find the index is empty */ 5732 if (max && have_data) 5733 { 5734 /* scan in the opposite direction; all else is the same */ 5735 have_data = get_actual_variable_endpoint(heapRel, 5736 indexRel, 5737 -indexscandir, 5738 scankeys, 5739 typLen, 5740 typByVal, 5741 slot, 5742 oldcontext, 5743 max); 5744 } 5745 5746 /* Clean everything up */ 5747 ExecDropSingleTupleTableSlot(slot); 5748 5749 index_close(indexRel, NoLock); 5750 table_close(heapRel, NoLock); 5751 5752 MemoryContextSwitchTo(oldcontext); 5753 MemoryContextDelete(tmpcontext); 5754 5755 /* And we're done */ 5756 break; 5757 } 5758 } 5759 5760 return have_data; 5761 } 5762 5763 /* 5764 * Get one endpoint datum (min or max depending on indexscandir) from the 5765 * specified index. Return true if successful, false if index is empty. 5766 * On success, endpoint value is stored to *endpointDatum (and copied into 5767 * outercontext). 5768 * 5769 * scankeys is a 1-element scankey array set up to reject nulls. 5770 * typLen/typByVal describe the datatype of the index's first column. 5771 * tableslot is a slot suitable to hold table tuples, in case we need 5772 * to probe the heap. 5773 * (We could compute these values locally, but that would mean computing them 5774 * twice when get_actual_variable_range needs both the min and the max.) 5775 */ 5776 static bool 5777 get_actual_variable_endpoint(Relation heapRel, 5778 Relation indexRel, 5779 ScanDirection indexscandir, 5780 ScanKey scankeys, 5781 int16 typLen, 5782 bool typByVal, 5783 TupleTableSlot *tableslot, 5784 MemoryContext outercontext, 5785 Datum *endpointDatum) 5786 { 5787 bool have_data = false; 5788 SnapshotData SnapshotNonVacuumable; 5789 IndexScanDesc index_scan; 5790 Buffer vmbuffer = InvalidBuffer; 5791 ItemPointer tid; 5792 Datum values[INDEX_MAX_KEYS]; 5793 bool isnull[INDEX_MAX_KEYS]; 5794 MemoryContext oldcontext; 5795 5796 /* 5797 * We use the index-only-scan machinery for this. With mostly-static 5798 * tables that's a win because it avoids a heap visit. It's also a win 5799 * for dynamic data, but the reason is less obvious; read on for details. 5800 * 5801 * In principle, we should scan the index with our current active 5802 * snapshot, which is the best approximation we've got to what the query 5803 * will see when executed. But that won't be exact if a new snap is taken 5804 * before running the query, and it can be very expensive if a lot of 5805 * recently-dead or uncommitted rows exist at the beginning or end of the 5806 * index (because we'll laboriously fetch each one and reject it). 5807 * Instead, we use SnapshotNonVacuumable. That will accept recently-dead 5808 * and uncommitted rows as well as normal visible rows. On the other 5809 * hand, it will reject known-dead rows, and thus not give a bogus answer 5810 * when the extreme value has been deleted (unless the deletion was quite 5811 * recent); that case motivates not using SnapshotAny here. 5812 * 5813 * A crucial point here is that SnapshotNonVacuumable, with 5814 * RecentGlobalXmin as horizon, yields the inverse of the condition that 5815 * the indexscan will use to decide that index entries are killable (see 5816 * heap_hot_search_buffer()). Therefore, if the snapshot rejects a tuple 5817 * (or more precisely, all tuples of a HOT chain) and we have to continue 5818 * scanning past it, we know that the indexscan will mark that index entry 5819 * killed. That means that the next get_actual_variable_endpoint() call 5820 * will not have to re-consider that index entry. In this way we avoid 5821 * repetitive work when this function is used a lot during planning. 5822 * 5823 * But using SnapshotNonVacuumable creates a hazard of its own. In a 5824 * recently-created index, some index entries may point at "broken" HOT 5825 * chains in which not all the tuple versions contain data matching the 5826 * index entry. The live tuple version(s) certainly do match the index, 5827 * but SnapshotNonVacuumable can accept recently-dead tuple versions that 5828 * don't match. Hence, if we took data from the selected heap tuple, we 5829 * might get a bogus answer that's not close to the index extremal value, 5830 * or could even be NULL. We avoid this hazard because we take the data 5831 * from the index entry not the heap. 5832 */ 5833 InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin); 5834 5835 index_scan = index_beginscan(heapRel, indexRel, 5836 &SnapshotNonVacuumable, 5837 1, 0); 5838 /* Set it up for index-only scan */ 5839 index_scan->xs_want_itup = true; 5840 index_rescan(index_scan, scankeys, 1, NULL, 0); 5841 5842 /* Fetch first/next tuple in specified direction */ 5843 while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL) 5844 { 5845 if (!VM_ALL_VISIBLE(heapRel, 5846 ItemPointerGetBlockNumber(tid), 5847 &vmbuffer)) 5848 { 5849 /* Rats, we have to visit the heap to check visibility */ 5850 if (!index_fetch_heap(index_scan, tableslot)) 5851 continue; /* no visible tuple, try next index entry */ 5852 5853 /* We don't actually need the heap tuple for anything */ 5854 ExecClearTuple(tableslot); 5855 5856 /* 5857 * We don't care whether there's more than one visible tuple in 5858 * the HOT chain; if any are visible, that's good enough. 5859 */ 5860 } 5861 5862 /* 5863 * We expect that btree will return data in IndexTuple not HeapTuple 5864 * format. It's not lossy either. 5865 */ 5866 if (!index_scan->xs_itup) 5867 elog(ERROR, "no data returned for index-only scan"); 5868 if (index_scan->xs_recheck) 5869 elog(ERROR, "unexpected recheck indication from btree"); 5870 5871 /* OK to deconstruct the index tuple */ 5872 index_deform_tuple(index_scan->xs_itup, 5873 index_scan->xs_itupdesc, 5874 values, isnull); 5875 5876 /* Shouldn't have got a null, but be careful */ 5877 if (isnull[0]) 5878 elog(ERROR, "found unexpected null value in index \"%s\"", 5879 RelationGetRelationName(indexRel)); 5880 5881 /* Copy the index column value out to caller's context */ 5882 oldcontext = MemoryContextSwitchTo(outercontext); 5883 *endpointDatum = datumCopy(values[0], typByVal, typLen); 5884 MemoryContextSwitchTo(oldcontext); 5885 have_data = true; 5886 break; 5887 } 5888 5889 if (vmbuffer != InvalidBuffer) 5890 ReleaseBuffer(vmbuffer); 5891 index_endscan(index_scan); 5892 5893 return have_data; 5894 } 5895 5896 /* 5897 * find_join_input_rel 5898 * Look up the input relation for a join. 5899 * 5900 * We assume that the input relation's RelOptInfo must have been constructed 5901 * already. 5902 */ 5903 static RelOptInfo * 5904 find_join_input_rel(PlannerInfo *root, Relids relids) 5905 { 5906 RelOptInfo *rel = NULL; 5907 5908 switch (bms_membership(relids)) 5909 { 5910 case BMS_EMPTY_SET: 5911 /* should not happen */ 5912 break; 5913 case BMS_SINGLETON: 5914 rel = find_base_rel(root, bms_singleton_member(relids)); 5915 break; 5916 case BMS_MULTIPLE: 5917 rel = find_join_rel(root, relids); 5918 break; 5919 } 5920 5921 if (rel == NULL) 5922 elog(ERROR, "could not find RelOptInfo for given relids"); 5923 5924 return rel; 5925 } 5926 5927 5928 /*------------------------------------------------------------------------- 5929 * 5930 * Index cost estimation functions 5931 * 5932 *------------------------------------------------------------------------- 5933 */ 5934 5935 /* 5936 * Extract the actual indexquals (as RestrictInfos) from an IndexClause list 5937 */ 5938 List * 5939 get_quals_from_indexclauses(List *indexclauses) 5940 { 5941 List *result = NIL; 5942 ListCell *lc; 5943 5944 foreach(lc, indexclauses) 5945 { 5946 IndexClause *iclause = lfirst_node(IndexClause, lc); 5947 ListCell *lc2; 5948 5949 foreach(lc2, iclause->indexquals) 5950 { 5951 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); 5952 5953 result = lappend(result, rinfo); 5954 } 5955 } 5956 return result; 5957 } 5958 5959 /* 5960 * Compute the total evaluation cost of the comparison operands in a list 5961 * of index qual expressions. Since we know these will be evaluated just 5962 * once per scan, there's no need to distinguish startup from per-row cost. 5963 * 5964 * This can be used either on the result of get_quals_from_indexclauses(), 5965 * or directly on an indexorderbys list. In both cases, we expect that the 5966 * index key expression is on the left side of binary clauses. 5967 */ 5968 Cost 5969 index_other_operands_eval_cost(PlannerInfo *root, List *indexquals) 5970 { 5971 Cost qual_arg_cost = 0; 5972 ListCell *lc; 5973 5974 foreach(lc, indexquals) 5975 { 5976 Expr *clause = (Expr *) lfirst(lc); 5977 Node *other_operand; 5978 QualCost index_qual_cost; 5979 5980 /* 5981 * Index quals will have RestrictInfos, indexorderbys won't. Look 5982 * through RestrictInfo if present. 5983 */ 5984 if (IsA(clause, RestrictInfo)) 5985 clause = ((RestrictInfo *) clause)->clause; 5986 5987 if (IsA(clause, OpExpr)) 5988 { 5989 OpExpr *op = (OpExpr *) clause; 5990 5991 other_operand = (Node *) lsecond(op->args); 5992 } 5993 else if (IsA(clause, RowCompareExpr)) 5994 { 5995 RowCompareExpr *rc = (RowCompareExpr *) clause; 5996 5997 other_operand = (Node *) rc->rargs; 5998 } 5999 else if (IsA(clause, ScalarArrayOpExpr)) 6000 { 6001 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; 6002 6003 other_operand = (Node *) lsecond(saop->args); 6004 } 6005 else if (IsA(clause, NullTest)) 6006 { 6007 other_operand = NULL; 6008 } 6009 else 6010 { 6011 elog(ERROR, "unsupported indexqual type: %d", 6012 (int) nodeTag(clause)); 6013 other_operand = NULL; /* keep compiler quiet */ 6014 } 6015 6016 cost_qual_eval_node(&index_qual_cost, other_operand, root); 6017 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple; 6018 } 6019 return qual_arg_cost; 6020 } 6021 6022 void 6023 genericcostestimate(PlannerInfo *root, 6024 IndexPath *path, 6025 double loop_count, 6026 GenericCosts *costs) 6027 { 6028 IndexOptInfo *index = path->indexinfo; 6029 List *indexQuals = get_quals_from_indexclauses(path->indexclauses); 6030 List *indexOrderBys = path->indexorderbys; 6031 Cost indexStartupCost; 6032 Cost indexTotalCost; 6033 Selectivity indexSelectivity; 6034 double indexCorrelation; 6035 double numIndexPages; 6036 double numIndexTuples; 6037 double spc_random_page_cost; 6038 double num_sa_scans; 6039 double num_outer_scans; 6040 double num_scans; 6041 double qual_op_cost; 6042 double qual_arg_cost; 6043 List *selectivityQuals; 6044 ListCell *l; 6045 6046 /* 6047 * If the index is partial, AND the index predicate with the explicitly 6048 * given indexquals to produce a more accurate idea of the index 6049 * selectivity. 6050 */ 6051 selectivityQuals = add_predicate_to_index_quals(index, indexQuals); 6052 6053 /* 6054 * Check for ScalarArrayOpExpr index quals, and estimate the number of 6055 * index scans that will be performed. 6056 */ 6057 num_sa_scans = 1; 6058 foreach(l, indexQuals) 6059 { 6060 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); 6061 6062 if (IsA(rinfo->clause, ScalarArrayOpExpr)) 6063 { 6064 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause; 6065 int alength = estimate_array_length(lsecond(saop->args)); 6066 6067 if (alength > 1) 6068 num_sa_scans *= alength; 6069 } 6070 } 6071 6072 /* Estimate the fraction of main-table tuples that will be visited */ 6073 indexSelectivity = clauselist_selectivity(root, selectivityQuals, 6074 index->rel->relid, 6075 JOIN_INNER, 6076 NULL); 6077 6078 /* 6079 * If caller didn't give us an estimate, estimate the number of index 6080 * tuples that will be visited. We do it in this rather peculiar-looking 6081 * way in order to get the right answer for partial indexes. 6082 */ 6083 numIndexTuples = costs->numIndexTuples; 6084 if (numIndexTuples <= 0.0) 6085 { 6086 numIndexTuples = indexSelectivity * index->rel->tuples; 6087 6088 /* 6089 * The above calculation counts all the tuples visited across all 6090 * scans induced by ScalarArrayOpExpr nodes. We want to consider the 6091 * average per-indexscan number, so adjust. This is a handy place to 6092 * round to integer, too. (If caller supplied tuple estimate, it's 6093 * responsible for handling these considerations.) 6094 */ 6095 numIndexTuples = rint(numIndexTuples / num_sa_scans); 6096 } 6097 6098 /* 6099 * We can bound the number of tuples by the index size in any case. Also, 6100 * always estimate at least one tuple is touched, even when 6101 * indexSelectivity estimate is tiny. 6102 */ 6103 if (numIndexTuples > index->tuples) 6104 numIndexTuples = index->tuples; 6105 if (numIndexTuples < 1.0) 6106 numIndexTuples = 1.0; 6107 6108 /* 6109 * Estimate the number of index pages that will be retrieved. 6110 * 6111 * We use the simplistic method of taking a pro-rata fraction of the total 6112 * number of index pages. In effect, this counts only leaf pages and not 6113 * any overhead such as index metapage or upper tree levels. 6114 * 6115 * In practice access to upper index levels is often nearly free because 6116 * those tend to stay in cache under load; moreover, the cost involved is 6117 * highly dependent on index type. We therefore ignore such costs here 6118 * and leave it to the caller to add a suitable charge if needed. 6119 */ 6120 if (index->pages > 1 && index->tuples > 1) 6121 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples); 6122 else 6123 numIndexPages = 1.0; 6124 6125 /* fetch estimated page cost for tablespace containing index */ 6126 get_tablespace_page_costs(index->reltablespace, 6127 &spc_random_page_cost, 6128 NULL); 6129 6130 /* 6131 * Now compute the disk access costs. 6132 * 6133 * The above calculations are all per-index-scan. However, if we are in a 6134 * nestloop inner scan, we can expect the scan to be repeated (with 6135 * different search keys) for each row of the outer relation. Likewise, 6136 * ScalarArrayOpExpr quals result in multiple index scans. This creates 6137 * the potential for cache effects to reduce the number of disk page 6138 * fetches needed. We want to estimate the average per-scan I/O cost in 6139 * the presence of caching. 6140 * 6141 * We use the Mackert-Lohman formula (see costsize.c for details) to 6142 * estimate the total number of page fetches that occur. While this 6143 * wasn't what it was designed for, it seems a reasonable model anyway. 6144 * Note that we are counting pages not tuples anymore, so we take N = T = 6145 * index size, as if there were one "tuple" per page. 6146 */ 6147 num_outer_scans = loop_count; 6148 num_scans = num_sa_scans * num_outer_scans; 6149 6150 if (num_scans > 1) 6151 { 6152 double pages_fetched; 6153 6154 /* total page fetches ignoring cache effects */ 6155 pages_fetched = numIndexPages * num_scans; 6156 6157 /* use Mackert and Lohman formula to adjust for cache effects */ 6158 pages_fetched = index_pages_fetched(pages_fetched, 6159 index->pages, 6160 (double) index->pages, 6161 root); 6162 6163 /* 6164 * Now compute the total disk access cost, and then report a pro-rated 6165 * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr, 6166 * since that's internal to the indexscan.) 6167 */ 6168 indexTotalCost = (pages_fetched * spc_random_page_cost) 6169 / num_outer_scans; 6170 } 6171 else 6172 { 6173 /* 6174 * For a single index scan, we just charge spc_random_page_cost per 6175 * page touched. 6176 */ 6177 indexTotalCost = numIndexPages * spc_random_page_cost; 6178 } 6179 6180 /* 6181 * CPU cost: any complex expressions in the indexquals will need to be 6182 * evaluated once at the start of the scan to reduce them to runtime keys 6183 * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple 6184 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per 6185 * indexqual operator. Because we have numIndexTuples as a per-scan 6186 * number, we have to multiply by num_sa_scans to get the correct result 6187 * for ScalarArrayOpExpr cases. Similarly add in costs for any index 6188 * ORDER BY expressions. 6189 * 6190 * Note: this neglects the possible costs of rechecking lossy operators. 6191 * Detecting that that might be needed seems more expensive than it's 6192 * worth, though, considering all the other inaccuracies here ... 6193 */ 6194 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) + 6195 index_other_operands_eval_cost(root, indexOrderBys); 6196 qual_op_cost = cpu_operator_cost * 6197 (list_length(indexQuals) + list_length(indexOrderBys)); 6198 6199 indexStartupCost = qual_arg_cost; 6200 indexTotalCost += qual_arg_cost; 6201 indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost); 6202 6203 /* 6204 * Generic assumption about index correlation: there isn't any. 6205 */ 6206 indexCorrelation = 0.0; 6207 6208 /* 6209 * Return everything to caller. 6210 */ 6211 costs->indexStartupCost = indexStartupCost; 6212 costs->indexTotalCost = indexTotalCost; 6213 costs->indexSelectivity = indexSelectivity; 6214 costs->indexCorrelation = indexCorrelation; 6215 costs->numIndexPages = numIndexPages; 6216 costs->numIndexTuples = numIndexTuples; 6217 costs->spc_random_page_cost = spc_random_page_cost; 6218 costs->num_sa_scans = num_sa_scans; 6219 } 6220 6221 /* 6222 * If the index is partial, add its predicate to the given qual list. 6223 * 6224 * ANDing the index predicate with the explicitly given indexquals produces 6225 * a more accurate idea of the index's selectivity. However, we need to be 6226 * careful not to insert redundant clauses, because clauselist_selectivity() 6227 * is easily fooled into computing a too-low selectivity estimate. Our 6228 * approach is to add only the predicate clause(s) that cannot be proven to 6229 * be implied by the given indexquals. This successfully handles cases such 6230 * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50". 6231 * There are many other cases where we won't detect redundancy, leading to a 6232 * too-low selectivity estimate, which will bias the system in favor of using 6233 * partial indexes where possible. That is not necessarily bad though. 6234 * 6235 * Note that indexQuals contains RestrictInfo nodes while the indpred 6236 * does not, so the output list will be mixed. This is OK for both 6237 * predicate_implied_by() and clauselist_selectivity(), but might be 6238 * problematic if the result were passed to other things. 6239 */ 6240 List * 6241 add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals) 6242 { 6243 List *predExtraQuals = NIL; 6244 ListCell *lc; 6245 6246 if (index->indpred == NIL) 6247 return indexQuals; 6248 6249 foreach(lc, index->indpred) 6250 { 6251 Node *predQual = (Node *) lfirst(lc); 6252 List *oneQual = list_make1(predQual); 6253 6254 if (!predicate_implied_by(oneQual, indexQuals, false)) 6255 predExtraQuals = list_concat(predExtraQuals, oneQual); 6256 } 6257 return list_concat(predExtraQuals, indexQuals); 6258 } 6259 6260 6261 void 6262 btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, 6263 Cost *indexStartupCost, Cost *indexTotalCost, 6264 Selectivity *indexSelectivity, double *indexCorrelation, 6265 double *indexPages) 6266 { 6267 IndexOptInfo *index = path->indexinfo; 6268 GenericCosts costs; 6269 Oid relid; 6270 AttrNumber colnum; 6271 VariableStatData vardata; 6272 double numIndexTuples; 6273 Cost descentCost; 6274 List *indexBoundQuals; 6275 int indexcol; 6276 bool eqQualHere; 6277 bool found_saop; 6278 bool found_is_null_op; 6279 double num_sa_scans; 6280 ListCell *lc; 6281 6282 /* 6283 * For a btree scan, only leading '=' quals plus inequality quals for the 6284 * immediately next attribute contribute to index selectivity (these are 6285 * the "boundary quals" that determine the starting and stopping points of 6286 * the index scan). Additional quals can suppress visits to the heap, so 6287 * it's OK to count them in indexSelectivity, but they should not count 6288 * for estimating numIndexTuples. So we must examine the given indexquals 6289 * to find out which ones count as boundary quals. We rely on the 6290 * knowledge that they are given in index column order. 6291 * 6292 * For a RowCompareExpr, we consider only the first column, just as 6293 * rowcomparesel() does. 6294 * 6295 * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N 6296 * index scans not one, but the ScalarArrayOpExpr's operator can be 6297 * considered to act the same as it normally does. 6298 */ 6299 indexBoundQuals = NIL; 6300 indexcol = 0; 6301 eqQualHere = false; 6302 found_saop = false; 6303 found_is_null_op = false; 6304 num_sa_scans = 1; 6305 foreach(lc, path->indexclauses) 6306 { 6307 IndexClause *iclause = lfirst_node(IndexClause, lc); 6308 ListCell *lc2; 6309 6310 if (indexcol != iclause->indexcol) 6311 { 6312 /* Beginning of a new column's quals */ 6313 if (!eqQualHere) 6314 break; /* done if no '=' qual for indexcol */ 6315 eqQualHere = false; 6316 indexcol++; 6317 if (indexcol != iclause->indexcol) 6318 break; /* no quals at all for indexcol */ 6319 } 6320 6321 /* Examine each indexqual associated with this index clause */ 6322 foreach(lc2, iclause->indexquals) 6323 { 6324 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); 6325 Expr *clause = rinfo->clause; 6326 Oid clause_op = InvalidOid; 6327 int op_strategy; 6328 6329 if (IsA(clause, OpExpr)) 6330 { 6331 OpExpr *op = (OpExpr *) clause; 6332 6333 clause_op = op->opno; 6334 } 6335 else if (IsA(clause, RowCompareExpr)) 6336 { 6337 RowCompareExpr *rc = (RowCompareExpr *) clause; 6338 6339 clause_op = linitial_oid(rc->opnos); 6340 } 6341 else if (IsA(clause, ScalarArrayOpExpr)) 6342 { 6343 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; 6344 Node *other_operand = (Node *) lsecond(saop->args); 6345 int alength = estimate_array_length(other_operand); 6346 6347 clause_op = saop->opno; 6348 found_saop = true; 6349 /* count number of SA scans induced by indexBoundQuals only */ 6350 if (alength > 1) 6351 num_sa_scans *= alength; 6352 } 6353 else if (IsA(clause, NullTest)) 6354 { 6355 NullTest *nt = (NullTest *) clause; 6356 6357 if (nt->nulltesttype == IS_NULL) 6358 { 6359 found_is_null_op = true; 6360 /* IS NULL is like = for selectivity purposes */ 6361 eqQualHere = true; 6362 } 6363 } 6364 else 6365 elog(ERROR, "unsupported indexqual type: %d", 6366 (int) nodeTag(clause)); 6367 6368 /* check for equality operator */ 6369 if (OidIsValid(clause_op)) 6370 { 6371 op_strategy = get_op_opfamily_strategy(clause_op, 6372 index->opfamily[indexcol]); 6373 Assert(op_strategy != 0); /* not a member of opfamily?? */ 6374 if (op_strategy == BTEqualStrategyNumber) 6375 eqQualHere = true; 6376 } 6377 6378 indexBoundQuals = lappend(indexBoundQuals, rinfo); 6379 } 6380 } 6381 6382 /* 6383 * If index is unique and we found an '=' clause for each column, we can 6384 * just assume numIndexTuples = 1 and skip the expensive 6385 * clauselist_selectivity calculations. However, a ScalarArrayOp or 6386 * NullTest invalidates that theory, even though it sets eqQualHere. 6387 */ 6388 if (index->unique && 6389 indexcol == index->nkeycolumns - 1 && 6390 eqQualHere && 6391 !found_saop && 6392 !found_is_null_op) 6393 numIndexTuples = 1.0; 6394 else 6395 { 6396 List *selectivityQuals; 6397 Selectivity btreeSelectivity; 6398 6399 /* 6400 * If the index is partial, AND the index predicate with the 6401 * index-bound quals to produce a more accurate idea of the number of 6402 * rows covered by the bound conditions. 6403 */ 6404 selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals); 6405 6406 btreeSelectivity = clauselist_selectivity(root, selectivityQuals, 6407 index->rel->relid, 6408 JOIN_INNER, 6409 NULL); 6410 numIndexTuples = btreeSelectivity * index->rel->tuples; 6411 6412 /* 6413 * As in genericcostestimate(), we have to adjust for any 6414 * ScalarArrayOpExpr quals included in indexBoundQuals, and then round 6415 * to integer. 6416 */ 6417 numIndexTuples = rint(numIndexTuples / num_sa_scans); 6418 } 6419 6420 /* 6421 * Now do generic index cost estimation. 6422 */ 6423 MemSet(&costs, 0, sizeof(costs)); 6424 costs.numIndexTuples = numIndexTuples; 6425 6426 genericcostestimate(root, path, loop_count, &costs); 6427 6428 /* 6429 * Add a CPU-cost component to represent the costs of initial btree 6430 * descent. We don't charge any I/O cost for touching upper btree levels, 6431 * since they tend to stay in cache, but we still have to do about log2(N) 6432 * comparisons to descend a btree of N leaf tuples. We charge one 6433 * cpu_operator_cost per comparison. 6434 * 6435 * If there are ScalarArrayOpExprs, charge this once per SA scan. The 6436 * ones after the first one are not startup cost so far as the overall 6437 * plan is concerned, so add them only to "total" cost. 6438 */ 6439 if (index->tuples > 1) /* avoid computing log(0) */ 6440 { 6441 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost; 6442 costs.indexStartupCost += descentCost; 6443 costs.indexTotalCost += costs.num_sa_scans * descentCost; 6444 } 6445 6446 /* 6447 * Even though we're not charging I/O cost for touching upper btree pages, 6448 * it's still reasonable to charge some CPU cost per page descended 6449 * through. Moreover, if we had no such charge at all, bloated indexes 6450 * would appear to have the same search cost as unbloated ones, at least 6451 * in cases where only a single leaf page is expected to be visited. This 6452 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page 6453 * touched. The number of such pages is btree tree height plus one (ie, 6454 * we charge for the leaf page too). As above, charge once per SA scan. 6455 */ 6456 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost; 6457 costs.indexStartupCost += descentCost; 6458 costs.indexTotalCost += costs.num_sa_scans * descentCost; 6459 6460 /* 6461 * If we can get an estimate of the first column's ordering correlation C 6462 * from pg_statistic, estimate the index correlation as C for a 6463 * single-column index, or C * 0.75 for multiple columns. (The idea here 6464 * is that multiple columns dilute the importance of the first column's 6465 * ordering, but don't negate it entirely. Before 8.0 we divided the 6466 * correlation by the number of columns, but that seems too strong.) 6467 */ 6468 MemSet(&vardata, 0, sizeof(vardata)); 6469 6470 if (index->indexkeys[0] != 0) 6471 { 6472 /* Simple variable --- look to stats for the underlying table */ 6473 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root); 6474 6475 Assert(rte->rtekind == RTE_RELATION); 6476 relid = rte->relid; 6477 Assert(relid != InvalidOid); 6478 colnum = index->indexkeys[0]; 6479 6480 if (get_relation_stats_hook && 6481 (*get_relation_stats_hook) (root, rte, colnum, &vardata)) 6482 { 6483 /* 6484 * The hook took control of acquiring a stats tuple. If it did 6485 * supply a tuple, it'd better have supplied a freefunc. 6486 */ 6487 if (HeapTupleIsValid(vardata.statsTuple) && 6488 !vardata.freefunc) 6489 elog(ERROR, "no function provided to release variable stats with"); 6490 } 6491 else 6492 { 6493 vardata.statsTuple = SearchSysCache3(STATRELATTINH, 6494 ObjectIdGetDatum(relid), 6495 Int16GetDatum(colnum), 6496 BoolGetDatum(rte->inh)); 6497 vardata.freefunc = ReleaseSysCache; 6498 } 6499 } 6500 else 6501 { 6502 /* Expression --- maybe there are stats for the index itself */ 6503 relid = index->indexoid; 6504 colnum = 1; 6505 6506 if (get_index_stats_hook && 6507 (*get_index_stats_hook) (root, relid, colnum, &vardata)) 6508 { 6509 /* 6510 * The hook took control of acquiring a stats tuple. If it did 6511 * supply a tuple, it'd better have supplied a freefunc. 6512 */ 6513 if (HeapTupleIsValid(vardata.statsTuple) && 6514 !vardata.freefunc) 6515 elog(ERROR, "no function provided to release variable stats with"); 6516 } 6517 else 6518 { 6519 vardata.statsTuple = SearchSysCache3(STATRELATTINH, 6520 ObjectIdGetDatum(relid), 6521 Int16GetDatum(colnum), 6522 BoolGetDatum(false)); 6523 vardata.freefunc = ReleaseSysCache; 6524 } 6525 } 6526 6527 if (HeapTupleIsValid(vardata.statsTuple)) 6528 { 6529 Oid sortop; 6530 AttStatsSlot sslot; 6531 6532 sortop = get_opfamily_member(index->opfamily[0], 6533 index->opcintype[0], 6534 index->opcintype[0], 6535 BTLessStrategyNumber); 6536 if (OidIsValid(sortop) && 6537 get_attstatsslot(&sslot, vardata.statsTuple, 6538 STATISTIC_KIND_CORRELATION, sortop, 6539 ATTSTATSSLOT_NUMBERS)) 6540 { 6541 double varCorrelation; 6542 6543 Assert(sslot.nnumbers == 1); 6544 varCorrelation = sslot.numbers[0]; 6545 6546 if (index->reverse_sort[0]) 6547 varCorrelation = -varCorrelation; 6548 6549 if (index->nkeycolumns > 1) 6550 costs.indexCorrelation = varCorrelation * 0.75; 6551 else 6552 costs.indexCorrelation = varCorrelation; 6553 6554 free_attstatsslot(&sslot); 6555 } 6556 } 6557 6558 ReleaseVariableStats(vardata); 6559 6560 *indexStartupCost = costs.indexStartupCost; 6561 *indexTotalCost = costs.indexTotalCost; 6562 *indexSelectivity = costs.indexSelectivity; 6563 *indexCorrelation = costs.indexCorrelation; 6564 *indexPages = costs.numIndexPages; 6565 } 6566 6567 void 6568 hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, 6569 Cost *indexStartupCost, Cost *indexTotalCost, 6570 Selectivity *indexSelectivity, double *indexCorrelation, 6571 double *indexPages) 6572 { 6573 GenericCosts costs; 6574 6575 MemSet(&costs, 0, sizeof(costs)); 6576 6577 genericcostestimate(root, path, loop_count, &costs); 6578 6579 /* 6580 * A hash index has no descent costs as such, since the index AM can go 6581 * directly to the target bucket after computing the hash value. There 6582 * are a couple of other hash-specific costs that we could conceivably add 6583 * here, though: 6584 * 6585 * Ideally we'd charge spc_random_page_cost for each page in the target 6586 * bucket, not just the numIndexPages pages that genericcostestimate 6587 * thought we'd visit. However in most cases we don't know which bucket 6588 * that will be. There's no point in considering the average bucket size 6589 * because the hash AM makes sure that's always one page. 6590 * 6591 * Likewise, we could consider charging some CPU for each index tuple in 6592 * the bucket, if we knew how many there were. But the per-tuple cost is 6593 * just a hash value comparison, not a general datatype-dependent 6594 * comparison, so any such charge ought to be quite a bit less than 6595 * cpu_operator_cost; which makes it probably not worth worrying about. 6596 * 6597 * A bigger issue is that chance hash-value collisions will result in 6598 * wasted probes into the heap. We don't currently attempt to model this 6599 * cost on the grounds that it's rare, but maybe it's not rare enough. 6600 * (Any fix for this ought to consider the generic lossy-operator problem, 6601 * though; it's not entirely hash-specific.) 6602 */ 6603 6604 *indexStartupCost = costs.indexStartupCost; 6605 *indexTotalCost = costs.indexTotalCost; 6606 *indexSelectivity = costs.indexSelectivity; 6607 *indexCorrelation = costs.indexCorrelation; 6608 *indexPages = costs.numIndexPages; 6609 } 6610 6611 void 6612 gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, 6613 Cost *indexStartupCost, Cost *indexTotalCost, 6614 Selectivity *indexSelectivity, double *indexCorrelation, 6615 double *indexPages) 6616 { 6617 IndexOptInfo *index = path->indexinfo; 6618 GenericCosts costs; 6619 Cost descentCost; 6620 6621 MemSet(&costs, 0, sizeof(costs)); 6622 6623 genericcostestimate(root, path, loop_count, &costs); 6624 6625 /* 6626 * We model index descent costs similarly to those for btree, but to do 6627 * that we first need an idea of the tree height. We somewhat arbitrarily 6628 * assume that the fanout is 100, meaning the tree height is at most 6629 * log100(index->pages). 6630 * 6631 * Although this computation isn't really expensive enough to require 6632 * caching, we might as well use index->tree_height to cache it. 6633 */ 6634 if (index->tree_height < 0) /* unknown? */ 6635 { 6636 if (index->pages > 1) /* avoid computing log(0) */ 6637 index->tree_height = (int) (log(index->pages) / log(100.0)); 6638 else 6639 index->tree_height = 0; 6640 } 6641 6642 /* 6643 * Add a CPU-cost component to represent the costs of initial descent. We 6644 * just use log(N) here not log2(N) since the branching factor isn't 6645 * necessarily two anyway. As for btree, charge once per SA scan. 6646 */ 6647 if (index->tuples > 1) /* avoid computing log(0) */ 6648 { 6649 descentCost = ceil(log(index->tuples)) * cpu_operator_cost; 6650 costs.indexStartupCost += descentCost; 6651 costs.indexTotalCost += costs.num_sa_scans * descentCost; 6652 } 6653 6654 /* 6655 * Likewise add a per-page charge, calculated the same as for btrees. 6656 */ 6657 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost; 6658 costs.indexStartupCost += descentCost; 6659 costs.indexTotalCost += costs.num_sa_scans * descentCost; 6660 6661 *indexStartupCost = costs.indexStartupCost; 6662 *indexTotalCost = costs.indexTotalCost; 6663 *indexSelectivity = costs.indexSelectivity; 6664 *indexCorrelation = costs.indexCorrelation; 6665 *indexPages = costs.numIndexPages; 6666 } 6667 6668 void 6669 spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, 6670 Cost *indexStartupCost, Cost *indexTotalCost, 6671 Selectivity *indexSelectivity, double *indexCorrelation, 6672 double *indexPages) 6673 { 6674 IndexOptInfo *index = path->indexinfo; 6675 GenericCosts costs; 6676 Cost descentCost; 6677 6678 MemSet(&costs, 0, sizeof(costs)); 6679 6680 genericcostestimate(root, path, loop_count, &costs); 6681 6682 /* 6683 * We model index descent costs similarly to those for btree, but to do 6684 * that we first need an idea of the tree height. We somewhat arbitrarily 6685 * assume that the fanout is 100, meaning the tree height is at most 6686 * log100(index->pages). 6687 * 6688 * Although this computation isn't really expensive enough to require 6689 * caching, we might as well use index->tree_height to cache it. 6690 */ 6691 if (index->tree_height < 0) /* unknown? */ 6692 { 6693 if (index->pages > 1) /* avoid computing log(0) */ 6694 index->tree_height = (int) (log(index->pages) / log(100.0)); 6695 else 6696 index->tree_height = 0; 6697 } 6698 6699 /* 6700 * Add a CPU-cost component to represent the costs of initial descent. We 6701 * just use log(N) here not log2(N) since the branching factor isn't 6702 * necessarily two anyway. As for btree, charge once per SA scan. 6703 */ 6704 if (index->tuples > 1) /* avoid computing log(0) */ 6705 { 6706 descentCost = ceil(log(index->tuples)) * cpu_operator_cost; 6707 costs.indexStartupCost += descentCost; 6708 costs.indexTotalCost += costs.num_sa_scans * descentCost; 6709 } 6710 6711 /* 6712 * Likewise add a per-page charge, calculated the same as for btrees. 6713 */ 6714 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost; 6715 costs.indexStartupCost += descentCost; 6716 costs.indexTotalCost += costs.num_sa_scans * descentCost; 6717 6718 *indexStartupCost = costs.indexStartupCost; 6719 *indexTotalCost = costs.indexTotalCost; 6720 *indexSelectivity = costs.indexSelectivity; 6721 *indexCorrelation = costs.indexCorrelation; 6722 *indexPages = costs.numIndexPages; 6723 } 6724 6725 6726 /* 6727 * Support routines for gincostestimate 6728 */ 6729 6730 typedef struct 6731 { 6732 bool attHasFullScan[INDEX_MAX_KEYS]; 6733 bool attHasNormalScan[INDEX_MAX_KEYS]; 6734 double partialEntries; 6735 double exactEntries; 6736 double searchEntries; 6737 double arrayScans; 6738 } GinQualCounts; 6739 6740 /* 6741 * Estimate the number of index terms that need to be searched for while 6742 * testing the given GIN query, and increment the counts in *counts 6743 * appropriately. If the query is unsatisfiable, return false. 6744 */ 6745 static bool 6746 gincost_pattern(IndexOptInfo *index, int indexcol, 6747 Oid clause_op, Datum query, 6748 GinQualCounts *counts) 6749 { 6750 FmgrInfo flinfo; 6751 Oid extractProcOid; 6752 Oid collation; 6753 int strategy_op; 6754 Oid lefttype, 6755 righttype; 6756 int32 nentries = 0; 6757 bool *partial_matches = NULL; 6758 Pointer *extra_data = NULL; 6759 bool *nullFlags = NULL; 6760 int32 searchMode = GIN_SEARCH_MODE_DEFAULT; 6761 int32 i; 6762 6763 Assert(indexcol < index->nkeycolumns); 6764 6765 /* 6766 * Get the operator's strategy number and declared input data types within 6767 * the index opfamily. (We don't need the latter, but we use 6768 * get_op_opfamily_properties because it will throw error if it fails to 6769 * find a matching pg_amop entry.) 6770 */ 6771 get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false, 6772 &strategy_op, &lefttype, &righttype); 6773 6774 /* 6775 * GIN always uses the "default" support functions, which are those with 6776 * lefttype == righttype == the opclass' opcintype (see 6777 * IndexSupportInitialize in relcache.c). 6778 */ 6779 extractProcOid = get_opfamily_proc(index->opfamily[indexcol], 6780 index->opcintype[indexcol], 6781 index->opcintype[indexcol], 6782 GIN_EXTRACTQUERY_PROC); 6783 6784 if (!OidIsValid(extractProcOid)) 6785 { 6786 /* should not happen; throw same error as index_getprocinfo */ 6787 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", 6788 GIN_EXTRACTQUERY_PROC, indexcol + 1, 6789 get_rel_name(index->indexoid)); 6790 } 6791 6792 /* 6793 * Choose collation to pass to extractProc (should match initGinState). 6794 */ 6795 if (OidIsValid(index->indexcollations[indexcol])) 6796 collation = index->indexcollations[indexcol]; 6797 else 6798 collation = DEFAULT_COLLATION_OID; 6799 6800 fmgr_info(extractProcOid, &flinfo); 6801 6802 set_fn_opclass_options(&flinfo, index->opclassoptions[indexcol]); 6803 6804 FunctionCall7Coll(&flinfo, 6805 collation, 6806 query, 6807 PointerGetDatum(&nentries), 6808 UInt16GetDatum(strategy_op), 6809 PointerGetDatum(&partial_matches), 6810 PointerGetDatum(&extra_data), 6811 PointerGetDatum(&nullFlags), 6812 PointerGetDatum(&searchMode)); 6813 6814 if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT) 6815 { 6816 /* No match is possible */ 6817 return false; 6818 } 6819 6820 for (i = 0; i < nentries; i++) 6821 { 6822 /* 6823 * For partial match we haven't any information to estimate number of 6824 * matched entries in index, so, we just estimate it as 100 6825 */ 6826 if (partial_matches && partial_matches[i]) 6827 counts->partialEntries += 100; 6828 else 6829 counts->exactEntries++; 6830 6831 counts->searchEntries++; 6832 } 6833 6834 if (searchMode == GIN_SEARCH_MODE_DEFAULT) 6835 { 6836 counts->attHasNormalScan[indexcol] = true; 6837 } 6838 else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY) 6839 { 6840 /* Treat "include empty" like an exact-match item */ 6841 counts->attHasNormalScan[indexcol] = true; 6842 counts->exactEntries++; 6843 counts->searchEntries++; 6844 } 6845 else 6846 { 6847 /* It's GIN_SEARCH_MODE_ALL */ 6848 counts->attHasFullScan[indexcol] = true; 6849 } 6850 6851 return true; 6852 } 6853 6854 /* 6855 * Estimate the number of index terms that need to be searched for while 6856 * testing the given GIN index clause, and increment the counts in *counts 6857 * appropriately. If the query is unsatisfiable, return false. 6858 */ 6859 static bool 6860 gincost_opexpr(PlannerInfo *root, 6861 IndexOptInfo *index, 6862 int indexcol, 6863 OpExpr *clause, 6864 GinQualCounts *counts) 6865 { 6866 Oid clause_op = clause->opno; 6867 Node *operand = (Node *) lsecond(clause->args); 6868 6869 /* aggressively reduce to a constant, and look through relabeling */ 6870 operand = estimate_expression_value(root, operand); 6871 6872 if (IsA(operand, RelabelType)) 6873 operand = (Node *) ((RelabelType *) operand)->arg; 6874 6875 /* 6876 * It's impossible to call extractQuery method for unknown operand. So 6877 * unless operand is a Const we can't do much; just assume there will be 6878 * one ordinary search entry from the operand at runtime. 6879 */ 6880 if (!IsA(operand, Const)) 6881 { 6882 counts->exactEntries++; 6883 counts->searchEntries++; 6884 return true; 6885 } 6886 6887 /* If Const is null, there can be no matches */ 6888 if (((Const *) operand)->constisnull) 6889 return false; 6890 6891 /* Otherwise, apply extractQuery and get the actual term counts */ 6892 return gincost_pattern(index, indexcol, clause_op, 6893 ((Const *) operand)->constvalue, 6894 counts); 6895 } 6896 6897 /* 6898 * Estimate the number of index terms that need to be searched for while 6899 * testing the given GIN index clause, and increment the counts in *counts 6900 * appropriately. If the query is unsatisfiable, return false. 6901 * 6902 * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime, 6903 * each of which involves one value from the RHS array, plus all the 6904 * non-array quals (if any). To model this, we average the counts across 6905 * the RHS elements, and add the averages to the counts in *counts (which 6906 * correspond to per-indexscan costs). We also multiply counts->arrayScans 6907 * by N, causing gincostestimate to scale up its estimates accordingly. 6908 */ 6909 static bool 6910 gincost_scalararrayopexpr(PlannerInfo *root, 6911 IndexOptInfo *index, 6912 int indexcol, 6913 ScalarArrayOpExpr *clause, 6914 double numIndexEntries, 6915 GinQualCounts *counts) 6916 { 6917 Oid clause_op = clause->opno; 6918 Node *rightop = (Node *) lsecond(clause->args); 6919 ArrayType *arrayval; 6920 int16 elmlen; 6921 bool elmbyval; 6922 char elmalign; 6923 int numElems; 6924 Datum *elemValues; 6925 bool *elemNulls; 6926 GinQualCounts arraycounts; 6927 int numPossible = 0; 6928 int i; 6929 6930 Assert(clause->useOr); 6931 6932 /* aggressively reduce to a constant, and look through relabeling */ 6933 rightop = estimate_expression_value(root, rightop); 6934 6935 if (IsA(rightop, RelabelType)) 6936 rightop = (Node *) ((RelabelType *) rightop)->arg; 6937 6938 /* 6939 * It's impossible to call extractQuery method for unknown operand. So 6940 * unless operand is a Const we can't do much; just assume there will be 6941 * one ordinary search entry from each array entry at runtime, and fall 6942 * back on a probably-bad estimate of the number of array entries. 6943 */ 6944 if (!IsA(rightop, Const)) 6945 { 6946 counts->exactEntries++; 6947 counts->searchEntries++; 6948 counts->arrayScans *= estimate_array_length(rightop); 6949 return true; 6950 } 6951 6952 /* If Const is null, there can be no matches */ 6953 if (((Const *) rightop)->constisnull) 6954 return false; 6955 6956 /* Otherwise, extract the array elements and iterate over them */ 6957 arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue); 6958 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), 6959 &elmlen, &elmbyval, &elmalign); 6960 deconstruct_array(arrayval, 6961 ARR_ELEMTYPE(arrayval), 6962 elmlen, elmbyval, elmalign, 6963 &elemValues, &elemNulls, &numElems); 6964 6965 memset(&arraycounts, 0, sizeof(arraycounts)); 6966 6967 for (i = 0; i < numElems; i++) 6968 { 6969 GinQualCounts elemcounts; 6970 6971 /* NULL can't match anything, so ignore, as the executor will */ 6972 if (elemNulls[i]) 6973 continue; 6974 6975 /* Otherwise, apply extractQuery and get the actual term counts */ 6976 memset(&elemcounts, 0, sizeof(elemcounts)); 6977 6978 if (gincost_pattern(index, indexcol, clause_op, elemValues[i], 6979 &elemcounts)) 6980 { 6981 /* We ignore array elements that are unsatisfiable patterns */ 6982 numPossible++; 6983 6984 if (elemcounts.attHasFullScan[indexcol] && 6985 !elemcounts.attHasNormalScan[indexcol]) 6986 { 6987 /* 6988 * Full index scan will be required. We treat this as if 6989 * every key in the index had been listed in the query; is 6990 * that reasonable? 6991 */ 6992 elemcounts.partialEntries = 0; 6993 elemcounts.exactEntries = numIndexEntries; 6994 elemcounts.searchEntries = numIndexEntries; 6995 } 6996 arraycounts.partialEntries += elemcounts.partialEntries; 6997 arraycounts.exactEntries += elemcounts.exactEntries; 6998 arraycounts.searchEntries += elemcounts.searchEntries; 6999 } 7000 } 7001 7002 if (numPossible == 0) 7003 { 7004 /* No satisfiable patterns in the array */ 7005 return false; 7006 } 7007 7008 /* 7009 * Now add the averages to the global counts. This will give us an 7010 * estimate of the average number of terms searched for in each indexscan, 7011 * including contributions from both array and non-array quals. 7012 */ 7013 counts->partialEntries += arraycounts.partialEntries / numPossible; 7014 counts->exactEntries += arraycounts.exactEntries / numPossible; 7015 counts->searchEntries += arraycounts.searchEntries / numPossible; 7016 7017 counts->arrayScans *= numPossible; 7018 7019 return true; 7020 } 7021 7022 /* 7023 * GIN has search behavior completely different from other index types 7024 */ 7025 void 7026 gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, 7027 Cost *indexStartupCost, Cost *indexTotalCost, 7028 Selectivity *indexSelectivity, double *indexCorrelation, 7029 double *indexPages) 7030 { 7031 IndexOptInfo *index = path->indexinfo; 7032 List *indexQuals = get_quals_from_indexclauses(path->indexclauses); 7033 List *selectivityQuals; 7034 double numPages = index->pages, 7035 numTuples = index->tuples; 7036 double numEntryPages, 7037 numDataPages, 7038 numPendingPages, 7039 numEntries; 7040 GinQualCounts counts; 7041 bool matchPossible; 7042 bool fullIndexScan; 7043 double partialScale; 7044 double entryPagesFetched, 7045 dataPagesFetched, 7046 dataPagesFetchedBySel; 7047 double qual_op_cost, 7048 qual_arg_cost, 7049 spc_random_page_cost, 7050 outer_scans; 7051 Relation indexRel; 7052 GinStatsData ginStats; 7053 ListCell *lc; 7054 int i; 7055 7056 /* 7057 * Obtain statistical information from the meta page, if possible. Else 7058 * set ginStats to zeroes, and we'll cope below. 7059 */ 7060 if (!index->hypothetical) 7061 { 7062 /* Lock should have already been obtained in plancat.c */ 7063 indexRel = index_open(index->indexoid, NoLock); 7064 ginGetStats(indexRel, &ginStats); 7065 index_close(indexRel, NoLock); 7066 } 7067 else 7068 { 7069 memset(&ginStats, 0, sizeof(ginStats)); 7070 } 7071 7072 /* 7073 * Assuming we got valid (nonzero) stats at all, nPendingPages can be 7074 * trusted, but the other fields are data as of the last VACUUM. We can 7075 * scale them up to account for growth since then, but that method only 7076 * goes so far; in the worst case, the stats might be for a completely 7077 * empty index, and scaling them will produce pretty bogus numbers. 7078 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if 7079 * it's grown more than that, fall back to estimating things only from the 7080 * assumed-accurate index size. But we'll trust nPendingPages in any case 7081 * so long as it's not clearly insane, ie, more than the index size. 7082 */ 7083 if (ginStats.nPendingPages < numPages) 7084 numPendingPages = ginStats.nPendingPages; 7085 else 7086 numPendingPages = 0; 7087 7088 if (numPages > 0 && ginStats.nTotalPages <= numPages && 7089 ginStats.nTotalPages > numPages / 4 && 7090 ginStats.nEntryPages > 0 && ginStats.nEntries > 0) 7091 { 7092 /* 7093 * OK, the stats seem close enough to sane to be trusted. But we 7094 * still need to scale them by the ratio numPages / nTotalPages to 7095 * account for growth since the last VACUUM. 7096 */ 7097 double scale = numPages / ginStats.nTotalPages; 7098 7099 numEntryPages = ceil(ginStats.nEntryPages * scale); 7100 numDataPages = ceil(ginStats.nDataPages * scale); 7101 numEntries = ceil(ginStats.nEntries * scale); 7102 /* ensure we didn't round up too much */ 7103 numEntryPages = Min(numEntryPages, numPages - numPendingPages); 7104 numDataPages = Min(numDataPages, 7105 numPages - numPendingPages - numEntryPages); 7106 } 7107 else 7108 { 7109 /* 7110 * We might get here because it's a hypothetical index, or an index 7111 * created pre-9.1 and never vacuumed since upgrading (in which case 7112 * its stats would read as zeroes), or just because it's grown too 7113 * much since the last VACUUM for us to put our faith in scaling. 7114 * 7115 * Invent some plausible internal statistics based on the index page 7116 * count (and clamp that to at least 10 pages, just in case). We 7117 * estimate that 90% of the index is entry pages, and the rest is data 7118 * pages. Estimate 100 entries per entry page; this is rather bogus 7119 * since it'll depend on the size of the keys, but it's more robust 7120 * than trying to predict the number of entries per heap tuple. 7121 */ 7122 numPages = Max(numPages, 10); 7123 numEntryPages = floor((numPages - numPendingPages) * 0.90); 7124 numDataPages = numPages - numPendingPages - numEntryPages; 7125 numEntries = floor(numEntryPages * 100); 7126 } 7127 7128 /* In an empty index, numEntries could be zero. Avoid divide-by-zero */ 7129 if (numEntries < 1) 7130 numEntries = 1; 7131 7132 /* 7133 * If the index is partial, AND the index predicate with the index-bound 7134 * quals to produce a more accurate idea of the number of rows covered by 7135 * the bound conditions. 7136 */ 7137 selectivityQuals = add_predicate_to_index_quals(index, indexQuals); 7138 7139 /* Estimate the fraction of main-table tuples that will be visited */ 7140 *indexSelectivity = clauselist_selectivity(root, selectivityQuals, 7141 index->rel->relid, 7142 JOIN_INNER, 7143 NULL); 7144 7145 /* fetch estimated page cost for tablespace containing index */ 7146 get_tablespace_page_costs(index->reltablespace, 7147 &spc_random_page_cost, 7148 NULL); 7149 7150 /* 7151 * Generic assumption about index correlation: there isn't any. 7152 */ 7153 *indexCorrelation = 0.0; 7154 7155 /* 7156 * Examine quals to estimate number of search entries & partial matches 7157 */ 7158 memset(&counts, 0, sizeof(counts)); 7159 counts.arrayScans = 1; 7160 matchPossible = true; 7161 7162 foreach(lc, path->indexclauses) 7163 { 7164 IndexClause *iclause = lfirst_node(IndexClause, lc); 7165 ListCell *lc2; 7166 7167 foreach(lc2, iclause->indexquals) 7168 { 7169 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); 7170 Expr *clause = rinfo->clause; 7171 7172 if (IsA(clause, OpExpr)) 7173 { 7174 matchPossible = gincost_opexpr(root, 7175 index, 7176 iclause->indexcol, 7177 (OpExpr *) clause, 7178 &counts); 7179 if (!matchPossible) 7180 break; 7181 } 7182 else if (IsA(clause, ScalarArrayOpExpr)) 7183 { 7184 matchPossible = gincost_scalararrayopexpr(root, 7185 index, 7186 iclause->indexcol, 7187 (ScalarArrayOpExpr *) clause, 7188 numEntries, 7189 &counts); 7190 if (!matchPossible) 7191 break; 7192 } 7193 else 7194 { 7195 /* shouldn't be anything else for a GIN index */ 7196 elog(ERROR, "unsupported GIN indexqual type: %d", 7197 (int) nodeTag(clause)); 7198 } 7199 } 7200 } 7201 7202 /* Fall out if there were any provably-unsatisfiable quals */ 7203 if (!matchPossible) 7204 { 7205 *indexStartupCost = 0; 7206 *indexTotalCost = 0; 7207 *indexSelectivity = 0; 7208 return; 7209 } 7210 7211 /* 7212 * If attribute has a full scan and at the same time doesn't have normal 7213 * scan, then we'll have to scan all non-null entries of that attribute. 7214 * Currently, we don't have per-attribute statistics for GIN. Thus, we 7215 * must assume the whole GIN index has to be scanned in this case. 7216 */ 7217 fullIndexScan = false; 7218 for (i = 0; i < index->nkeycolumns; i++) 7219 { 7220 if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i]) 7221 { 7222 fullIndexScan = true; 7223 break; 7224 } 7225 } 7226 7227 if (fullIndexScan || indexQuals == NIL) 7228 { 7229 /* 7230 * Full index scan will be required. We treat this as if every key in 7231 * the index had been listed in the query; is that reasonable? 7232 */ 7233 counts.partialEntries = 0; 7234 counts.exactEntries = numEntries; 7235 counts.searchEntries = numEntries; 7236 } 7237 7238 /* Will we have more than one iteration of a nestloop scan? */ 7239 outer_scans = loop_count; 7240 7241 /* 7242 * Compute cost to begin scan, first of all, pay attention to pending 7243 * list. 7244 */ 7245 entryPagesFetched = numPendingPages; 7246 7247 /* 7248 * Estimate number of entry pages read. We need to do 7249 * counts.searchEntries searches. Use a power function as it should be, 7250 * but tuples on leaf pages usually is much greater. Here we include all 7251 * searches in entry tree, including search of first entry in partial 7252 * match algorithm 7253 */ 7254 entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15))); 7255 7256 /* 7257 * Add an estimate of entry pages read by partial match algorithm. It's a 7258 * scan over leaf pages in entry tree. We haven't any useful stats here, 7259 * so estimate it as proportion. Because counts.partialEntries is really 7260 * pretty bogus (see code above), it's possible that it is more than 7261 * numEntries; clamp the proportion to ensure sanity. 7262 */ 7263 partialScale = counts.partialEntries / numEntries; 7264 partialScale = Min(partialScale, 1.0); 7265 7266 entryPagesFetched += ceil(numEntryPages * partialScale); 7267 7268 /* 7269 * Partial match algorithm reads all data pages before doing actual scan, 7270 * so it's a startup cost. Again, we haven't any useful stats here, so 7271 * estimate it as proportion. 7272 */ 7273 dataPagesFetched = ceil(numDataPages * partialScale); 7274 7275 /* 7276 * Calculate cache effects if more than one scan due to nestloops or array 7277 * quals. The result is pro-rated per nestloop scan, but the array qual 7278 * factor shouldn't be pro-rated (compare genericcostestimate). 7279 */ 7280 if (outer_scans > 1 || counts.arrayScans > 1) 7281 { 7282 entryPagesFetched *= outer_scans * counts.arrayScans; 7283 entryPagesFetched = index_pages_fetched(entryPagesFetched, 7284 (BlockNumber) numEntryPages, 7285 numEntryPages, root); 7286 entryPagesFetched /= outer_scans; 7287 dataPagesFetched *= outer_scans * counts.arrayScans; 7288 dataPagesFetched = index_pages_fetched(dataPagesFetched, 7289 (BlockNumber) numDataPages, 7290 numDataPages, root); 7291 dataPagesFetched /= outer_scans; 7292 } 7293 7294 /* 7295 * Here we use random page cost because logically-close pages could be far 7296 * apart on disk. 7297 */ 7298 *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost; 7299 7300 /* 7301 * Now compute the number of data pages fetched during the scan. 7302 * 7303 * We assume every entry to have the same number of items, and that there 7304 * is no overlap between them. (XXX: tsvector and array opclasses collect 7305 * statistics on the frequency of individual keys; it would be nice to use 7306 * those here.) 7307 */ 7308 dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries); 7309 7310 /* 7311 * If there is a lot of overlap among the entries, in particular if one of 7312 * the entries is very frequent, the above calculation can grossly 7313 * under-estimate. As a simple cross-check, calculate a lower bound based 7314 * on the overall selectivity of the quals. At a minimum, we must read 7315 * one item pointer for each matching entry. 7316 * 7317 * The width of each item pointer varies, based on the level of 7318 * compression. We don't have statistics on that, but an average of 7319 * around 3 bytes per item is fairly typical. 7320 */ 7321 dataPagesFetchedBySel = ceil(*indexSelectivity * 7322 (numTuples / (BLCKSZ / 3))); 7323 if (dataPagesFetchedBySel > dataPagesFetched) 7324 dataPagesFetched = dataPagesFetchedBySel; 7325 7326 /* Account for cache effects, the same as above */ 7327 if (outer_scans > 1 || counts.arrayScans > 1) 7328 { 7329 dataPagesFetched *= outer_scans * counts.arrayScans; 7330 dataPagesFetched = index_pages_fetched(dataPagesFetched, 7331 (BlockNumber) numDataPages, 7332 numDataPages, root); 7333 dataPagesFetched /= outer_scans; 7334 } 7335 7336 /* And apply random_page_cost as the cost per page */ 7337 *indexTotalCost = *indexStartupCost + 7338 dataPagesFetched * spc_random_page_cost; 7339 7340 /* 7341 * Add on index qual eval costs, much as in genericcostestimate. But we 7342 * can disregard indexorderbys, since GIN doesn't support those. 7343 */ 7344 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals); 7345 qual_op_cost = cpu_operator_cost * list_length(indexQuals); 7346 7347 *indexStartupCost += qual_arg_cost; 7348 *indexTotalCost += qual_arg_cost; 7349 *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost); 7350 *indexPages = dataPagesFetched; 7351 } 7352 7353 /* 7354 * BRIN has search behavior completely different from other index types 7355 */ 7356 void 7357 brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, 7358 Cost *indexStartupCost, Cost *indexTotalCost, 7359 Selectivity *indexSelectivity, double *indexCorrelation, 7360 double *indexPages) 7361 { 7362 IndexOptInfo *index = path->indexinfo; 7363 List *indexQuals = get_quals_from_indexclauses(path->indexclauses); 7364 double numPages = index->pages; 7365 RelOptInfo *baserel = index->rel; 7366 RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root); 7367 Cost spc_seq_page_cost; 7368 Cost spc_random_page_cost; 7369 double qual_arg_cost; 7370 double qualSelectivity; 7371 BrinStatsData statsData; 7372 double indexRanges; 7373 double minimalRanges; 7374 double estimatedRanges; 7375 double selec; 7376 Relation indexRel; 7377 ListCell *l; 7378 VariableStatData vardata; 7379 7380 Assert(rte->rtekind == RTE_RELATION); 7381 7382 /* fetch estimated page cost for the tablespace containing the index */ 7383 get_tablespace_page_costs(index->reltablespace, 7384 &spc_random_page_cost, 7385 &spc_seq_page_cost); 7386 7387 /* 7388 * Obtain some data from the index itself, if possible. Otherwise invent 7389 * some plausible internal statistics based on the relation page count. 7390 */ 7391 if (!index->hypothetical) 7392 { 7393 /* 7394 * A lock should have already been obtained on the index in plancat.c. 7395 */ 7396 indexRel = index_open(index->indexoid, NoLock); 7397 brinGetStats(indexRel, &statsData); 7398 index_close(indexRel, NoLock); 7399 7400 /* work out the actual number of ranges in the index */ 7401 indexRanges = Max(ceil((double) baserel->pages / 7402 statsData.pagesPerRange), 1.0); 7403 } 7404 else 7405 { 7406 /* 7407 * Assume default number of pages per range, and estimate the number 7408 * of ranges based on that. 7409 */ 7410 indexRanges = Max(ceil((double) baserel->pages / 7411 BRIN_DEFAULT_PAGES_PER_RANGE), 1.0); 7412 7413 statsData.pagesPerRange = BRIN_DEFAULT_PAGES_PER_RANGE; 7414 statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1; 7415 } 7416 7417 /* 7418 * Compute index correlation 7419 * 7420 * Because we can use all index quals equally when scanning, we can use 7421 * the largest correlation (in absolute value) among columns used by the 7422 * query. Start at zero, the worst possible case. If we cannot find any 7423 * correlation statistics, we will keep it as 0. 7424 */ 7425 *indexCorrelation = 0; 7426 7427 foreach(l, path->indexclauses) 7428 { 7429 IndexClause *iclause = lfirst_node(IndexClause, l); 7430 AttrNumber attnum = index->indexkeys[iclause->indexcol]; 7431 7432 /* attempt to lookup stats in relation for this index column */ 7433 if (attnum != 0) 7434 { 7435 /* Simple variable -- look to stats for the underlying table */ 7436 if (get_relation_stats_hook && 7437 (*get_relation_stats_hook) (root, rte, attnum, &vardata)) 7438 { 7439 /* 7440 * The hook took control of acquiring a stats tuple. If it 7441 * did supply a tuple, it'd better have supplied a freefunc. 7442 */ 7443 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc) 7444 elog(ERROR, 7445 "no function provided to release variable stats with"); 7446 } 7447 else 7448 { 7449 vardata.statsTuple = 7450 SearchSysCache3(STATRELATTINH, 7451 ObjectIdGetDatum(rte->relid), 7452 Int16GetDatum(attnum), 7453 BoolGetDatum(false)); 7454 vardata.freefunc = ReleaseSysCache; 7455 } 7456 } 7457 else 7458 { 7459 /* 7460 * Looks like we've found an expression column in the index. Let's 7461 * see if there's any stats for it. 7462 */ 7463 7464 /* get the attnum from the 0-based index. */ 7465 attnum = iclause->indexcol + 1; 7466 7467 if (get_index_stats_hook && 7468 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata)) 7469 { 7470 /* 7471 * The hook took control of acquiring a stats tuple. If it 7472 * did supply a tuple, it'd better have supplied a freefunc. 7473 */ 7474 if (HeapTupleIsValid(vardata.statsTuple) && 7475 !vardata.freefunc) 7476 elog(ERROR, "no function provided to release variable stats with"); 7477 } 7478 else 7479 { 7480 vardata.statsTuple = SearchSysCache3(STATRELATTINH, 7481 ObjectIdGetDatum(index->indexoid), 7482 Int16GetDatum(attnum), 7483 BoolGetDatum(false)); 7484 vardata.freefunc = ReleaseSysCache; 7485 } 7486 } 7487 7488 if (HeapTupleIsValid(vardata.statsTuple)) 7489 { 7490 AttStatsSlot sslot; 7491 7492 if (get_attstatsslot(&sslot, vardata.statsTuple, 7493 STATISTIC_KIND_CORRELATION, InvalidOid, 7494 ATTSTATSSLOT_NUMBERS)) 7495 { 7496 double varCorrelation = 0.0; 7497 7498 if (sslot.nnumbers > 0) 7499 varCorrelation = Abs(sslot.numbers[0]); 7500 7501 if (varCorrelation > *indexCorrelation) 7502 *indexCorrelation = varCorrelation; 7503 7504 free_attstatsslot(&sslot); 7505 } 7506 } 7507 7508 ReleaseVariableStats(vardata); 7509 } 7510 7511 qualSelectivity = clauselist_selectivity(root, indexQuals, 7512 baserel->relid, 7513 JOIN_INNER, NULL); 7514 7515 /* 7516 * Now calculate the minimum possible ranges we could match with if all of 7517 * the rows were in the perfect order in the table's heap. 7518 */ 7519 minimalRanges = ceil(indexRanges * qualSelectivity); 7520 7521 /* 7522 * Now estimate the number of ranges that we'll touch by using the 7523 * indexCorrelation from the stats. Careful not to divide by zero (note 7524 * we're using the absolute value of the correlation). 7525 */ 7526 if (*indexCorrelation < 1.0e-10) 7527 estimatedRanges = indexRanges; 7528 else 7529 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges); 7530 7531 /* we expect to visit this portion of the table */ 7532 selec = estimatedRanges / indexRanges; 7533 7534 CLAMP_PROBABILITY(selec); 7535 7536 *indexSelectivity = selec; 7537 7538 /* 7539 * Compute the index qual costs, much as in genericcostestimate, to add to 7540 * the index costs. We can disregard indexorderbys, since BRIN doesn't 7541 * support those. 7542 */ 7543 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals); 7544 7545 /* 7546 * Compute the startup cost as the cost to read the whole revmap 7547 * sequentially, including the cost to execute the index quals. 7548 */ 7549 *indexStartupCost = 7550 spc_seq_page_cost * statsData.revmapNumPages * loop_count; 7551 *indexStartupCost += qual_arg_cost; 7552 7553 /* 7554 * To read a BRIN index there might be a bit of back and forth over 7555 * regular pages, as revmap might point to them out of sequential order; 7556 * calculate the total cost as reading the whole index in random order. 7557 */ 7558 *indexTotalCost = *indexStartupCost + 7559 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count; 7560 7561 /* 7562 * Charge a small amount per range tuple which we expect to match to. This 7563 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan 7564 * will set a bit for each page in the range when we find a matching 7565 * range, so we must multiply the charge by the number of pages in the 7566 * range. 7567 */ 7568 *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges * 7569 statsData.pagesPerRange; 7570 7571 *indexPages = index->pages; 7572 } 7573