1 /*------------------------------------------------------------------------- 2 * 3 * partcache.c 4 * Support routines for manipulating partition information cached in 5 * relcache 6 * 7 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * IDENTIFICATION 11 * src/backend/utils/cache/partcache.c 12 * 13 *------------------------------------------------------------------------- 14 */ 15 #include "postgres.h" 16 17 #include "access/hash.h" 18 #include "access/heapam.h" 19 #include "access/htup_details.h" 20 #include "access/nbtree.h" 21 #include "catalog/partition.h" 22 #include "catalog/pg_inherits.h" 23 #include "catalog/pg_opclass.h" 24 #include "catalog/pg_partitioned_table.h" 25 #include "miscadmin.h" 26 #include "nodes/makefuncs.h" 27 #include "nodes/nodeFuncs.h" 28 #include "optimizer/clauses.h" 29 #include "optimizer/planner.h" 30 #include "partitioning/partbounds.h" 31 #include "utils/builtins.h" 32 #include "utils/datum.h" 33 #include "utils/lsyscache.h" 34 #include "utils/memutils.h" 35 #include "utils/partcache.h" 36 #include "utils/rel.h" 37 #include "utils/syscache.h" 38 39 40 static List *generate_partition_qual(Relation rel); 41 static int32 qsort_partition_hbound_cmp(const void *a, const void *b); 42 static int32 qsort_partition_list_value_cmp(const void *a, const void *b, 43 void *arg); 44 static int32 qsort_partition_rbound_cmp(const void *a, const void *b, 45 void *arg); 46 47 48 /* 49 * RelationBuildPartitionKey 50 * Build partition key data of relation, and attach to relcache 51 * 52 * Partitioning key data is a complex structure; to avoid complicated logic to 53 * free individual elements whenever the relcache entry is flushed, we give it 54 * its own memory context, a child of CacheMemoryContext, which can easily be 55 * deleted on its own. To avoid leaking memory in that context in case of an 56 * error partway through this function, the context is initially created as a 57 * child of CurTransactionContext and only re-parented to CacheMemoryContext 58 * at the end, when no further errors are possible. Also, we don't make this 59 * context the current context except in very brief code sections, out of fear 60 * that some of our callees allocate memory on their own which would be leaked 61 * permanently. 62 */ 63 void 64 RelationBuildPartitionKey(Relation relation) 65 { 66 Form_pg_partitioned_table form; 67 HeapTuple tuple; 68 bool isnull; 69 int i; 70 PartitionKey key; 71 AttrNumber *attrs; 72 oidvector *opclass; 73 oidvector *collation; 74 ListCell *partexprs_item; 75 Datum datum; 76 MemoryContext partkeycxt, 77 oldcxt; 78 int16 procnum; 79 80 tuple = SearchSysCache1(PARTRELID, 81 ObjectIdGetDatum(RelationGetRelid(relation))); 82 83 /* 84 * The following happens when we have created our pg_class entry but not 85 * the pg_partitioned_table entry yet. 86 */ 87 if (!HeapTupleIsValid(tuple)) 88 return; 89 90 partkeycxt = AllocSetContextCreate(CurTransactionContext, 91 "partition key", 92 ALLOCSET_SMALL_SIZES); 93 MemoryContextCopyAndSetIdentifier(partkeycxt, 94 RelationGetRelationName(relation)); 95 96 key = (PartitionKey) MemoryContextAllocZero(partkeycxt, 97 sizeof(PartitionKeyData)); 98 99 /* Fixed-length attributes */ 100 form = (Form_pg_partitioned_table) GETSTRUCT(tuple); 101 key->strategy = form->partstrat; 102 key->partnatts = form->partnatts; 103 104 /* 105 * We can rely on the first variable-length attribute being mapped to the 106 * relevant field of the catalog's C struct, because all previous 107 * attributes are non-nullable and fixed-length. 108 */ 109 attrs = form->partattrs.values; 110 111 /* But use the hard way to retrieve further variable-length attributes */ 112 /* Operator class */ 113 datum = SysCacheGetAttr(PARTRELID, tuple, 114 Anum_pg_partitioned_table_partclass, &isnull); 115 Assert(!isnull); 116 opclass = (oidvector *) DatumGetPointer(datum); 117 118 /* Collation */ 119 datum = SysCacheGetAttr(PARTRELID, tuple, 120 Anum_pg_partitioned_table_partcollation, &isnull); 121 Assert(!isnull); 122 collation = (oidvector *) DatumGetPointer(datum); 123 124 /* Expressions */ 125 datum = SysCacheGetAttr(PARTRELID, tuple, 126 Anum_pg_partitioned_table_partexprs, &isnull); 127 if (!isnull) 128 { 129 char *exprString; 130 Node *expr; 131 132 exprString = TextDatumGetCString(datum); 133 expr = stringToNode(exprString); 134 pfree(exprString); 135 136 /* 137 * Run the expressions through const-simplification since the planner 138 * will be comparing them to similarly-processed qual clause operands, 139 * and may fail to detect valid matches without this step; fix 140 * opfuncids while at it. We don't need to bother with 141 * canonicalize_qual() though, because partition expressions should be 142 * in canonical form already (ie, no need for OR-merging or constant 143 * elimination). 144 */ 145 expr = eval_const_expressions(NULL, expr); 146 fix_opfuncids(expr); 147 148 oldcxt = MemoryContextSwitchTo(partkeycxt); 149 key->partexprs = (List *) copyObject(expr); 150 MemoryContextSwitchTo(oldcxt); 151 } 152 153 /* Allocate assorted arrays in the partkeycxt, which we'll fill below */ 154 oldcxt = MemoryContextSwitchTo(partkeycxt); 155 key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); 156 key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); 157 key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid)); 158 key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); 159 160 key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); 161 key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); 162 key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); 163 key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); 164 key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool)); 165 key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); 166 key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); 167 MemoryContextSwitchTo(oldcxt); 168 169 /* determine support function number to search for */ 170 procnum = (key->strategy == PARTITION_STRATEGY_HASH) ? 171 HASHEXTENDED_PROC : BTORDER_PROC; 172 173 /* Copy partattrs and fill other per-attribute info */ 174 memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); 175 partexprs_item = list_head(key->partexprs); 176 for (i = 0; i < key->partnatts; i++) 177 { 178 AttrNumber attno = key->partattrs[i]; 179 HeapTuple opclasstup; 180 Form_pg_opclass opclassform; 181 Oid funcid; 182 183 /* Collect opfamily information */ 184 opclasstup = SearchSysCache1(CLAOID, 185 ObjectIdGetDatum(opclass->values[i])); 186 if (!HeapTupleIsValid(opclasstup)) 187 elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]); 188 189 opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup); 190 key->partopfamily[i] = opclassform->opcfamily; 191 key->partopcintype[i] = opclassform->opcintype; 192 193 /* Get a support function for the specified opfamily and datatypes */ 194 funcid = get_opfamily_proc(opclassform->opcfamily, 195 opclassform->opcintype, 196 opclassform->opcintype, 197 procnum); 198 if (!OidIsValid(funcid)) 199 ereport(ERROR, 200 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), 201 errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s", 202 NameStr(opclassform->opcname), 203 (key->strategy == PARTITION_STRATEGY_HASH) ? 204 "hash" : "btree", 205 procnum, 206 format_type_be(opclassform->opcintype)))); 207 208 fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt); 209 210 /* Collation */ 211 key->partcollation[i] = collation->values[i]; 212 213 /* Collect type information */ 214 if (attno != 0) 215 { 216 Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1); 217 218 key->parttypid[i] = att->atttypid; 219 key->parttypmod[i] = att->atttypmod; 220 key->parttypcoll[i] = att->attcollation; 221 } 222 else 223 { 224 if (partexprs_item == NULL) 225 elog(ERROR, "wrong number of partition key expressions"); 226 227 key->parttypid[i] = exprType(lfirst(partexprs_item)); 228 key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); 229 key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); 230 231 partexprs_item = lnext(partexprs_item); 232 } 233 get_typlenbyvalalign(key->parttypid[i], 234 &key->parttyplen[i], 235 &key->parttypbyval[i], 236 &key->parttypalign[i]); 237 238 ReleaseSysCache(opclasstup); 239 } 240 241 ReleaseSysCache(tuple); 242 243 /* Assert that we're not leaking any old data during assignments below */ 244 Assert(relation->rd_partkeycxt == NULL); 245 Assert(relation->rd_partkey == NULL); 246 247 /* 248 * Success --- reparent our context and make the relcache point to the 249 * newly constructed key 250 */ 251 MemoryContextSetParent(partkeycxt, CacheMemoryContext); 252 relation->rd_partkeycxt = partkeycxt; 253 relation->rd_partkey = key; 254 } 255 256 /* 257 * RelationBuildPartitionDesc 258 * Form rel's partition descriptor, and store in relcache entry 259 * 260 * Note: the descriptor won't be flushed from the cache by 261 * RelationClearRelation() unless it's changed because of 262 * addition or removal of a partition. Hence, code holding a lock 263 * that's sufficient to prevent that can assume that rd_partdesc 264 * won't change underneath it. 265 */ 266 void 267 RelationBuildPartitionDesc(Relation rel) 268 { 269 List *inhoids, 270 *partoids; 271 Oid *oids = NULL; 272 List *boundspecs = NIL; 273 ListCell *cell; 274 int i, 275 nparts; 276 PartitionKey key = RelationGetPartitionKey(rel); 277 PartitionDesc result; 278 MemoryContext oldcxt; 279 280 int ndatums = 0; 281 int default_index = -1; 282 283 /* Hash partitioning specific */ 284 PartitionHashBound **hbounds = NULL; 285 286 /* List partitioning specific */ 287 PartitionListValue **all_values = NULL; 288 int null_index = -1; 289 290 /* Range partitioning specific */ 291 PartitionRangeBound **rbounds = NULL; 292 293 /* Get partition oids from pg_inherits */ 294 inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock); 295 296 /* Collect bound spec nodes in a list */ 297 i = 0; 298 partoids = NIL; 299 foreach(cell, inhoids) 300 { 301 Oid inhrelid = lfirst_oid(cell); 302 HeapTuple tuple; 303 Datum datum; 304 bool isnull; 305 Node *boundspec; 306 307 tuple = SearchSysCache1(RELOID, inhrelid); 308 if (!HeapTupleIsValid(tuple)) 309 elog(ERROR, "cache lookup failed for relation %u", inhrelid); 310 311 datum = SysCacheGetAttr(RELOID, tuple, 312 Anum_pg_class_relpartbound, 313 &isnull); 314 if (isnull) 315 elog(ERROR, "null relpartbound for relation %u", inhrelid); 316 boundspec = (Node *) stringToNode(TextDatumGetCString(datum)); 317 318 /* 319 * Sanity check: If the PartitionBoundSpec says this is the default 320 * partition, its OID should correspond to whatever's stored in 321 * pg_partitioned_table.partdefid; if not, the catalog is corrupt. 322 */ 323 if (castNode(PartitionBoundSpec, boundspec)->is_default) 324 { 325 Oid partdefid; 326 327 partdefid = get_default_partition_oid(RelationGetRelid(rel)); 328 if (partdefid != inhrelid) 329 elog(ERROR, "expected partdefid %u, but got %u", 330 inhrelid, partdefid); 331 } 332 333 boundspecs = lappend(boundspecs, boundspec); 334 partoids = lappend_oid(partoids, inhrelid); 335 ReleaseSysCache(tuple); 336 } 337 338 nparts = list_length(partoids); 339 340 if (nparts > 0) 341 { 342 oids = (Oid *) palloc(nparts * sizeof(Oid)); 343 i = 0; 344 foreach(cell, partoids) 345 oids[i++] = lfirst_oid(cell); 346 347 /* Convert from node to the internal representation */ 348 if (key->strategy == PARTITION_STRATEGY_HASH) 349 { 350 ndatums = nparts; 351 hbounds = (PartitionHashBound **) 352 palloc(nparts * sizeof(PartitionHashBound *)); 353 354 i = 0; 355 foreach(cell, boundspecs) 356 { 357 PartitionBoundSpec *spec = castNode(PartitionBoundSpec, 358 lfirst(cell)); 359 360 if (spec->strategy != PARTITION_STRATEGY_HASH) 361 elog(ERROR, "invalid strategy in partition bound spec"); 362 363 hbounds[i] = (PartitionHashBound *) 364 palloc(sizeof(PartitionHashBound)); 365 366 hbounds[i]->modulus = spec->modulus; 367 hbounds[i]->remainder = spec->remainder; 368 hbounds[i]->index = i; 369 i++; 370 } 371 372 /* Sort all the bounds in ascending order */ 373 qsort(hbounds, nparts, sizeof(PartitionHashBound *), 374 qsort_partition_hbound_cmp); 375 } 376 else if (key->strategy == PARTITION_STRATEGY_LIST) 377 { 378 List *non_null_values = NIL; 379 380 /* 381 * Create a unified list of non-null values across all partitions. 382 */ 383 i = 0; 384 null_index = -1; 385 foreach(cell, boundspecs) 386 { 387 PartitionBoundSpec *spec = castNode(PartitionBoundSpec, 388 lfirst(cell)); 389 ListCell *c; 390 391 if (spec->strategy != PARTITION_STRATEGY_LIST) 392 elog(ERROR, "invalid strategy in partition bound spec"); 393 394 /* 395 * Note the index of the partition bound spec for the default 396 * partition. There's no datum to add to the list of non-null 397 * datums for this partition. 398 */ 399 if (spec->is_default) 400 { 401 default_index = i; 402 i++; 403 continue; 404 } 405 406 foreach(c, spec->listdatums) 407 { 408 Const *val = castNode(Const, lfirst(c)); 409 PartitionListValue *list_value = NULL; 410 411 if (!val->constisnull) 412 { 413 list_value = (PartitionListValue *) 414 palloc0(sizeof(PartitionListValue)); 415 list_value->index = i; 416 list_value->value = val->constvalue; 417 } 418 else 419 { 420 /* 421 * Never put a null into the values array, flag 422 * instead for the code further down below where we 423 * construct the actual relcache struct. 424 */ 425 if (null_index != -1) 426 elog(ERROR, "found null more than once"); 427 null_index = i; 428 } 429 430 if (list_value) 431 non_null_values = lappend(non_null_values, 432 list_value); 433 } 434 435 i++; 436 } 437 438 ndatums = list_length(non_null_values); 439 440 /* 441 * Collect all list values in one array. Alongside the value, we 442 * also save the index of partition the value comes from. 443 */ 444 all_values = (PartitionListValue **) palloc(ndatums * 445 sizeof(PartitionListValue *)); 446 i = 0; 447 foreach(cell, non_null_values) 448 { 449 PartitionListValue *src = lfirst(cell); 450 451 all_values[i] = (PartitionListValue *) 452 palloc(sizeof(PartitionListValue)); 453 all_values[i]->value = src->value; 454 all_values[i]->index = src->index; 455 i++; 456 } 457 458 qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), 459 qsort_partition_list_value_cmp, (void *) key); 460 } 461 else if (key->strategy == PARTITION_STRATEGY_RANGE) 462 { 463 int k; 464 PartitionRangeBound **all_bounds, 465 *prev; 466 467 all_bounds = (PartitionRangeBound **) palloc0(2 * nparts * 468 sizeof(PartitionRangeBound *)); 469 470 /* 471 * Create a unified list of range bounds across all the 472 * partitions. 473 */ 474 i = ndatums = 0; 475 foreach(cell, boundspecs) 476 { 477 PartitionBoundSpec *spec = castNode(PartitionBoundSpec, 478 lfirst(cell)); 479 PartitionRangeBound *lower, 480 *upper; 481 482 if (spec->strategy != PARTITION_STRATEGY_RANGE) 483 elog(ERROR, "invalid strategy in partition bound spec"); 484 485 /* 486 * Note the index of the partition bound spec for the default 487 * partition. There's no datum to add to the allbounds array 488 * for this partition. 489 */ 490 if (spec->is_default) 491 { 492 default_index = i++; 493 continue; 494 } 495 496 lower = make_one_partition_rbound(key, i, spec->lowerdatums, 497 true); 498 upper = make_one_partition_rbound(key, i, spec->upperdatums, 499 false); 500 all_bounds[ndatums++] = lower; 501 all_bounds[ndatums++] = upper; 502 i++; 503 } 504 505 Assert(ndatums == nparts * 2 || 506 (default_index != -1 && ndatums == (nparts - 1) * 2)); 507 508 /* Sort all the bounds in ascending order */ 509 qsort_arg(all_bounds, ndatums, 510 sizeof(PartitionRangeBound *), 511 qsort_partition_rbound_cmp, 512 (void *) key); 513 514 /* Save distinct bounds from all_bounds into rbounds. */ 515 rbounds = (PartitionRangeBound **) 516 palloc(ndatums * sizeof(PartitionRangeBound *)); 517 k = 0; 518 prev = NULL; 519 for (i = 0; i < ndatums; i++) 520 { 521 PartitionRangeBound *cur = all_bounds[i]; 522 bool is_distinct = false; 523 int j; 524 525 /* Is the current bound distinct from the previous one? */ 526 for (j = 0; j < key->partnatts; j++) 527 { 528 Datum cmpval; 529 530 if (prev == NULL || cur->kind[j] != prev->kind[j]) 531 { 532 is_distinct = true; 533 break; 534 } 535 536 /* 537 * If the bounds are both MINVALUE or MAXVALUE, stop now 538 * and treat them as equal, since any values after this 539 * point must be ignored. 540 */ 541 if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE) 542 break; 543 544 cmpval = FunctionCall2Coll(&key->partsupfunc[j], 545 key->partcollation[j], 546 cur->datums[j], 547 prev->datums[j]); 548 if (DatumGetInt32(cmpval) != 0) 549 { 550 is_distinct = true; 551 break; 552 } 553 } 554 555 /* 556 * Only if the bound is distinct save it into a temporary 557 * array i.e. rbounds which is later copied into boundinfo 558 * datums array. 559 */ 560 if (is_distinct) 561 rbounds[k++] = all_bounds[i]; 562 563 prev = cur; 564 } 565 566 /* Update ndatums to hold the count of distinct datums. */ 567 ndatums = k; 568 } 569 else 570 elog(ERROR, "unexpected partition strategy: %d", 571 (int) key->strategy); 572 } 573 574 /* Assert we aren't about to leak any old data structure */ 575 Assert(rel->rd_pdcxt == NULL); 576 Assert(rel->rd_partdesc == NULL); 577 578 /* 579 * Now build the actual relcache partition descriptor. Note that the 580 * order of operations here is fairly critical. If we fail partway 581 * through this code, we won't have leaked memory because the rd_pdcxt is 582 * attached to the relcache entry immediately, so it'll be freed whenever 583 * the entry is rebuilt or destroyed. However, we don't assign to 584 * rd_partdesc until the cached data structure is fully complete and 585 * valid, so that no other code might try to use it. 586 */ 587 rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, 588 "partition descriptor", 589 ALLOCSET_SMALL_SIZES); 590 MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt, 591 RelationGetRelationName(rel)); 592 593 oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt); 594 595 result = (PartitionDescData *) palloc0(sizeof(PartitionDescData)); 596 result->nparts = nparts; 597 if (nparts > 0) 598 { 599 PartitionBoundInfo boundinfo; 600 int *mapping; 601 int next_index = 0; 602 603 result->oids = (Oid *) palloc0(nparts * sizeof(Oid)); 604 605 boundinfo = (PartitionBoundInfoData *) 606 palloc0(sizeof(PartitionBoundInfoData)); 607 boundinfo->strategy = key->strategy; 608 boundinfo->default_index = -1; 609 boundinfo->ndatums = ndatums; 610 boundinfo->null_index = -1; 611 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); 612 613 /* Initialize mapping array with invalid values */ 614 mapping = (int *) palloc(sizeof(int) * nparts); 615 for (i = 0; i < nparts; i++) 616 mapping[i] = -1; 617 618 switch (key->strategy) 619 { 620 case PARTITION_STRATEGY_HASH: 621 { 622 /* Moduli are stored in ascending order */ 623 int greatest_modulus = hbounds[ndatums - 1]->modulus; 624 625 boundinfo->nindexes = greatest_modulus; 626 boundinfo->indexes = (int *) palloc(greatest_modulus * 627 sizeof(int)); 628 629 for (i = 0; i < greatest_modulus; i++) 630 boundinfo->indexes[i] = -1; 631 632 for (i = 0; i < nparts; i++) 633 { 634 int modulus = hbounds[i]->modulus; 635 int remainder = hbounds[i]->remainder; 636 637 boundinfo->datums[i] = (Datum *) palloc(2 * 638 sizeof(Datum)); 639 boundinfo->datums[i][0] = Int32GetDatum(modulus); 640 boundinfo->datums[i][1] = Int32GetDatum(remainder); 641 642 while (remainder < greatest_modulus) 643 { 644 /* overlap? */ 645 Assert(boundinfo->indexes[remainder] == -1); 646 boundinfo->indexes[remainder] = i; 647 remainder += modulus; 648 } 649 650 mapping[hbounds[i]->index] = i; 651 pfree(hbounds[i]); 652 } 653 pfree(hbounds); 654 break; 655 } 656 657 case PARTITION_STRATEGY_LIST: 658 { 659 boundinfo->nindexes = ndatums; 660 boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); 661 662 /* 663 * Copy values. Indexes of individual values are mapped 664 * to canonical values so that they match for any two list 665 * partitioned tables with same number of partitions and 666 * same lists per partition. One way to canonicalize is 667 * to assign the index in all_values[] of the smallest 668 * value of each partition, as the index of all of the 669 * partition's values. 670 */ 671 for (i = 0; i < ndatums; i++) 672 { 673 boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum)); 674 boundinfo->datums[i][0] = datumCopy(all_values[i]->value, 675 key->parttypbyval[0], 676 key->parttyplen[0]); 677 678 /* If the old index has no mapping, assign one */ 679 if (mapping[all_values[i]->index] == -1) 680 mapping[all_values[i]->index] = next_index++; 681 682 boundinfo->indexes[i] = mapping[all_values[i]->index]; 683 } 684 685 /* 686 * If null-accepting partition has no mapped index yet, 687 * assign one. This could happen if such partition 688 * accepts only null and hence not covered in the above 689 * loop which only handled non-null values. 690 */ 691 if (null_index != -1) 692 { 693 Assert(null_index >= 0); 694 if (mapping[null_index] == -1) 695 mapping[null_index] = next_index++; 696 boundinfo->null_index = mapping[null_index]; 697 } 698 699 /* Assign mapped index for the default partition. */ 700 if (default_index != -1) 701 { 702 /* 703 * The default partition accepts any value not 704 * specified in the lists of other partitions, hence 705 * it should not get mapped index while assigning 706 * those for non-null datums. 707 */ 708 Assert(default_index >= 0 && 709 mapping[default_index] == -1); 710 mapping[default_index] = next_index++; 711 boundinfo->default_index = mapping[default_index]; 712 } 713 714 /* All partitions must now have a valid mapping */ 715 Assert(next_index == nparts); 716 break; 717 } 718 719 case PARTITION_STRATEGY_RANGE: 720 { 721 boundinfo->kind = (PartitionRangeDatumKind **) 722 palloc(ndatums * 723 sizeof(PartitionRangeDatumKind *)); 724 boundinfo->nindexes = ndatums + 1; 725 boundinfo->indexes = (int *) palloc((ndatums + 1) * 726 sizeof(int)); 727 728 for (i = 0; i < ndatums; i++) 729 { 730 int j; 731 732 boundinfo->datums[i] = (Datum *) palloc(key->partnatts * 733 sizeof(Datum)); 734 boundinfo->kind[i] = (PartitionRangeDatumKind *) 735 palloc(key->partnatts * 736 sizeof(PartitionRangeDatumKind)); 737 for (j = 0; j < key->partnatts; j++) 738 { 739 if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE) 740 boundinfo->datums[i][j] = 741 datumCopy(rbounds[i]->datums[j], 742 key->parttypbyval[j], 743 key->parttyplen[j]); 744 boundinfo->kind[i][j] = rbounds[i]->kind[j]; 745 } 746 747 /* 748 * There is no mapping for invalid indexes. 749 * 750 * Any lower bounds in the rbounds array have invalid 751 * indexes assigned, because the values between the 752 * previous bound (if there is one) and this (lower) 753 * bound are not part of the range of any existing 754 * partition. 755 */ 756 if (rbounds[i]->lower) 757 boundinfo->indexes[i] = -1; 758 else 759 { 760 int orig_index = rbounds[i]->index; 761 762 /* If the old index has no mapping, assign one */ 763 if (mapping[orig_index] == -1) 764 mapping[orig_index] = next_index++; 765 766 boundinfo->indexes[i] = mapping[orig_index]; 767 } 768 } 769 770 /* Assign mapped index for the default partition. */ 771 if (default_index != -1) 772 { 773 Assert(default_index >= 0 && mapping[default_index] == -1); 774 mapping[default_index] = next_index++; 775 boundinfo->default_index = mapping[default_index]; 776 } 777 boundinfo->indexes[i] = -1; 778 break; 779 } 780 781 default: 782 elog(ERROR, "unexpected partition strategy: %d", 783 (int) key->strategy); 784 } 785 786 result->boundinfo = boundinfo; 787 788 /* 789 * Now assign OIDs from the original array into mapped indexes of the 790 * result array. Order of OIDs in the former is defined by the 791 * catalog scan that retrieved them, whereas that in the latter is 792 * defined by canonicalized representation of the partition bounds. 793 */ 794 for (i = 0; i < nparts; i++) 795 result->oids[mapping[i]] = oids[i]; 796 pfree(mapping); 797 } 798 799 MemoryContextSwitchTo(oldcxt); 800 rel->rd_partdesc = result; 801 } 802 803 /* 804 * RelationGetPartitionQual 805 * 806 * Returns a list of partition quals 807 */ 808 List * 809 RelationGetPartitionQual(Relation rel) 810 { 811 /* Quick exit */ 812 if (!rel->rd_rel->relispartition) 813 return NIL; 814 815 return generate_partition_qual(rel); 816 } 817 818 /* 819 * get_partition_qual_relid 820 * 821 * Returns an expression tree describing the passed-in relation's partition 822 * constraint. 823 * 824 * If the relation is not found, or is not a partition, or there is no 825 * partition constraint, return NULL. We must guard against the first two 826 * cases because this supports a SQL function that could be passed any OID. 827 * The last case can happen even if relispartition is true, when a default 828 * partition is the only partition. 829 */ 830 Expr * 831 get_partition_qual_relid(Oid relid) 832 { 833 Expr *result = NULL; 834 835 /* Do the work only if this relation exists and is a partition. */ 836 if (get_rel_relispartition(relid)) 837 { 838 Relation rel = relation_open(relid, AccessShareLock); 839 List *and_args; 840 841 and_args = generate_partition_qual(rel); 842 843 /* Convert implicit-AND list format to boolean expression */ 844 if (and_args == NIL) 845 result = NULL; 846 else if (list_length(and_args) > 1) 847 result = makeBoolExpr(AND_EXPR, and_args, -1); 848 else 849 result = linitial(and_args); 850 851 /* Keep the lock, to allow safe deparsing against the rel by caller. */ 852 relation_close(rel, NoLock); 853 } 854 855 return result; 856 } 857 858 /* 859 * generate_partition_qual 860 * 861 * Generate partition predicate from rel's partition bound expression. The 862 * function returns a NIL list if there is no predicate. 863 * 864 * We cache a copy of the result in the relcache entry, after constructing 865 * it using the caller's context. This approach avoids leaking any data 866 * into long-lived cache contexts, especially if we fail partway through. 867 */ 868 static List * 869 generate_partition_qual(Relation rel) 870 { 871 HeapTuple tuple; 872 MemoryContext oldcxt; 873 Datum boundDatum; 874 bool isnull; 875 List *my_qual = NIL, 876 *result = NIL; 877 Relation parent; 878 bool found_whole_row; 879 880 /* Guard against stack overflow due to overly deep partition tree */ 881 check_stack_depth(); 882 883 /* If we already cached the result, just return a copy */ 884 if (rel->rd_partcheckvalid) 885 return copyObject(rel->rd_partcheck); 886 887 /* Grab at least an AccessShareLock on the parent table */ 888 parent = relation_open(get_partition_parent(RelationGetRelid(rel)), 889 AccessShareLock); 890 891 /* Get pg_class.relpartbound */ 892 tuple = SearchSysCache1(RELOID, RelationGetRelid(rel)); 893 if (!HeapTupleIsValid(tuple)) 894 elog(ERROR, "cache lookup failed for relation %u", 895 RelationGetRelid(rel)); 896 897 boundDatum = SysCacheGetAttr(RELOID, tuple, 898 Anum_pg_class_relpartbound, 899 &isnull); 900 if (!isnull) 901 { 902 PartitionBoundSpec *bound; 903 904 bound = castNode(PartitionBoundSpec, 905 stringToNode(TextDatumGetCString(boundDatum))); 906 907 my_qual = get_qual_from_partbound(rel, parent, bound); 908 } 909 910 ReleaseSysCache(tuple); 911 912 /* Add the parent's quals to the list (if any) */ 913 if (parent->rd_rel->relispartition) 914 result = list_concat(generate_partition_qual(parent), my_qual); 915 else 916 result = my_qual; 917 918 /* 919 * Change Vars to have partition's attnos instead of the parent's. We do 920 * this after we concatenate the parent's quals, because we want every Var 921 * in it to bear this relation's attnos. It's safe to assume varno = 1 922 * here. 923 */ 924 result = map_partition_varattnos(result, 1, rel, parent, 925 &found_whole_row); 926 /* There can never be a whole-row reference here */ 927 if (found_whole_row) 928 elog(ERROR, "unexpected whole-row reference found in partition key"); 929 930 /* Assert that we're not leaking any old data during assignments below */ 931 Assert(rel->rd_partcheckcxt == NULL); 932 Assert(rel->rd_partcheck == NIL); 933 934 /* 935 * Save a copy in the relcache. The order of these operations is fairly 936 * critical to avoid memory leaks and ensure that we don't leave a corrupt 937 * relcache entry if we fail partway through copyObject. 938 * 939 * If, as is definitely possible, the partcheck list is NIL, then we do 940 * not need to make a context to hold it. 941 */ 942 if (result != NIL) 943 { 944 rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext, 945 "partition constraint", 946 ALLOCSET_SMALL_SIZES); 947 MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt, 948 RelationGetRelationName(rel)); 949 oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt); 950 rel->rd_partcheck = copyObject(result); 951 MemoryContextSwitchTo(oldcxt); 952 } 953 else 954 rel->rd_partcheck = NIL; 955 rel->rd_partcheckvalid = true; 956 957 /* Keep the parent locked until commit */ 958 relation_close(parent, NoLock); 959 960 /* Return the working copy to the caller */ 961 return result; 962 } 963 964 /* 965 * qsort_partition_hbound_cmp 966 * 967 * We sort hash bounds by modulus, then by remainder. 968 */ 969 static int32 970 qsort_partition_hbound_cmp(const void *a, const void *b) 971 { 972 PartitionHashBound *h1 = (*(PartitionHashBound *const *) a); 973 PartitionHashBound *h2 = (*(PartitionHashBound *const *) b); 974 975 return partition_hbound_cmp(h1->modulus, h1->remainder, 976 h2->modulus, h2->remainder); 977 } 978 979 /* 980 * qsort_partition_list_value_cmp 981 * 982 * Compare two list partition bound datums 983 */ 984 static int32 985 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) 986 { 987 Datum val1 = (*(const PartitionListValue **) a)->value, 988 val2 = (*(const PartitionListValue **) b)->value; 989 PartitionKey key = (PartitionKey) arg; 990 991 return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], 992 key->partcollation[0], 993 val1, val2)); 994 } 995 996 /* Used when sorting range bounds across all range partitions */ 997 static int32 998 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) 999 { 1000 PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a); 1001 PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b); 1002 PartitionKey key = (PartitionKey) arg; 1003 1004 return partition_rbound_cmp(key->partnatts, key->partsupfunc, 1005 key->partcollation, b1->datums, b1->kind, 1006 b1->lower, b2); 1007 } 1008