1 /*-------------------------------------------------------------------------
2 *
3 * partbounds.c
4 * Support routines for manipulating partition bounds
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/partitioning/partbounds.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 #include "postgres.h"
16
17 #include "access/relation.h"
18 #include "access/table.h"
19 #include "access/tableam.h"
20 #include "catalog/partition.h"
21 #include "catalog/pg_inherits.h"
22 #include "catalog/pg_type.h"
23 #include "commands/tablecmds.h"
24 #include "executor/executor.h"
25 #include "miscadmin.h"
26 #include "nodes/makefuncs.h"
27 #include "nodes/nodeFuncs.h"
28 #include "parser/parse_coerce.h"
29 #include "partitioning/partbounds.h"
30 #include "partitioning/partdesc.h"
31 #include "partitioning/partprune.h"
32 #include "utils/builtins.h"
33 #include "utils/datum.h"
34 #include "utils/fmgroids.h"
35 #include "utils/hashutils.h"
36 #include "utils/lsyscache.h"
37 #include "utils/partcache.h"
38 #include "utils/rel.h"
39 #include "utils/snapmgr.h"
40 #include "utils/ruleutils.h"
41 #include "utils/syscache.h"
42
43 /*
44 * When qsort'ing partition bounds after reading from the catalog, each bound
45 * is represented with one of the following structs.
46 */
47
48 /* One bound of a hash partition */
49 typedef struct PartitionHashBound
50 {
51 int modulus;
52 int remainder;
53 int index;
54 } PartitionHashBound;
55
56 /* One value coming from some (index'th) list partition */
57 typedef struct PartitionListValue
58 {
59 int index;
60 Datum value;
61 } PartitionListValue;
62
63 /* One bound of a range partition */
64 typedef struct PartitionRangeBound
65 {
66 int index;
67 Datum *datums; /* range bound datums */
68 PartitionRangeDatumKind *kind; /* the kind of each datum */
69 bool lower; /* this is the lower (vs upper) bound */
70 } PartitionRangeBound;
71
72 static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
73 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
74 void *arg);
75 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
76 void *arg);
77 static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs,
78 int nparts, PartitionKey key, int **mapping);
79 static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs,
80 int nparts, PartitionKey key, int **mapping);
81 static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs,
82 int nparts, PartitionKey key, int **mapping);
83 static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
84 List *datums, bool lower);
85 static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
86 int remainder2);
87 static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
88 Oid *partcollation, Datum *datums1,
89 PartitionRangeDatumKind *kind1, bool lower1,
90 PartitionRangeBound *b2);
91 static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
92 Oid *partcollation,
93 PartitionBoundInfo boundinfo,
94 PartitionRangeBound *probe, bool *is_equal);
95 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
96 uint16 strategy, Expr *arg1, Expr *arg2);
97 static Oid get_partition_operator(PartitionKey key, int col,
98 StrategyNumber strategy, bool *need_relabel);
99 static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
100 static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
101 static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
102 bool for_default);
103 static void get_range_key_properties(PartitionKey key, int keynum,
104 PartitionRangeDatum *ldatum,
105 PartitionRangeDatum *udatum,
106 ListCell **partexprs_item,
107 Expr **keyCol,
108 Const **lower_val, Const **upper_val);
109 static List *get_range_nulltest(PartitionKey key);
110
111 /*
112 * get_qual_from_partbound
113 * Given a parser node for partition bound, return the list of executable
114 * expressions as partition constraint
115 */
116 List *
get_qual_from_partbound(Relation rel,Relation parent,PartitionBoundSpec * spec)117 get_qual_from_partbound(Relation rel, Relation parent,
118 PartitionBoundSpec *spec)
119 {
120 PartitionKey key = RelationGetPartitionKey(parent);
121 List *my_qual = NIL;
122
123 Assert(key != NULL);
124
125 switch (key->strategy)
126 {
127 case PARTITION_STRATEGY_HASH:
128 Assert(spec->strategy == PARTITION_STRATEGY_HASH);
129 my_qual = get_qual_for_hash(parent, spec);
130 break;
131
132 case PARTITION_STRATEGY_LIST:
133 Assert(spec->strategy == PARTITION_STRATEGY_LIST);
134 my_qual = get_qual_for_list(parent, spec);
135 break;
136
137 case PARTITION_STRATEGY_RANGE:
138 Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
139 my_qual = get_qual_for_range(parent, spec, false);
140 break;
141
142 default:
143 elog(ERROR, "unexpected partition strategy: %d",
144 (int) key->strategy);
145 }
146
147 return my_qual;
148 }
149
150 /*
151 * partition_bounds_create
152 * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
153 * nodes
154 *
155 * This function creates a PartitionBoundInfo and fills the values of its
156 * various members based on the input list. Importantly, 'datums' array will
157 * contain Datum representation of individual bounds (possibly after
158 * de-duplication as in case of range bounds), sorted in a canonical order
159 * defined by qsort_partition_* functions of respective partitioning methods.
160 * 'indexes' array will contain as many elements as there are bounds (specific
161 * exceptions to this rule are listed in the function body), which represent
162 * the 0-based canonical positions of partitions.
163 *
164 * Upon return from this function, *mapping is set to an array of
165 * list_length(boundspecs) elements, each of which maps the original index of
166 * a partition to its canonical index.
167 *
168 * Note: The objects returned by this function are wholly allocated in the
169 * current memory context.
170 */
171 PartitionBoundInfo
partition_bounds_create(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)172 partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts,
173 PartitionKey key, int **mapping)
174 {
175 int i;
176
177 Assert(nparts > 0);
178
179 /*
180 * For each partitioning method, we first convert the partition bounds
181 * from their parser node representation to the internal representation,
182 * along with any additional preprocessing (such as de-duplicating range
183 * bounds). Resulting bound datums are then added to the 'datums' array
184 * in PartitionBoundInfo. For each datum added, an integer indicating the
185 * canonical partition index is added to the 'indexes' array.
186 *
187 * For each bound, we remember its partition's position (0-based) in the
188 * original list to later map it to the canonical index.
189 */
190
191 /*
192 * Initialize mapping array with invalid values, this is filled within
193 * each sub-routine below depending on the bound type.
194 */
195 *mapping = (int *) palloc(sizeof(int) * nparts);
196 for (i = 0; i < nparts; i++)
197 (*mapping)[i] = -1;
198
199 switch (key->strategy)
200 {
201 case PARTITION_STRATEGY_HASH:
202 return create_hash_bounds(boundspecs, nparts, key, mapping);
203
204 case PARTITION_STRATEGY_LIST:
205 return create_list_bounds(boundspecs, nparts, key, mapping);
206
207 case PARTITION_STRATEGY_RANGE:
208 return create_range_bounds(boundspecs, nparts, key, mapping);
209
210 default:
211 elog(ERROR, "unexpected partition strategy: %d",
212 (int) key->strategy);
213 break;
214 }
215
216 Assert(false);
217 return NULL; /* keep compiler quiet */
218 }
219
220 /*
221 * create_hash_bounds
222 * Create a PartitionBoundInfo for a hash partitioned table
223 */
224 static PartitionBoundInfo
create_hash_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)225 create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
226 PartitionKey key, int **mapping)
227 {
228 PartitionBoundInfo boundinfo;
229 PartitionHashBound **hbounds = NULL;
230 int i;
231 int ndatums = 0;
232 int greatest_modulus;
233
234 boundinfo = (PartitionBoundInfoData *)
235 palloc0(sizeof(PartitionBoundInfoData));
236 boundinfo->strategy = key->strategy;
237 /* No special hash partitions. */
238 boundinfo->null_index = -1;
239 boundinfo->default_index = -1;
240
241 ndatums = nparts;
242 hbounds = (PartitionHashBound **)
243 palloc(nparts * sizeof(PartitionHashBound *));
244
245 /* Convert from node to the internal representation */
246 for (i = 0; i < nparts; i++)
247 {
248 PartitionBoundSpec *spec = boundspecs[i];
249
250 if (spec->strategy != PARTITION_STRATEGY_HASH)
251 elog(ERROR, "invalid strategy in partition bound spec");
252
253 hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound));
254 hbounds[i]->modulus = spec->modulus;
255 hbounds[i]->remainder = spec->remainder;
256 hbounds[i]->index = i;
257 }
258
259 /* Sort all the bounds in ascending order */
260 qsort(hbounds, nparts, sizeof(PartitionHashBound *),
261 qsort_partition_hbound_cmp);
262
263 /* After sorting, moduli are now stored in ascending order. */
264 greatest_modulus = hbounds[ndatums - 1]->modulus;
265
266 boundinfo->ndatums = ndatums;
267 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
268 boundinfo->nindexes = greatest_modulus;
269 boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
270 for (i = 0; i < greatest_modulus; i++)
271 boundinfo->indexes[i] = -1;
272
273 /*
274 * For hash partitioning, there are as many datums (modulus and remainder
275 * pairs) as there are partitions. Indexes are simply values ranging from
276 * 0 to (nparts - 1).
277 */
278 for (i = 0; i < nparts; i++)
279 {
280 int modulus = hbounds[i]->modulus;
281 int remainder = hbounds[i]->remainder;
282
283 boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum));
284 boundinfo->datums[i][0] = Int32GetDatum(modulus);
285 boundinfo->datums[i][1] = Int32GetDatum(remainder);
286
287 while (remainder < greatest_modulus)
288 {
289 /* overlap? */
290 Assert(boundinfo->indexes[remainder] == -1);
291 boundinfo->indexes[remainder] = i;
292 remainder += modulus;
293 }
294
295 (*mapping)[hbounds[i]->index] = i;
296 pfree(hbounds[i]);
297 }
298 pfree(hbounds);
299
300 return boundinfo;
301 }
302
303 /*
304 * create_list_bounds
305 * Create a PartitionBoundInfo for a list partitioned table
306 */
307 static PartitionBoundInfo
create_list_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)308 create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
309 PartitionKey key, int **mapping)
310 {
311 PartitionBoundInfo boundinfo;
312 PartitionListValue **all_values = NULL;
313 ListCell *cell;
314 int i = 0;
315 int ndatums = 0;
316 int next_index = 0;
317 int default_index = -1;
318 int null_index = -1;
319 List *non_null_values = NIL;
320
321 boundinfo = (PartitionBoundInfoData *)
322 palloc0(sizeof(PartitionBoundInfoData));
323 boundinfo->strategy = key->strategy;
324 /* Will be set correctly below. */
325 boundinfo->null_index = -1;
326 boundinfo->default_index = -1;
327
328 /* Create a unified list of non-null values across all partitions. */
329 for (i = 0; i < nparts; i++)
330 {
331 PartitionBoundSpec *spec = boundspecs[i];
332 ListCell *c;
333
334 if (spec->strategy != PARTITION_STRATEGY_LIST)
335 elog(ERROR, "invalid strategy in partition bound spec");
336
337 /*
338 * Note the index of the partition bound spec for the default
339 * partition. There's no datum to add to the list on non-null datums
340 * for this partition.
341 */
342 if (spec->is_default)
343 {
344 default_index = i;
345 continue;
346 }
347
348 foreach(c, spec->listdatums)
349 {
350 Const *val = castNode(Const, lfirst(c));
351 PartitionListValue *list_value = NULL;
352
353 if (!val->constisnull)
354 {
355 list_value = (PartitionListValue *)
356 palloc0(sizeof(PartitionListValue));
357 list_value->index = i;
358 list_value->value = val->constvalue;
359 }
360 else
361 {
362 /*
363 * Never put a null into the values array; save the index of
364 * the partition that stores nulls, instead.
365 */
366 if (null_index != -1)
367 elog(ERROR, "found null more than once");
368 null_index = i;
369 }
370
371 if (list_value)
372 non_null_values = lappend(non_null_values, list_value);
373 }
374 }
375
376 ndatums = list_length(non_null_values);
377
378 /*
379 * Collect all list values in one array. Alongside the value, we also save
380 * the index of partition the value comes from.
381 */
382 all_values = (PartitionListValue **)
383 palloc(ndatums * sizeof(PartitionListValue *));
384 i = 0;
385 foreach(cell, non_null_values)
386 {
387 PartitionListValue *src = lfirst(cell);
388
389 all_values[i] = (PartitionListValue *)
390 palloc(sizeof(PartitionListValue));
391 all_values[i]->value = src->value;
392 all_values[i]->index = src->index;
393 i++;
394 }
395
396 qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
397 qsort_partition_list_value_cmp, (void *) key);
398
399 boundinfo->ndatums = ndatums;
400 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
401 boundinfo->nindexes = ndatums;
402 boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
403
404 /*
405 * Copy values. Canonical indexes are values ranging from 0 to (nparts -
406 * 1) assigned to each partition such that all datums of a given partition
407 * receive the same value. The value for a given partition is the index of
408 * that partition's smallest datum in the all_values[] array.
409 */
410 for (i = 0; i < ndatums; i++)
411 {
412 int orig_index = all_values[i]->index;
413
414 boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
415 boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
416 key->parttypbyval[0],
417 key->parttyplen[0]);
418
419 /* If the old index has no mapping, assign one */
420 if ((*mapping)[orig_index] == -1)
421 (*mapping)[orig_index] = next_index++;
422
423 boundinfo->indexes[i] = (*mapping)[orig_index];
424 }
425
426 /*
427 * Set the canonical value for null_index, if any.
428 *
429 * It is possible that the null-accepting partition has not been assigned
430 * an index yet, which could happen if such partition accepts only null
431 * and hence not handled in the above loop which only looked at non-null
432 * values.
433 */
434 if (null_index != -1)
435 {
436 Assert(null_index >= 0);
437 if ((*mapping)[null_index] == -1)
438 (*mapping)[null_index] = next_index++;
439 boundinfo->null_index = (*mapping)[null_index];
440 }
441
442 /* Set the canonical value for default_index, if any. */
443 if (default_index != -1)
444 {
445 /*
446 * The default partition accepts any value not specified in the lists
447 * of other partitions, hence it should not get mapped index while
448 * assigning those for non-null datums.
449 */
450 Assert(default_index >= 0);
451 Assert((*mapping)[default_index] == -1);
452 (*mapping)[default_index] = next_index++;
453 boundinfo->default_index = (*mapping)[default_index];
454 }
455
456 /* All partitions must now have been assigned canonical indexes. */
457 Assert(next_index == nparts);
458 return boundinfo;
459 }
460
461 /*
462 * create_range_bounds
463 * Create a PartitionBoundInfo for a range partitioned table
464 */
465 static PartitionBoundInfo
create_range_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)466 create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
467 PartitionKey key, int **mapping)
468 {
469 PartitionBoundInfo boundinfo;
470 PartitionRangeBound **rbounds = NULL;
471 PartitionRangeBound **all_bounds,
472 *prev;
473 int i,
474 k;
475 int ndatums = 0;
476 int default_index = -1;
477 int next_index = 0;
478
479 boundinfo = (PartitionBoundInfoData *)
480 palloc0(sizeof(PartitionBoundInfoData));
481 boundinfo->strategy = key->strategy;
482 /* There is no special null-accepting range partition. */
483 boundinfo->null_index = -1;
484 /* Will be set correctly below. */
485 boundinfo->default_index = -1;
486
487 all_bounds = (PartitionRangeBound **)
488 palloc0(2 * nparts * sizeof(PartitionRangeBound *));
489
490 /* Create a unified list of range bounds across all the partitions. */
491 ndatums = 0;
492 for (i = 0; i < nparts; i++)
493 {
494 PartitionBoundSpec *spec = boundspecs[i];
495 PartitionRangeBound *lower,
496 *upper;
497
498 if (spec->strategy != PARTITION_STRATEGY_RANGE)
499 elog(ERROR, "invalid strategy in partition bound spec");
500
501 /*
502 * Note the index of the partition bound spec for the default
503 * partition. There's no datum to add to the all_bounds array for
504 * this partition.
505 */
506 if (spec->is_default)
507 {
508 default_index = i;
509 continue;
510 }
511
512 lower = make_one_partition_rbound(key, i, spec->lowerdatums, true);
513 upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
514 all_bounds[ndatums++] = lower;
515 all_bounds[ndatums++] = upper;
516 }
517
518 Assert(ndatums == nparts * 2 ||
519 (default_index != -1 && ndatums == (nparts - 1) * 2));
520
521 /* Sort all the bounds in ascending order */
522 qsort_arg(all_bounds, ndatums,
523 sizeof(PartitionRangeBound *),
524 qsort_partition_rbound_cmp,
525 (void *) key);
526
527 /* Save distinct bounds from all_bounds into rbounds. */
528 rbounds = (PartitionRangeBound **)
529 palloc(ndatums * sizeof(PartitionRangeBound *));
530 k = 0;
531 prev = NULL;
532 for (i = 0; i < ndatums; i++)
533 {
534 PartitionRangeBound *cur = all_bounds[i];
535 bool is_distinct = false;
536 int j;
537
538 /* Is the current bound distinct from the previous one? */
539 for (j = 0; j < key->partnatts; j++)
540 {
541 Datum cmpval;
542
543 if (prev == NULL || cur->kind[j] != prev->kind[j])
544 {
545 is_distinct = true;
546 break;
547 }
548
549 /*
550 * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
551 * them as equal, since any values after this point must be
552 * ignored.
553 */
554 if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
555 break;
556
557 cmpval = FunctionCall2Coll(&key->partsupfunc[j],
558 key->partcollation[j],
559 cur->datums[j],
560 prev->datums[j]);
561 if (DatumGetInt32(cmpval) != 0)
562 {
563 is_distinct = true;
564 break;
565 }
566 }
567
568 /*
569 * Only if the bound is distinct save it into a temporary array, i.e,
570 * rbounds which is later copied into boundinfo datums array.
571 */
572 if (is_distinct)
573 rbounds[k++] = all_bounds[i];
574
575 prev = cur;
576 }
577
578 /* Update ndatums to hold the count of distinct datums. */
579 ndatums = k;
580
581 /*
582 * Add datums to boundinfo. Canonical indexes are values ranging from 0
583 * to nparts - 1, assigned in that order to each partition's upper bound.
584 * For 'datums' elements that are lower bounds, there is -1 in the
585 * 'indexes' array to signify that no partition exists for the values less
586 * than such a bound and greater than or equal to the previous upper
587 * bound.
588 */
589 boundinfo->ndatums = ndatums;
590 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
591 boundinfo->kind = (PartitionRangeDatumKind **)
592 palloc(ndatums *
593 sizeof(PartitionRangeDatumKind *));
594
595 /*
596 * For range partitioning, an additional value of -1 is stored as the last
597 * element of the indexes[] array.
598 */
599 boundinfo->nindexes = ndatums + 1;
600 boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
601
602 for (i = 0; i < ndatums; i++)
603 {
604 int j;
605
606 boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
607 sizeof(Datum));
608 boundinfo->kind[i] = (PartitionRangeDatumKind *)
609 palloc(key->partnatts *
610 sizeof(PartitionRangeDatumKind));
611 for (j = 0; j < key->partnatts; j++)
612 {
613 if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
614 boundinfo->datums[i][j] =
615 datumCopy(rbounds[i]->datums[j],
616 key->parttypbyval[j],
617 key->parttyplen[j]);
618 boundinfo->kind[i][j] = rbounds[i]->kind[j];
619 }
620
621 /*
622 * There is no mapping for invalid indexes.
623 *
624 * Any lower bounds in the rbounds array have invalid indexes
625 * assigned, because the values between the previous bound (if there
626 * is one) and this (lower) bound are not part of the range of any
627 * existing partition.
628 */
629 if (rbounds[i]->lower)
630 boundinfo->indexes[i] = -1;
631 else
632 {
633 int orig_index = rbounds[i]->index;
634
635 /* If the old index has no mapping, assign one */
636 if ((*mapping)[orig_index] == -1)
637 (*mapping)[orig_index] = next_index++;
638
639 boundinfo->indexes[i] = (*mapping)[orig_index];
640 }
641 }
642
643 /* Set the canonical value for default_index, if any. */
644 if (default_index != -1)
645 {
646 Assert(default_index >= 0 && (*mapping)[default_index] == -1);
647 (*mapping)[default_index] = next_index++;
648 boundinfo->default_index = (*mapping)[default_index];
649 }
650
651 /* The extra -1 element. */
652 Assert(i == ndatums);
653 boundinfo->indexes[i] = -1;
654
655 /* All partitions must now have been assigned canonical indexes. */
656 Assert(next_index == nparts);
657 return boundinfo;
658 }
659
660 /*
661 * Are two partition bound collections logically equal?
662 *
663 * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
664 * This is also useful when b1 and b2 are bound collections of two separate
665 * relations, respectively, because PartitionBoundInfo is a canonical
666 * representation of partition bounds.
667 */
668 bool
partition_bounds_equal(int partnatts,int16 * parttyplen,bool * parttypbyval,PartitionBoundInfo b1,PartitionBoundInfo b2)669 partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
670 PartitionBoundInfo b1, PartitionBoundInfo b2)
671 {
672 int i;
673
674 if (b1->strategy != b2->strategy)
675 return false;
676
677 if (b1->ndatums != b2->ndatums)
678 return false;
679
680 if (b1->nindexes != b2->nindexes)
681 return false;
682
683 if (b1->null_index != b2->null_index)
684 return false;
685
686 if (b1->default_index != b2->default_index)
687 return false;
688
689 /* For all partition strategies, the indexes[] arrays have to match */
690 for (i = 0; i < b1->nindexes; i++)
691 {
692 if (b1->indexes[i] != b2->indexes[i])
693 return false;
694 }
695
696 /* Finally, compare the datums[] arrays */
697 if (b1->strategy == PARTITION_STRATEGY_HASH)
698 {
699 /*
700 * We arrange the partitions in the ascending order of their moduli
701 * and remainders. Also every modulus is factor of next larger
702 * modulus. Therefore we can safely store index of a given partition
703 * in indexes array at remainder of that partition. Also entries at
704 * (remainder + N * modulus) positions in indexes array are all same
705 * for (modulus, remainder) specification for any partition. Thus the
706 * datums arrays from the given bounds are the same, if and only if
707 * their indexes arrays are the same. So, it suffices to compare the
708 * indexes arrays.
709 *
710 * Nonetheless make sure that the bounds are indeed the same when the
711 * indexes match. Hash partition bound stores modulus and remainder
712 * at b1->datums[i][0] and b1->datums[i][1] position respectively.
713 */
714 #ifdef USE_ASSERT_CHECKING
715 for (i = 0; i < b1->ndatums; i++)
716 Assert((b1->datums[i][0] == b2->datums[i][0] &&
717 b1->datums[i][1] == b2->datums[i][1]));
718 #endif
719 }
720 else
721 {
722 for (i = 0; i < b1->ndatums; i++)
723 {
724 int j;
725
726 for (j = 0; j < partnatts; j++)
727 {
728 /* For range partitions, the bounds might not be finite. */
729 if (b1->kind != NULL)
730 {
731 /* The different kinds of bound all differ from each other */
732 if (b1->kind[i][j] != b2->kind[i][j])
733 return false;
734
735 /*
736 * Non-finite bounds are equal without further
737 * examination.
738 */
739 if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
740 continue;
741 }
742
743 /*
744 * Compare the actual values. Note that it would be both
745 * incorrect and unsafe to invoke the comparison operator
746 * derived from the partitioning specification here. It would
747 * be incorrect because we want the relcache entry to be
748 * updated for ANY change to the partition bounds, not just
749 * those that the partitioning operator thinks are
750 * significant. It would be unsafe because we might reach
751 * this code in the context of an aborted transaction, and an
752 * arbitrary partitioning operator might not be safe in that
753 * context. datumIsEqual() should be simple enough to be
754 * safe.
755 */
756 if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
757 parttypbyval[j], parttyplen[j]))
758 return false;
759 }
760 }
761 }
762 return true;
763 }
764
765 /*
766 * Return a copy of given PartitionBoundInfo structure. The data types of bounds
767 * are described by given partition key specification.
768 */
769 PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,PartitionKey key)770 partition_bounds_copy(PartitionBoundInfo src,
771 PartitionKey key)
772 {
773 PartitionBoundInfo dest;
774 int i;
775 int ndatums;
776 int nindexes;
777 int partnatts;
778
779 dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
780
781 dest->strategy = src->strategy;
782 ndatums = dest->ndatums = src->ndatums;
783 nindexes = dest->nindexes = src->nindexes;
784 partnatts = key->partnatts;
785
786 /* List partitioned tables have only a single partition key. */
787 Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
788
789 dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
790
791 if (src->kind != NULL)
792 {
793 dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
794 sizeof(PartitionRangeDatumKind *));
795 for (i = 0; i < ndatums; i++)
796 {
797 dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
798 sizeof(PartitionRangeDatumKind));
799
800 memcpy(dest->kind[i], src->kind[i],
801 sizeof(PartitionRangeDatumKind) * key->partnatts);
802 }
803 }
804 else
805 dest->kind = NULL;
806
807 for (i = 0; i < ndatums; i++)
808 {
809 int j;
810
811 /*
812 * For a corresponding hash partition, datums array will have two
813 * elements - modulus and remainder.
814 */
815 bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
816 int natts = hash_part ? 2 : partnatts;
817
818 dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
819
820 for (j = 0; j < natts; j++)
821 {
822 bool byval;
823 int typlen;
824
825 if (hash_part)
826 {
827 typlen = sizeof(int32); /* Always int4 */
828 byval = true; /* int4 is pass-by-value */
829 }
830 else
831 {
832 byval = key->parttypbyval[j];
833 typlen = key->parttyplen[j];
834 }
835
836 if (dest->kind == NULL ||
837 dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
838 dest->datums[i][j] = datumCopy(src->datums[i][j],
839 byval, typlen);
840 }
841 }
842
843 dest->indexes = (int *) palloc(sizeof(int) * nindexes);
844 memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
845
846 dest->null_index = src->null_index;
847 dest->default_index = src->default_index;
848
849 return dest;
850 }
851
852 /*
853 * partitions_are_ordered
854 * Determine whether the partitions described by 'boundinfo' are ordered,
855 * that is partitions appearing earlier in the PartitionDesc sequence
856 * contain partition keys strictly less than those appearing later.
857 * Also, if NULL values are possible, they must come in the last
858 * partition defined in the PartitionDesc.
859 *
860 * If out of order, or there is insufficient info to know the order,
861 * then we return false.
862 */
863 bool
partitions_are_ordered(PartitionBoundInfo boundinfo,int nparts)864 partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts)
865 {
866 Assert(boundinfo != NULL);
867
868 switch (boundinfo->strategy)
869 {
870 case PARTITION_STRATEGY_RANGE:
871
872 /*
873 * RANGE-type partitioning guarantees that the partitions can be
874 * scanned in the order that they're defined in the PartitionDesc
875 * to provide sequential, non-overlapping ranges of tuples.
876 * However, if a DEFAULT partition exists then it doesn't work, as
877 * that could contain tuples from either below or above the
878 * defined range, or tuples belonging to gaps between partitions.
879 */
880 if (!partition_bound_has_default(boundinfo))
881 return true;
882 break;
883
884 case PARTITION_STRATEGY_LIST:
885
886 /*
887 * LIST partitioning can also guarantee ordering, but only if the
888 * partitions don't accept interleaved values. We could likely
889 * check for this by looping over the PartitionBound's indexes
890 * array to check that the indexes are in order. For now, let's
891 * just keep it simple and just accept LIST partitioning when
892 * there's no DEFAULT partition, exactly one value per partition,
893 * and optionally a NULL partition that does not accept any other
894 * values. Such a NULL partition will come last in the
895 * PartitionDesc, and the other partitions will be properly
896 * ordered. This is a cheap test to make as it does not require
897 * any per-partition processing. Maybe we'd like to handle more
898 * complex cases in the future.
899 */
900 if (partition_bound_has_default(boundinfo))
901 return false;
902
903 if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo)
904 == nparts)
905 return true;
906 break;
907
908 default:
909 /* HASH, or some other strategy */
910 break;
911 }
912
913 return false;
914 }
915
916 /*
917 * check_new_partition_bound
918 *
919 * Checks if the new partition's bound overlaps any of the existing partitions
920 * of parent. Also performs additional checks as necessary per strategy.
921 */
922 void
check_new_partition_bound(char * relname,Relation parent,PartitionBoundSpec * spec)923 check_new_partition_bound(char *relname, Relation parent,
924 PartitionBoundSpec *spec)
925 {
926 PartitionKey key = RelationGetPartitionKey(parent);
927 PartitionDesc partdesc = RelationGetPartitionDesc(parent);
928 PartitionBoundInfo boundinfo = partdesc->boundinfo;
929 ParseState *pstate = make_parsestate(NULL);
930 int with = -1;
931 bool overlap = false;
932
933 if (spec->is_default)
934 {
935 /*
936 * The default partition bound never conflicts with any other
937 * partition's; if that's what we're attaching, the only possible
938 * problem is that one already exists, so check for that and we're
939 * done.
940 */
941 if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
942 return;
943
944 /* Default partition already exists, error out. */
945 ereport(ERROR,
946 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
947 errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
948 relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
949 parser_errposition(pstate, spec->location)));
950 }
951
952 switch (key->strategy)
953 {
954 case PARTITION_STRATEGY_HASH:
955 {
956 Assert(spec->strategy == PARTITION_STRATEGY_HASH);
957 Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
958
959 if (partdesc->nparts > 0)
960 {
961 Datum **datums = boundinfo->datums;
962 int ndatums = boundinfo->ndatums;
963 int greatest_modulus;
964 int remainder;
965 int offset;
966 bool valid_modulus = true;
967 int prev_modulus, /* Previous largest modulus */
968 next_modulus; /* Next largest modulus */
969
970 /*
971 * Check rule that every modulus must be a factor of the
972 * next larger modulus. For example, if you have a bunch
973 * of partitions that all have modulus 5, you can add a
974 * new partition with modulus 10 or a new partition with
975 * modulus 15, but you cannot add both a partition with
976 * modulus 10 and a partition with modulus 15, because 10
977 * is not a factor of 15.
978 *
979 * Get the greatest (modulus, remainder) pair contained in
980 * boundinfo->datums that is less than or equal to the
981 * (spec->modulus, spec->remainder) pair.
982 */
983 offset = partition_hash_bsearch(boundinfo,
984 spec->modulus,
985 spec->remainder);
986 if (offset < 0)
987 {
988 next_modulus = DatumGetInt32(datums[0][0]);
989 valid_modulus = (next_modulus % spec->modulus) == 0;
990 }
991 else
992 {
993 prev_modulus = DatumGetInt32(datums[offset][0]);
994 valid_modulus = (spec->modulus % prev_modulus) == 0;
995
996 if (valid_modulus && (offset + 1) < ndatums)
997 {
998 next_modulus = DatumGetInt32(datums[offset + 1][0]);
999 valid_modulus = (next_modulus % spec->modulus) == 0;
1000 }
1001 }
1002
1003 if (!valid_modulus)
1004 ereport(ERROR,
1005 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1006 errmsg("every hash partition modulus must be a factor of the next larger modulus")));
1007
1008 greatest_modulus = boundinfo->nindexes;
1009 remainder = spec->remainder;
1010
1011 /*
1012 * Normally, the lowest remainder that could conflict with
1013 * the new partition is equal to the remainder specified
1014 * for the new partition, but when the new partition has a
1015 * modulus higher than any used so far, we need to adjust.
1016 */
1017 if (remainder >= greatest_modulus)
1018 remainder = remainder % greatest_modulus;
1019
1020 /* Check every potentially-conflicting remainder. */
1021 do
1022 {
1023 if (boundinfo->indexes[remainder] != -1)
1024 {
1025 overlap = true;
1026 with = boundinfo->indexes[remainder];
1027 break;
1028 }
1029 remainder += spec->modulus;
1030 } while (remainder < greatest_modulus);
1031 }
1032
1033 break;
1034 }
1035
1036 case PARTITION_STRATEGY_LIST:
1037 {
1038 Assert(spec->strategy == PARTITION_STRATEGY_LIST);
1039
1040 if (partdesc->nparts > 0)
1041 {
1042 ListCell *cell;
1043
1044 Assert(boundinfo &&
1045 boundinfo->strategy == PARTITION_STRATEGY_LIST &&
1046 (boundinfo->ndatums > 0 ||
1047 partition_bound_accepts_nulls(boundinfo) ||
1048 partition_bound_has_default(boundinfo)));
1049
1050 foreach(cell, spec->listdatums)
1051 {
1052 Const *val = castNode(Const, lfirst(cell));
1053
1054 if (!val->constisnull)
1055 {
1056 int offset;
1057 bool equal;
1058
1059 offset = partition_list_bsearch(&key->partsupfunc[0],
1060 key->partcollation,
1061 boundinfo,
1062 val->constvalue,
1063 &equal);
1064 if (offset >= 0 && equal)
1065 {
1066 overlap = true;
1067 with = boundinfo->indexes[offset];
1068 break;
1069 }
1070 }
1071 else if (partition_bound_accepts_nulls(boundinfo))
1072 {
1073 overlap = true;
1074 with = boundinfo->null_index;
1075 break;
1076 }
1077 }
1078 }
1079
1080 break;
1081 }
1082
1083 case PARTITION_STRATEGY_RANGE:
1084 {
1085 PartitionRangeBound *lower,
1086 *upper;
1087
1088 Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
1089 lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
1090 upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
1091
1092 /*
1093 * First check if the resulting range would be empty with
1094 * specified lower and upper bounds
1095 */
1096 if (partition_rbound_cmp(key->partnatts, key->partsupfunc,
1097 key->partcollation, lower->datums,
1098 lower->kind, true, upper) >= 0)
1099 {
1100 ereport(ERROR,
1101 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1102 errmsg("empty range bound specified for partition \"%s\"",
1103 relname),
1104 errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
1105 get_range_partbound_string(spec->lowerdatums),
1106 get_range_partbound_string(spec->upperdatums)),
1107 parser_errposition(pstate, spec->location)));
1108 }
1109
1110 if (partdesc->nparts > 0)
1111 {
1112 int offset;
1113 bool equal;
1114
1115 Assert(boundinfo &&
1116 boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
1117 (boundinfo->ndatums > 0 ||
1118 partition_bound_has_default(boundinfo)));
1119
1120 /*
1121 * Test whether the new lower bound (which is treated
1122 * inclusively as part of the new partition) lies inside
1123 * an existing partition, or in a gap.
1124 *
1125 * If it's inside an existing partition, the bound at
1126 * offset + 1 will be the upper bound of that partition,
1127 * and its index will be >= 0.
1128 *
1129 * If it's in a gap, the bound at offset + 1 will be the
1130 * lower bound of the next partition, and its index will
1131 * be -1. This is also true if there is no next partition,
1132 * since the index array is initialised with an extra -1
1133 * at the end.
1134 */
1135 offset = partition_range_bsearch(key->partnatts,
1136 key->partsupfunc,
1137 key->partcollation,
1138 boundinfo, lower,
1139 &equal);
1140
1141 if (boundinfo->indexes[offset + 1] < 0)
1142 {
1143 /*
1144 * Check that the new partition will fit in the gap.
1145 * For it to fit, the new upper bound must be less
1146 * than or equal to the lower bound of the next
1147 * partition, if there is one.
1148 */
1149 if (offset + 1 < boundinfo->ndatums)
1150 {
1151 int32 cmpval;
1152 Datum *datums;
1153 PartitionRangeDatumKind *kind;
1154 bool is_lower;
1155
1156 datums = boundinfo->datums[offset + 1];
1157 kind = boundinfo->kind[offset + 1];
1158 is_lower = (boundinfo->indexes[offset + 1] == -1);
1159
1160 cmpval = partition_rbound_cmp(key->partnatts,
1161 key->partsupfunc,
1162 key->partcollation,
1163 datums, kind,
1164 is_lower, upper);
1165 if (cmpval < 0)
1166 {
1167 /*
1168 * The new partition overlaps with the
1169 * existing partition between offset + 1 and
1170 * offset + 2.
1171 */
1172 overlap = true;
1173 with = boundinfo->indexes[offset + 2];
1174 }
1175 }
1176 }
1177 else
1178 {
1179 /*
1180 * The new partition overlaps with the existing
1181 * partition between offset and offset + 1.
1182 */
1183 overlap = true;
1184 with = boundinfo->indexes[offset + 1];
1185 }
1186 }
1187
1188 break;
1189 }
1190
1191 default:
1192 elog(ERROR, "unexpected partition strategy: %d",
1193 (int) key->strategy);
1194 }
1195
1196 if (overlap)
1197 {
1198 Assert(with >= 0);
1199 ereport(ERROR,
1200 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1201 errmsg("partition \"%s\" would overlap partition \"%s\"",
1202 relname, get_rel_name(partdesc->oids[with])),
1203 parser_errposition(pstate, spec->location)));
1204 }
1205 }
1206
1207 /*
1208 * check_default_partition_contents
1209 *
1210 * This function checks if there exists a row in the default partition that
1211 * would properly belong to the new partition being added. If it finds one,
1212 * it throws an error.
1213 */
1214 void
check_default_partition_contents(Relation parent,Relation default_rel,PartitionBoundSpec * new_spec)1215 check_default_partition_contents(Relation parent, Relation default_rel,
1216 PartitionBoundSpec *new_spec)
1217 {
1218 List *new_part_constraints;
1219 List *def_part_constraints;
1220 List *all_parts;
1221 ListCell *lc;
1222
1223 new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
1224 ? get_qual_for_list(parent, new_spec)
1225 : get_qual_for_range(parent, new_spec, false);
1226 def_part_constraints =
1227 get_proposed_default_constraint(new_part_constraints);
1228
1229 /*
1230 * Map the Vars in the constraint expression from parent's attnos to
1231 * default_rel's.
1232 */
1233 def_part_constraints =
1234 map_partition_varattnos(def_part_constraints, 1, default_rel,
1235 parent, NULL);
1236
1237 /*
1238 * If the existing constraints on the default partition imply that it will
1239 * not contain any row that would belong to the new partition, we can
1240 * avoid scanning the default partition.
1241 */
1242 if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
1243 {
1244 ereport(DEBUG1,
1245 (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
1246 RelationGetRelationName(default_rel))));
1247 return;
1248 }
1249
1250 /*
1251 * Scan the default partition and its subpartitions, and check for rows
1252 * that do not satisfy the revised partition constraints.
1253 */
1254 if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1255 all_parts = find_all_inheritors(RelationGetRelid(default_rel),
1256 AccessExclusiveLock, NULL);
1257 else
1258 all_parts = list_make1_oid(RelationGetRelid(default_rel));
1259
1260 foreach(lc, all_parts)
1261 {
1262 Oid part_relid = lfirst_oid(lc);
1263 Relation part_rel;
1264 Expr *partition_constraint;
1265 EState *estate;
1266 ExprState *partqualstate = NULL;
1267 Snapshot snapshot;
1268 ExprContext *econtext;
1269 TableScanDesc scan;
1270 MemoryContext oldCxt;
1271 TupleTableSlot *tupslot;
1272
1273 /* Lock already taken above. */
1274 if (part_relid != RelationGetRelid(default_rel))
1275 {
1276 part_rel = table_open(part_relid, NoLock);
1277
1278 /*
1279 * Map the Vars in the constraint expression from default_rel's
1280 * the sub-partition's.
1281 */
1282 partition_constraint = make_ands_explicit(def_part_constraints);
1283 partition_constraint = (Expr *)
1284 map_partition_varattnos((List *) partition_constraint, 1,
1285 part_rel, default_rel, NULL);
1286
1287 /*
1288 * If the partition constraints on default partition child imply
1289 * that it will not contain any row that would belong to the new
1290 * partition, we can avoid scanning the child table.
1291 */
1292 if (PartConstraintImpliedByRelConstraint(part_rel,
1293 def_part_constraints))
1294 {
1295 ereport(DEBUG1,
1296 (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
1297 RelationGetRelationName(part_rel))));
1298
1299 table_close(part_rel, NoLock);
1300 continue;
1301 }
1302 }
1303 else
1304 {
1305 part_rel = default_rel;
1306 partition_constraint = make_ands_explicit(def_part_constraints);
1307 }
1308
1309 /*
1310 * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
1311 * scanned.
1312 */
1313 if (part_rel->rd_rel->relkind != RELKIND_RELATION)
1314 {
1315 if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1316 ereport(WARNING,
1317 (errcode(ERRCODE_CHECK_VIOLATION),
1318 errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
1319 RelationGetRelationName(part_rel),
1320 RelationGetRelationName(default_rel))));
1321
1322 if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
1323 table_close(part_rel, NoLock);
1324
1325 continue;
1326 }
1327
1328 estate = CreateExecutorState();
1329
1330 /* Build expression execution states for partition check quals */
1331 partqualstate = ExecPrepareExpr(partition_constraint, estate);
1332
1333 econtext = GetPerTupleExprContext(estate);
1334 snapshot = RegisterSnapshot(GetLatestSnapshot());
1335 tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
1336 scan = table_beginscan(part_rel, snapshot, 0, NULL);
1337
1338 /*
1339 * Switch to per-tuple memory context and reset it for each tuple
1340 * produced, so we don't leak memory.
1341 */
1342 oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1343
1344 while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
1345 {
1346 econtext->ecxt_scantuple = tupslot;
1347
1348 if (!ExecCheck(partqualstate, econtext))
1349 ereport(ERROR,
1350 (errcode(ERRCODE_CHECK_VIOLATION),
1351 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
1352 RelationGetRelationName(default_rel))));
1353
1354 ResetExprContext(econtext);
1355 CHECK_FOR_INTERRUPTS();
1356 }
1357
1358 MemoryContextSwitchTo(oldCxt);
1359 table_endscan(scan);
1360 UnregisterSnapshot(snapshot);
1361 ExecDropSingleTupleTableSlot(tupslot);
1362 FreeExecutorState(estate);
1363
1364 if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
1365 table_close(part_rel, NoLock); /* keep the lock until commit */
1366 }
1367 }
1368
1369 /*
1370 * get_hash_partition_greatest_modulus
1371 *
1372 * Returns the greatest modulus of the hash partition bound.
1373 * This is no longer used in the core code, but we keep it around
1374 * in case external modules are using it.
1375 */
1376 int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)1377 get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
1378 {
1379 Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
1380 return bound->nindexes;
1381 }
1382
1383 /*
1384 * make_one_partition_rbound
1385 *
1386 * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
1387 * and a flag telling whether the bound is lower or not. Made into a function
1388 * because there are multiple sites that want to use this facility.
1389 */
1390 static PartitionRangeBound *
make_one_partition_rbound(PartitionKey key,int index,List * datums,bool lower)1391 make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
1392 {
1393 PartitionRangeBound *bound;
1394 ListCell *lc;
1395 int i;
1396
1397 Assert(datums != NIL);
1398
1399 bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
1400 bound->index = index;
1401 bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
1402 bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
1403 sizeof(PartitionRangeDatumKind));
1404 bound->lower = lower;
1405
1406 i = 0;
1407 foreach(lc, datums)
1408 {
1409 PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
1410
1411 /* What's contained in this range datum? */
1412 bound->kind[i] = datum->kind;
1413
1414 if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
1415 {
1416 Const *val = castNode(Const, datum->value);
1417
1418 if (val->constisnull)
1419 elog(ERROR, "invalid range bound datum");
1420 bound->datums[i] = val->constvalue;
1421 }
1422
1423 i++;
1424 }
1425
1426 return bound;
1427 }
1428
1429 /*
1430 * partition_rbound_cmp
1431 *
1432 * Return for two range bounds whether the 1st one (specified in datums1,
1433 * kind1, and lower1) is <, =, or > the bound specified in *b2.
1434 *
1435 * partnatts, partsupfunc and partcollation give the number of attributes in the
1436 * bounds to be compared, comparison function to be used and the collations of
1437 * attributes, respectively.
1438 *
1439 * Note that if the values of the two range bounds compare equal, then we take
1440 * into account whether they are upper or lower bounds, and an upper bound is
1441 * considered to be smaller than a lower bound. This is important to the way
1442 * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
1443 * structure, which only stores the upper bound of a common boundary between
1444 * two contiguous partitions.
1445 */
1446 static int32
partition_rbound_cmp(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * datums1,PartitionRangeDatumKind * kind1,bool lower1,PartitionRangeBound * b2)1447 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
1448 Oid *partcollation,
1449 Datum *datums1, PartitionRangeDatumKind *kind1,
1450 bool lower1, PartitionRangeBound *b2)
1451 {
1452 int32 cmpval = 0; /* placate compiler */
1453 int i;
1454 Datum *datums2 = b2->datums;
1455 PartitionRangeDatumKind *kind2 = b2->kind;
1456 bool lower2 = b2->lower;
1457
1458 for (i = 0; i < partnatts; i++)
1459 {
1460 /*
1461 * First, handle cases where the column is unbounded, which should not
1462 * invoke the comparison procedure, and should not consider any later
1463 * columns. Note that the PartitionRangeDatumKind enum elements
1464 * compare the same way as the values they represent.
1465 */
1466 if (kind1[i] < kind2[i])
1467 return -1;
1468 else if (kind1[i] > kind2[i])
1469 return 1;
1470 else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
1471
1472 /*
1473 * The column bounds are both MINVALUE or both MAXVALUE. No later
1474 * columns should be considered, but we still need to compare
1475 * whether they are upper or lower bounds.
1476 */
1477 break;
1478
1479 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
1480 partcollation[i],
1481 datums1[i],
1482 datums2[i]));
1483 if (cmpval != 0)
1484 break;
1485 }
1486
1487 /*
1488 * If the comparison is anything other than equal, we're done. If they
1489 * compare equal though, we still have to consider whether the boundaries
1490 * are inclusive or exclusive. Exclusive one is considered smaller of the
1491 * two.
1492 */
1493 if (cmpval == 0 && lower1 != lower2)
1494 cmpval = lower1 ? 1 : -1;
1495
1496 return cmpval;
1497 }
1498
1499 /*
1500 * partition_rbound_datum_cmp
1501 *
1502 * Return whether range bound (specified in rb_datums, rb_kind, and rb_lower)
1503 * is <, =, or > partition key of tuple (tuple_datums)
1504 *
1505 * n_tuple_datums, partsupfunc and partcollation give number of attributes in
1506 * the bounds to be compared, comparison function to be used and the collations
1507 * of attributes resp.
1508 *
1509 */
1510 int32
partition_rbound_datum_cmp(FmgrInfo * partsupfunc,Oid * partcollation,Datum * rb_datums,PartitionRangeDatumKind * rb_kind,Datum * tuple_datums,int n_tuple_datums)1511 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
1512 Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
1513 Datum *tuple_datums, int n_tuple_datums)
1514 {
1515 int i;
1516 int32 cmpval = -1;
1517
1518 for (i = 0; i < n_tuple_datums; i++)
1519 {
1520 if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
1521 return -1;
1522 else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
1523 return 1;
1524
1525 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
1526 partcollation[i],
1527 rb_datums[i],
1528 tuple_datums[i]));
1529 if (cmpval != 0)
1530 break;
1531 }
1532
1533 return cmpval;
1534 }
1535
1536 /*
1537 * partition_hbound_cmp
1538 *
1539 * Compares modulus first, then remainder if modulus is equal.
1540 */
1541 static int32
partition_hbound_cmp(int modulus1,int remainder1,int modulus2,int remainder2)1542 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
1543 {
1544 if (modulus1 < modulus2)
1545 return -1;
1546 if (modulus1 > modulus2)
1547 return 1;
1548 if (modulus1 == modulus2 && remainder1 != remainder2)
1549 return (remainder1 > remainder2) ? 1 : -1;
1550 return 0;
1551 }
1552
1553 /*
1554 * partition_list_bsearch
1555 * Returns the index of the greatest bound datum that is less than equal
1556 * to the given value or -1 if all of the bound datums are greater
1557 *
1558 * *is_equal is set to true if the bound datum at the returned index is equal
1559 * to the input value.
1560 */
1561 int
partition_list_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,Datum value,bool * is_equal)1562 partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
1563 PartitionBoundInfo boundinfo,
1564 Datum value, bool *is_equal)
1565 {
1566 int lo,
1567 hi,
1568 mid;
1569
1570 lo = -1;
1571 hi = boundinfo->ndatums - 1;
1572 while (lo < hi)
1573 {
1574 int32 cmpval;
1575
1576 mid = (lo + hi + 1) / 2;
1577 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
1578 partcollation[0],
1579 boundinfo->datums[mid][0],
1580 value));
1581 if (cmpval <= 0)
1582 {
1583 lo = mid;
1584 *is_equal = (cmpval == 0);
1585 if (*is_equal)
1586 break;
1587 }
1588 else
1589 hi = mid - 1;
1590 }
1591
1592 return lo;
1593 }
1594
1595 /*
1596 * partition_range_bsearch
1597 * Returns the index of the greatest range bound that is less than or
1598 * equal to the given range bound or -1 if all of the range bounds are
1599 * greater
1600 *
1601 * *is_equal is set to true if the range bound at the returned index is equal
1602 * to the input range bound
1603 */
1604 static int
partition_range_bsearch(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,PartitionRangeBound * probe,bool * is_equal)1605 partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
1606 Oid *partcollation,
1607 PartitionBoundInfo boundinfo,
1608 PartitionRangeBound *probe, bool *is_equal)
1609 {
1610 int lo,
1611 hi,
1612 mid;
1613
1614 lo = -1;
1615 hi = boundinfo->ndatums - 1;
1616 while (lo < hi)
1617 {
1618 int32 cmpval;
1619
1620 mid = (lo + hi + 1) / 2;
1621 cmpval = partition_rbound_cmp(partnatts, partsupfunc,
1622 partcollation,
1623 boundinfo->datums[mid],
1624 boundinfo->kind[mid],
1625 (boundinfo->indexes[mid] == -1),
1626 probe);
1627 if (cmpval <= 0)
1628 {
1629 lo = mid;
1630 *is_equal = (cmpval == 0);
1631
1632 if (*is_equal)
1633 break;
1634 }
1635 else
1636 hi = mid - 1;
1637 }
1638
1639 return lo;
1640 }
1641
1642 /*
1643 * partition_range_bsearch
1644 * Returns the index of the greatest range bound that is less than or
1645 * equal to the given tuple or -1 if all of the range bounds are greater
1646 *
1647 * *is_equal is set to true if the range bound at the returned index is equal
1648 * to the input tuple.
1649 */
1650 int
partition_range_datum_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,int nvalues,Datum * values,bool * is_equal)1651 partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
1652 PartitionBoundInfo boundinfo,
1653 int nvalues, Datum *values, bool *is_equal)
1654 {
1655 int lo,
1656 hi,
1657 mid;
1658
1659 lo = -1;
1660 hi = boundinfo->ndatums - 1;
1661 while (lo < hi)
1662 {
1663 int32 cmpval;
1664
1665 mid = (lo + hi + 1) / 2;
1666 cmpval = partition_rbound_datum_cmp(partsupfunc,
1667 partcollation,
1668 boundinfo->datums[mid],
1669 boundinfo->kind[mid],
1670 values,
1671 nvalues);
1672 if (cmpval <= 0)
1673 {
1674 lo = mid;
1675 *is_equal = (cmpval == 0);
1676
1677 if (*is_equal)
1678 break;
1679 }
1680 else
1681 hi = mid - 1;
1682 }
1683
1684 return lo;
1685 }
1686
1687 /*
1688 * partition_hash_bsearch
1689 * Returns the index of the greatest (modulus, remainder) pair that is
1690 * less than or equal to the given (modulus, remainder) pair or -1 if
1691 * all of them are greater
1692 */
1693 int
partition_hash_bsearch(PartitionBoundInfo boundinfo,int modulus,int remainder)1694 partition_hash_bsearch(PartitionBoundInfo boundinfo,
1695 int modulus, int remainder)
1696 {
1697 int lo,
1698 hi,
1699 mid;
1700
1701 lo = -1;
1702 hi = boundinfo->ndatums - 1;
1703 while (lo < hi)
1704 {
1705 int32 cmpval,
1706 bound_modulus,
1707 bound_remainder;
1708
1709 mid = (lo + hi + 1) / 2;
1710 bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
1711 bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
1712 cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
1713 modulus, remainder);
1714 if (cmpval <= 0)
1715 {
1716 lo = mid;
1717
1718 if (cmpval == 0)
1719 break;
1720 }
1721 else
1722 hi = mid - 1;
1723 }
1724
1725 return lo;
1726 }
1727
1728 /*
1729 * qsort_partition_hbound_cmp
1730 *
1731 * Hash bounds are sorted by modulus, then by remainder.
1732 */
1733 static int32
qsort_partition_hbound_cmp(const void * a,const void * b)1734 qsort_partition_hbound_cmp(const void *a, const void *b)
1735 {
1736 PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
1737 PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
1738
1739 return partition_hbound_cmp(h1->modulus, h1->remainder,
1740 h2->modulus, h2->remainder);
1741 }
1742
1743 /*
1744 * qsort_partition_list_value_cmp
1745 *
1746 * Compare two list partition bound datums.
1747 */
1748 static int32
qsort_partition_list_value_cmp(const void * a,const void * b,void * arg)1749 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
1750 {
1751 Datum val1 = (*(PartitionListValue *const *) a)->value,
1752 val2 = (*(PartitionListValue *const *) b)->value;
1753 PartitionKey key = (PartitionKey) arg;
1754
1755 return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1756 key->partcollation[0],
1757 val1, val2));
1758 }
1759
1760 /*
1761 * qsort_partition_rbound_cmp
1762 *
1763 * Used when sorting range bounds across all range partitions.
1764 */
1765 static int32
qsort_partition_rbound_cmp(const void * a,const void * b,void * arg)1766 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
1767 {
1768 PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
1769 PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
1770 PartitionKey key = (PartitionKey) arg;
1771
1772 return partition_rbound_cmp(key->partnatts, key->partsupfunc,
1773 key->partcollation, b1->datums, b1->kind,
1774 b1->lower, b2);
1775 }
1776
1777 /*
1778 * get_partition_operator
1779 *
1780 * Return oid of the operator of the given strategy for the given partition
1781 * key column. It is assumed that the partitioning key is of the same type as
1782 * the chosen partitioning opclass, or at least binary-compatible. In the
1783 * latter case, *need_relabel is set to true if the opclass is not of a
1784 * polymorphic type (indicating a RelabelType node needed on top), otherwise
1785 * false.
1786 */
1787 static Oid
get_partition_operator(PartitionKey key,int col,StrategyNumber strategy,bool * need_relabel)1788 get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
1789 bool *need_relabel)
1790 {
1791 Oid operoid;
1792
1793 /*
1794 * Get the operator in the partitioning opfamily using the opclass'
1795 * declared input type as both left- and righttype.
1796 */
1797 operoid = get_opfamily_member(key->partopfamily[col],
1798 key->partopcintype[col],
1799 key->partopcintype[col],
1800 strategy);
1801 if (!OidIsValid(operoid))
1802 elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
1803 strategy, key->partopcintype[col], key->partopcintype[col],
1804 key->partopfamily[col]);
1805
1806 /*
1807 * If the partition key column is not of the same type as the operator
1808 * class and not polymorphic, tell caller to wrap the non-Const expression
1809 * in a RelabelType. This matches what parse_coerce.c does.
1810 */
1811 *need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
1812 key->partopcintype[col] != RECORDOID &&
1813 !IsPolymorphicType(key->partopcintype[col]));
1814
1815 return operoid;
1816 }
1817
1818 /*
1819 * make_partition_op_expr
1820 * Returns an Expr for the given partition key column with arg1 and
1821 * arg2 as its leftop and rightop, respectively
1822 */
1823 static Expr *
make_partition_op_expr(PartitionKey key,int keynum,uint16 strategy,Expr * arg1,Expr * arg2)1824 make_partition_op_expr(PartitionKey key, int keynum,
1825 uint16 strategy, Expr *arg1, Expr *arg2)
1826 {
1827 Oid operoid;
1828 bool need_relabel = false;
1829 Expr *result = NULL;
1830
1831 /* Get the correct btree operator for this partitioning column */
1832 operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
1833
1834 /*
1835 * Chosen operator may be such that the non-Const operand needs to be
1836 * coerced, so apply the same; see the comment in
1837 * get_partition_operator().
1838 */
1839 if (!IsA(arg1, Const) &&
1840 (need_relabel ||
1841 key->partcollation[keynum] != key->parttypcoll[keynum]))
1842 arg1 = (Expr *) makeRelabelType(arg1,
1843 key->partopcintype[keynum],
1844 -1,
1845 key->partcollation[keynum],
1846 COERCE_EXPLICIT_CAST);
1847
1848 /* Generate the actual expression */
1849 switch (key->strategy)
1850 {
1851 case PARTITION_STRATEGY_LIST:
1852 {
1853 List *elems = (List *) arg2;
1854 int nelems = list_length(elems);
1855
1856 Assert(nelems >= 1);
1857 Assert(keynum == 0);
1858
1859 if (nelems > 1 &&
1860 !type_is_array(key->parttypid[keynum]))
1861 {
1862 ArrayExpr *arrexpr;
1863 ScalarArrayOpExpr *saopexpr;
1864
1865 /* Construct an ArrayExpr for the right-hand inputs */
1866 arrexpr = makeNode(ArrayExpr);
1867 arrexpr->array_typeid =
1868 get_array_type(key->parttypid[keynum]);
1869 arrexpr->array_collid = key->parttypcoll[keynum];
1870 arrexpr->element_typeid = key->parttypid[keynum];
1871 arrexpr->elements = elems;
1872 arrexpr->multidims = false;
1873 arrexpr->location = -1;
1874
1875 /* Build leftop = ANY (rightop) */
1876 saopexpr = makeNode(ScalarArrayOpExpr);
1877 saopexpr->opno = operoid;
1878 saopexpr->opfuncid = get_opcode(operoid);
1879 saopexpr->useOr = true;
1880 saopexpr->inputcollid = key->partcollation[keynum];
1881 saopexpr->args = list_make2(arg1, arrexpr);
1882 saopexpr->location = -1;
1883
1884 result = (Expr *) saopexpr;
1885 }
1886 else
1887 {
1888 List *elemops = NIL;
1889 ListCell *lc;
1890
1891 foreach(lc, elems)
1892 {
1893 Expr *elem = lfirst(lc),
1894 *elemop;
1895
1896 elemop = make_opclause(operoid,
1897 BOOLOID,
1898 false,
1899 arg1, elem,
1900 InvalidOid,
1901 key->partcollation[keynum]);
1902 elemops = lappend(elemops, elemop);
1903 }
1904
1905 result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
1906 }
1907 break;
1908 }
1909
1910 case PARTITION_STRATEGY_RANGE:
1911 result = make_opclause(operoid,
1912 BOOLOID,
1913 false,
1914 arg1, arg2,
1915 InvalidOid,
1916 key->partcollation[keynum]);
1917 break;
1918
1919 default:
1920 elog(ERROR, "invalid partitioning strategy");
1921 break;
1922 }
1923
1924 return result;
1925 }
1926
1927 /*
1928 * get_qual_for_hash
1929 *
1930 * Returns a CHECK constraint expression to use as a hash partition's
1931 * constraint, given the parent relation and partition bound structure.
1932 *
1933 * The partition constraint for a hash partition is always a call to the
1934 * built-in function satisfies_hash_partition().
1935 */
1936 static List *
get_qual_for_hash(Relation parent,PartitionBoundSpec * spec)1937 get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
1938 {
1939 PartitionKey key = RelationGetPartitionKey(parent);
1940 FuncExpr *fexpr;
1941 Node *relidConst;
1942 Node *modulusConst;
1943 Node *remainderConst;
1944 List *args;
1945 ListCell *partexprs_item;
1946 int i;
1947
1948 /* Fixed arguments. */
1949 relidConst = (Node *) makeConst(OIDOID,
1950 -1,
1951 InvalidOid,
1952 sizeof(Oid),
1953 ObjectIdGetDatum(RelationGetRelid(parent)),
1954 false,
1955 true);
1956
1957 modulusConst = (Node *) makeConst(INT4OID,
1958 -1,
1959 InvalidOid,
1960 sizeof(int32),
1961 Int32GetDatum(spec->modulus),
1962 false,
1963 true);
1964
1965 remainderConst = (Node *) makeConst(INT4OID,
1966 -1,
1967 InvalidOid,
1968 sizeof(int32),
1969 Int32GetDatum(spec->remainder),
1970 false,
1971 true);
1972
1973 args = list_make3(relidConst, modulusConst, remainderConst);
1974 partexprs_item = list_head(key->partexprs);
1975
1976 /* Add an argument for each key column. */
1977 for (i = 0; i < key->partnatts; i++)
1978 {
1979 Node *keyCol;
1980
1981 /* Left operand */
1982 if (key->partattrs[i] != 0)
1983 {
1984 keyCol = (Node *) makeVar(1,
1985 key->partattrs[i],
1986 key->parttypid[i],
1987 key->parttypmod[i],
1988 key->parttypcoll[i],
1989 0);
1990 }
1991 else
1992 {
1993 keyCol = (Node *) copyObject(lfirst(partexprs_item));
1994 partexprs_item = lnext(partexprs_item);
1995 }
1996
1997 args = lappend(args, keyCol);
1998 }
1999
2000 fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
2001 BOOLOID,
2002 args,
2003 InvalidOid,
2004 InvalidOid,
2005 COERCE_EXPLICIT_CALL);
2006
2007 return list_make1(fexpr);
2008 }
2009
2010 /*
2011 * get_qual_for_list
2012 *
2013 * Returns an implicit-AND list of expressions to use as a list partition's
2014 * constraint, given the parent relation and partition bound structure.
2015 *
2016 * The function returns NIL for a default partition when it's the only
2017 * partition since in that case there is no constraint.
2018 */
2019 static List *
get_qual_for_list(Relation parent,PartitionBoundSpec * spec)2020 get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
2021 {
2022 PartitionKey key = RelationGetPartitionKey(parent);
2023 List *result;
2024 Expr *keyCol;
2025 Expr *opexpr;
2026 NullTest *nulltest;
2027 ListCell *cell;
2028 List *elems = NIL;
2029 bool list_has_null = false;
2030
2031 /*
2032 * Only single-column list partitioning is supported, so we are worried
2033 * only about the partition key with index 0.
2034 */
2035 Assert(key->partnatts == 1);
2036
2037 /* Construct Var or expression representing the partition column */
2038 if (key->partattrs[0] != 0)
2039 keyCol = (Expr *) makeVar(1,
2040 key->partattrs[0],
2041 key->parttypid[0],
2042 key->parttypmod[0],
2043 key->parttypcoll[0],
2044 0);
2045 else
2046 keyCol = (Expr *) copyObject(linitial(key->partexprs));
2047
2048 /*
2049 * For default list partition, collect datums for all the partitions. The
2050 * default partition constraint should check that the partition key is
2051 * equal to none of those.
2052 */
2053 if (spec->is_default)
2054 {
2055 int i;
2056 int ndatums = 0;
2057 PartitionDesc pdesc = RelationGetPartitionDesc(parent);
2058 PartitionBoundInfo boundinfo = pdesc->boundinfo;
2059
2060 if (boundinfo)
2061 {
2062 ndatums = boundinfo->ndatums;
2063
2064 if (partition_bound_accepts_nulls(boundinfo))
2065 list_has_null = true;
2066 }
2067
2068 /*
2069 * If default is the only partition, there need not be any partition
2070 * constraint on it.
2071 */
2072 if (ndatums == 0 && !list_has_null)
2073 return NIL;
2074
2075 for (i = 0; i < ndatums; i++)
2076 {
2077 Const *val;
2078
2079 /*
2080 * Construct Const from known-not-null datum. We must be careful
2081 * to copy the value, because our result has to be able to outlive
2082 * the relcache entry we're copying from.
2083 */
2084 val = makeConst(key->parttypid[0],
2085 key->parttypmod[0],
2086 key->parttypcoll[0],
2087 key->parttyplen[0],
2088 datumCopy(*boundinfo->datums[i],
2089 key->parttypbyval[0],
2090 key->parttyplen[0]),
2091 false, /* isnull */
2092 key->parttypbyval[0]);
2093
2094 elems = lappend(elems, val);
2095 }
2096 }
2097 else
2098 {
2099 /*
2100 * Create list of Consts for the allowed values, excluding any nulls.
2101 */
2102 foreach(cell, spec->listdatums)
2103 {
2104 Const *val = castNode(Const, lfirst(cell));
2105
2106 if (val->constisnull)
2107 list_has_null = true;
2108 else
2109 elems = lappend(elems, copyObject(val));
2110 }
2111 }
2112
2113 if (elems)
2114 {
2115 /*
2116 * Generate the operator expression from the non-null partition
2117 * values.
2118 */
2119 opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
2120 keyCol, (Expr *) elems);
2121 }
2122 else
2123 {
2124 /*
2125 * If there are no partition values, we don't need an operator
2126 * expression.
2127 */
2128 opexpr = NULL;
2129 }
2130
2131 if (!list_has_null)
2132 {
2133 /*
2134 * Gin up a "col IS NOT NULL" test that will be AND'd with the main
2135 * expression. This might seem redundant, but the partition routing
2136 * machinery needs it.
2137 */
2138 nulltest = makeNode(NullTest);
2139 nulltest->arg = keyCol;
2140 nulltest->nulltesttype = IS_NOT_NULL;
2141 nulltest->argisrow = false;
2142 nulltest->location = -1;
2143
2144 result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
2145 }
2146 else
2147 {
2148 /*
2149 * Gin up a "col IS NULL" test that will be OR'd with the main
2150 * expression.
2151 */
2152 nulltest = makeNode(NullTest);
2153 nulltest->arg = keyCol;
2154 nulltest->nulltesttype = IS_NULL;
2155 nulltest->argisrow = false;
2156 nulltest->location = -1;
2157
2158 if (opexpr)
2159 {
2160 Expr *or;
2161
2162 or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
2163 result = list_make1(or);
2164 }
2165 else
2166 result = list_make1(nulltest);
2167 }
2168
2169 /*
2170 * Note that, in general, applying NOT to a constraint expression doesn't
2171 * necessarily invert the set of rows it accepts, because NOT (NULL) is
2172 * NULL. However, the partition constraints we construct here never
2173 * evaluate to NULL, so applying NOT works as intended.
2174 */
2175 if (spec->is_default)
2176 {
2177 result = list_make1(make_ands_explicit(result));
2178 result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
2179 }
2180
2181 return result;
2182 }
2183
2184 /*
2185 * get_qual_for_range
2186 *
2187 * Returns an implicit-AND list of expressions to use as a range partition's
2188 * constraint, given the parent relation and partition bound structure.
2189 *
2190 * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
2191 * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
2192 * generate an expression tree of the following form:
2193 *
2194 * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
2195 * AND
2196 * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
2197 * AND
2198 * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
2199 *
2200 * It is often the case that a prefix of lower and upper bound tuples contains
2201 * the same values, for example, (al = au), in which case, we will emit an
2202 * expression tree of the following form:
2203 *
2204 * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
2205 * AND
2206 * (a = al)
2207 * AND
2208 * (b > bl OR (b = bl AND c >= cl))
2209 * AND
2210 * (b < bu OR (b = bu AND c < cu))
2211 *
2212 * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
2213 * simplified using the fact that any value is greater than MINVALUE and less
2214 * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
2215 * true, and we need not emit any expression for it, and the last line becomes
2216 *
2217 * (b < bu) OR (b = bu), which is simplified to (b <= bu)
2218 *
2219 * In most common cases with only one partition column, say a, the following
2220 * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
2221 *
2222 * For default partition, it returns the negation of the constraints of all
2223 * the other partitions.
2224 *
2225 * External callers should pass for_default as false; we set it to true only
2226 * when recursing.
2227 */
2228 static List *
get_qual_for_range(Relation parent,PartitionBoundSpec * spec,bool for_default)2229 get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
2230 bool for_default)
2231 {
2232 List *result = NIL;
2233 ListCell *cell1,
2234 *cell2,
2235 *partexprs_item,
2236 *partexprs_item_saved;
2237 int i,
2238 j;
2239 PartitionRangeDatum *ldatum,
2240 *udatum;
2241 PartitionKey key = RelationGetPartitionKey(parent);
2242 Expr *keyCol;
2243 Const *lower_val,
2244 *upper_val;
2245 List *lower_or_arms,
2246 *upper_or_arms;
2247 int num_or_arms,
2248 current_or_arm;
2249 ListCell *lower_or_start_datum,
2250 *upper_or_start_datum;
2251 bool need_next_lower_arm,
2252 need_next_upper_arm;
2253
2254 if (spec->is_default)
2255 {
2256 List *or_expr_args = NIL;
2257 PartitionDesc pdesc = RelationGetPartitionDesc(parent);
2258 Oid *inhoids = pdesc->oids;
2259 int nparts = pdesc->nparts,
2260 i;
2261
2262 for (i = 0; i < nparts; i++)
2263 {
2264 Oid inhrelid = inhoids[i];
2265 HeapTuple tuple;
2266 Datum datum;
2267 bool isnull;
2268 PartitionBoundSpec *bspec;
2269
2270 tuple = SearchSysCache1(RELOID, inhrelid);
2271 if (!HeapTupleIsValid(tuple))
2272 elog(ERROR, "cache lookup failed for relation %u", inhrelid);
2273
2274 datum = SysCacheGetAttr(RELOID, tuple,
2275 Anum_pg_class_relpartbound,
2276 &isnull);
2277 if (isnull)
2278 elog(ERROR, "null relpartbound for relation %u", inhrelid);
2279
2280 bspec = (PartitionBoundSpec *)
2281 stringToNode(TextDatumGetCString(datum));
2282 if (!IsA(bspec, PartitionBoundSpec))
2283 elog(ERROR, "expected PartitionBoundSpec");
2284
2285 if (!bspec->is_default)
2286 {
2287 List *part_qual;
2288
2289 part_qual = get_qual_for_range(parent, bspec, true);
2290
2291 /*
2292 * AND the constraints of the partition and add to
2293 * or_expr_args
2294 */
2295 or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
2296 ? makeBoolExpr(AND_EXPR, part_qual, -1)
2297 : linitial(part_qual));
2298 }
2299 ReleaseSysCache(tuple);
2300 }
2301
2302 if (or_expr_args != NIL)
2303 {
2304 Expr *other_parts_constr;
2305
2306 /*
2307 * Combine the constraints obtained for non-default partitions
2308 * using OR. As requested, each of the OR's args doesn't include
2309 * the NOT NULL test for partition keys (which is to avoid its
2310 * useless repetition). Add the same now.
2311 */
2312 other_parts_constr =
2313 makeBoolExpr(AND_EXPR,
2314 lappend(get_range_nulltest(key),
2315 list_length(or_expr_args) > 1
2316 ? makeBoolExpr(OR_EXPR, or_expr_args,
2317 -1)
2318 : linitial(or_expr_args)),
2319 -1);
2320
2321 /*
2322 * Finally, the default partition contains everything *NOT*
2323 * contained in the non-default partitions.
2324 */
2325 result = list_make1(makeBoolExpr(NOT_EXPR,
2326 list_make1(other_parts_constr), -1));
2327 }
2328
2329 return result;
2330 }
2331
2332 lower_or_start_datum = list_head(spec->lowerdatums);
2333 upper_or_start_datum = list_head(spec->upperdatums);
2334 num_or_arms = key->partnatts;
2335
2336 /*
2337 * If it is the recursive call for default, we skip the get_range_nulltest
2338 * to avoid accumulating the NullTest on the same keys for each partition.
2339 */
2340 if (!for_default)
2341 result = get_range_nulltest(key);
2342
2343 /*
2344 * Iterate over the key columns and check if the corresponding lower and
2345 * upper datums are equal using the btree equality operator for the
2346 * column's type. If equal, we emit single keyCol = common_value
2347 * expression. Starting from the first column for which the corresponding
2348 * lower and upper bound datums are not equal, we generate OR expressions
2349 * as shown in the function's header comment.
2350 */
2351 i = 0;
2352 partexprs_item = list_head(key->partexprs);
2353 partexprs_item_saved = partexprs_item; /* placate compiler */
2354 forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
2355 {
2356 EState *estate;
2357 MemoryContext oldcxt;
2358 Expr *test_expr;
2359 ExprState *test_exprstate;
2360 Datum test_result;
2361 bool isNull;
2362
2363 ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
2364 udatum = castNode(PartitionRangeDatum, lfirst(cell2));
2365
2366 /*
2367 * Since get_range_key_properties() modifies partexprs_item, and we
2368 * might need to start over from the previous expression in the later
2369 * part of this function, save away the current value.
2370 */
2371 partexprs_item_saved = partexprs_item;
2372
2373 get_range_key_properties(key, i, ldatum, udatum,
2374 &partexprs_item,
2375 &keyCol,
2376 &lower_val, &upper_val);
2377
2378 /*
2379 * If either value is NULL, the corresponding partition bound is
2380 * either MINVALUE or MAXVALUE, and we treat them as unequal, because
2381 * even if they're the same, there is no common value to equate the
2382 * key column with.
2383 */
2384 if (!lower_val || !upper_val)
2385 break;
2386
2387 /* Create the test expression */
2388 estate = CreateExecutorState();
2389 oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
2390 test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
2391 (Expr *) lower_val,
2392 (Expr *) upper_val);
2393 fix_opfuncids((Node *) test_expr);
2394 test_exprstate = ExecInitExpr(test_expr, NULL);
2395 test_result = ExecEvalExprSwitchContext(test_exprstate,
2396 GetPerTupleExprContext(estate),
2397 &isNull);
2398 MemoryContextSwitchTo(oldcxt);
2399 FreeExecutorState(estate);
2400
2401 /* If not equal, go generate the OR expressions */
2402 if (!DatumGetBool(test_result))
2403 break;
2404
2405 /*
2406 * The bounds for the last key column can't be equal, because such a
2407 * range partition would never be allowed to be defined (it would have
2408 * an empty range otherwise).
2409 */
2410 if (i == key->partnatts - 1)
2411 elog(ERROR, "invalid range bound specification");
2412
2413 /* Equal, so generate keyCol = lower_val expression */
2414 result = lappend(result,
2415 make_partition_op_expr(key, i, BTEqualStrategyNumber,
2416 keyCol, (Expr *) lower_val));
2417
2418 i++;
2419 }
2420
2421 /* First pair of lower_val and upper_val that are not equal. */
2422 lower_or_start_datum = cell1;
2423 upper_or_start_datum = cell2;
2424
2425 /* OR will have as many arms as there are key columns left. */
2426 num_or_arms = key->partnatts - i;
2427 current_or_arm = 0;
2428 lower_or_arms = upper_or_arms = NIL;
2429 need_next_lower_arm = need_next_upper_arm = true;
2430 while (current_or_arm < num_or_arms)
2431 {
2432 List *lower_or_arm_args = NIL,
2433 *upper_or_arm_args = NIL;
2434
2435 /* Restart scan of columns from the i'th one */
2436 j = i;
2437 partexprs_item = partexprs_item_saved;
2438
2439 for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
2440 {
2441 PartitionRangeDatum *ldatum_next = NULL,
2442 *udatum_next = NULL;
2443
2444 ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
2445 if (lnext(cell1))
2446 ldatum_next = castNode(PartitionRangeDatum,
2447 lfirst(lnext(cell1)));
2448 udatum = castNode(PartitionRangeDatum, lfirst(cell2));
2449 if (lnext(cell2))
2450 udatum_next = castNode(PartitionRangeDatum,
2451 lfirst(lnext(cell2)));
2452 get_range_key_properties(key, j, ldatum, udatum,
2453 &partexprs_item,
2454 &keyCol,
2455 &lower_val, &upper_val);
2456
2457 if (need_next_lower_arm && lower_val)
2458 {
2459 uint16 strategy;
2460
2461 /*
2462 * For the non-last columns of this arm, use the EQ operator.
2463 * For the last column of this arm, use GT, unless this is the
2464 * last column of the whole bound check, or the next bound
2465 * datum is MINVALUE, in which case use GE.
2466 */
2467 if (j - i < current_or_arm)
2468 strategy = BTEqualStrategyNumber;
2469 else if (j == key->partnatts - 1 ||
2470 (ldatum_next &&
2471 ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
2472 strategy = BTGreaterEqualStrategyNumber;
2473 else
2474 strategy = BTGreaterStrategyNumber;
2475
2476 lower_or_arm_args = lappend(lower_or_arm_args,
2477 make_partition_op_expr(key, j,
2478 strategy,
2479 keyCol,
2480 (Expr *) lower_val));
2481 }
2482
2483 if (need_next_upper_arm && upper_val)
2484 {
2485 uint16 strategy;
2486
2487 /*
2488 * For the non-last columns of this arm, use the EQ operator.
2489 * For the last column of this arm, use LT, unless the next
2490 * bound datum is MAXVALUE, in which case use LE.
2491 */
2492 if (j - i < current_or_arm)
2493 strategy = BTEqualStrategyNumber;
2494 else if (udatum_next &&
2495 udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
2496 strategy = BTLessEqualStrategyNumber;
2497 else
2498 strategy = BTLessStrategyNumber;
2499
2500 upper_or_arm_args = lappend(upper_or_arm_args,
2501 make_partition_op_expr(key, j,
2502 strategy,
2503 keyCol,
2504 (Expr *) upper_val));
2505 }
2506
2507 /*
2508 * Did we generate enough of OR's arguments? First arm considers
2509 * the first of the remaining columns, second arm considers first
2510 * two of the remaining columns, and so on.
2511 */
2512 ++j;
2513 if (j - i > current_or_arm)
2514 {
2515 /*
2516 * We must not emit any more arms if the new column that will
2517 * be considered is unbounded, or this one was.
2518 */
2519 if (!lower_val || !ldatum_next ||
2520 ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
2521 need_next_lower_arm = false;
2522 if (!upper_val || !udatum_next ||
2523 udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
2524 need_next_upper_arm = false;
2525 break;
2526 }
2527 }
2528
2529 if (lower_or_arm_args != NIL)
2530 lower_or_arms = lappend(lower_or_arms,
2531 list_length(lower_or_arm_args) > 1
2532 ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
2533 : linitial(lower_or_arm_args));
2534
2535 if (upper_or_arm_args != NIL)
2536 upper_or_arms = lappend(upper_or_arms,
2537 list_length(upper_or_arm_args) > 1
2538 ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
2539 : linitial(upper_or_arm_args));
2540
2541 /* If no work to do in the next iteration, break away. */
2542 if (!need_next_lower_arm && !need_next_upper_arm)
2543 break;
2544
2545 ++current_or_arm;
2546 }
2547
2548 /*
2549 * Generate the OR expressions for each of lower and upper bounds (if
2550 * required), and append to the list of implicitly ANDed list of
2551 * expressions.
2552 */
2553 if (lower_or_arms != NIL)
2554 result = lappend(result,
2555 list_length(lower_or_arms) > 1
2556 ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
2557 : linitial(lower_or_arms));
2558 if (upper_or_arms != NIL)
2559 result = lappend(result,
2560 list_length(upper_or_arms) > 1
2561 ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
2562 : linitial(upper_or_arms));
2563
2564 /*
2565 * As noted above, for non-default, we return list with constant TRUE. If
2566 * the result is NIL during the recursive call for default, it implies
2567 * this is the only other partition which can hold every value of the key
2568 * except NULL. Hence we return the NullTest result skipped earlier.
2569 */
2570 if (result == NIL)
2571 result = for_default
2572 ? get_range_nulltest(key)
2573 : list_make1(makeBoolConst(true, false));
2574
2575 return result;
2576 }
2577
2578 /*
2579 * get_range_key_properties
2580 * Returns range partition key information for a given column
2581 *
2582 * This is a subroutine for get_qual_for_range, and its API is pretty
2583 * specialized to that caller.
2584 *
2585 * Constructs an Expr for the key column (returned in *keyCol) and Consts
2586 * for the lower and upper range limits (returned in *lower_val and
2587 * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
2588 * a Const. All of these structures are freshly palloc'd.
2589 *
2590 * *partexprs_item points to the cell containing the next expression in
2591 * the key->partexprs list, or NULL. It may be advanced upon return.
2592 */
2593 static void
get_range_key_properties(PartitionKey key,int keynum,PartitionRangeDatum * ldatum,PartitionRangeDatum * udatum,ListCell ** partexprs_item,Expr ** keyCol,Const ** lower_val,Const ** upper_val)2594 get_range_key_properties(PartitionKey key, int keynum,
2595 PartitionRangeDatum *ldatum,
2596 PartitionRangeDatum *udatum,
2597 ListCell **partexprs_item,
2598 Expr **keyCol,
2599 Const **lower_val, Const **upper_val)
2600 {
2601 /* Get partition key expression for this column */
2602 if (key->partattrs[keynum] != 0)
2603 {
2604 *keyCol = (Expr *) makeVar(1,
2605 key->partattrs[keynum],
2606 key->parttypid[keynum],
2607 key->parttypmod[keynum],
2608 key->parttypcoll[keynum],
2609 0);
2610 }
2611 else
2612 {
2613 if (*partexprs_item == NULL)
2614 elog(ERROR, "wrong number of partition key expressions");
2615 *keyCol = copyObject(lfirst(*partexprs_item));
2616 *partexprs_item = lnext(*partexprs_item);
2617 }
2618
2619 /* Get appropriate Const nodes for the bounds */
2620 if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
2621 *lower_val = castNode(Const, copyObject(ldatum->value));
2622 else
2623 *lower_val = NULL;
2624
2625 if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
2626 *upper_val = castNode(Const, copyObject(udatum->value));
2627 else
2628 *upper_val = NULL;
2629 }
2630
2631 /*
2632 * get_range_nulltest
2633 *
2634 * A non-default range partition table does not currently allow partition
2635 * keys to be null, so emit an IS NOT NULL expression for each key column.
2636 */
2637 static List *
get_range_nulltest(PartitionKey key)2638 get_range_nulltest(PartitionKey key)
2639 {
2640 List *result = NIL;
2641 NullTest *nulltest;
2642 ListCell *partexprs_item;
2643 int i;
2644
2645 partexprs_item = list_head(key->partexprs);
2646 for (i = 0; i < key->partnatts; i++)
2647 {
2648 Expr *keyCol;
2649
2650 if (key->partattrs[i] != 0)
2651 {
2652 keyCol = (Expr *) makeVar(1,
2653 key->partattrs[i],
2654 key->parttypid[i],
2655 key->parttypmod[i],
2656 key->parttypcoll[i],
2657 0);
2658 }
2659 else
2660 {
2661 if (partexprs_item == NULL)
2662 elog(ERROR, "wrong number of partition key expressions");
2663 keyCol = copyObject(lfirst(partexprs_item));
2664 partexprs_item = lnext(partexprs_item);
2665 }
2666
2667 nulltest = makeNode(NullTest);
2668 nulltest->arg = keyCol;
2669 nulltest->nulltesttype = IS_NOT_NULL;
2670 nulltest->argisrow = false;
2671 nulltest->location = -1;
2672 result = lappend(result, nulltest);
2673 }
2674
2675 return result;
2676 }
2677
2678 /*
2679 * compute_partition_hash_value
2680 *
2681 * Compute the hash value for given partition key values.
2682 */
2683 uint64
compute_partition_hash_value(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * values,bool * isnull)2684 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
2685 Datum *values, bool *isnull)
2686 {
2687 int i;
2688 uint64 rowHash = 0;
2689 Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
2690
2691 for (i = 0; i < partnatts; i++)
2692 {
2693 /* Nulls are just ignored */
2694 if (!isnull[i])
2695 {
2696 Datum hash;
2697
2698 Assert(OidIsValid(partsupfunc[i].fn_oid));
2699
2700 /*
2701 * Compute hash for each datum value by calling respective
2702 * datatype-specific hash functions of each partition key
2703 * attribute.
2704 */
2705 hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
2706 values[i], seed);
2707
2708 /* Form a single 64-bit hash value */
2709 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
2710 }
2711 }
2712
2713 return rowHash;
2714 }
2715
2716 /*
2717 * satisfies_hash_partition
2718 *
2719 * This is an SQL-callable function for use in hash partition constraints.
2720 * The first three arguments are the parent table OID, modulus, and remainder.
2721 * The remaining arguments are the value of the partitioning columns (or
2722 * expressions); these are hashed and the results are combined into a single
2723 * hash value by calling hash_combine64.
2724 *
2725 * Returns true if remainder produced when this computed single hash value is
2726 * divided by the given modulus is equal to given remainder, otherwise false.
2727 * NB: it's important that this never return null, as the constraint machinery
2728 * would consider that to be a "pass".
2729 *
2730 * See get_qual_for_hash() for usage.
2731 */
2732 Datum
satisfies_hash_partition(PG_FUNCTION_ARGS)2733 satisfies_hash_partition(PG_FUNCTION_ARGS)
2734 {
2735 typedef struct ColumnsHashData
2736 {
2737 Oid relid;
2738 int nkeys;
2739 Oid variadic_type;
2740 int16 variadic_typlen;
2741 bool variadic_typbyval;
2742 char variadic_typalign;
2743 Oid partcollid[PARTITION_MAX_KEYS];
2744 FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
2745 } ColumnsHashData;
2746 Oid parentId;
2747 int modulus;
2748 int remainder;
2749 Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
2750 ColumnsHashData *my_extra;
2751 uint64 rowHash = 0;
2752
2753 /* Return false if the parent OID, modulus, or remainder is NULL. */
2754 if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
2755 PG_RETURN_BOOL(false);
2756 parentId = PG_GETARG_OID(0);
2757 modulus = PG_GETARG_INT32(1);
2758 remainder = PG_GETARG_INT32(2);
2759
2760 /* Sanity check modulus and remainder. */
2761 if (modulus <= 0)
2762 ereport(ERROR,
2763 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2764 errmsg("modulus for hash partition must be an integer value greater than zero")));
2765 if (remainder < 0)
2766 ereport(ERROR,
2767 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2768 errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
2769 if (remainder >= modulus)
2770 ereport(ERROR,
2771 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2772 errmsg("remainder for hash partition must be less than modulus")));
2773
2774 /*
2775 * Cache hash function information.
2776 */
2777 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
2778 if (my_extra == NULL || my_extra->relid != parentId)
2779 {
2780 Relation parent;
2781 PartitionKey key;
2782 int j;
2783
2784 /* Open parent relation and fetch partition key info */
2785 parent = relation_open(parentId, AccessShareLock);
2786 key = RelationGetPartitionKey(parent);
2787
2788 /* Reject parent table that is not hash-partitioned. */
2789 if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
2790 ereport(ERROR,
2791 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2792 errmsg("\"%s\" is not a hash partitioned table",
2793 get_rel_name(parentId))));
2794
2795 if (!get_fn_expr_variadic(fcinfo->flinfo))
2796 {
2797 int nargs = PG_NARGS() - 3;
2798
2799 /* complain if wrong number of column values */
2800 if (key->partnatts != nargs)
2801 ereport(ERROR,
2802 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2803 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
2804 key->partnatts, nargs)));
2805
2806 /* allocate space for our cache */
2807 fcinfo->flinfo->fn_extra =
2808 MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
2809 offsetof(ColumnsHashData, partsupfunc) +
2810 sizeof(FmgrInfo) * nargs);
2811 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
2812 my_extra->relid = parentId;
2813 my_extra->nkeys = key->partnatts;
2814 memcpy(my_extra->partcollid, key->partcollation,
2815 key->partnatts * sizeof(Oid));
2816
2817 /* check argument types and save fmgr_infos */
2818 for (j = 0; j < key->partnatts; ++j)
2819 {
2820 Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
2821
2822 if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
2823 ereport(ERROR,
2824 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2825 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
2826 j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
2827
2828 fmgr_info_copy(&my_extra->partsupfunc[j],
2829 &key->partsupfunc[j],
2830 fcinfo->flinfo->fn_mcxt);
2831 }
2832 }
2833 else
2834 {
2835 ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
2836
2837 /* allocate space for our cache -- just one FmgrInfo in this case */
2838 fcinfo->flinfo->fn_extra =
2839 MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
2840 offsetof(ColumnsHashData, partsupfunc) +
2841 sizeof(FmgrInfo));
2842 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
2843 my_extra->relid = parentId;
2844 my_extra->nkeys = key->partnatts;
2845 my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
2846 get_typlenbyvalalign(my_extra->variadic_type,
2847 &my_extra->variadic_typlen,
2848 &my_extra->variadic_typbyval,
2849 &my_extra->variadic_typalign);
2850 my_extra->partcollid[0] = key->partcollation[0];
2851
2852 /* check argument types */
2853 for (j = 0; j < key->partnatts; ++j)
2854 if (key->parttypid[j] != my_extra->variadic_type)
2855 ereport(ERROR,
2856 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2857 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
2858 j + 1,
2859 format_type_be(key->parttypid[j]),
2860 format_type_be(my_extra->variadic_type))));
2861
2862 fmgr_info_copy(&my_extra->partsupfunc[0],
2863 &key->partsupfunc[0],
2864 fcinfo->flinfo->fn_mcxt);
2865 }
2866
2867 /* Hold lock until commit */
2868 relation_close(parent, NoLock);
2869 }
2870
2871 if (!OidIsValid(my_extra->variadic_type))
2872 {
2873 int nkeys = my_extra->nkeys;
2874 int i;
2875
2876 /*
2877 * For a non-variadic call, neither the number of arguments nor their
2878 * types can change across calls, so avoid the expense of rechecking
2879 * here.
2880 */
2881
2882 for (i = 0; i < nkeys; i++)
2883 {
2884 Datum hash;
2885
2886 /* keys start from fourth argument of function. */
2887 int argno = i + 3;
2888
2889 if (PG_ARGISNULL(argno))
2890 continue;
2891
2892 hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
2893 my_extra->partcollid[i],
2894 PG_GETARG_DATUM(argno),
2895 seed);
2896
2897 /* Form a single 64-bit hash value */
2898 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
2899 }
2900 }
2901 else
2902 {
2903 ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
2904 int i;
2905 int nelems;
2906 Datum *datum;
2907 bool *isnull;
2908
2909 deconstruct_array(variadic_array,
2910 my_extra->variadic_type,
2911 my_extra->variadic_typlen,
2912 my_extra->variadic_typbyval,
2913 my_extra->variadic_typalign,
2914 &datum, &isnull, &nelems);
2915
2916 /* complain if wrong number of column values */
2917 if (nelems != my_extra->nkeys)
2918 ereport(ERROR,
2919 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2920 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
2921 my_extra->nkeys, nelems)));
2922
2923 for (i = 0; i < nelems; i++)
2924 {
2925 Datum hash;
2926
2927 if (isnull[i])
2928 continue;
2929
2930 hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
2931 my_extra->partcollid[0],
2932 datum[i],
2933 seed);
2934
2935 /* Form a single 64-bit hash value */
2936 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
2937 }
2938 }
2939
2940 PG_RETURN_BOOL(rowHash % modulus == remainder);
2941 }
2942