1 /*-------------------------------------------------------------------------
2 *
3 * partbounds.c
4 * Support routines for manipulating partition bounds
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/partitioning/partbounds.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15 #include "postgres.h"
16
17 #include "access/relation.h"
18 #include "access/table.h"
19 #include "access/tableam.h"
20 #include "catalog/partition.h"
21 #include "catalog/pg_inherits.h"
22 #include "catalog/pg_type.h"
23 #include "commands/tablecmds.h"
24 #include "common/hashfn.h"
25 #include "executor/executor.h"
26 #include "miscadmin.h"
27 #include "nodes/makefuncs.h"
28 #include "nodes/nodeFuncs.h"
29 #include "nodes/pathnodes.h"
30 #include "parser/parse_coerce.h"
31 #include "partitioning/partbounds.h"
32 #include "partitioning/partdesc.h"
33 #include "partitioning/partprune.h"
34 #include "utils/builtins.h"
35 #include "utils/datum.h"
36 #include "utils/fmgroids.h"
37 #include "utils/lsyscache.h"
38 #include "utils/partcache.h"
39 #include "utils/ruleutils.h"
40 #include "utils/snapmgr.h"
41 #include "utils/syscache.h"
42
43 /*
44 * When qsort'ing partition bounds after reading from the catalog, each bound
45 * is represented with one of the following structs.
46 */
47
48 /* One bound of a hash partition */
49 typedef struct PartitionHashBound
50 {
51 int modulus;
52 int remainder;
53 int index;
54 } PartitionHashBound;
55
56 /* One value coming from some (index'th) list partition */
57 typedef struct PartitionListValue
58 {
59 int index;
60 Datum value;
61 } PartitionListValue;
62
63 /* One bound of a range partition */
64 typedef struct PartitionRangeBound
65 {
66 int index;
67 Datum *datums; /* range bound datums */
68 PartitionRangeDatumKind *kind; /* the kind of each datum */
69 bool lower; /* this is the lower (vs upper) bound */
70 } PartitionRangeBound;
71
72 /*
73 * Mapping from partitions of a joining relation to partitions of a join
74 * relation being computed (a.k.a merged partitions)
75 */
76 typedef struct PartitionMap
77 {
78 int nparts; /* number of partitions */
79 int *merged_indexes; /* indexes of merged partitions */
80 bool *merged; /* flags to indicate whether partitions are
81 * merged with non-dummy partitions */
82 bool did_remapping; /* did we re-map partitions? */
83 int *old_indexes; /* old indexes of merged partitions if
84 * did_remapping */
85 } PartitionMap;
86
87 /* Macro for comparing two range bounds */
88 #define compare_range_bounds(partnatts, partsupfunc, partcollations, \
89 bound1, bound2) \
90 (partition_rbound_cmp(partnatts, partsupfunc, partcollations, \
91 (bound1)->datums, (bound1)->kind, (bound1)->lower, \
92 bound2))
93
94 static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
95 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
96 void *arg);
97 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
98 void *arg);
99 static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs,
100 int nparts, PartitionKey key, int **mapping);
101 static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs,
102 int nparts, PartitionKey key, int **mapping);
103 static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs,
104 int nparts, PartitionKey key, int **mapping);
105 static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc,
106 Oid *collations,
107 RelOptInfo *outer_rel,
108 RelOptInfo *inner_rel,
109 JoinType jointype,
110 List **outer_parts,
111 List **inner_parts);
112 static PartitionBoundInfo merge_range_bounds(int partnatts,
113 FmgrInfo *partsupfuncs,
114 Oid *partcollations,
115 RelOptInfo *outer_rel,
116 RelOptInfo *inner_rel,
117 JoinType jointype,
118 List **outer_parts,
119 List **inner_parts);
120 static void init_partition_map(RelOptInfo *rel, PartitionMap *map);
121 static void free_partition_map(PartitionMap *map);
122 static bool is_dummy_partition(RelOptInfo *rel, int part_index);
123 static int merge_matching_partitions(PartitionMap *outer_map,
124 PartitionMap *inner_map,
125 int outer_part,
126 int inner_part,
127 int *next_index);
128 static int process_outer_partition(PartitionMap *outer_map,
129 PartitionMap *inner_map,
130 bool outer_has_default,
131 bool inner_has_default,
132 int outer_index,
133 int inner_default,
134 JoinType jointype,
135 int *next_index,
136 int *default_index);
137 static int process_inner_partition(PartitionMap *outer_map,
138 PartitionMap *inner_map,
139 bool outer_has_default,
140 bool inner_has_default,
141 int inner_index,
142 int outer_default,
143 JoinType jointype,
144 int *next_index,
145 int *default_index);
146 static void merge_null_partitions(PartitionMap *outer_map,
147 PartitionMap *inner_map,
148 bool outer_has_null,
149 bool inner_has_null,
150 int outer_null,
151 int inner_null,
152 JoinType jointype,
153 int *next_index,
154 int *null_index);
155 static void merge_default_partitions(PartitionMap *outer_map,
156 PartitionMap *inner_map,
157 bool outer_has_default,
158 bool inner_has_default,
159 int outer_default,
160 int inner_default,
161 JoinType jointype,
162 int *next_index,
163 int *default_index);
164 static int merge_partition_with_dummy(PartitionMap *map, int index,
165 int *next_index);
166 static void fix_merged_indexes(PartitionMap *outer_map,
167 PartitionMap *inner_map,
168 int nmerged, List *merged_indexes);
169 static void generate_matching_part_pairs(RelOptInfo *outer_rel,
170 RelOptInfo *inner_rel,
171 PartitionMap *outer_map,
172 PartitionMap *inner_map,
173 int nmerged,
174 List **outer_parts,
175 List **inner_parts);
176 static PartitionBoundInfo build_merged_partition_bounds(char strategy,
177 List *merged_datums,
178 List *merged_kinds,
179 List *merged_indexes,
180 int null_index,
181 int default_index);
182 static int get_range_partition(RelOptInfo *rel,
183 PartitionBoundInfo bi,
184 int *lb_pos,
185 PartitionRangeBound *lb,
186 PartitionRangeBound *ub);
187 static int get_range_partition_internal(PartitionBoundInfo bi,
188 int *lb_pos,
189 PartitionRangeBound *lb,
190 PartitionRangeBound *ub);
191 static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
192 Oid *partcollations,
193 PartitionRangeBound *outer_lb,
194 PartitionRangeBound *outer_ub,
195 PartitionRangeBound *inner_lb,
196 PartitionRangeBound *inner_ub,
197 int *lb_cmpval, int *ub_cmpval);
198 static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
199 Oid *partcollations, JoinType jointype,
200 PartitionRangeBound *outer_lb,
201 PartitionRangeBound *outer_ub,
202 PartitionRangeBound *inner_lb,
203 PartitionRangeBound *inner_ub,
204 int lb_cmpval, int ub_cmpval,
205 PartitionRangeBound *merged_lb,
206 PartitionRangeBound *merged_ub);
207 static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
208 Oid *partcollations,
209 PartitionRangeBound *merged_lb,
210 PartitionRangeBound *merged_ub,
211 int merged_index,
212 List **merged_datums,
213 List **merged_kinds,
214 List **merged_indexes);
215 static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
216 List *datums, bool lower);
217 static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
218 int remainder2);
219 static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
220 Oid *partcollation, Datum *datums1,
221 PartitionRangeDatumKind *kind1, bool lower1,
222 PartitionRangeBound *b2);
223 static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
224 Oid *partcollation,
225 PartitionBoundInfo boundinfo,
226 PartitionRangeBound *probe, bool *is_equal);
227 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
228 uint16 strategy, Expr *arg1, Expr *arg2);
229 static Oid get_partition_operator(PartitionKey key, int col,
230 StrategyNumber strategy, bool *need_relabel);
231 static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
232 static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
233 static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
234 bool for_default);
235 static void get_range_key_properties(PartitionKey key, int keynum,
236 PartitionRangeDatum *ldatum,
237 PartitionRangeDatum *udatum,
238 ListCell **partexprs_item,
239 Expr **keyCol,
240 Const **lower_val, Const **upper_val);
241 static List *get_range_nulltest(PartitionKey key);
242
243 /*
244 * get_qual_from_partbound
245 * Given a parser node for partition bound, return the list of executable
246 * expressions as partition constraint
247 */
248 List *
get_qual_from_partbound(Relation rel,Relation parent,PartitionBoundSpec * spec)249 get_qual_from_partbound(Relation rel, Relation parent,
250 PartitionBoundSpec *spec)
251 {
252 PartitionKey key = RelationGetPartitionKey(parent);
253 List *my_qual = NIL;
254
255 Assert(key != NULL);
256
257 switch (key->strategy)
258 {
259 case PARTITION_STRATEGY_HASH:
260 Assert(spec->strategy == PARTITION_STRATEGY_HASH);
261 my_qual = get_qual_for_hash(parent, spec);
262 break;
263
264 case PARTITION_STRATEGY_LIST:
265 Assert(spec->strategy == PARTITION_STRATEGY_LIST);
266 my_qual = get_qual_for_list(parent, spec);
267 break;
268
269 case PARTITION_STRATEGY_RANGE:
270 Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
271 my_qual = get_qual_for_range(parent, spec, false);
272 break;
273
274 default:
275 elog(ERROR, "unexpected partition strategy: %d",
276 (int) key->strategy);
277 }
278
279 return my_qual;
280 }
281
282 /*
283 * partition_bounds_create
284 * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
285 * nodes
286 *
287 * This function creates a PartitionBoundInfo and fills the values of its
288 * various members based on the input list. Importantly, 'datums' array will
289 * contain Datum representation of individual bounds (possibly after
290 * de-duplication as in case of range bounds), sorted in a canonical order
291 * defined by qsort_partition_* functions of respective partitioning methods.
292 * 'indexes' array will contain as many elements as there are bounds (specific
293 * exceptions to this rule are listed in the function body), which represent
294 * the 0-based canonical positions of partitions.
295 *
296 * Upon return from this function, *mapping is set to an array of
297 * list_length(boundspecs) elements, each of which maps the original index of
298 * a partition to its canonical index.
299 *
300 * Note: The objects returned by this function are wholly allocated in the
301 * current memory context.
302 */
303 PartitionBoundInfo
partition_bounds_create(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)304 partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts,
305 PartitionKey key, int **mapping)
306 {
307 int i;
308
309 Assert(nparts > 0);
310
311 /*
312 * For each partitioning method, we first convert the partition bounds
313 * from their parser node representation to the internal representation,
314 * along with any additional preprocessing (such as de-duplicating range
315 * bounds). Resulting bound datums are then added to the 'datums' array
316 * in PartitionBoundInfo. For each datum added, an integer indicating the
317 * canonical partition index is added to the 'indexes' array.
318 *
319 * For each bound, we remember its partition's position (0-based) in the
320 * original list to later map it to the canonical index.
321 */
322
323 /*
324 * Initialize mapping array with invalid values, this is filled within
325 * each sub-routine below depending on the bound type.
326 */
327 *mapping = (int *) palloc(sizeof(int) * nparts);
328 for (i = 0; i < nparts; i++)
329 (*mapping)[i] = -1;
330
331 switch (key->strategy)
332 {
333 case PARTITION_STRATEGY_HASH:
334 return create_hash_bounds(boundspecs, nparts, key, mapping);
335
336 case PARTITION_STRATEGY_LIST:
337 return create_list_bounds(boundspecs, nparts, key, mapping);
338
339 case PARTITION_STRATEGY_RANGE:
340 return create_range_bounds(boundspecs, nparts, key, mapping);
341
342 default:
343 elog(ERROR, "unexpected partition strategy: %d",
344 (int) key->strategy);
345 break;
346 }
347
348 Assert(false);
349 return NULL; /* keep compiler quiet */
350 }
351
352 /*
353 * create_hash_bounds
354 * Create a PartitionBoundInfo for a hash partitioned table
355 */
356 static PartitionBoundInfo
create_hash_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)357 create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
358 PartitionKey key, int **mapping)
359 {
360 PartitionBoundInfo boundinfo;
361 PartitionHashBound **hbounds = NULL;
362 int i;
363 int ndatums = 0;
364 int greatest_modulus;
365
366 boundinfo = (PartitionBoundInfoData *)
367 palloc0(sizeof(PartitionBoundInfoData));
368 boundinfo->strategy = key->strategy;
369 /* No special hash partitions. */
370 boundinfo->null_index = -1;
371 boundinfo->default_index = -1;
372
373 ndatums = nparts;
374 hbounds = (PartitionHashBound **)
375 palloc(nparts * sizeof(PartitionHashBound *));
376
377 /* Convert from node to the internal representation */
378 for (i = 0; i < nparts; i++)
379 {
380 PartitionBoundSpec *spec = boundspecs[i];
381
382 if (spec->strategy != PARTITION_STRATEGY_HASH)
383 elog(ERROR, "invalid strategy in partition bound spec");
384
385 hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound));
386 hbounds[i]->modulus = spec->modulus;
387 hbounds[i]->remainder = spec->remainder;
388 hbounds[i]->index = i;
389 }
390
391 /* Sort all the bounds in ascending order */
392 qsort(hbounds, nparts, sizeof(PartitionHashBound *),
393 qsort_partition_hbound_cmp);
394
395 /* After sorting, moduli are now stored in ascending order. */
396 greatest_modulus = hbounds[ndatums - 1]->modulus;
397
398 boundinfo->ndatums = ndatums;
399 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
400 boundinfo->nindexes = greatest_modulus;
401 boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
402 for (i = 0; i < greatest_modulus; i++)
403 boundinfo->indexes[i] = -1;
404
405 /*
406 * For hash partitioning, there are as many datums (modulus and remainder
407 * pairs) as there are partitions. Indexes are simply values ranging from
408 * 0 to (nparts - 1).
409 */
410 for (i = 0; i < nparts; i++)
411 {
412 int modulus = hbounds[i]->modulus;
413 int remainder = hbounds[i]->remainder;
414
415 boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum));
416 boundinfo->datums[i][0] = Int32GetDatum(modulus);
417 boundinfo->datums[i][1] = Int32GetDatum(remainder);
418
419 while (remainder < greatest_modulus)
420 {
421 /* overlap? */
422 Assert(boundinfo->indexes[remainder] == -1);
423 boundinfo->indexes[remainder] = i;
424 remainder += modulus;
425 }
426
427 (*mapping)[hbounds[i]->index] = i;
428 pfree(hbounds[i]);
429 }
430 pfree(hbounds);
431
432 return boundinfo;
433 }
434
435 /*
436 * create_list_bounds
437 * Create a PartitionBoundInfo for a list partitioned table
438 */
439 static PartitionBoundInfo
create_list_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)440 create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
441 PartitionKey key, int **mapping)
442 {
443 PartitionBoundInfo boundinfo;
444 PartitionListValue **all_values = NULL;
445 ListCell *cell;
446 int i = 0;
447 int ndatums = 0;
448 int next_index = 0;
449 int default_index = -1;
450 int null_index = -1;
451 List *non_null_values = NIL;
452
453 boundinfo = (PartitionBoundInfoData *)
454 palloc0(sizeof(PartitionBoundInfoData));
455 boundinfo->strategy = key->strategy;
456 /* Will be set correctly below. */
457 boundinfo->null_index = -1;
458 boundinfo->default_index = -1;
459
460 /* Create a unified list of non-null values across all partitions. */
461 for (i = 0; i < nparts; i++)
462 {
463 PartitionBoundSpec *spec = boundspecs[i];
464 ListCell *c;
465
466 if (spec->strategy != PARTITION_STRATEGY_LIST)
467 elog(ERROR, "invalid strategy in partition bound spec");
468
469 /*
470 * Note the index of the partition bound spec for the default
471 * partition. There's no datum to add to the list on non-null datums
472 * for this partition.
473 */
474 if (spec->is_default)
475 {
476 default_index = i;
477 continue;
478 }
479
480 foreach(c, spec->listdatums)
481 {
482 Const *val = castNode(Const, lfirst(c));
483 PartitionListValue *list_value = NULL;
484
485 if (!val->constisnull)
486 {
487 list_value = (PartitionListValue *)
488 palloc0(sizeof(PartitionListValue));
489 list_value->index = i;
490 list_value->value = val->constvalue;
491 }
492 else
493 {
494 /*
495 * Never put a null into the values array; save the index of
496 * the partition that stores nulls, instead.
497 */
498 if (null_index != -1)
499 elog(ERROR, "found null more than once");
500 null_index = i;
501 }
502
503 if (list_value)
504 non_null_values = lappend(non_null_values, list_value);
505 }
506 }
507
508 ndatums = list_length(non_null_values);
509
510 /*
511 * Collect all list values in one array. Alongside the value, we also save
512 * the index of partition the value comes from.
513 */
514 all_values = (PartitionListValue **)
515 palloc(ndatums * sizeof(PartitionListValue *));
516 i = 0;
517 foreach(cell, non_null_values)
518 {
519 PartitionListValue *src = lfirst(cell);
520
521 all_values[i] = (PartitionListValue *)
522 palloc(sizeof(PartitionListValue));
523 all_values[i]->value = src->value;
524 all_values[i]->index = src->index;
525 i++;
526 }
527
528 qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
529 qsort_partition_list_value_cmp, (void *) key);
530
531 boundinfo->ndatums = ndatums;
532 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
533 boundinfo->nindexes = ndatums;
534 boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
535
536 /*
537 * Copy values. Canonical indexes are values ranging from 0 to (nparts -
538 * 1) assigned to each partition such that all datums of a given partition
539 * receive the same value. The value for a given partition is the index of
540 * that partition's smallest datum in the all_values[] array.
541 */
542 for (i = 0; i < ndatums; i++)
543 {
544 int orig_index = all_values[i]->index;
545
546 boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
547 boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
548 key->parttypbyval[0],
549 key->parttyplen[0]);
550
551 /* If the old index has no mapping, assign one */
552 if ((*mapping)[orig_index] == -1)
553 (*mapping)[orig_index] = next_index++;
554
555 boundinfo->indexes[i] = (*mapping)[orig_index];
556 }
557
558 /*
559 * Set the canonical value for null_index, if any.
560 *
561 * It is possible that the null-accepting partition has not been assigned
562 * an index yet, which could happen if such partition accepts only null
563 * and hence not handled in the above loop which only looked at non-null
564 * values.
565 */
566 if (null_index != -1)
567 {
568 Assert(null_index >= 0);
569 if ((*mapping)[null_index] == -1)
570 (*mapping)[null_index] = next_index++;
571 boundinfo->null_index = (*mapping)[null_index];
572 }
573
574 /* Set the canonical value for default_index, if any. */
575 if (default_index != -1)
576 {
577 /*
578 * The default partition accepts any value not specified in the lists
579 * of other partitions, hence it should not get mapped index while
580 * assigning those for non-null datums.
581 */
582 Assert(default_index >= 0);
583 Assert((*mapping)[default_index] == -1);
584 (*mapping)[default_index] = next_index++;
585 boundinfo->default_index = (*mapping)[default_index];
586 }
587
588 /* All partitions must now have been assigned canonical indexes. */
589 Assert(next_index == nparts);
590 return boundinfo;
591 }
592
593 /*
594 * create_range_bounds
595 * Create a PartitionBoundInfo for a range partitioned table
596 */
597 static PartitionBoundInfo
create_range_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)598 create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
599 PartitionKey key, int **mapping)
600 {
601 PartitionBoundInfo boundinfo;
602 PartitionRangeBound **rbounds = NULL;
603 PartitionRangeBound **all_bounds,
604 *prev;
605 int i,
606 k;
607 int ndatums = 0;
608 int default_index = -1;
609 int next_index = 0;
610
611 boundinfo = (PartitionBoundInfoData *)
612 palloc0(sizeof(PartitionBoundInfoData));
613 boundinfo->strategy = key->strategy;
614 /* There is no special null-accepting range partition. */
615 boundinfo->null_index = -1;
616 /* Will be set correctly below. */
617 boundinfo->default_index = -1;
618
619 all_bounds = (PartitionRangeBound **)
620 palloc0(2 * nparts * sizeof(PartitionRangeBound *));
621
622 /* Create a unified list of range bounds across all the partitions. */
623 ndatums = 0;
624 for (i = 0; i < nparts; i++)
625 {
626 PartitionBoundSpec *spec = boundspecs[i];
627 PartitionRangeBound *lower,
628 *upper;
629
630 if (spec->strategy != PARTITION_STRATEGY_RANGE)
631 elog(ERROR, "invalid strategy in partition bound spec");
632
633 /*
634 * Note the index of the partition bound spec for the default
635 * partition. There's no datum to add to the all_bounds array for
636 * this partition.
637 */
638 if (spec->is_default)
639 {
640 default_index = i;
641 continue;
642 }
643
644 lower = make_one_partition_rbound(key, i, spec->lowerdatums, true);
645 upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
646 all_bounds[ndatums++] = lower;
647 all_bounds[ndatums++] = upper;
648 }
649
650 Assert(ndatums == nparts * 2 ||
651 (default_index != -1 && ndatums == (nparts - 1) * 2));
652
653 /* Sort all the bounds in ascending order */
654 qsort_arg(all_bounds, ndatums,
655 sizeof(PartitionRangeBound *),
656 qsort_partition_rbound_cmp,
657 (void *) key);
658
659 /* Save distinct bounds from all_bounds into rbounds. */
660 rbounds = (PartitionRangeBound **)
661 palloc(ndatums * sizeof(PartitionRangeBound *));
662 k = 0;
663 prev = NULL;
664 for (i = 0; i < ndatums; i++)
665 {
666 PartitionRangeBound *cur = all_bounds[i];
667 bool is_distinct = false;
668 int j;
669
670 /* Is the current bound distinct from the previous one? */
671 for (j = 0; j < key->partnatts; j++)
672 {
673 Datum cmpval;
674
675 if (prev == NULL || cur->kind[j] != prev->kind[j])
676 {
677 is_distinct = true;
678 break;
679 }
680
681 /*
682 * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
683 * them as equal, since any values after this point must be
684 * ignored.
685 */
686 if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
687 break;
688
689 cmpval = FunctionCall2Coll(&key->partsupfunc[j],
690 key->partcollation[j],
691 cur->datums[j],
692 prev->datums[j]);
693 if (DatumGetInt32(cmpval) != 0)
694 {
695 is_distinct = true;
696 break;
697 }
698 }
699
700 /*
701 * Only if the bound is distinct save it into a temporary array, i.e,
702 * rbounds which is later copied into boundinfo datums array.
703 */
704 if (is_distinct)
705 rbounds[k++] = all_bounds[i];
706
707 prev = cur;
708 }
709
710 /* Update ndatums to hold the count of distinct datums. */
711 ndatums = k;
712
713 /*
714 * Add datums to boundinfo. Canonical indexes are values ranging from 0
715 * to nparts - 1, assigned in that order to each partition's upper bound.
716 * For 'datums' elements that are lower bounds, there is -1 in the
717 * 'indexes' array to signify that no partition exists for the values less
718 * than such a bound and greater than or equal to the previous upper
719 * bound.
720 */
721 boundinfo->ndatums = ndatums;
722 boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
723 boundinfo->kind = (PartitionRangeDatumKind **)
724 palloc(ndatums *
725 sizeof(PartitionRangeDatumKind *));
726
727 /*
728 * For range partitioning, an additional value of -1 is stored as the last
729 * element of the indexes[] array.
730 */
731 boundinfo->nindexes = ndatums + 1;
732 boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
733
734 for (i = 0; i < ndatums; i++)
735 {
736 int j;
737
738 boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
739 sizeof(Datum));
740 boundinfo->kind[i] = (PartitionRangeDatumKind *)
741 palloc(key->partnatts *
742 sizeof(PartitionRangeDatumKind));
743 for (j = 0; j < key->partnatts; j++)
744 {
745 if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
746 boundinfo->datums[i][j] =
747 datumCopy(rbounds[i]->datums[j],
748 key->parttypbyval[j],
749 key->parttyplen[j]);
750 boundinfo->kind[i][j] = rbounds[i]->kind[j];
751 }
752
753 /*
754 * There is no mapping for invalid indexes.
755 *
756 * Any lower bounds in the rbounds array have invalid indexes
757 * assigned, because the values between the previous bound (if there
758 * is one) and this (lower) bound are not part of the range of any
759 * existing partition.
760 */
761 if (rbounds[i]->lower)
762 boundinfo->indexes[i] = -1;
763 else
764 {
765 int orig_index = rbounds[i]->index;
766
767 /* If the old index has no mapping, assign one */
768 if ((*mapping)[orig_index] == -1)
769 (*mapping)[orig_index] = next_index++;
770
771 boundinfo->indexes[i] = (*mapping)[orig_index];
772 }
773 }
774
775 /* Set the canonical value for default_index, if any. */
776 if (default_index != -1)
777 {
778 Assert(default_index >= 0 && (*mapping)[default_index] == -1);
779 (*mapping)[default_index] = next_index++;
780 boundinfo->default_index = (*mapping)[default_index];
781 }
782
783 /* The extra -1 element. */
784 Assert(i == ndatums);
785 boundinfo->indexes[i] = -1;
786
787 /* All partitions must now have been assigned canonical indexes. */
788 Assert(next_index == nparts);
789 return boundinfo;
790 }
791
792 /*
793 * Are two partition bound collections logically equal?
794 *
795 * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
796 * This is also useful when b1 and b2 are bound collections of two separate
797 * relations, respectively, because PartitionBoundInfo is a canonical
798 * representation of partition bounds.
799 */
800 bool
partition_bounds_equal(int partnatts,int16 * parttyplen,bool * parttypbyval,PartitionBoundInfo b1,PartitionBoundInfo b2)801 partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
802 PartitionBoundInfo b1, PartitionBoundInfo b2)
803 {
804 int i;
805
806 if (b1->strategy != b2->strategy)
807 return false;
808
809 if (b1->ndatums != b2->ndatums)
810 return false;
811
812 if (b1->nindexes != b2->nindexes)
813 return false;
814
815 if (b1->null_index != b2->null_index)
816 return false;
817
818 if (b1->default_index != b2->default_index)
819 return false;
820
821 /* For all partition strategies, the indexes[] arrays have to match */
822 for (i = 0; i < b1->nindexes; i++)
823 {
824 if (b1->indexes[i] != b2->indexes[i])
825 return false;
826 }
827
828 /* Finally, compare the datums[] arrays */
829 if (b1->strategy == PARTITION_STRATEGY_HASH)
830 {
831 /*
832 * We arrange the partitions in the ascending order of their moduli
833 * and remainders. Also every modulus is factor of next larger
834 * modulus. Therefore we can safely store index of a given partition
835 * in indexes array at remainder of that partition. Also entries at
836 * (remainder + N * modulus) positions in indexes array are all same
837 * for (modulus, remainder) specification for any partition. Thus the
838 * datums arrays from the given bounds are the same, if and only if
839 * their indexes arrays are the same. So, it suffices to compare the
840 * indexes arrays.
841 *
842 * Nonetheless make sure that the bounds are indeed the same when the
843 * indexes match. Hash partition bound stores modulus and remainder
844 * at b1->datums[i][0] and b1->datums[i][1] position respectively.
845 */
846 #ifdef USE_ASSERT_CHECKING
847 for (i = 0; i < b1->ndatums; i++)
848 Assert((b1->datums[i][0] == b2->datums[i][0] &&
849 b1->datums[i][1] == b2->datums[i][1]));
850 #endif
851 }
852 else
853 {
854 for (i = 0; i < b1->ndatums; i++)
855 {
856 int j;
857
858 for (j = 0; j < partnatts; j++)
859 {
860 /* For range partitions, the bounds might not be finite. */
861 if (b1->kind != NULL)
862 {
863 /* The different kinds of bound all differ from each other */
864 if (b1->kind[i][j] != b2->kind[i][j])
865 return false;
866
867 /*
868 * Non-finite bounds are equal without further
869 * examination.
870 */
871 if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
872 continue;
873 }
874
875 /*
876 * Compare the actual values. Note that it would be both
877 * incorrect and unsafe to invoke the comparison operator
878 * derived from the partitioning specification here. It would
879 * be incorrect because we want the relcache entry to be
880 * updated for ANY change to the partition bounds, not just
881 * those that the partitioning operator thinks are
882 * significant. It would be unsafe because we might reach
883 * this code in the context of an aborted transaction, and an
884 * arbitrary partitioning operator might not be safe in that
885 * context. datumIsEqual() should be simple enough to be
886 * safe.
887 */
888 if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
889 parttypbyval[j], parttyplen[j]))
890 return false;
891 }
892 }
893 }
894 return true;
895 }
896
897 /*
898 * Return a copy of given PartitionBoundInfo structure. The data types of bounds
899 * are described by given partition key specification.
900 *
901 * Note: it's important that this function and its callees not do any catalog
902 * access, nor anything else that would result in allocating memory other than
903 * the returned data structure. Since this is called in a long-lived context,
904 * that would result in unwanted memory leaks.
905 */
906 PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,PartitionKey key)907 partition_bounds_copy(PartitionBoundInfo src,
908 PartitionKey key)
909 {
910 PartitionBoundInfo dest;
911 int i;
912 int ndatums;
913 int nindexes;
914 int partnatts;
915 bool hash_part;
916 int natts;
917
918 dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
919
920 dest->strategy = src->strategy;
921 ndatums = dest->ndatums = src->ndatums;
922 nindexes = dest->nindexes = src->nindexes;
923 partnatts = key->partnatts;
924
925 /* List partitioned tables have only a single partition key. */
926 Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
927
928 dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
929
930 if (src->kind != NULL)
931 {
932 dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
933 sizeof(PartitionRangeDatumKind *));
934 for (i = 0; i < ndatums; i++)
935 {
936 dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
937 sizeof(PartitionRangeDatumKind));
938
939 memcpy(dest->kind[i], src->kind[i],
940 sizeof(PartitionRangeDatumKind) * key->partnatts);
941 }
942 }
943 else
944 dest->kind = NULL;
945
946 /*
947 * For hash partitioning, datums array will have two elements - modulus
948 * and remainder.
949 */
950 hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
951 natts = hash_part ? 2 : partnatts;
952
953 for (i = 0; i < ndatums; i++)
954 {
955 int j;
956
957 dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
958
959 for (j = 0; j < natts; j++)
960 {
961 bool byval;
962 int typlen;
963
964 if (hash_part)
965 {
966 typlen = sizeof(int32); /* Always int4 */
967 byval = true; /* int4 is pass-by-value */
968 }
969 else
970 {
971 byval = key->parttypbyval[j];
972 typlen = key->parttyplen[j];
973 }
974
975 if (dest->kind == NULL ||
976 dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
977 dest->datums[i][j] = datumCopy(src->datums[i][j],
978 byval, typlen);
979 }
980 }
981
982 dest->indexes = (int *) palloc(sizeof(int) * nindexes);
983 memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
984
985 dest->null_index = src->null_index;
986 dest->default_index = src->default_index;
987
988 return dest;
989 }
990
991 /*
992 * partition_bounds_merge
993 * Check to see whether every partition of 'outer_rel' matches/overlaps
994 * one partition of 'inner_rel' at most, and vice versa; and if so, build
995 * and return the partition bounds for a join relation between the rels,
996 * generating two lists of the matching/overlapping partitions, which are
997 * returned to *outer_parts and *inner_parts respectively.
998 *
999 * The lists contain the same number of partitions, and the partitions at the
1000 * same positions in the lists indicate join pairs used for partitioned join.
1001 * If a partition on one side matches/overlaps multiple partitions on the other
1002 * side, this function returns NULL, setting *outer_parts and *inner_parts to
1003 * NIL.
1004 */
1005 PartitionBoundInfo
partition_bounds_merge(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,RelOptInfo * outer_rel,RelOptInfo * inner_rel,JoinType jointype,List ** outer_parts,List ** inner_parts)1006 partition_bounds_merge(int partnatts,
1007 FmgrInfo *partsupfunc, Oid *partcollation,
1008 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1009 JoinType jointype,
1010 List **outer_parts, List **inner_parts)
1011 {
1012 /*
1013 * Currently, this function is called only from try_partitionwise_join(),
1014 * so the join type should be INNER, LEFT, FULL, SEMI, or ANTI.
1015 */
1016 Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT ||
1017 jointype == JOIN_FULL || jointype == JOIN_SEMI ||
1018 jointype == JOIN_ANTI);
1019
1020 /* The partitioning strategies should be the same. */
1021 Assert(outer_rel->boundinfo->strategy == inner_rel->boundinfo->strategy);
1022
1023 *outer_parts = *inner_parts = NIL;
1024 switch (outer_rel->boundinfo->strategy)
1025 {
1026 case PARTITION_STRATEGY_HASH:
1027
1028 /*
1029 * For hash partitioned tables, we currently support partitioned
1030 * join only when they have exactly the same partition bounds.
1031 *
1032 * XXX: it might be possible to relax the restriction to support
1033 * cases where hash partitioned tables have missing partitions
1034 * and/or different moduli, but it's not clear if it would be
1035 * useful to support the former case since it's unusual to have
1036 * missing partitions. On the other hand, it would be useful to
1037 * support the latter case, but in that case, there is a high
1038 * probability that a partition on one side will match multiple
1039 * partitions on the other side, which is the scenario the current
1040 * implementation of partitioned join can't handle.
1041 */
1042 return NULL;
1043
1044 case PARTITION_STRATEGY_LIST:
1045 return merge_list_bounds(partsupfunc,
1046 partcollation,
1047 outer_rel,
1048 inner_rel,
1049 jointype,
1050 outer_parts,
1051 inner_parts);
1052
1053 case PARTITION_STRATEGY_RANGE:
1054 return merge_range_bounds(partnatts,
1055 partsupfunc,
1056 partcollation,
1057 outer_rel,
1058 inner_rel,
1059 jointype,
1060 outer_parts,
1061 inner_parts);
1062
1063 default:
1064 elog(ERROR, "unexpected partition strategy: %d",
1065 (int) outer_rel->boundinfo->strategy);
1066 return NULL; /* keep compiler quiet */
1067 }
1068 }
1069
1070 /*
1071 * merge_list_bounds
1072 * Create the partition bounds for a join relation between list
1073 * partitioned tables, if possible
1074 *
1075 * In this function we try to find sets of matching partitions from both sides
1076 * by comparing list values stored in their partition bounds. Since the list
1077 * values appear in the ascending order, an algorithm similar to merge join is
1078 * used for that. If a partition on one side doesn't have a matching
1079 * partition on the other side, the algorithm tries to match it with the
1080 * default partition on the other side if any; if not, the algorithm tries to
1081 * match it with a dummy partition on the other side if it's on the
1082 * non-nullable side of an outer join. Also, if both sides have the default
1083 * partitions, the algorithm tries to match them with each other. We give up
1084 * if the algorithm finds a partition matching multiple partitions on the
1085 * other side, which is the scenario the current implementation of partitioned
1086 * join can't handle.
1087 */
1088 static PartitionBoundInfo
merge_list_bounds(FmgrInfo * partsupfunc,Oid * partcollation,RelOptInfo * outer_rel,RelOptInfo * inner_rel,JoinType jointype,List ** outer_parts,List ** inner_parts)1089 merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation,
1090 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1091 JoinType jointype,
1092 List **outer_parts, List **inner_parts)
1093 {
1094 PartitionBoundInfo merged_bounds = NULL;
1095 PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1096 PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1097 bool outer_has_default = partition_bound_has_default(outer_bi);
1098 bool inner_has_default = partition_bound_has_default(inner_bi);
1099 int outer_default = outer_bi->default_index;
1100 int inner_default = inner_bi->default_index;
1101 bool outer_has_null = partition_bound_accepts_nulls(outer_bi);
1102 bool inner_has_null = partition_bound_accepts_nulls(inner_bi);
1103 PartitionMap outer_map;
1104 PartitionMap inner_map;
1105 int outer_pos;
1106 int inner_pos;
1107 int next_index = 0;
1108 int null_index = -1;
1109 int default_index = -1;
1110 List *merged_datums = NIL;
1111 List *merged_indexes = NIL;
1112
1113 Assert(*outer_parts == NIL);
1114 Assert(*inner_parts == NIL);
1115 Assert(outer_bi->strategy == inner_bi->strategy &&
1116 outer_bi->strategy == PARTITION_STRATEGY_LIST);
1117 /* List partitioning doesn't require kinds. */
1118 Assert(!outer_bi->kind && !inner_bi->kind);
1119
1120 init_partition_map(outer_rel, &outer_map);
1121 init_partition_map(inner_rel, &inner_map);
1122
1123 /*
1124 * If the default partitions (if any) have been proven empty, deem them
1125 * non-existent.
1126 */
1127 if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1128 outer_has_default = false;
1129 if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1130 inner_has_default = false;
1131
1132 /*
1133 * Merge partitions from both sides. In each iteration we compare a pair
1134 * of list values, one from each side, and decide whether the
1135 * corresponding partitions match or not. If the two values match
1136 * exactly, move to the next pair of list values, otherwise move to the
1137 * next list value on the side with a smaller list value.
1138 */
1139 outer_pos = inner_pos = 0;
1140 while (outer_pos < outer_bi->ndatums || inner_pos < inner_bi->ndatums)
1141 {
1142 int outer_index = -1;
1143 int inner_index = -1;
1144 Datum *outer_datums;
1145 Datum *inner_datums;
1146 int cmpval;
1147 Datum *merged_datum = NULL;
1148 int merged_index = -1;
1149
1150 if (outer_pos < outer_bi->ndatums)
1151 {
1152 /*
1153 * If the partition on the outer side has been proven empty,
1154 * ignore it and move to the next datum on the outer side.
1155 */
1156 outer_index = outer_bi->indexes[outer_pos];
1157 if (is_dummy_partition(outer_rel, outer_index))
1158 {
1159 outer_pos++;
1160 continue;
1161 }
1162 }
1163 if (inner_pos < inner_bi->ndatums)
1164 {
1165 /*
1166 * If the partition on the inner side has been proven empty,
1167 * ignore it and move to the next datum on the inner side.
1168 */
1169 inner_index = inner_bi->indexes[inner_pos];
1170 if (is_dummy_partition(inner_rel, inner_index))
1171 {
1172 inner_pos++;
1173 continue;
1174 }
1175 }
1176
1177 /* Get the list values. */
1178 outer_datums = outer_pos < outer_bi->ndatums ?
1179 outer_bi->datums[outer_pos] : NULL;
1180 inner_datums = inner_pos < inner_bi->ndatums ?
1181 inner_bi->datums[inner_pos] : NULL;
1182
1183 /*
1184 * We run this loop till both sides finish. This allows us to avoid
1185 * duplicating code to handle the remaining values on the side which
1186 * finishes later. For that we set the comparison parameter cmpval in
1187 * such a way that it appears as if the side which finishes earlier
1188 * has an extra value higher than any other value on the unfinished
1189 * side. That way we advance the values on the unfinished side till
1190 * all of its values are exhausted.
1191 */
1192 if (outer_pos >= outer_bi->ndatums)
1193 cmpval = 1;
1194 else if (inner_pos >= inner_bi->ndatums)
1195 cmpval = -1;
1196 else
1197 {
1198 Assert(outer_datums != NULL && inner_datums != NULL);
1199 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
1200 partcollation[0],
1201 outer_datums[0],
1202 inner_datums[0]));
1203 }
1204
1205 if (cmpval == 0)
1206 {
1207 /* Two list values match exactly. */
1208 Assert(outer_pos < outer_bi->ndatums);
1209 Assert(inner_pos < inner_bi->ndatums);
1210 Assert(outer_index >= 0);
1211 Assert(inner_index >= 0);
1212
1213 /*
1214 * Try merging both partitions. If successful, add the list value
1215 * and index of the merged partition below.
1216 */
1217 merged_index = merge_matching_partitions(&outer_map, &inner_map,
1218 outer_index, inner_index,
1219 &next_index);
1220 if (merged_index == -1)
1221 goto cleanup;
1222
1223 merged_datum = outer_datums;
1224
1225 /* Move to the next pair of list values. */
1226 outer_pos++;
1227 inner_pos++;
1228 }
1229 else if (cmpval < 0)
1230 {
1231 /* A list value missing from the inner side. */
1232 Assert(outer_pos < outer_bi->ndatums);
1233
1234 /*
1235 * If the inner side has the default partition, or this is an
1236 * outer join, try to assign a merged partition to the outer
1237 * partition (see process_outer_partition()). Otherwise, the
1238 * outer partition will not contribute to the result.
1239 */
1240 if (inner_has_default || IS_OUTER_JOIN(jointype))
1241 {
1242 /* Get the outer partition. */
1243 outer_index = outer_bi->indexes[outer_pos];
1244 Assert(outer_index >= 0);
1245 merged_index = process_outer_partition(&outer_map,
1246 &inner_map,
1247 outer_has_default,
1248 inner_has_default,
1249 outer_index,
1250 inner_default,
1251 jointype,
1252 &next_index,
1253 &default_index);
1254 if (merged_index == -1)
1255 goto cleanup;
1256 merged_datum = outer_datums;
1257 }
1258
1259 /* Move to the next list value on the outer side. */
1260 outer_pos++;
1261 }
1262 else
1263 {
1264 /* A list value missing from the outer side. */
1265 Assert(cmpval > 0);
1266 Assert(inner_pos < inner_bi->ndatums);
1267
1268 /*
1269 * If the outer side has the default partition, or this is a FULL
1270 * join, try to assign a merged partition to the inner partition
1271 * (see process_inner_partition()). Otherwise, the inner
1272 * partition will not contribute to the result.
1273 */
1274 if (outer_has_default || jointype == JOIN_FULL)
1275 {
1276 /* Get the inner partition. */
1277 inner_index = inner_bi->indexes[inner_pos];
1278 Assert(inner_index >= 0);
1279 merged_index = process_inner_partition(&outer_map,
1280 &inner_map,
1281 outer_has_default,
1282 inner_has_default,
1283 inner_index,
1284 outer_default,
1285 jointype,
1286 &next_index,
1287 &default_index);
1288 if (merged_index == -1)
1289 goto cleanup;
1290 merged_datum = inner_datums;
1291 }
1292
1293 /* Move to the next list value on the inner side. */
1294 inner_pos++;
1295 }
1296
1297 /*
1298 * If we assigned a merged partition, add the list value and index of
1299 * the merged partition if appropriate.
1300 */
1301 if (merged_index >= 0 && merged_index != default_index)
1302 {
1303 merged_datums = lappend(merged_datums, merged_datum);
1304 merged_indexes = lappend_int(merged_indexes, merged_index);
1305 }
1306 }
1307
1308 /*
1309 * If the NULL partitions (if any) have been proven empty, deem them
1310 * non-existent.
1311 */
1312 if (outer_has_null &&
1313 is_dummy_partition(outer_rel, outer_bi->null_index))
1314 outer_has_null = false;
1315 if (inner_has_null &&
1316 is_dummy_partition(inner_rel, inner_bi->null_index))
1317 inner_has_null = false;
1318
1319 /* Merge the NULL partitions if any. */
1320 if (outer_has_null || inner_has_null)
1321 merge_null_partitions(&outer_map, &inner_map,
1322 outer_has_null, inner_has_null,
1323 outer_bi->null_index, inner_bi->null_index,
1324 jointype, &next_index, &null_index);
1325 else
1326 Assert(null_index == -1);
1327
1328 /* Merge the default partitions if any. */
1329 if (outer_has_default || inner_has_default)
1330 merge_default_partitions(&outer_map, &inner_map,
1331 outer_has_default, inner_has_default,
1332 outer_default, inner_default,
1333 jointype, &next_index, &default_index);
1334 else
1335 Assert(default_index == -1);
1336
1337 /* If we have merged partitions, create the partition bounds. */
1338 if (next_index > 0)
1339 {
1340 /* Fix the merged_indexes list if necessary. */
1341 if (outer_map.did_remapping || inner_map.did_remapping)
1342 {
1343 Assert(jointype == JOIN_FULL);
1344 fix_merged_indexes(&outer_map, &inner_map,
1345 next_index, merged_indexes);
1346 }
1347
1348 /* Use maps to match partitions from inputs. */
1349 generate_matching_part_pairs(outer_rel, inner_rel,
1350 &outer_map, &inner_map,
1351 next_index,
1352 outer_parts, inner_parts);
1353 Assert(*outer_parts != NIL);
1354 Assert(*inner_parts != NIL);
1355 Assert(list_length(*outer_parts) == list_length(*inner_parts));
1356 Assert(list_length(*outer_parts) <= next_index);
1357
1358 /* Make a PartitionBoundInfo struct to return. */
1359 merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1360 merged_datums,
1361 NIL,
1362 merged_indexes,
1363 null_index,
1364 default_index);
1365 Assert(merged_bounds);
1366 }
1367
1368 cleanup:
1369 /* Free local memory before returning. */
1370 list_free(merged_datums);
1371 list_free(merged_indexes);
1372 free_partition_map(&outer_map);
1373 free_partition_map(&inner_map);
1374
1375 return merged_bounds;
1376 }
1377
1378 /*
1379 * merge_range_bounds
1380 * Create the partition bounds for a join relation between range
1381 * partitioned tables, if possible
1382 *
1383 * In this function we try to find sets of overlapping partitions from both
1384 * sides by comparing ranges stored in their partition bounds. Since the
1385 * ranges appear in the ascending order, an algorithm similar to merge join is
1386 * used for that. If a partition on one side doesn't have an overlapping
1387 * partition on the other side, the algorithm tries to match it with the
1388 * default partition on the other side if any; if not, the algorithm tries to
1389 * match it with a dummy partition on the other side if it's on the
1390 * non-nullable side of an outer join. Also, if both sides have the default
1391 * partitions, the algorithm tries to match them with each other. We give up
1392 * if the algorithm finds a partition overlapping multiple partitions on the
1393 * other side, which is the scenario the current implementation of partitioned
1394 * join can't handle.
1395 */
1396 static PartitionBoundInfo
merge_range_bounds(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,RelOptInfo * outer_rel,RelOptInfo * inner_rel,JoinType jointype,List ** outer_parts,List ** inner_parts)1397 merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
1398 Oid *partcollations,
1399 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1400 JoinType jointype,
1401 List **outer_parts, List **inner_parts)
1402 {
1403 PartitionBoundInfo merged_bounds = NULL;
1404 PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1405 PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1406 bool outer_has_default = partition_bound_has_default(outer_bi);
1407 bool inner_has_default = partition_bound_has_default(inner_bi);
1408 int outer_default = outer_bi->default_index;
1409 int inner_default = inner_bi->default_index;
1410 PartitionMap outer_map;
1411 PartitionMap inner_map;
1412 int outer_index;
1413 int inner_index;
1414 int outer_lb_pos;
1415 int inner_lb_pos;
1416 PartitionRangeBound outer_lb;
1417 PartitionRangeBound outer_ub;
1418 PartitionRangeBound inner_lb;
1419 PartitionRangeBound inner_ub;
1420 int next_index = 0;
1421 int default_index = -1;
1422 List *merged_datums = NIL;
1423 List *merged_kinds = NIL;
1424 List *merged_indexes = NIL;
1425
1426 Assert(*outer_parts == NIL);
1427 Assert(*inner_parts == NIL);
1428 Assert(outer_bi->strategy == inner_bi->strategy &&
1429 outer_bi->strategy == PARTITION_STRATEGY_RANGE);
1430
1431 init_partition_map(outer_rel, &outer_map);
1432 init_partition_map(inner_rel, &inner_map);
1433
1434 /*
1435 * If the default partitions (if any) have been proven empty, deem them
1436 * non-existent.
1437 */
1438 if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1439 outer_has_default = false;
1440 if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1441 inner_has_default = false;
1442
1443 /*
1444 * Merge partitions from both sides. In each iteration we compare a pair
1445 * of ranges, one from each side, and decide whether the corresponding
1446 * partitions match or not. If the two ranges overlap, move to the next
1447 * pair of ranges, otherwise move to the next range on the side with a
1448 * lower range. outer_lb_pos/inner_lb_pos keep track of the positions of
1449 * lower bounds in the datums arrays in the outer/inner
1450 * PartitionBoundInfos respectively.
1451 */
1452 outer_lb_pos = inner_lb_pos = 0;
1453 outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1454 &outer_lb, &outer_ub);
1455 inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1456 &inner_lb, &inner_ub);
1457 while (outer_index >= 0 || inner_index >= 0)
1458 {
1459 bool overlap;
1460 int ub_cmpval;
1461 int lb_cmpval;
1462 PartitionRangeBound merged_lb = {-1, NULL, NULL, true};
1463 PartitionRangeBound merged_ub = {-1, NULL, NULL, false};
1464 int merged_index = -1;
1465
1466 /*
1467 * We run this loop till both sides finish. This allows us to avoid
1468 * duplicating code to handle the remaining ranges on the side which
1469 * finishes later. For that we set the comparison parameter cmpval in
1470 * such a way that it appears as if the side which finishes earlier
1471 * has an extra range higher than any other range on the unfinished
1472 * side. That way we advance the ranges on the unfinished side till
1473 * all of its ranges are exhausted.
1474 */
1475 if (outer_index == -1)
1476 {
1477 overlap = false;
1478 lb_cmpval = 1;
1479 ub_cmpval = 1;
1480 }
1481 else if (inner_index == -1)
1482 {
1483 overlap = false;
1484 lb_cmpval = -1;
1485 ub_cmpval = -1;
1486 }
1487 else
1488 overlap = compare_range_partitions(partnatts, partsupfuncs,
1489 partcollations,
1490 &outer_lb, &outer_ub,
1491 &inner_lb, &inner_ub,
1492 &lb_cmpval, &ub_cmpval);
1493
1494 if (overlap)
1495 {
1496 /* Two ranges overlap; form a join pair. */
1497
1498 PartitionRangeBound save_outer_ub;
1499 PartitionRangeBound save_inner_ub;
1500
1501 /* Both partitions should not have been merged yet. */
1502 Assert(outer_index >= 0);
1503 Assert(outer_map.merged_indexes[outer_index] == -1 &&
1504 outer_map.merged[outer_index] == false);
1505 Assert(inner_index >= 0);
1506 Assert(inner_map.merged_indexes[inner_index] == -1 &&
1507 inner_map.merged[inner_index] == false);
1508
1509 /*
1510 * Get the index of the merged partition. Both partitions aren't
1511 * merged yet, so the partitions should be merged successfully.
1512 */
1513 merged_index = merge_matching_partitions(&outer_map, &inner_map,
1514 outer_index, inner_index,
1515 &next_index);
1516 Assert(merged_index >= 0);
1517
1518 /* Get the range bounds of the merged partition. */
1519 get_merged_range_bounds(partnatts, partsupfuncs,
1520 partcollations, jointype,
1521 &outer_lb, &outer_ub,
1522 &inner_lb, &inner_ub,
1523 lb_cmpval, ub_cmpval,
1524 &merged_lb, &merged_ub);
1525
1526 /* Save the upper bounds of both partitions for use below. */
1527 save_outer_ub = outer_ub;
1528 save_inner_ub = inner_ub;
1529
1530 /* Move to the next pair of ranges. */
1531 outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1532 &outer_lb, &outer_ub);
1533 inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1534 &inner_lb, &inner_ub);
1535
1536 /*
1537 * If the range of a partition on one side overlaps the range of
1538 * the next partition on the other side, that will cause the
1539 * partition on one side to match at least two partitions on the
1540 * other side, which is the case that we currently don't support
1541 * partitioned join for; give up.
1542 */
1543 if (ub_cmpval > 0 && inner_index >= 0 &&
1544 compare_range_bounds(partnatts, partsupfuncs, partcollations,
1545 &save_outer_ub, &inner_lb) > 0)
1546 goto cleanup;
1547 if (ub_cmpval < 0 && outer_index >= 0 &&
1548 compare_range_bounds(partnatts, partsupfuncs, partcollations,
1549 &outer_lb, &save_inner_ub) < 0)
1550 goto cleanup;
1551
1552 /*
1553 * A row from a non-overlapping portion (if any) of a partition on
1554 * one side might find its join partner in the default partition
1555 * (if any) on the other side, causing the same situation as
1556 * above; give up in that case.
1557 */
1558 if ((outer_has_default && (lb_cmpval > 0 || ub_cmpval < 0)) ||
1559 (inner_has_default && (lb_cmpval < 0 || ub_cmpval > 0)))
1560 goto cleanup;
1561 }
1562 else if (ub_cmpval < 0)
1563 {
1564 /* A non-overlapping outer range. */
1565
1566 /* The outer partition should not have been merged yet. */
1567 Assert(outer_index >= 0);
1568 Assert(outer_map.merged_indexes[outer_index] == -1 &&
1569 outer_map.merged[outer_index] == false);
1570
1571 /*
1572 * If the inner side has the default partition, or this is an
1573 * outer join, try to assign a merged partition to the outer
1574 * partition (see process_outer_partition()). Otherwise, the
1575 * outer partition will not contribute to the result.
1576 */
1577 if (inner_has_default || IS_OUTER_JOIN(jointype))
1578 {
1579 merged_index = process_outer_partition(&outer_map,
1580 &inner_map,
1581 outer_has_default,
1582 inner_has_default,
1583 outer_index,
1584 inner_default,
1585 jointype,
1586 &next_index,
1587 &default_index);
1588 if (merged_index == -1)
1589 goto cleanup;
1590 merged_lb = outer_lb;
1591 merged_ub = outer_ub;
1592 }
1593
1594 /* Move to the next range on the outer side. */
1595 outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1596 &outer_lb, &outer_ub);
1597 }
1598 else
1599 {
1600 /* A non-overlapping inner range. */
1601 Assert(ub_cmpval > 0);
1602
1603 /* The inner partition should not have been merged yet. */
1604 Assert(inner_index >= 0);
1605 Assert(inner_map.merged_indexes[inner_index] == -1 &&
1606 inner_map.merged[inner_index] == false);
1607
1608 /*
1609 * If the outer side has the default partition, or this is a FULL
1610 * join, try to assign a merged partition to the inner partition
1611 * (see process_inner_partition()). Otherwise, the inner
1612 * partition will not contribute to the result.
1613 */
1614 if (outer_has_default || jointype == JOIN_FULL)
1615 {
1616 merged_index = process_inner_partition(&outer_map,
1617 &inner_map,
1618 outer_has_default,
1619 inner_has_default,
1620 inner_index,
1621 outer_default,
1622 jointype,
1623 &next_index,
1624 &default_index);
1625 if (merged_index == -1)
1626 goto cleanup;
1627 merged_lb = inner_lb;
1628 merged_ub = inner_ub;
1629 }
1630
1631 /* Move to the next range on the inner side. */
1632 inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1633 &inner_lb, &inner_ub);
1634 }
1635
1636 /*
1637 * If we assigned a merged partition, add the range bounds and index
1638 * of the merged partition if appropriate.
1639 */
1640 if (merged_index >= 0 && merged_index != default_index)
1641 add_merged_range_bounds(partnatts, partsupfuncs, partcollations,
1642 &merged_lb, &merged_ub, merged_index,
1643 &merged_datums, &merged_kinds,
1644 &merged_indexes);
1645 }
1646
1647 /* Merge the default partitions if any. */
1648 if (outer_has_default || inner_has_default)
1649 merge_default_partitions(&outer_map, &inner_map,
1650 outer_has_default, inner_has_default,
1651 outer_default, inner_default,
1652 jointype, &next_index, &default_index);
1653 else
1654 Assert(default_index == -1);
1655
1656 /* If we have merged partitions, create the partition bounds. */
1657 if (next_index > 0)
1658 {
1659 /*
1660 * Unlike the case of list partitioning, we wouldn't have re-merged
1661 * partitions, so did_remapping should be left alone.
1662 */
1663 Assert(!outer_map.did_remapping);
1664 Assert(!inner_map.did_remapping);
1665
1666 /* Use maps to match partitions from inputs. */
1667 generate_matching_part_pairs(outer_rel, inner_rel,
1668 &outer_map, &inner_map,
1669 next_index,
1670 outer_parts, inner_parts);
1671 Assert(*outer_parts != NIL);
1672 Assert(*inner_parts != NIL);
1673 Assert(list_length(*outer_parts) == list_length(*inner_parts));
1674 Assert(list_length(*outer_parts) == next_index);
1675
1676 /* Make a PartitionBoundInfo struct to return. */
1677 merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1678 merged_datums,
1679 merged_kinds,
1680 merged_indexes,
1681 -1,
1682 default_index);
1683 Assert(merged_bounds);
1684 }
1685
1686 cleanup:
1687 /* Free local memory before returning. */
1688 list_free(merged_datums);
1689 list_free(merged_kinds);
1690 list_free(merged_indexes);
1691 free_partition_map(&outer_map);
1692 free_partition_map(&inner_map);
1693
1694 return merged_bounds;
1695 }
1696
1697 /*
1698 * init_partition_map
1699 * Initialize a PartitionMap struct for given relation
1700 */
1701 static void
init_partition_map(RelOptInfo * rel,PartitionMap * map)1702 init_partition_map(RelOptInfo *rel, PartitionMap *map)
1703 {
1704 int nparts = rel->nparts;
1705 int i;
1706
1707 map->nparts = nparts;
1708 map->merged_indexes = (int *) palloc(sizeof(int) * nparts);
1709 map->merged = (bool *) palloc(sizeof(bool) * nparts);
1710 map->did_remapping = false;
1711 map->old_indexes = (int *) palloc(sizeof(int) * nparts);
1712 for (i = 0; i < nparts; i++)
1713 {
1714 map->merged_indexes[i] = map->old_indexes[i] = -1;
1715 map->merged[i] = false;
1716 }
1717 }
1718
1719 /*
1720 * free_partition_map
1721 */
1722 static void
free_partition_map(PartitionMap * map)1723 free_partition_map(PartitionMap *map)
1724 {
1725 pfree(map->merged_indexes);
1726 pfree(map->merged);
1727 pfree(map->old_indexes);
1728 }
1729
1730 /*
1731 * is_dummy_partition --- has partition been proven empty?
1732 */
1733 static bool
is_dummy_partition(RelOptInfo * rel,int part_index)1734 is_dummy_partition(RelOptInfo *rel, int part_index)
1735 {
1736 RelOptInfo *part_rel;
1737
1738 Assert(part_index >= 0);
1739 part_rel = rel->part_rels[part_index];
1740 if (part_rel == NULL || IS_DUMMY_REL(part_rel))
1741 return true;
1742 return false;
1743 }
1744
1745 /*
1746 * merge_matching_partitions
1747 * Try to merge given outer/inner partitions, and return the index of a
1748 * merged partition produced from them if successful, -1 otherwise
1749 *
1750 * If the merged partition is newly created, *next_index is incremented.
1751 */
1752 static int
merge_matching_partitions(PartitionMap * outer_map,PartitionMap * inner_map,int outer_index,int inner_index,int * next_index)1753 merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map,
1754 int outer_index, int inner_index, int *next_index)
1755 {
1756 int outer_merged_index;
1757 int inner_merged_index;
1758 bool outer_merged;
1759 bool inner_merged;
1760
1761 Assert(outer_index >= 0 && outer_index < outer_map->nparts);
1762 outer_merged_index = outer_map->merged_indexes[outer_index];
1763 outer_merged = outer_map->merged[outer_index];
1764 Assert(inner_index >= 0 && inner_index < inner_map->nparts);
1765 inner_merged_index = inner_map->merged_indexes[inner_index];
1766 inner_merged = inner_map->merged[inner_index];
1767
1768 /*
1769 * Handle cases where we have already assigned a merged partition to each
1770 * of the given partitions.
1771 */
1772 if (outer_merged_index >= 0 && inner_merged_index >= 0)
1773 {
1774 /*
1775 * If the mereged partitions are the same, no need to do anything;
1776 * return the index of the merged partitions. Otherwise, if each of
1777 * the given partitions has been merged with a dummy partition on the
1778 * other side, re-map them to either of the two merged partitions.
1779 * Otherwise, they can't be merged, so return -1.
1780 */
1781 if (outer_merged_index == inner_merged_index)
1782 {
1783 Assert(outer_merged);
1784 Assert(inner_merged);
1785 return outer_merged_index;
1786 }
1787 if (!outer_merged && !inner_merged)
1788 {
1789 /*
1790 * This can only happen for a list-partitioning case. We re-map
1791 * them to the merged partition with the smaller of the two merged
1792 * indexes to preserve the property that the canonical order of
1793 * list partitions is determined by the indexes assigned to the
1794 * smallest list value of each partition.
1795 */
1796 if (outer_merged_index < inner_merged_index)
1797 {
1798 outer_map->merged[outer_index] = true;
1799 inner_map->merged_indexes[inner_index] = outer_merged_index;
1800 inner_map->merged[inner_index] = true;
1801 inner_map->did_remapping = true;
1802 inner_map->old_indexes[inner_index] = inner_merged_index;
1803 return outer_merged_index;
1804 }
1805 else
1806 {
1807 inner_map->merged[inner_index] = true;
1808 outer_map->merged_indexes[outer_index] = inner_merged_index;
1809 outer_map->merged[outer_index] = true;
1810 outer_map->did_remapping = true;
1811 outer_map->old_indexes[outer_index] = outer_merged_index;
1812 return inner_merged_index;
1813 }
1814 }
1815 return -1;
1816 }
1817
1818 /* At least one of the given partitions should not have yet been merged. */
1819 Assert(outer_merged_index == -1 || inner_merged_index == -1);
1820
1821 /*
1822 * If neither of them has been merged, merge them. Otherwise, if one has
1823 * been merged with a dummy partition on the other side (and the other
1824 * hasn't yet been merged with anything), re-merge them. Otherwise, they
1825 * can't be merged, so return -1.
1826 */
1827 if (outer_merged_index == -1 && inner_merged_index == -1)
1828 {
1829 int merged_index = *next_index;
1830
1831 Assert(!outer_merged);
1832 Assert(!inner_merged);
1833 outer_map->merged_indexes[outer_index] = merged_index;
1834 outer_map->merged[outer_index] = true;
1835 inner_map->merged_indexes[inner_index] = merged_index;
1836 inner_map->merged[inner_index] = true;
1837 *next_index = *next_index + 1;
1838 return merged_index;
1839 }
1840 if (outer_merged_index >= 0 && !outer_map->merged[outer_index])
1841 {
1842 Assert(inner_merged_index == -1);
1843 Assert(!inner_merged);
1844 inner_map->merged_indexes[inner_index] = outer_merged_index;
1845 inner_map->merged[inner_index] = true;
1846 outer_map->merged[outer_index] = true;
1847 return outer_merged_index;
1848 }
1849 if (inner_merged_index >= 0 && !inner_map->merged[inner_index])
1850 {
1851 Assert(outer_merged_index == -1);
1852 Assert(!outer_merged);
1853 outer_map->merged_indexes[outer_index] = inner_merged_index;
1854 outer_map->merged[outer_index] = true;
1855 inner_map->merged[inner_index] = true;
1856 return inner_merged_index;
1857 }
1858 return -1;
1859 }
1860
1861 /*
1862 * process_outer_partition
1863 * Try to assign given outer partition a merged partition, and return the
1864 * index of the merged partition if successful, -1 otherwise
1865 *
1866 * If the partition is newly created, *next_index is incremented. Also, if it
1867 * is the default partition of the join relation, *default_index is set to the
1868 * index if not already done.
1869 */
1870 static int
process_outer_partition(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_default,bool inner_has_default,int outer_index,int inner_default,JoinType jointype,int * next_index,int * default_index)1871 process_outer_partition(PartitionMap *outer_map,
1872 PartitionMap *inner_map,
1873 bool outer_has_default,
1874 bool inner_has_default,
1875 int outer_index,
1876 int inner_default,
1877 JoinType jointype,
1878 int *next_index,
1879 int *default_index)
1880 {
1881 int merged_index = -1;
1882
1883 Assert(outer_index >= 0);
1884
1885 /*
1886 * If the inner side has the default partition, a row from the outer
1887 * partition might find its join partner in the default partition; try
1888 * merging the outer partition with the default partition. Otherwise,
1889 * this should be an outer join, in which case the outer partition has to
1890 * be scanned all the way anyway; merge the outer partition with a dummy
1891 * partition on the other side.
1892 */
1893 if (inner_has_default)
1894 {
1895 Assert(inner_default >= 0);
1896
1897 /*
1898 * If the outer side has the default partition as well, the default
1899 * partition on the inner side will have two matching partitions on
1900 * the other side: the outer partition and the default partition on
1901 * the outer side. Partitionwise join doesn't handle this scenario
1902 * yet.
1903 */
1904 if (outer_has_default)
1905 return -1;
1906
1907 merged_index = merge_matching_partitions(outer_map, inner_map,
1908 outer_index, inner_default,
1909 next_index);
1910 if (merged_index == -1)
1911 return -1;
1912
1913 /*
1914 * If this is a FULL join, the default partition on the inner side has
1915 * to be scanned all the way anyway, so the resulting partition will
1916 * contain all key values from the default partition, which any other
1917 * partition of the join relation will not contain. Thus the
1918 * resulting partition will act as the default partition of the join
1919 * relation; record the index in *default_index if not already done.
1920 */
1921 if (jointype == JOIN_FULL)
1922 {
1923 if (*default_index == -1)
1924 *default_index = merged_index;
1925 else
1926 Assert(*default_index == merged_index);
1927 }
1928 }
1929 else
1930 {
1931 Assert(IS_OUTER_JOIN(jointype));
1932 Assert(jointype != JOIN_RIGHT);
1933
1934 /* If we have already assigned a partition, no need to do anything. */
1935 merged_index = outer_map->merged_indexes[outer_index];
1936 if (merged_index == -1)
1937 merged_index = merge_partition_with_dummy(outer_map, outer_index,
1938 next_index);
1939 }
1940 return merged_index;
1941 }
1942
1943 /*
1944 * process_inner_partition
1945 * Try to assign given inner partition a merged partition, and return the
1946 * index of the merged partition if successful, -1 otherwise
1947 *
1948 * If the partition is newly created, *next_index is incremented. Also, if it
1949 * is the default partition of the join relation, *default_index is set to the
1950 * index if not already done.
1951 */
1952 static int
process_inner_partition(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_default,bool inner_has_default,int inner_index,int outer_default,JoinType jointype,int * next_index,int * default_index)1953 process_inner_partition(PartitionMap *outer_map,
1954 PartitionMap *inner_map,
1955 bool outer_has_default,
1956 bool inner_has_default,
1957 int inner_index,
1958 int outer_default,
1959 JoinType jointype,
1960 int *next_index,
1961 int *default_index)
1962 {
1963 int merged_index = -1;
1964
1965 Assert(inner_index >= 0);
1966
1967 /*
1968 * If the outer side has the default partition, a row from the inner
1969 * partition might find its join partner in the default partition; try
1970 * merging the inner partition with the default partition. Otherwise,
1971 * this should be a FULL join, in which case the inner partition has to be
1972 * scanned all the way anyway; merge the inner partition with a dummy
1973 * partition on the other side.
1974 */
1975 if (outer_has_default)
1976 {
1977 Assert(outer_default >= 0);
1978
1979 /*
1980 * If the inner side has the default partition as well, the default
1981 * partition on the outer side will have two matching partitions on
1982 * the other side: the inner partition and the default partition on
1983 * the inner side. Partitionwise join doesn't handle this scenario
1984 * yet.
1985 */
1986 if (inner_has_default)
1987 return -1;
1988
1989 merged_index = merge_matching_partitions(outer_map, inner_map,
1990 outer_default, inner_index,
1991 next_index);
1992 if (merged_index == -1)
1993 return -1;
1994
1995 /*
1996 * If this is an outer join, the default partition on the outer side
1997 * has to be scanned all the way anyway, so the resulting partition
1998 * will contain all key values from the default partition, which any
1999 * other partition of the join relation will not contain. Thus the
2000 * resulting partition will act as the default partition of the join
2001 * relation; record the index in *default_index if not already done.
2002 */
2003 if (IS_OUTER_JOIN(jointype))
2004 {
2005 Assert(jointype != JOIN_RIGHT);
2006 if (*default_index == -1)
2007 *default_index = merged_index;
2008 else
2009 Assert(*default_index == merged_index);
2010 }
2011 }
2012 else
2013 {
2014 Assert(jointype == JOIN_FULL);
2015
2016 /* If we have already assigned a partition, no need to do anything. */
2017 merged_index = inner_map->merged_indexes[inner_index];
2018 if (merged_index == -1)
2019 merged_index = merge_partition_with_dummy(inner_map, inner_index,
2020 next_index);
2021 }
2022 return merged_index;
2023 }
2024
2025 /*
2026 * merge_null_partitions
2027 * Merge the NULL partitions from a join's outer and inner sides.
2028 *
2029 * If the merged partition produced from them is the NULL partition of the join
2030 * relation, *null_index is set to the index of the merged partition.
2031 *
2032 * Note: We assume here that the join clause for a partitioned join is strict
2033 * because have_partkey_equi_join() requires that the corresponding operator
2034 * be mergejoinable, and we currently assume that mergejoinable operators are
2035 * strict (see MJEvalOuterValues()/MJEvalInnerValues()).
2036 */
2037 static void
merge_null_partitions(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_null,bool inner_has_null,int outer_null,int inner_null,JoinType jointype,int * next_index,int * null_index)2038 merge_null_partitions(PartitionMap *outer_map,
2039 PartitionMap *inner_map,
2040 bool outer_has_null,
2041 bool inner_has_null,
2042 int outer_null,
2043 int inner_null,
2044 JoinType jointype,
2045 int *next_index,
2046 int *null_index)
2047 {
2048 bool consider_outer_null = false;
2049 bool consider_inner_null = false;
2050
2051 Assert(outer_has_null || inner_has_null);
2052 Assert(*null_index == -1);
2053
2054 /*
2055 * Check whether the NULL partitions have already been merged and if so,
2056 * set the consider_outer_null/consider_inner_null flags.
2057 */
2058 if (outer_has_null)
2059 {
2060 Assert(outer_null >= 0 && outer_null < outer_map->nparts);
2061 if (outer_map->merged_indexes[outer_null] == -1)
2062 consider_outer_null = true;
2063 }
2064 if (inner_has_null)
2065 {
2066 Assert(inner_null >= 0 && inner_null < inner_map->nparts);
2067 if (inner_map->merged_indexes[inner_null] == -1)
2068 consider_inner_null = true;
2069 }
2070
2071 /* If both flags are set false, we don't need to do anything. */
2072 if (!consider_outer_null && !consider_inner_null)
2073 return;
2074
2075 if (consider_outer_null && !consider_inner_null)
2076 {
2077 Assert(outer_has_null);
2078
2079 /*
2080 * If this is an outer join, the NULL partition on the outer side has
2081 * to be scanned all the way anyway; merge the NULL partition with a
2082 * dummy partition on the other side. In that case
2083 * consider_outer_null means that the NULL partition only contains
2084 * NULL values as the key values, so the merged partition will do so;
2085 * treat it as the NULL partition of the join relation.
2086 */
2087 if (IS_OUTER_JOIN(jointype))
2088 {
2089 Assert(jointype != JOIN_RIGHT);
2090 *null_index = merge_partition_with_dummy(outer_map, outer_null,
2091 next_index);
2092 }
2093 }
2094 else if (!consider_outer_null && consider_inner_null)
2095 {
2096 Assert(inner_has_null);
2097
2098 /*
2099 * If this is a FULL join, the NULL partition on the inner side has to
2100 * be scanned all the way anyway; merge the NULL partition with a
2101 * dummy partition on the other side. In that case
2102 * consider_inner_null means that the NULL partition only contains
2103 * NULL values as the key values, so the merged partition will do so;
2104 * treat it as the NULL partition of the join relation.
2105 */
2106 if (jointype == JOIN_FULL)
2107 *null_index = merge_partition_with_dummy(inner_map, inner_null,
2108 next_index);
2109 }
2110 else
2111 {
2112 Assert(consider_outer_null && consider_inner_null);
2113 Assert(outer_has_null);
2114 Assert(inner_has_null);
2115
2116 /*
2117 * If this is an outer join, the NULL partition on the outer side (and
2118 * that on the inner side if this is a FULL join) have to be scanned
2119 * all the way anyway, so merge them. Note that each of the NULL
2120 * partitions isn't merged yet, so they should be merged successfully.
2121 * Like the above, each of the NULL partitions only contains NULL
2122 * values as the key values, so the merged partition will do so; treat
2123 * it as the NULL partition of the join relation.
2124 *
2125 * Note: if this an INNER/SEMI join, the join clause will never be
2126 * satisfied by two NULL values (see comments above), so both the NULL
2127 * partitions can be eliminated.
2128 */
2129 if (IS_OUTER_JOIN(jointype))
2130 {
2131 Assert(jointype != JOIN_RIGHT);
2132 *null_index = merge_matching_partitions(outer_map, inner_map,
2133 outer_null, inner_null,
2134 next_index);
2135 Assert(*null_index >= 0);
2136 }
2137 }
2138 }
2139
2140 /*
2141 * merge_default_partitions
2142 * Merge the default partitions from a join's outer and inner sides.
2143 *
2144 * If the merged partition produced from them is the default partition of the
2145 * join relation, *default_index is set to the index of the merged partition.
2146 */
2147 static void
merge_default_partitions(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_default,bool inner_has_default,int outer_default,int inner_default,JoinType jointype,int * next_index,int * default_index)2148 merge_default_partitions(PartitionMap *outer_map,
2149 PartitionMap *inner_map,
2150 bool outer_has_default,
2151 bool inner_has_default,
2152 int outer_default,
2153 int inner_default,
2154 JoinType jointype,
2155 int *next_index,
2156 int *default_index)
2157 {
2158 int outer_merged_index = -1;
2159 int inner_merged_index = -1;
2160
2161 Assert(outer_has_default || inner_has_default);
2162
2163 /* Get the merged partition indexes for the default partitions. */
2164 if (outer_has_default)
2165 {
2166 Assert(outer_default >= 0 && outer_default < outer_map->nparts);
2167 outer_merged_index = outer_map->merged_indexes[outer_default];
2168 }
2169 if (inner_has_default)
2170 {
2171 Assert(inner_default >= 0 && inner_default < inner_map->nparts);
2172 inner_merged_index = inner_map->merged_indexes[inner_default];
2173 }
2174
2175 if (outer_has_default && !inner_has_default)
2176 {
2177 /*
2178 * If this is an outer join, the default partition on the outer side
2179 * has to be scanned all the way anyway; if we have not yet assigned a
2180 * partition, merge the default partition with a dummy partition on
2181 * the other side. The merged partition will act as the default
2182 * partition of the join relation (see comments in
2183 * process_inner_partition()).
2184 */
2185 if (IS_OUTER_JOIN(jointype))
2186 {
2187 Assert(jointype != JOIN_RIGHT);
2188 if (outer_merged_index == -1)
2189 {
2190 Assert(*default_index == -1);
2191 *default_index = merge_partition_with_dummy(outer_map,
2192 outer_default,
2193 next_index);
2194 }
2195 else
2196 Assert(*default_index == outer_merged_index);
2197 }
2198 else
2199 Assert(*default_index == -1);
2200 }
2201 else if (!outer_has_default && inner_has_default)
2202 {
2203 /*
2204 * If this is a FULL join, the default partition on the inner side has
2205 * to be scanned all the way anyway; if we have not yet assigned a
2206 * partition, merge the default partition with a dummy partition on
2207 * the other side. The merged partition will act as the default
2208 * partition of the join relation (see comments in
2209 * process_outer_partition()).
2210 */
2211 if (jointype == JOIN_FULL)
2212 {
2213 if (inner_merged_index == -1)
2214 {
2215 Assert(*default_index == -1);
2216 *default_index = merge_partition_with_dummy(inner_map,
2217 inner_default,
2218 next_index);
2219 }
2220 else
2221 Assert(*default_index == inner_merged_index);
2222 }
2223 else
2224 Assert(*default_index == -1);
2225 }
2226 else
2227 {
2228 Assert(outer_has_default && inner_has_default);
2229
2230 /*
2231 * The default partitions have to be joined with each other, so merge
2232 * them. Note that each of the default partitions isn't merged yet
2233 * (see, process_outer_partition()/process_innerer_partition()), so
2234 * they should be merged successfully. The merged partition will act
2235 * as the default partition of the join relation.
2236 */
2237 Assert(outer_merged_index == -1);
2238 Assert(inner_merged_index == -1);
2239 Assert(*default_index == -1);
2240 *default_index = merge_matching_partitions(outer_map,
2241 inner_map,
2242 outer_default,
2243 inner_default,
2244 next_index);
2245 Assert(*default_index >= 0);
2246 }
2247 }
2248
2249 /*
2250 * merge_partition_with_dummy
2251 * Assign given partition a new partition of a join relation
2252 *
2253 * Note: The caller assumes that the given partition doesn't have a non-dummy
2254 * matching partition on the other side, but if the given partition finds the
2255 * matching partition later, we will adjust the assignment.
2256 */
2257 static int
merge_partition_with_dummy(PartitionMap * map,int index,int * next_index)2258 merge_partition_with_dummy(PartitionMap *map, int index, int *next_index)
2259 {
2260 int merged_index = *next_index;
2261
2262 Assert(index >= 0 && index < map->nparts);
2263 Assert(map->merged_indexes[index] == -1);
2264 Assert(!map->merged[index]);
2265 map->merged_indexes[index] = merged_index;
2266 /* Leave the merged flag alone! */
2267 *next_index = *next_index + 1;
2268 return merged_index;
2269 }
2270
2271 /*
2272 * fix_merged_indexes
2273 * Adjust merged indexes of re-merged partitions
2274 */
2275 static void
fix_merged_indexes(PartitionMap * outer_map,PartitionMap * inner_map,int nmerged,List * merged_indexes)2276 fix_merged_indexes(PartitionMap *outer_map, PartitionMap *inner_map,
2277 int nmerged, List *merged_indexes)
2278 {
2279 int *new_indexes;
2280 int merged_index;
2281 int i;
2282 ListCell *lc;
2283
2284 Assert(nmerged > 0);
2285
2286 new_indexes = (int *) palloc(sizeof(int) * nmerged);
2287 for (i = 0; i < nmerged; i++)
2288 new_indexes[i] = -1;
2289
2290 /* Build the mapping of old merged indexes to new merged indexes. */
2291 if (outer_map->did_remapping)
2292 {
2293 for (i = 0; i < outer_map->nparts; i++)
2294 {
2295 merged_index = outer_map->old_indexes[i];
2296 if (merged_index >= 0)
2297 new_indexes[merged_index] = outer_map->merged_indexes[i];
2298 }
2299 }
2300 if (inner_map->did_remapping)
2301 {
2302 for (i = 0; i < inner_map->nparts; i++)
2303 {
2304 merged_index = inner_map->old_indexes[i];
2305 if (merged_index >= 0)
2306 new_indexes[merged_index] = inner_map->merged_indexes[i];
2307 }
2308 }
2309
2310 /* Fix the merged_indexes list using the mapping. */
2311 foreach(lc, merged_indexes)
2312 {
2313 merged_index = lfirst_int(lc);
2314 Assert(merged_index >= 0);
2315 if (new_indexes[merged_index] >= 0)
2316 lfirst_int(lc) = new_indexes[merged_index];
2317 }
2318
2319 pfree(new_indexes);
2320 }
2321
2322 /*
2323 * generate_matching_part_pairs
2324 * Generate a pair of lists of partitions that produce merged partitions
2325 *
2326 * The lists of partitions are built in the order of merged partition indexes,
2327 * and returned in *outer_parts and *inner_parts.
2328 */
2329 static void
generate_matching_part_pairs(RelOptInfo * outer_rel,RelOptInfo * inner_rel,PartitionMap * outer_map,PartitionMap * inner_map,int nmerged,List ** outer_parts,List ** inner_parts)2330 generate_matching_part_pairs(RelOptInfo *outer_rel, RelOptInfo *inner_rel,
2331 PartitionMap *outer_map, PartitionMap *inner_map,
2332 int nmerged,
2333 List **outer_parts, List **inner_parts)
2334 {
2335 int outer_nparts = outer_map->nparts;
2336 int inner_nparts = inner_map->nparts;
2337 int *outer_indexes;
2338 int *inner_indexes;
2339 int max_nparts;
2340 int i;
2341
2342 Assert(nmerged > 0);
2343 Assert(*outer_parts == NIL);
2344 Assert(*inner_parts == NIL);
2345
2346 outer_indexes = (int *) palloc(sizeof(int) * nmerged);
2347 inner_indexes = (int *) palloc(sizeof(int) * nmerged);
2348 for (i = 0; i < nmerged; i++)
2349 outer_indexes[i] = inner_indexes[i] = -1;
2350
2351 /* Set pairs of matching partitions. */
2352 Assert(outer_nparts == outer_rel->nparts);
2353 Assert(inner_nparts == inner_rel->nparts);
2354 max_nparts = Max(outer_nparts, inner_nparts);
2355 for (i = 0; i < max_nparts; i++)
2356 {
2357 if (i < outer_nparts)
2358 {
2359 int merged_index = outer_map->merged_indexes[i];
2360
2361 if (merged_index >= 0)
2362 {
2363 Assert(merged_index < nmerged);
2364 outer_indexes[merged_index] = i;
2365 }
2366 }
2367 if (i < inner_nparts)
2368 {
2369 int merged_index = inner_map->merged_indexes[i];
2370
2371 if (merged_index >= 0)
2372 {
2373 Assert(merged_index < nmerged);
2374 inner_indexes[merged_index] = i;
2375 }
2376 }
2377 }
2378
2379 /* Build the list pairs. */
2380 for (i = 0; i < nmerged; i++)
2381 {
2382 int outer_index = outer_indexes[i];
2383 int inner_index = inner_indexes[i];
2384
2385 /*
2386 * If both partitions are dummy, it means the merged partition that
2387 * had been assigned to the outer/inner partition was removed when
2388 * re-merging the outer/inner partition in
2389 * merge_matching_partitions(); ignore the merged partition.
2390 */
2391 if (outer_index == -1 && inner_index == -1)
2392 continue;
2393
2394 *outer_parts = lappend(*outer_parts, outer_index >= 0 ?
2395 outer_rel->part_rels[outer_index] : NULL);
2396 *inner_parts = lappend(*inner_parts, inner_index >= 0 ?
2397 inner_rel->part_rels[inner_index] : NULL);
2398 }
2399
2400 pfree(outer_indexes);
2401 pfree(inner_indexes);
2402 }
2403
2404 /*
2405 * build_merged_partition_bounds
2406 * Create a PartitionBoundInfo struct from merged partition bounds
2407 */
2408 static PartitionBoundInfo
build_merged_partition_bounds(char strategy,List * merged_datums,List * merged_kinds,List * merged_indexes,int null_index,int default_index)2409 build_merged_partition_bounds(char strategy, List *merged_datums,
2410 List *merged_kinds, List *merged_indexes,
2411 int null_index, int default_index)
2412 {
2413 PartitionBoundInfo merged_bounds;
2414 int ndatums = list_length(merged_datums);
2415 int pos;
2416 ListCell *lc;
2417
2418 merged_bounds = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
2419 merged_bounds->strategy = strategy;
2420 merged_bounds->ndatums = ndatums;
2421
2422 merged_bounds->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
2423 pos = 0;
2424 foreach(lc, merged_datums)
2425 merged_bounds->datums[pos++] = (Datum *) lfirst(lc);
2426
2427 if (strategy == PARTITION_STRATEGY_RANGE)
2428 {
2429 Assert(list_length(merged_kinds) == ndatums);
2430 merged_bounds->kind = (PartitionRangeDatumKind **)
2431 palloc(sizeof(PartitionRangeDatumKind *) * ndatums);
2432 pos = 0;
2433 foreach(lc, merged_kinds)
2434 merged_bounds->kind[pos++] = (PartitionRangeDatumKind *) lfirst(lc);
2435
2436 /* There are ndatums+1 indexes in the case of range partitioning. */
2437 merged_indexes = lappend_int(merged_indexes, -1);
2438 ndatums++;
2439 }
2440 else
2441 {
2442 Assert(strategy == PARTITION_STRATEGY_LIST);
2443 Assert(merged_kinds == NIL);
2444 merged_bounds->kind = NULL;
2445 }
2446
2447 Assert(list_length(merged_indexes) == ndatums);
2448 merged_bounds->nindexes = ndatums;
2449 merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
2450 pos = 0;
2451 foreach(lc, merged_indexes)
2452 merged_bounds->indexes[pos++] = lfirst_int(lc);
2453
2454 merged_bounds->null_index = null_index;
2455 merged_bounds->default_index = default_index;
2456
2457 return merged_bounds;
2458 }
2459
2460 /*
2461 * get_range_partition
2462 * Get the next non-dummy partition of a range-partitioned relation,
2463 * returning the index of that partition
2464 *
2465 * *lb and *ub are set to the lower and upper bounds of that partition
2466 * respectively, and *lb_pos is advanced to the next lower bound, if any.
2467 */
2468 static int
get_range_partition(RelOptInfo * rel,PartitionBoundInfo bi,int * lb_pos,PartitionRangeBound * lb,PartitionRangeBound * ub)2469 get_range_partition(RelOptInfo *rel,
2470 PartitionBoundInfo bi,
2471 int *lb_pos,
2472 PartitionRangeBound *lb,
2473 PartitionRangeBound *ub)
2474 {
2475 int part_index;
2476
2477 Assert(bi->strategy == PARTITION_STRATEGY_RANGE);
2478
2479 do
2480 {
2481 part_index = get_range_partition_internal(bi, lb_pos, lb, ub);
2482 if (part_index == -1)
2483 return -1;
2484 } while (is_dummy_partition(rel, part_index));
2485
2486 return part_index;
2487 }
2488
2489 static int
get_range_partition_internal(PartitionBoundInfo bi,int * lb_pos,PartitionRangeBound * lb,PartitionRangeBound * ub)2490 get_range_partition_internal(PartitionBoundInfo bi,
2491 int *lb_pos,
2492 PartitionRangeBound *lb,
2493 PartitionRangeBound *ub)
2494 {
2495 /* Return the index as -1 if we've exhausted all lower bounds. */
2496 if (*lb_pos >= bi->ndatums)
2497 return -1;
2498
2499 /* A lower bound should have at least one more bound after it. */
2500 Assert(*lb_pos + 1 < bi->ndatums);
2501
2502 /* Set the lower bound. */
2503 lb->index = bi->indexes[*lb_pos];
2504 lb->datums = bi->datums[*lb_pos];
2505 lb->kind = bi->kind[*lb_pos];
2506 lb->lower = true;
2507 /* Set the upper bound. */
2508 ub->index = bi->indexes[*lb_pos + 1];
2509 ub->datums = bi->datums[*lb_pos + 1];
2510 ub->kind = bi->kind[*lb_pos + 1];
2511 ub->lower = false;
2512
2513 /* The index assigned to an upper bound should be valid. */
2514 Assert(ub->index >= 0);
2515
2516 /*
2517 * Advance the position to the next lower bound. If there are no bounds
2518 * left beyond the upper bound, we have reached the last lower bound.
2519 */
2520 if (*lb_pos + 2 >= bi->ndatums)
2521 *lb_pos = bi->ndatums;
2522 else
2523 {
2524 /*
2525 * If the index assigned to the bound next to the upper bound isn't
2526 * valid, that is the next lower bound; else, the upper bound is also
2527 * the lower bound of the next range partition.
2528 */
2529 if (bi->indexes[*lb_pos + 2] < 0)
2530 *lb_pos = *lb_pos + 2;
2531 else
2532 *lb_pos = *lb_pos + 1;
2533 }
2534
2535 return ub->index;
2536 }
2537
2538 /*
2539 * compare_range_partitions
2540 * Compare the bounds of two range partitions, and return true if the
2541 * two partitions overlap, false otherwise
2542 *
2543 * *lb_cmpval is set to -1, 0, or 1 if the outer partition's lower bound is
2544 * lower than, equal to, or higher than the inner partition's lower bound
2545 * respectively. Likewise, *ub_cmpval is set to -1, 0, or 1 if the outer
2546 * partition's upper bound is lower than, equal to, or higher than the inner
2547 * partition's upper bound respectively.
2548 */
2549 static bool
compare_range_partitions(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,PartitionRangeBound * outer_lb,PartitionRangeBound * outer_ub,PartitionRangeBound * inner_lb,PartitionRangeBound * inner_ub,int * lb_cmpval,int * ub_cmpval)2550 compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
2551 Oid *partcollations,
2552 PartitionRangeBound *outer_lb,
2553 PartitionRangeBound *outer_ub,
2554 PartitionRangeBound *inner_lb,
2555 PartitionRangeBound *inner_ub,
2556 int *lb_cmpval, int *ub_cmpval)
2557 {
2558 /*
2559 * Check if the outer partition's upper bound is lower than the inner
2560 * partition's lower bound; if so the partitions aren't overlapping.
2561 */
2562 if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2563 outer_ub, inner_lb) < 0)
2564 {
2565 *lb_cmpval = -1;
2566 *ub_cmpval = -1;
2567 return false;
2568 }
2569
2570 /*
2571 * Check if the outer partition's lower bound is higher than the inner
2572 * partition's upper bound; if so the partitions aren't overlapping.
2573 */
2574 if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2575 outer_lb, inner_ub) > 0)
2576 {
2577 *lb_cmpval = 1;
2578 *ub_cmpval = 1;
2579 return false;
2580 }
2581
2582 /* All other cases indicate overlapping partitions. */
2583 *lb_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2584 outer_lb, inner_lb);
2585 *ub_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2586 outer_ub, inner_ub);
2587 return true;
2588 }
2589
2590 /*
2591 * get_merged_range_bounds
2592 * Given the bounds of range partitions to be joined, determine the bounds
2593 * of a merged partition produced from the range partitions
2594 *
2595 * *merged_lb and *merged_ub are set to the lower and upper bounds of the
2596 * merged partition.
2597 */
2598 static void
get_merged_range_bounds(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,JoinType jointype,PartitionRangeBound * outer_lb,PartitionRangeBound * outer_ub,PartitionRangeBound * inner_lb,PartitionRangeBound * inner_ub,int lb_cmpval,int ub_cmpval,PartitionRangeBound * merged_lb,PartitionRangeBound * merged_ub)2599 get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2600 Oid *partcollations, JoinType jointype,
2601 PartitionRangeBound *outer_lb,
2602 PartitionRangeBound *outer_ub,
2603 PartitionRangeBound *inner_lb,
2604 PartitionRangeBound *inner_ub,
2605 int lb_cmpval, int ub_cmpval,
2606 PartitionRangeBound *merged_lb,
2607 PartitionRangeBound *merged_ub)
2608 {
2609 Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2610 outer_lb, inner_lb) == lb_cmpval);
2611 Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2612 outer_ub, inner_ub) == ub_cmpval);
2613
2614 switch (jointype)
2615 {
2616 case JOIN_INNER:
2617 case JOIN_SEMI:
2618
2619 /*
2620 * An INNER/SEMI join will have the rows that fit both sides, so
2621 * the lower bound of the merged partition will be the higher of
2622 * the two lower bounds, and the upper bound of the merged
2623 * partition will be the lower of the two upper bounds.
2624 */
2625 *merged_lb = (lb_cmpval > 0) ? *outer_lb : *inner_lb;
2626 *merged_ub = (ub_cmpval < 0) ? *outer_ub : *inner_ub;
2627 break;
2628
2629 case JOIN_LEFT:
2630 case JOIN_ANTI:
2631
2632 /*
2633 * A LEFT/ANTI join will have all the rows from the outer side, so
2634 * the bounds of the merged partition will be the same as the
2635 * outer bounds.
2636 */
2637 *merged_lb = *outer_lb;
2638 *merged_ub = *outer_ub;
2639 break;
2640
2641 case JOIN_FULL:
2642
2643 /*
2644 * A FULL join will have all the rows from both sides, so the
2645 * lower bound of the merged partition will be the lower of the
2646 * two lower bounds, and the upper bound of the merged partition
2647 * will be the higher of the two upper bounds.
2648 */
2649 *merged_lb = (lb_cmpval < 0) ? *outer_lb : *inner_lb;
2650 *merged_ub = (ub_cmpval > 0) ? *outer_ub : *inner_ub;
2651 break;
2652
2653 default:
2654 elog(ERROR, "unrecognized join type: %d", (int) jointype);
2655 }
2656 }
2657
2658 /*
2659 * add_merged_range_bounds
2660 * Add the bounds of a merged partition to the lists of range bounds
2661 */
2662 static void
add_merged_range_bounds(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,PartitionRangeBound * merged_lb,PartitionRangeBound * merged_ub,int merged_index,List ** merged_datums,List ** merged_kinds,List ** merged_indexes)2663 add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2664 Oid *partcollations,
2665 PartitionRangeBound *merged_lb,
2666 PartitionRangeBound *merged_ub,
2667 int merged_index,
2668 List **merged_datums,
2669 List **merged_kinds,
2670 List **merged_indexes)
2671 {
2672 int cmpval;
2673
2674 if (!*merged_datums)
2675 {
2676 /* First merged partition */
2677 Assert(!*merged_kinds);
2678 Assert(!*merged_indexes);
2679 cmpval = 1;
2680 }
2681 else
2682 {
2683 PartitionRangeBound prev_ub;
2684
2685 Assert(*merged_datums);
2686 Assert(*merged_kinds);
2687 Assert(*merged_indexes);
2688
2689 /* Get the last upper bound. */
2690 prev_ub.index = llast_int(*merged_indexes);
2691 prev_ub.datums = (Datum *) llast(*merged_datums);
2692 prev_ub.kind = (PartitionRangeDatumKind *) llast(*merged_kinds);
2693 prev_ub.lower = false;
2694
2695 /*
2696 * We pass to partition_rbound_cmp() lower1 as false to prevent it
2697 * from considering the last upper bound to be smaller than the lower
2698 * bound of the merged partition when the values of the two range
2699 * bounds compare equal.
2700 */
2701 cmpval = partition_rbound_cmp(partnatts, partsupfuncs, partcollations,
2702 merged_lb->datums, merged_lb->kind,
2703 false, &prev_ub);
2704 Assert(cmpval >= 0);
2705 }
2706
2707 /*
2708 * If the lower bound is higher than the last upper bound, add the lower
2709 * bound with the index as -1 indicating that that is a lower bound; else,
2710 * the last upper bound will be reused as the lower bound of the merged
2711 * partition, so skip this.
2712 */
2713 if (cmpval > 0)
2714 {
2715 *merged_datums = lappend(*merged_datums, merged_lb->datums);
2716 *merged_kinds = lappend(*merged_kinds, merged_lb->kind);
2717 *merged_indexes = lappend_int(*merged_indexes, -1);
2718 }
2719
2720 /* Add the upper bound and index of the merged partition. */
2721 *merged_datums = lappend(*merged_datums, merged_ub->datums);
2722 *merged_kinds = lappend(*merged_kinds, merged_ub->kind);
2723 *merged_indexes = lappend_int(*merged_indexes, merged_index);
2724 }
2725
2726 /*
2727 * partitions_are_ordered
2728 * Determine whether the partitions described by 'boundinfo' are ordered,
2729 * that is partitions appearing earlier in the PartitionDesc sequence
2730 * contain partition keys strictly less than those appearing later.
2731 * Also, if NULL values are possible, they must come in the last
2732 * partition defined in the PartitionDesc.
2733 *
2734 * If out of order, or there is insufficient info to know the order,
2735 * then we return false.
2736 */
2737 bool
partitions_are_ordered(PartitionBoundInfo boundinfo,int nparts)2738 partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts)
2739 {
2740 Assert(boundinfo != NULL);
2741
2742 switch (boundinfo->strategy)
2743 {
2744 case PARTITION_STRATEGY_RANGE:
2745
2746 /*
2747 * RANGE-type partitioning guarantees that the partitions can be
2748 * scanned in the order that they're defined in the PartitionDesc
2749 * to provide sequential, non-overlapping ranges of tuples.
2750 * However, if a DEFAULT partition exists then it doesn't work, as
2751 * that could contain tuples from either below or above the
2752 * defined range, or tuples belonging to gaps between partitions.
2753 */
2754 if (!partition_bound_has_default(boundinfo))
2755 return true;
2756 break;
2757
2758 case PARTITION_STRATEGY_LIST:
2759
2760 /*
2761 * LIST partitioning can also guarantee ordering, but only if the
2762 * partitions don't accept interleaved values. We could likely
2763 * check for this by looping over the PartitionBound's indexes
2764 * array to check that the indexes are in order. For now, let's
2765 * just keep it simple and just accept LIST partitioning when
2766 * there's no DEFAULT partition, exactly one value per partition,
2767 * and optionally a NULL partition that does not accept any other
2768 * values. Such a NULL partition will come last in the
2769 * PartitionDesc, and the other partitions will be properly
2770 * ordered. This is a cheap test to make as it does not require
2771 * any per-partition processing. Maybe we'd like to handle more
2772 * complex cases in the future.
2773 */
2774 if (partition_bound_has_default(boundinfo))
2775 return false;
2776
2777 if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo)
2778 == nparts)
2779 return true;
2780 break;
2781
2782 default:
2783 /* HASH, or some other strategy */
2784 break;
2785 }
2786
2787 return false;
2788 }
2789
2790 /*
2791 * check_new_partition_bound
2792 *
2793 * Checks if the new partition's bound overlaps any of the existing partitions
2794 * of parent. Also performs additional checks as necessary per strategy.
2795 */
2796 void
check_new_partition_bound(char * relname,Relation parent,PartitionBoundSpec * spec)2797 check_new_partition_bound(char *relname, Relation parent,
2798 PartitionBoundSpec *spec)
2799 {
2800 PartitionKey key = RelationGetPartitionKey(parent);
2801 PartitionDesc partdesc = RelationGetPartitionDesc(parent);
2802 PartitionBoundInfo boundinfo = partdesc->boundinfo;
2803 ParseState *pstate = make_parsestate(NULL);
2804 int with = -1;
2805 bool overlap = false;
2806
2807 if (spec->is_default)
2808 {
2809 /*
2810 * The default partition bound never conflicts with any other
2811 * partition's; if that's what we're attaching, the only possible
2812 * problem is that one already exists, so check for that and we're
2813 * done.
2814 */
2815 if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
2816 return;
2817
2818 /* Default partition already exists, error out. */
2819 ereport(ERROR,
2820 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2821 errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
2822 relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
2823 parser_errposition(pstate, spec->location)));
2824 }
2825
2826 switch (key->strategy)
2827 {
2828 case PARTITION_STRATEGY_HASH:
2829 {
2830 Assert(spec->strategy == PARTITION_STRATEGY_HASH);
2831 Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
2832
2833 if (partdesc->nparts > 0)
2834 {
2835 Datum **datums = boundinfo->datums;
2836 int ndatums = boundinfo->ndatums;
2837 int greatest_modulus;
2838 int remainder;
2839 int offset;
2840 bool valid_modulus = true;
2841 int prev_modulus, /* Previous largest modulus */
2842 next_modulus; /* Next largest modulus */
2843
2844 /*
2845 * Check rule that every modulus must be a factor of the
2846 * next larger modulus. For example, if you have a bunch
2847 * of partitions that all have modulus 5, you can add a
2848 * new partition with modulus 10 or a new partition with
2849 * modulus 15, but you cannot add both a partition with
2850 * modulus 10 and a partition with modulus 15, because 10
2851 * is not a factor of 15.
2852 *
2853 * Get the greatest (modulus, remainder) pair contained in
2854 * boundinfo->datums that is less than or equal to the
2855 * (spec->modulus, spec->remainder) pair.
2856 */
2857 offset = partition_hash_bsearch(boundinfo,
2858 spec->modulus,
2859 spec->remainder);
2860 if (offset < 0)
2861 {
2862 next_modulus = DatumGetInt32(datums[0][0]);
2863 valid_modulus = (next_modulus % spec->modulus) == 0;
2864 }
2865 else
2866 {
2867 prev_modulus = DatumGetInt32(datums[offset][0]);
2868 valid_modulus = (spec->modulus % prev_modulus) == 0;
2869
2870 if (valid_modulus && (offset + 1) < ndatums)
2871 {
2872 next_modulus = DatumGetInt32(datums[offset + 1][0]);
2873 valid_modulus = (next_modulus % spec->modulus) == 0;
2874 }
2875 }
2876
2877 if (!valid_modulus)
2878 ereport(ERROR,
2879 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2880 errmsg("every hash partition modulus must be a factor of the next larger modulus")));
2881
2882 greatest_modulus = boundinfo->nindexes;
2883 remainder = spec->remainder;
2884
2885 /*
2886 * Normally, the lowest remainder that could conflict with
2887 * the new partition is equal to the remainder specified
2888 * for the new partition, but when the new partition has a
2889 * modulus higher than any used so far, we need to adjust.
2890 */
2891 if (remainder >= greatest_modulus)
2892 remainder = remainder % greatest_modulus;
2893
2894 /* Check every potentially-conflicting remainder. */
2895 do
2896 {
2897 if (boundinfo->indexes[remainder] != -1)
2898 {
2899 overlap = true;
2900 with = boundinfo->indexes[remainder];
2901 break;
2902 }
2903 remainder += spec->modulus;
2904 } while (remainder < greatest_modulus);
2905 }
2906
2907 break;
2908 }
2909
2910 case PARTITION_STRATEGY_LIST:
2911 {
2912 Assert(spec->strategy == PARTITION_STRATEGY_LIST);
2913
2914 if (partdesc->nparts > 0)
2915 {
2916 ListCell *cell;
2917
2918 Assert(boundinfo &&
2919 boundinfo->strategy == PARTITION_STRATEGY_LIST &&
2920 (boundinfo->ndatums > 0 ||
2921 partition_bound_accepts_nulls(boundinfo) ||
2922 partition_bound_has_default(boundinfo)));
2923
2924 foreach(cell, spec->listdatums)
2925 {
2926 Const *val = castNode(Const, lfirst(cell));
2927
2928 if (!val->constisnull)
2929 {
2930 int offset;
2931 bool equal;
2932
2933 offset = partition_list_bsearch(&key->partsupfunc[0],
2934 key->partcollation,
2935 boundinfo,
2936 val->constvalue,
2937 &equal);
2938 if (offset >= 0 && equal)
2939 {
2940 overlap = true;
2941 with = boundinfo->indexes[offset];
2942 break;
2943 }
2944 }
2945 else if (partition_bound_accepts_nulls(boundinfo))
2946 {
2947 overlap = true;
2948 with = boundinfo->null_index;
2949 break;
2950 }
2951 }
2952 }
2953
2954 break;
2955 }
2956
2957 case PARTITION_STRATEGY_RANGE:
2958 {
2959 PartitionRangeBound *lower,
2960 *upper;
2961
2962 Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
2963 lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
2964 upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
2965
2966 /*
2967 * First check if the resulting range would be empty with
2968 * specified lower and upper bounds
2969 */
2970 if (partition_rbound_cmp(key->partnatts, key->partsupfunc,
2971 key->partcollation, lower->datums,
2972 lower->kind, true, upper) >= 0)
2973 {
2974 ereport(ERROR,
2975 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2976 errmsg("empty range bound specified for partition \"%s\"",
2977 relname),
2978 errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
2979 get_range_partbound_string(spec->lowerdatums),
2980 get_range_partbound_string(spec->upperdatums)),
2981 parser_errposition(pstate, spec->location)));
2982 }
2983
2984 if (partdesc->nparts > 0)
2985 {
2986 int offset;
2987 bool equal;
2988
2989 Assert(boundinfo &&
2990 boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
2991 (boundinfo->ndatums > 0 ||
2992 partition_bound_has_default(boundinfo)));
2993
2994 /*
2995 * Test whether the new lower bound (which is treated
2996 * inclusively as part of the new partition) lies inside
2997 * an existing partition, or in a gap.
2998 *
2999 * If it's inside an existing partition, the bound at
3000 * offset + 1 will be the upper bound of that partition,
3001 * and its index will be >= 0.
3002 *
3003 * If it's in a gap, the bound at offset + 1 will be the
3004 * lower bound of the next partition, and its index will
3005 * be -1. This is also true if there is no next partition,
3006 * since the index array is initialised with an extra -1
3007 * at the end.
3008 */
3009 offset = partition_range_bsearch(key->partnatts,
3010 key->partsupfunc,
3011 key->partcollation,
3012 boundinfo, lower,
3013 &equal);
3014
3015 if (boundinfo->indexes[offset + 1] < 0)
3016 {
3017 /*
3018 * Check that the new partition will fit in the gap.
3019 * For it to fit, the new upper bound must be less
3020 * than or equal to the lower bound of the next
3021 * partition, if there is one.
3022 */
3023 if (offset + 1 < boundinfo->ndatums)
3024 {
3025 int32 cmpval;
3026 Datum *datums;
3027 PartitionRangeDatumKind *kind;
3028 bool is_lower;
3029
3030 datums = boundinfo->datums[offset + 1];
3031 kind = boundinfo->kind[offset + 1];
3032 is_lower = (boundinfo->indexes[offset + 1] == -1);
3033
3034 cmpval = partition_rbound_cmp(key->partnatts,
3035 key->partsupfunc,
3036 key->partcollation,
3037 datums, kind,
3038 is_lower, upper);
3039 if (cmpval < 0)
3040 {
3041 /*
3042 * The new partition overlaps with the
3043 * existing partition between offset + 1 and
3044 * offset + 2.
3045 */
3046 overlap = true;
3047 with = boundinfo->indexes[offset + 2];
3048 }
3049 }
3050 }
3051 else
3052 {
3053 /*
3054 * The new partition overlaps with the existing
3055 * partition between offset and offset + 1.
3056 */
3057 overlap = true;
3058 with = boundinfo->indexes[offset + 1];
3059 }
3060 }
3061
3062 break;
3063 }
3064
3065 default:
3066 elog(ERROR, "unexpected partition strategy: %d",
3067 (int) key->strategy);
3068 }
3069
3070 if (overlap)
3071 {
3072 Assert(with >= 0);
3073 ereport(ERROR,
3074 (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3075 errmsg("partition \"%s\" would overlap partition \"%s\"",
3076 relname, get_rel_name(partdesc->oids[with])),
3077 parser_errposition(pstate, spec->location)));
3078 }
3079 }
3080
3081 /*
3082 * check_default_partition_contents
3083 *
3084 * This function checks if there exists a row in the default partition that
3085 * would properly belong to the new partition being added. If it finds one,
3086 * it throws an error.
3087 */
3088 void
check_default_partition_contents(Relation parent,Relation default_rel,PartitionBoundSpec * new_spec)3089 check_default_partition_contents(Relation parent, Relation default_rel,
3090 PartitionBoundSpec *new_spec)
3091 {
3092 List *new_part_constraints;
3093 List *def_part_constraints;
3094 List *all_parts;
3095 ListCell *lc;
3096
3097 new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
3098 ? get_qual_for_list(parent, new_spec)
3099 : get_qual_for_range(parent, new_spec, false);
3100 def_part_constraints =
3101 get_proposed_default_constraint(new_part_constraints);
3102
3103 /*
3104 * Map the Vars in the constraint expression from parent's attnos to
3105 * default_rel's.
3106 */
3107 def_part_constraints =
3108 map_partition_varattnos(def_part_constraints, 1, default_rel,
3109 parent);
3110
3111 /*
3112 * If the existing constraints on the default partition imply that it will
3113 * not contain any row that would belong to the new partition, we can
3114 * avoid scanning the default partition.
3115 */
3116 if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
3117 {
3118 ereport(DEBUG1,
3119 (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3120 RelationGetRelationName(default_rel))));
3121 return;
3122 }
3123
3124 /*
3125 * Scan the default partition and its subpartitions, and check for rows
3126 * that do not satisfy the revised partition constraints.
3127 */
3128 if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3129 all_parts = find_all_inheritors(RelationGetRelid(default_rel),
3130 AccessExclusiveLock, NULL);
3131 else
3132 all_parts = list_make1_oid(RelationGetRelid(default_rel));
3133
3134 foreach(lc, all_parts)
3135 {
3136 Oid part_relid = lfirst_oid(lc);
3137 Relation part_rel;
3138 Expr *partition_constraint;
3139 EState *estate;
3140 ExprState *partqualstate = NULL;
3141 Snapshot snapshot;
3142 ExprContext *econtext;
3143 TableScanDesc scan;
3144 MemoryContext oldCxt;
3145 TupleTableSlot *tupslot;
3146
3147 /* Lock already taken above. */
3148 if (part_relid != RelationGetRelid(default_rel))
3149 {
3150 part_rel = table_open(part_relid, NoLock);
3151
3152 /*
3153 * Map the Vars in the constraint expression from default_rel's
3154 * the sub-partition's.
3155 */
3156 partition_constraint = make_ands_explicit(def_part_constraints);
3157 partition_constraint = (Expr *)
3158 map_partition_varattnos((List *) partition_constraint, 1,
3159 part_rel, default_rel);
3160
3161 /*
3162 * If the partition constraints on default partition child imply
3163 * that it will not contain any row that would belong to the new
3164 * partition, we can avoid scanning the child table.
3165 */
3166 if (PartConstraintImpliedByRelConstraint(part_rel,
3167 def_part_constraints))
3168 {
3169 ereport(DEBUG1,
3170 (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3171 RelationGetRelationName(part_rel))));
3172
3173 table_close(part_rel, NoLock);
3174 continue;
3175 }
3176 }
3177 else
3178 {
3179 part_rel = default_rel;
3180 partition_constraint = make_ands_explicit(def_part_constraints);
3181 }
3182
3183 /*
3184 * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
3185 * scanned.
3186 */
3187 if (part_rel->rd_rel->relkind != RELKIND_RELATION)
3188 {
3189 if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
3190 ereport(WARNING,
3191 (errcode(ERRCODE_CHECK_VIOLATION),
3192 errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
3193 RelationGetRelationName(part_rel),
3194 RelationGetRelationName(default_rel))));
3195
3196 if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3197 table_close(part_rel, NoLock);
3198
3199 continue;
3200 }
3201
3202 estate = CreateExecutorState();
3203
3204 /* Build expression execution states for partition check quals */
3205 partqualstate = ExecPrepareExpr(partition_constraint, estate);
3206
3207 econtext = GetPerTupleExprContext(estate);
3208 snapshot = RegisterSnapshot(GetLatestSnapshot());
3209 tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
3210 scan = table_beginscan(part_rel, snapshot, 0, NULL);
3211
3212 /*
3213 * Switch to per-tuple memory context and reset it for each tuple
3214 * produced, so we don't leak memory.
3215 */
3216 oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
3217
3218 while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
3219 {
3220 econtext->ecxt_scantuple = tupslot;
3221
3222 if (!ExecCheck(partqualstate, econtext))
3223 ereport(ERROR,
3224 (errcode(ERRCODE_CHECK_VIOLATION),
3225 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
3226 RelationGetRelationName(default_rel)),
3227 errtable(default_rel)));
3228
3229 ResetExprContext(econtext);
3230 CHECK_FOR_INTERRUPTS();
3231 }
3232
3233 MemoryContextSwitchTo(oldCxt);
3234 table_endscan(scan);
3235 UnregisterSnapshot(snapshot);
3236 ExecDropSingleTupleTableSlot(tupslot);
3237 FreeExecutorState(estate);
3238
3239 if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3240 table_close(part_rel, NoLock); /* keep the lock until commit */
3241 }
3242 }
3243
3244 /*
3245 * get_hash_partition_greatest_modulus
3246 *
3247 * Returns the greatest modulus of the hash partition bound.
3248 * This is no longer used in the core code, but we keep it around
3249 * in case external modules are using it.
3250 */
3251 int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)3252 get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
3253 {
3254 Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
3255 return bound->nindexes;
3256 }
3257
3258 /*
3259 * make_one_partition_rbound
3260 *
3261 * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
3262 * and a flag telling whether the bound is lower or not. Made into a function
3263 * because there are multiple sites that want to use this facility.
3264 */
3265 static PartitionRangeBound *
make_one_partition_rbound(PartitionKey key,int index,List * datums,bool lower)3266 make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
3267 {
3268 PartitionRangeBound *bound;
3269 ListCell *lc;
3270 int i;
3271
3272 Assert(datums != NIL);
3273
3274 bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
3275 bound->index = index;
3276 bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
3277 bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
3278 sizeof(PartitionRangeDatumKind));
3279 bound->lower = lower;
3280
3281 i = 0;
3282 foreach(lc, datums)
3283 {
3284 PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
3285
3286 /* What's contained in this range datum? */
3287 bound->kind[i] = datum->kind;
3288
3289 if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
3290 {
3291 Const *val = castNode(Const, datum->value);
3292
3293 if (val->constisnull)
3294 elog(ERROR, "invalid range bound datum");
3295 bound->datums[i] = val->constvalue;
3296 }
3297
3298 i++;
3299 }
3300
3301 return bound;
3302 }
3303
3304 /*
3305 * partition_rbound_cmp
3306 *
3307 * Return for two range bounds whether the 1st one (specified in datums1,
3308 * kind1, and lower1) is <, =, or > the bound specified in *b2.
3309 *
3310 * partnatts, partsupfunc and partcollation give the number of attributes in the
3311 * bounds to be compared, comparison function to be used and the collations of
3312 * attributes, respectively.
3313 *
3314 * Note that if the values of the two range bounds compare equal, then we take
3315 * into account whether they are upper or lower bounds, and an upper bound is
3316 * considered to be smaller than a lower bound. This is important to the way
3317 * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
3318 * structure, which only stores the upper bound of a common boundary between
3319 * two contiguous partitions.
3320 */
3321 static int32
partition_rbound_cmp(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * datums1,PartitionRangeDatumKind * kind1,bool lower1,PartitionRangeBound * b2)3322 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
3323 Oid *partcollation,
3324 Datum *datums1, PartitionRangeDatumKind *kind1,
3325 bool lower1, PartitionRangeBound *b2)
3326 {
3327 int32 cmpval = 0; /* placate compiler */
3328 int i;
3329 Datum *datums2 = b2->datums;
3330 PartitionRangeDatumKind *kind2 = b2->kind;
3331 bool lower2 = b2->lower;
3332
3333 for (i = 0; i < partnatts; i++)
3334 {
3335 /*
3336 * First, handle cases where the column is unbounded, which should not
3337 * invoke the comparison procedure, and should not consider any later
3338 * columns. Note that the PartitionRangeDatumKind enum elements
3339 * compare the same way as the values they represent.
3340 */
3341 if (kind1[i] < kind2[i])
3342 return -1;
3343 else if (kind1[i] > kind2[i])
3344 return 1;
3345 else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
3346
3347 /*
3348 * The column bounds are both MINVALUE or both MAXVALUE. No later
3349 * columns should be considered, but we still need to compare
3350 * whether they are upper or lower bounds.
3351 */
3352 break;
3353
3354 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3355 partcollation[i],
3356 datums1[i],
3357 datums2[i]));
3358 if (cmpval != 0)
3359 break;
3360 }
3361
3362 /*
3363 * If the comparison is anything other than equal, we're done. If they
3364 * compare equal though, we still have to consider whether the boundaries
3365 * are inclusive or exclusive. Exclusive one is considered smaller of the
3366 * two.
3367 */
3368 if (cmpval == 0 && lower1 != lower2)
3369 cmpval = lower1 ? 1 : -1;
3370
3371 return cmpval;
3372 }
3373
3374 /*
3375 * partition_rbound_datum_cmp
3376 *
3377 * Return whether range bound (specified in rb_datums and rb_kind)
3378 * is <, =, or > partition key of tuple (tuple_datums)
3379 *
3380 * n_tuple_datums, partsupfunc and partcollation give number of attributes in
3381 * the bounds to be compared, comparison function to be used and the collations
3382 * of attributes resp.
3383 *
3384 */
3385 int32
partition_rbound_datum_cmp(FmgrInfo * partsupfunc,Oid * partcollation,Datum * rb_datums,PartitionRangeDatumKind * rb_kind,Datum * tuple_datums,int n_tuple_datums)3386 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
3387 Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
3388 Datum *tuple_datums, int n_tuple_datums)
3389 {
3390 int i;
3391 int32 cmpval = -1;
3392
3393 for (i = 0; i < n_tuple_datums; i++)
3394 {
3395 if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
3396 return -1;
3397 else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
3398 return 1;
3399
3400 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3401 partcollation[i],
3402 rb_datums[i],
3403 tuple_datums[i]));
3404 if (cmpval != 0)
3405 break;
3406 }
3407
3408 return cmpval;
3409 }
3410
3411 /*
3412 * partition_hbound_cmp
3413 *
3414 * Compares modulus first, then remainder if modulus is equal.
3415 */
3416 static int32
partition_hbound_cmp(int modulus1,int remainder1,int modulus2,int remainder2)3417 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
3418 {
3419 if (modulus1 < modulus2)
3420 return -1;
3421 if (modulus1 > modulus2)
3422 return 1;
3423 if (modulus1 == modulus2 && remainder1 != remainder2)
3424 return (remainder1 > remainder2) ? 1 : -1;
3425 return 0;
3426 }
3427
3428 /*
3429 * partition_list_bsearch
3430 * Returns the index of the greatest bound datum that is less than equal
3431 * to the given value or -1 if all of the bound datums are greater
3432 *
3433 * *is_equal is set to true if the bound datum at the returned index is equal
3434 * to the input value.
3435 */
3436 int
partition_list_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,Datum value,bool * is_equal)3437 partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3438 PartitionBoundInfo boundinfo,
3439 Datum value, bool *is_equal)
3440 {
3441 int lo,
3442 hi,
3443 mid;
3444
3445 lo = -1;
3446 hi = boundinfo->ndatums - 1;
3447 while (lo < hi)
3448 {
3449 int32 cmpval;
3450
3451 mid = (lo + hi + 1) / 2;
3452 cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
3453 partcollation[0],
3454 boundinfo->datums[mid][0],
3455 value));
3456 if (cmpval <= 0)
3457 {
3458 lo = mid;
3459 *is_equal = (cmpval == 0);
3460 if (*is_equal)
3461 break;
3462 }
3463 else
3464 hi = mid - 1;
3465 }
3466
3467 return lo;
3468 }
3469
3470 /*
3471 * partition_range_bsearch
3472 * Returns the index of the greatest range bound that is less than or
3473 * equal to the given range bound or -1 if all of the range bounds are
3474 * greater
3475 *
3476 * *is_equal is set to true if the range bound at the returned index is equal
3477 * to the input range bound
3478 */
3479 static int
partition_range_bsearch(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,PartitionRangeBound * probe,bool * is_equal)3480 partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
3481 Oid *partcollation,
3482 PartitionBoundInfo boundinfo,
3483 PartitionRangeBound *probe, bool *is_equal)
3484 {
3485 int lo,
3486 hi,
3487 mid;
3488
3489 lo = -1;
3490 hi = boundinfo->ndatums - 1;
3491 while (lo < hi)
3492 {
3493 int32 cmpval;
3494
3495 mid = (lo + hi + 1) / 2;
3496 cmpval = partition_rbound_cmp(partnatts, partsupfunc,
3497 partcollation,
3498 boundinfo->datums[mid],
3499 boundinfo->kind[mid],
3500 (boundinfo->indexes[mid] == -1),
3501 probe);
3502 if (cmpval <= 0)
3503 {
3504 lo = mid;
3505 *is_equal = (cmpval == 0);
3506
3507 if (*is_equal)
3508 break;
3509 }
3510 else
3511 hi = mid - 1;
3512 }
3513
3514 return lo;
3515 }
3516
3517 /*
3518 * partition_range_bsearch
3519 * Returns the index of the greatest range bound that is less than or
3520 * equal to the given tuple or -1 if all of the range bounds are greater
3521 *
3522 * *is_equal is set to true if the range bound at the returned index is equal
3523 * to the input tuple.
3524 */
3525 int
partition_range_datum_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,int nvalues,Datum * values,bool * is_equal)3526 partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3527 PartitionBoundInfo boundinfo,
3528 int nvalues, Datum *values, bool *is_equal)
3529 {
3530 int lo,
3531 hi,
3532 mid;
3533
3534 lo = -1;
3535 hi = boundinfo->ndatums - 1;
3536 while (lo < hi)
3537 {
3538 int32 cmpval;
3539
3540 mid = (lo + hi + 1) / 2;
3541 cmpval = partition_rbound_datum_cmp(partsupfunc,
3542 partcollation,
3543 boundinfo->datums[mid],
3544 boundinfo->kind[mid],
3545 values,
3546 nvalues);
3547 if (cmpval <= 0)
3548 {
3549 lo = mid;
3550 *is_equal = (cmpval == 0);
3551
3552 if (*is_equal)
3553 break;
3554 }
3555 else
3556 hi = mid - 1;
3557 }
3558
3559 return lo;
3560 }
3561
3562 /*
3563 * partition_hash_bsearch
3564 * Returns the index of the greatest (modulus, remainder) pair that is
3565 * less than or equal to the given (modulus, remainder) pair or -1 if
3566 * all of them are greater
3567 */
3568 int
partition_hash_bsearch(PartitionBoundInfo boundinfo,int modulus,int remainder)3569 partition_hash_bsearch(PartitionBoundInfo boundinfo,
3570 int modulus, int remainder)
3571 {
3572 int lo,
3573 hi,
3574 mid;
3575
3576 lo = -1;
3577 hi = boundinfo->ndatums - 1;
3578 while (lo < hi)
3579 {
3580 int32 cmpval,
3581 bound_modulus,
3582 bound_remainder;
3583
3584 mid = (lo + hi + 1) / 2;
3585 bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
3586 bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
3587 cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
3588 modulus, remainder);
3589 if (cmpval <= 0)
3590 {
3591 lo = mid;
3592
3593 if (cmpval == 0)
3594 break;
3595 }
3596 else
3597 hi = mid - 1;
3598 }
3599
3600 return lo;
3601 }
3602
3603 /*
3604 * qsort_partition_hbound_cmp
3605 *
3606 * Hash bounds are sorted by modulus, then by remainder.
3607 */
3608 static int32
qsort_partition_hbound_cmp(const void * a,const void * b)3609 qsort_partition_hbound_cmp(const void *a, const void *b)
3610 {
3611 PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
3612 PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
3613
3614 return partition_hbound_cmp(h1->modulus, h1->remainder,
3615 h2->modulus, h2->remainder);
3616 }
3617
3618 /*
3619 * qsort_partition_list_value_cmp
3620 *
3621 * Compare two list partition bound datums.
3622 */
3623 static int32
qsort_partition_list_value_cmp(const void * a,const void * b,void * arg)3624 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
3625 {
3626 Datum val1 = (*(PartitionListValue *const *) a)->value,
3627 val2 = (*(PartitionListValue *const *) b)->value;
3628 PartitionKey key = (PartitionKey) arg;
3629
3630 return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
3631 key->partcollation[0],
3632 val1, val2));
3633 }
3634
3635 /*
3636 * qsort_partition_rbound_cmp
3637 *
3638 * Used when sorting range bounds across all range partitions.
3639 */
3640 static int32
qsort_partition_rbound_cmp(const void * a,const void * b,void * arg)3641 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
3642 {
3643 PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
3644 PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
3645 PartitionKey key = (PartitionKey) arg;
3646
3647 return partition_rbound_cmp(key->partnatts, key->partsupfunc,
3648 key->partcollation, b1->datums, b1->kind,
3649 b1->lower, b2);
3650 }
3651
3652 /*
3653 * get_partition_operator
3654 *
3655 * Return oid of the operator of the given strategy for the given partition
3656 * key column. It is assumed that the partitioning key is of the same type as
3657 * the chosen partitioning opclass, or at least binary-compatible. In the
3658 * latter case, *need_relabel is set to true if the opclass is not of a
3659 * polymorphic type (indicating a RelabelType node needed on top), otherwise
3660 * false.
3661 */
3662 static Oid
get_partition_operator(PartitionKey key,int col,StrategyNumber strategy,bool * need_relabel)3663 get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
3664 bool *need_relabel)
3665 {
3666 Oid operoid;
3667
3668 /*
3669 * Get the operator in the partitioning opfamily using the opclass'
3670 * declared input type as both left- and righttype.
3671 */
3672 operoid = get_opfamily_member(key->partopfamily[col],
3673 key->partopcintype[col],
3674 key->partopcintype[col],
3675 strategy);
3676 if (!OidIsValid(operoid))
3677 elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
3678 strategy, key->partopcintype[col], key->partopcintype[col],
3679 key->partopfamily[col]);
3680
3681 /*
3682 * If the partition key column is not of the same type as the operator
3683 * class and not polymorphic, tell caller to wrap the non-Const expression
3684 * in a RelabelType. This matches what parse_coerce.c does.
3685 */
3686 *need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
3687 key->partopcintype[col] != RECORDOID &&
3688 !IsPolymorphicType(key->partopcintype[col]));
3689
3690 return operoid;
3691 }
3692
3693 /*
3694 * make_partition_op_expr
3695 * Returns an Expr for the given partition key column with arg1 and
3696 * arg2 as its leftop and rightop, respectively
3697 */
3698 static Expr *
make_partition_op_expr(PartitionKey key,int keynum,uint16 strategy,Expr * arg1,Expr * arg2)3699 make_partition_op_expr(PartitionKey key, int keynum,
3700 uint16 strategy, Expr *arg1, Expr *arg2)
3701 {
3702 Oid operoid;
3703 bool need_relabel = false;
3704 Expr *result = NULL;
3705
3706 /* Get the correct btree operator for this partitioning column */
3707 operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
3708
3709 /*
3710 * Chosen operator may be such that the non-Const operand needs to be
3711 * coerced, so apply the same; see the comment in
3712 * get_partition_operator().
3713 */
3714 if (!IsA(arg1, Const) &&
3715 (need_relabel ||
3716 key->partcollation[keynum] != key->parttypcoll[keynum]))
3717 arg1 = (Expr *) makeRelabelType(arg1,
3718 key->partopcintype[keynum],
3719 -1,
3720 key->partcollation[keynum],
3721 COERCE_EXPLICIT_CAST);
3722
3723 /* Generate the actual expression */
3724 switch (key->strategy)
3725 {
3726 case PARTITION_STRATEGY_LIST:
3727 {
3728 List *elems = (List *) arg2;
3729 int nelems = list_length(elems);
3730
3731 Assert(nelems >= 1);
3732 Assert(keynum == 0);
3733
3734 if (nelems > 1 &&
3735 !type_is_array(key->parttypid[keynum]))
3736 {
3737 ArrayExpr *arrexpr;
3738 ScalarArrayOpExpr *saopexpr;
3739
3740 /* Construct an ArrayExpr for the right-hand inputs */
3741 arrexpr = makeNode(ArrayExpr);
3742 arrexpr->array_typeid =
3743 get_array_type(key->parttypid[keynum]);
3744 arrexpr->array_collid = key->parttypcoll[keynum];
3745 arrexpr->element_typeid = key->parttypid[keynum];
3746 arrexpr->elements = elems;
3747 arrexpr->multidims = false;
3748 arrexpr->location = -1;
3749
3750 /* Build leftop = ANY (rightop) */
3751 saopexpr = makeNode(ScalarArrayOpExpr);
3752 saopexpr->opno = operoid;
3753 saopexpr->opfuncid = get_opcode(operoid);
3754 saopexpr->useOr = true;
3755 saopexpr->inputcollid = key->partcollation[keynum];
3756 saopexpr->args = list_make2(arg1, arrexpr);
3757 saopexpr->location = -1;
3758
3759 result = (Expr *) saopexpr;
3760 }
3761 else
3762 {
3763 List *elemops = NIL;
3764 ListCell *lc;
3765
3766 foreach(lc, elems)
3767 {
3768 Expr *elem = lfirst(lc),
3769 *elemop;
3770
3771 elemop = make_opclause(operoid,
3772 BOOLOID,
3773 false,
3774 arg1, elem,
3775 InvalidOid,
3776 key->partcollation[keynum]);
3777 elemops = lappend(elemops, elemop);
3778 }
3779
3780 result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
3781 }
3782 break;
3783 }
3784
3785 case PARTITION_STRATEGY_RANGE:
3786 result = make_opclause(operoid,
3787 BOOLOID,
3788 false,
3789 arg1, arg2,
3790 InvalidOid,
3791 key->partcollation[keynum]);
3792 break;
3793
3794 default:
3795 elog(ERROR, "invalid partitioning strategy");
3796 break;
3797 }
3798
3799 return result;
3800 }
3801
3802 /*
3803 * get_qual_for_hash
3804 *
3805 * Returns a CHECK constraint expression to use as a hash partition's
3806 * constraint, given the parent relation and partition bound structure.
3807 *
3808 * The partition constraint for a hash partition is always a call to the
3809 * built-in function satisfies_hash_partition().
3810 */
3811 static List *
get_qual_for_hash(Relation parent,PartitionBoundSpec * spec)3812 get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
3813 {
3814 PartitionKey key = RelationGetPartitionKey(parent);
3815 FuncExpr *fexpr;
3816 Node *relidConst;
3817 Node *modulusConst;
3818 Node *remainderConst;
3819 List *args;
3820 ListCell *partexprs_item;
3821 int i;
3822
3823 /* Fixed arguments. */
3824 relidConst = (Node *) makeConst(OIDOID,
3825 -1,
3826 InvalidOid,
3827 sizeof(Oid),
3828 ObjectIdGetDatum(RelationGetRelid(parent)),
3829 false,
3830 true);
3831
3832 modulusConst = (Node *) makeConst(INT4OID,
3833 -1,
3834 InvalidOid,
3835 sizeof(int32),
3836 Int32GetDatum(spec->modulus),
3837 false,
3838 true);
3839
3840 remainderConst = (Node *) makeConst(INT4OID,
3841 -1,
3842 InvalidOid,
3843 sizeof(int32),
3844 Int32GetDatum(spec->remainder),
3845 false,
3846 true);
3847
3848 args = list_make3(relidConst, modulusConst, remainderConst);
3849 partexprs_item = list_head(key->partexprs);
3850
3851 /* Add an argument for each key column. */
3852 for (i = 0; i < key->partnatts; i++)
3853 {
3854 Node *keyCol;
3855
3856 /* Left operand */
3857 if (key->partattrs[i] != 0)
3858 {
3859 keyCol = (Node *) makeVar(1,
3860 key->partattrs[i],
3861 key->parttypid[i],
3862 key->parttypmod[i],
3863 key->parttypcoll[i],
3864 0);
3865 }
3866 else
3867 {
3868 keyCol = (Node *) copyObject(lfirst(partexprs_item));
3869 partexprs_item = lnext(key->partexprs, partexprs_item);
3870 }
3871
3872 args = lappend(args, keyCol);
3873 }
3874
3875 fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
3876 BOOLOID,
3877 args,
3878 InvalidOid,
3879 InvalidOid,
3880 COERCE_EXPLICIT_CALL);
3881
3882 return list_make1(fexpr);
3883 }
3884
3885 /*
3886 * get_qual_for_list
3887 *
3888 * Returns an implicit-AND list of expressions to use as a list partition's
3889 * constraint, given the parent relation and partition bound structure.
3890 *
3891 * The function returns NIL for a default partition when it's the only
3892 * partition since in that case there is no constraint.
3893 */
3894 static List *
get_qual_for_list(Relation parent,PartitionBoundSpec * spec)3895 get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
3896 {
3897 PartitionKey key = RelationGetPartitionKey(parent);
3898 List *result;
3899 Expr *keyCol;
3900 Expr *opexpr;
3901 NullTest *nulltest;
3902 ListCell *cell;
3903 List *elems = NIL;
3904 bool list_has_null = false;
3905
3906 /*
3907 * Only single-column list partitioning is supported, so we are worried
3908 * only about the partition key with index 0.
3909 */
3910 Assert(key->partnatts == 1);
3911
3912 /* Construct Var or expression representing the partition column */
3913 if (key->partattrs[0] != 0)
3914 keyCol = (Expr *) makeVar(1,
3915 key->partattrs[0],
3916 key->parttypid[0],
3917 key->parttypmod[0],
3918 key->parttypcoll[0],
3919 0);
3920 else
3921 keyCol = (Expr *) copyObject(linitial(key->partexprs));
3922
3923 /*
3924 * For default list partition, collect datums for all the partitions. The
3925 * default partition constraint should check that the partition key is
3926 * equal to none of those.
3927 */
3928 if (spec->is_default)
3929 {
3930 int i;
3931 int ndatums = 0;
3932 PartitionDesc pdesc = RelationGetPartitionDesc(parent);
3933 PartitionBoundInfo boundinfo = pdesc->boundinfo;
3934
3935 if (boundinfo)
3936 {
3937 ndatums = boundinfo->ndatums;
3938
3939 if (partition_bound_accepts_nulls(boundinfo))
3940 list_has_null = true;
3941 }
3942
3943 /*
3944 * If default is the only partition, there need not be any partition
3945 * constraint on it.
3946 */
3947 if (ndatums == 0 && !list_has_null)
3948 return NIL;
3949
3950 for (i = 0; i < ndatums; i++)
3951 {
3952 Const *val;
3953
3954 /*
3955 * Construct Const from known-not-null datum. We must be careful
3956 * to copy the value, because our result has to be able to outlive
3957 * the relcache entry we're copying from.
3958 */
3959 val = makeConst(key->parttypid[0],
3960 key->parttypmod[0],
3961 key->parttypcoll[0],
3962 key->parttyplen[0],
3963 datumCopy(*boundinfo->datums[i],
3964 key->parttypbyval[0],
3965 key->parttyplen[0]),
3966 false, /* isnull */
3967 key->parttypbyval[0]);
3968
3969 elems = lappend(elems, val);
3970 }
3971 }
3972 else
3973 {
3974 /*
3975 * Create list of Consts for the allowed values, excluding any nulls.
3976 */
3977 foreach(cell, spec->listdatums)
3978 {
3979 Const *val = castNode(Const, lfirst(cell));
3980
3981 if (val->constisnull)
3982 list_has_null = true;
3983 else
3984 elems = lappend(elems, copyObject(val));
3985 }
3986 }
3987
3988 if (elems)
3989 {
3990 /*
3991 * Generate the operator expression from the non-null partition
3992 * values.
3993 */
3994 opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
3995 keyCol, (Expr *) elems);
3996 }
3997 else
3998 {
3999 /*
4000 * If there are no partition values, we don't need an operator
4001 * expression.
4002 */
4003 opexpr = NULL;
4004 }
4005
4006 if (!list_has_null)
4007 {
4008 /*
4009 * Gin up a "col IS NOT NULL" test that will be AND'd with the main
4010 * expression. This might seem redundant, but the partition routing
4011 * machinery needs it.
4012 */
4013 nulltest = makeNode(NullTest);
4014 nulltest->arg = keyCol;
4015 nulltest->nulltesttype = IS_NOT_NULL;
4016 nulltest->argisrow = false;
4017 nulltest->location = -1;
4018
4019 result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
4020 }
4021 else
4022 {
4023 /*
4024 * Gin up a "col IS NULL" test that will be OR'd with the main
4025 * expression.
4026 */
4027 nulltest = makeNode(NullTest);
4028 nulltest->arg = keyCol;
4029 nulltest->nulltesttype = IS_NULL;
4030 nulltest->argisrow = false;
4031 nulltest->location = -1;
4032
4033 if (opexpr)
4034 {
4035 Expr *or;
4036
4037 or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
4038 result = list_make1(or);
4039 }
4040 else
4041 result = list_make1(nulltest);
4042 }
4043
4044 /*
4045 * Note that, in general, applying NOT to a constraint expression doesn't
4046 * necessarily invert the set of rows it accepts, because NOT (NULL) is
4047 * NULL. However, the partition constraints we construct here never
4048 * evaluate to NULL, so applying NOT works as intended.
4049 */
4050 if (spec->is_default)
4051 {
4052 result = list_make1(make_ands_explicit(result));
4053 result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
4054 }
4055
4056 return result;
4057 }
4058
4059 /*
4060 * get_qual_for_range
4061 *
4062 * Returns an implicit-AND list of expressions to use as a range partition's
4063 * constraint, given the parent relation and partition bound structure.
4064 *
4065 * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
4066 * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
4067 * generate an expression tree of the following form:
4068 *
4069 * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4070 * AND
4071 * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
4072 * AND
4073 * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
4074 *
4075 * It is often the case that a prefix of lower and upper bound tuples contains
4076 * the same values, for example, (al = au), in which case, we will emit an
4077 * expression tree of the following form:
4078 *
4079 * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4080 * AND
4081 * (a = al)
4082 * AND
4083 * (b > bl OR (b = bl AND c >= cl))
4084 * AND
4085 * (b < bu OR (b = bu AND c < cu))
4086 *
4087 * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
4088 * simplified using the fact that any value is greater than MINVALUE and less
4089 * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
4090 * true, and we need not emit any expression for it, and the last line becomes
4091 *
4092 * (b < bu) OR (b = bu), which is simplified to (b <= bu)
4093 *
4094 * In most common cases with only one partition column, say a, the following
4095 * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
4096 *
4097 * For default partition, it returns the negation of the constraints of all
4098 * the other partitions.
4099 *
4100 * External callers should pass for_default as false; we set it to true only
4101 * when recursing.
4102 */
4103 static List *
get_qual_for_range(Relation parent,PartitionBoundSpec * spec,bool for_default)4104 get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
4105 bool for_default)
4106 {
4107 List *result = NIL;
4108 ListCell *cell1,
4109 *cell2,
4110 *partexprs_item,
4111 *partexprs_item_saved;
4112 int i,
4113 j;
4114 PartitionRangeDatum *ldatum,
4115 *udatum;
4116 PartitionKey key = RelationGetPartitionKey(parent);
4117 Expr *keyCol;
4118 Const *lower_val,
4119 *upper_val;
4120 List *lower_or_arms,
4121 *upper_or_arms;
4122 int num_or_arms,
4123 current_or_arm;
4124 ListCell *lower_or_start_datum,
4125 *upper_or_start_datum;
4126 bool need_next_lower_arm,
4127 need_next_upper_arm;
4128
4129 if (spec->is_default)
4130 {
4131 List *or_expr_args = NIL;
4132 PartitionDesc pdesc = RelationGetPartitionDesc(parent);
4133 Oid *inhoids = pdesc->oids;
4134 int nparts = pdesc->nparts,
4135 i;
4136
4137 for (i = 0; i < nparts; i++)
4138 {
4139 Oid inhrelid = inhoids[i];
4140 HeapTuple tuple;
4141 Datum datum;
4142 bool isnull;
4143 PartitionBoundSpec *bspec;
4144
4145 tuple = SearchSysCache1(RELOID, inhrelid);
4146 if (!HeapTupleIsValid(tuple))
4147 elog(ERROR, "cache lookup failed for relation %u", inhrelid);
4148
4149 datum = SysCacheGetAttr(RELOID, tuple,
4150 Anum_pg_class_relpartbound,
4151 &isnull);
4152 if (isnull)
4153 elog(ERROR, "null relpartbound for relation %u", inhrelid);
4154
4155 bspec = (PartitionBoundSpec *)
4156 stringToNode(TextDatumGetCString(datum));
4157 if (!IsA(bspec, PartitionBoundSpec))
4158 elog(ERROR, "expected PartitionBoundSpec");
4159
4160 if (!bspec->is_default)
4161 {
4162 List *part_qual;
4163
4164 part_qual = get_qual_for_range(parent, bspec, true);
4165
4166 /*
4167 * AND the constraints of the partition and add to
4168 * or_expr_args
4169 */
4170 or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
4171 ? makeBoolExpr(AND_EXPR, part_qual, -1)
4172 : linitial(part_qual));
4173 }
4174 ReleaseSysCache(tuple);
4175 }
4176
4177 if (or_expr_args != NIL)
4178 {
4179 Expr *other_parts_constr;
4180
4181 /*
4182 * Combine the constraints obtained for non-default partitions
4183 * using OR. As requested, each of the OR's args doesn't include
4184 * the NOT NULL test for partition keys (which is to avoid its
4185 * useless repetition). Add the same now.
4186 */
4187 other_parts_constr =
4188 makeBoolExpr(AND_EXPR,
4189 lappend(get_range_nulltest(key),
4190 list_length(or_expr_args) > 1
4191 ? makeBoolExpr(OR_EXPR, or_expr_args,
4192 -1)
4193 : linitial(or_expr_args)),
4194 -1);
4195
4196 /*
4197 * Finally, the default partition contains everything *NOT*
4198 * contained in the non-default partitions.
4199 */
4200 result = list_make1(makeBoolExpr(NOT_EXPR,
4201 list_make1(other_parts_constr), -1));
4202 }
4203
4204 return result;
4205 }
4206
4207 lower_or_start_datum = list_head(spec->lowerdatums);
4208 upper_or_start_datum = list_head(spec->upperdatums);
4209 num_or_arms = key->partnatts;
4210
4211 /*
4212 * If it is the recursive call for default, we skip the get_range_nulltest
4213 * to avoid accumulating the NullTest on the same keys for each partition.
4214 */
4215 if (!for_default)
4216 result = get_range_nulltest(key);
4217
4218 /*
4219 * Iterate over the key columns and check if the corresponding lower and
4220 * upper datums are equal using the btree equality operator for the
4221 * column's type. If equal, we emit single keyCol = common_value
4222 * expression. Starting from the first column for which the corresponding
4223 * lower and upper bound datums are not equal, we generate OR expressions
4224 * as shown in the function's header comment.
4225 */
4226 i = 0;
4227 partexprs_item = list_head(key->partexprs);
4228 partexprs_item_saved = partexprs_item; /* placate compiler */
4229 forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
4230 {
4231 EState *estate;
4232 MemoryContext oldcxt;
4233 Expr *test_expr;
4234 ExprState *test_exprstate;
4235 Datum test_result;
4236 bool isNull;
4237
4238 ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
4239 udatum = castNode(PartitionRangeDatum, lfirst(cell2));
4240
4241 /*
4242 * Since get_range_key_properties() modifies partexprs_item, and we
4243 * might need to start over from the previous expression in the later
4244 * part of this function, save away the current value.
4245 */
4246 partexprs_item_saved = partexprs_item;
4247
4248 get_range_key_properties(key, i, ldatum, udatum,
4249 &partexprs_item,
4250 &keyCol,
4251 &lower_val, &upper_val);
4252
4253 /*
4254 * If either value is NULL, the corresponding partition bound is
4255 * either MINVALUE or MAXVALUE, and we treat them as unequal, because
4256 * even if they're the same, there is no common value to equate the
4257 * key column with.
4258 */
4259 if (!lower_val || !upper_val)
4260 break;
4261
4262 /* Create the test expression */
4263 estate = CreateExecutorState();
4264 oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
4265 test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
4266 (Expr *) lower_val,
4267 (Expr *) upper_val);
4268 fix_opfuncids((Node *) test_expr);
4269 test_exprstate = ExecInitExpr(test_expr, NULL);
4270 test_result = ExecEvalExprSwitchContext(test_exprstate,
4271 GetPerTupleExprContext(estate),
4272 &isNull);
4273 MemoryContextSwitchTo(oldcxt);
4274 FreeExecutorState(estate);
4275
4276 /* If not equal, go generate the OR expressions */
4277 if (!DatumGetBool(test_result))
4278 break;
4279
4280 /*
4281 * The bounds for the last key column can't be equal, because such a
4282 * range partition would never be allowed to be defined (it would have
4283 * an empty range otherwise).
4284 */
4285 if (i == key->partnatts - 1)
4286 elog(ERROR, "invalid range bound specification");
4287
4288 /* Equal, so generate keyCol = lower_val expression */
4289 result = lappend(result,
4290 make_partition_op_expr(key, i, BTEqualStrategyNumber,
4291 keyCol, (Expr *) lower_val));
4292
4293 i++;
4294 }
4295
4296 /* First pair of lower_val and upper_val that are not equal. */
4297 lower_or_start_datum = cell1;
4298 upper_or_start_datum = cell2;
4299
4300 /* OR will have as many arms as there are key columns left. */
4301 num_or_arms = key->partnatts - i;
4302 current_or_arm = 0;
4303 lower_or_arms = upper_or_arms = NIL;
4304 need_next_lower_arm = need_next_upper_arm = true;
4305 while (current_or_arm < num_or_arms)
4306 {
4307 List *lower_or_arm_args = NIL,
4308 *upper_or_arm_args = NIL;
4309
4310 /* Restart scan of columns from the i'th one */
4311 j = i;
4312 partexprs_item = partexprs_item_saved;
4313
4314 for_both_cell(cell1, spec->lowerdatums, lower_or_start_datum,
4315 cell2, spec->upperdatums, upper_or_start_datum)
4316 {
4317 PartitionRangeDatum *ldatum_next = NULL,
4318 *udatum_next = NULL;
4319
4320 ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
4321 if (lnext(spec->lowerdatums, cell1))
4322 ldatum_next = castNode(PartitionRangeDatum,
4323 lfirst(lnext(spec->lowerdatums, cell1)));
4324 udatum = castNode(PartitionRangeDatum, lfirst(cell2));
4325 if (lnext(spec->upperdatums, cell2))
4326 udatum_next = castNode(PartitionRangeDatum,
4327 lfirst(lnext(spec->upperdatums, cell2)));
4328 get_range_key_properties(key, j, ldatum, udatum,
4329 &partexprs_item,
4330 &keyCol,
4331 &lower_val, &upper_val);
4332
4333 if (need_next_lower_arm && lower_val)
4334 {
4335 uint16 strategy;
4336
4337 /*
4338 * For the non-last columns of this arm, use the EQ operator.
4339 * For the last column of this arm, use GT, unless this is the
4340 * last column of the whole bound check, or the next bound
4341 * datum is MINVALUE, in which case use GE.
4342 */
4343 if (j - i < current_or_arm)
4344 strategy = BTEqualStrategyNumber;
4345 else if (j == key->partnatts - 1 ||
4346 (ldatum_next &&
4347 ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
4348 strategy = BTGreaterEqualStrategyNumber;
4349 else
4350 strategy = BTGreaterStrategyNumber;
4351
4352 lower_or_arm_args = lappend(lower_or_arm_args,
4353 make_partition_op_expr(key, j,
4354 strategy,
4355 keyCol,
4356 (Expr *) lower_val));
4357 }
4358
4359 if (need_next_upper_arm && upper_val)
4360 {
4361 uint16 strategy;
4362
4363 /*
4364 * For the non-last columns of this arm, use the EQ operator.
4365 * For the last column of this arm, use LT, unless the next
4366 * bound datum is MAXVALUE, in which case use LE.
4367 */
4368 if (j - i < current_or_arm)
4369 strategy = BTEqualStrategyNumber;
4370 else if (udatum_next &&
4371 udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
4372 strategy = BTLessEqualStrategyNumber;
4373 else
4374 strategy = BTLessStrategyNumber;
4375
4376 upper_or_arm_args = lappend(upper_or_arm_args,
4377 make_partition_op_expr(key, j,
4378 strategy,
4379 keyCol,
4380 (Expr *) upper_val));
4381 }
4382
4383 /*
4384 * Did we generate enough of OR's arguments? First arm considers
4385 * the first of the remaining columns, second arm considers first
4386 * two of the remaining columns, and so on.
4387 */
4388 ++j;
4389 if (j - i > current_or_arm)
4390 {
4391 /*
4392 * We must not emit any more arms if the new column that will
4393 * be considered is unbounded, or this one was.
4394 */
4395 if (!lower_val || !ldatum_next ||
4396 ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4397 need_next_lower_arm = false;
4398 if (!upper_val || !udatum_next ||
4399 udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4400 need_next_upper_arm = false;
4401 break;
4402 }
4403 }
4404
4405 if (lower_or_arm_args != NIL)
4406 lower_or_arms = lappend(lower_or_arms,
4407 list_length(lower_or_arm_args) > 1
4408 ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
4409 : linitial(lower_or_arm_args));
4410
4411 if (upper_or_arm_args != NIL)
4412 upper_or_arms = lappend(upper_or_arms,
4413 list_length(upper_or_arm_args) > 1
4414 ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
4415 : linitial(upper_or_arm_args));
4416
4417 /* If no work to do in the next iteration, break away. */
4418 if (!need_next_lower_arm && !need_next_upper_arm)
4419 break;
4420
4421 ++current_or_arm;
4422 }
4423
4424 /*
4425 * Generate the OR expressions for each of lower and upper bounds (if
4426 * required), and append to the list of implicitly ANDed list of
4427 * expressions.
4428 */
4429 if (lower_or_arms != NIL)
4430 result = lappend(result,
4431 list_length(lower_or_arms) > 1
4432 ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
4433 : linitial(lower_or_arms));
4434 if (upper_or_arms != NIL)
4435 result = lappend(result,
4436 list_length(upper_or_arms) > 1
4437 ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
4438 : linitial(upper_or_arms));
4439
4440 /*
4441 * As noted above, for non-default, we return list with constant TRUE. If
4442 * the result is NIL during the recursive call for default, it implies
4443 * this is the only other partition which can hold every value of the key
4444 * except NULL. Hence we return the NullTest result skipped earlier.
4445 */
4446 if (result == NIL)
4447 result = for_default
4448 ? get_range_nulltest(key)
4449 : list_make1(makeBoolConst(true, false));
4450
4451 return result;
4452 }
4453
4454 /*
4455 * get_range_key_properties
4456 * Returns range partition key information for a given column
4457 *
4458 * This is a subroutine for get_qual_for_range, and its API is pretty
4459 * specialized to that caller.
4460 *
4461 * Constructs an Expr for the key column (returned in *keyCol) and Consts
4462 * for the lower and upper range limits (returned in *lower_val and
4463 * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
4464 * a Const. All of these structures are freshly palloc'd.
4465 *
4466 * *partexprs_item points to the cell containing the next expression in
4467 * the key->partexprs list, or NULL. It may be advanced upon return.
4468 */
4469 static void
get_range_key_properties(PartitionKey key,int keynum,PartitionRangeDatum * ldatum,PartitionRangeDatum * udatum,ListCell ** partexprs_item,Expr ** keyCol,Const ** lower_val,Const ** upper_val)4470 get_range_key_properties(PartitionKey key, int keynum,
4471 PartitionRangeDatum *ldatum,
4472 PartitionRangeDatum *udatum,
4473 ListCell **partexprs_item,
4474 Expr **keyCol,
4475 Const **lower_val, Const **upper_val)
4476 {
4477 /* Get partition key expression for this column */
4478 if (key->partattrs[keynum] != 0)
4479 {
4480 *keyCol = (Expr *) makeVar(1,
4481 key->partattrs[keynum],
4482 key->parttypid[keynum],
4483 key->parttypmod[keynum],
4484 key->parttypcoll[keynum],
4485 0);
4486 }
4487 else
4488 {
4489 if (*partexprs_item == NULL)
4490 elog(ERROR, "wrong number of partition key expressions");
4491 *keyCol = copyObject(lfirst(*partexprs_item));
4492 *partexprs_item = lnext(key->partexprs, *partexprs_item);
4493 }
4494
4495 /* Get appropriate Const nodes for the bounds */
4496 if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
4497 *lower_val = castNode(Const, copyObject(ldatum->value));
4498 else
4499 *lower_val = NULL;
4500
4501 if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
4502 *upper_val = castNode(Const, copyObject(udatum->value));
4503 else
4504 *upper_val = NULL;
4505 }
4506
4507 /*
4508 * get_range_nulltest
4509 *
4510 * A non-default range partition table does not currently allow partition
4511 * keys to be null, so emit an IS NOT NULL expression for each key column.
4512 */
4513 static List *
get_range_nulltest(PartitionKey key)4514 get_range_nulltest(PartitionKey key)
4515 {
4516 List *result = NIL;
4517 NullTest *nulltest;
4518 ListCell *partexprs_item;
4519 int i;
4520
4521 partexprs_item = list_head(key->partexprs);
4522 for (i = 0; i < key->partnatts; i++)
4523 {
4524 Expr *keyCol;
4525
4526 if (key->partattrs[i] != 0)
4527 {
4528 keyCol = (Expr *) makeVar(1,
4529 key->partattrs[i],
4530 key->parttypid[i],
4531 key->parttypmod[i],
4532 key->parttypcoll[i],
4533 0);
4534 }
4535 else
4536 {
4537 if (partexprs_item == NULL)
4538 elog(ERROR, "wrong number of partition key expressions");
4539 keyCol = copyObject(lfirst(partexprs_item));
4540 partexprs_item = lnext(key->partexprs, partexprs_item);
4541 }
4542
4543 nulltest = makeNode(NullTest);
4544 nulltest->arg = keyCol;
4545 nulltest->nulltesttype = IS_NOT_NULL;
4546 nulltest->argisrow = false;
4547 nulltest->location = -1;
4548 result = lappend(result, nulltest);
4549 }
4550
4551 return result;
4552 }
4553
4554 /*
4555 * compute_partition_hash_value
4556 *
4557 * Compute the hash value for given partition key values.
4558 */
4559 uint64
compute_partition_hash_value(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * values,bool * isnull)4560 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
4561 Datum *values, bool *isnull)
4562 {
4563 int i;
4564 uint64 rowHash = 0;
4565 Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
4566
4567 for (i = 0; i < partnatts; i++)
4568 {
4569 /* Nulls are just ignored */
4570 if (!isnull[i])
4571 {
4572 Datum hash;
4573
4574 Assert(OidIsValid(partsupfunc[i].fn_oid));
4575
4576 /*
4577 * Compute hash for each datum value by calling respective
4578 * datatype-specific hash functions of each partition key
4579 * attribute.
4580 */
4581 hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
4582 values[i], seed);
4583
4584 /* Form a single 64-bit hash value */
4585 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4586 }
4587 }
4588
4589 return rowHash;
4590 }
4591
4592 /*
4593 * satisfies_hash_partition
4594 *
4595 * This is an SQL-callable function for use in hash partition constraints.
4596 * The first three arguments are the parent table OID, modulus, and remainder.
4597 * The remaining arguments are the value of the partitioning columns (or
4598 * expressions); these are hashed and the results are combined into a single
4599 * hash value by calling hash_combine64.
4600 *
4601 * Returns true if remainder produced when this computed single hash value is
4602 * divided by the given modulus is equal to given remainder, otherwise false.
4603 * NB: it's important that this never return null, as the constraint machinery
4604 * would consider that to be a "pass".
4605 *
4606 * See get_qual_for_hash() for usage.
4607 */
4608 Datum
satisfies_hash_partition(PG_FUNCTION_ARGS)4609 satisfies_hash_partition(PG_FUNCTION_ARGS)
4610 {
4611 typedef struct ColumnsHashData
4612 {
4613 Oid relid;
4614 int nkeys;
4615 Oid variadic_type;
4616 int16 variadic_typlen;
4617 bool variadic_typbyval;
4618 char variadic_typalign;
4619 Oid partcollid[PARTITION_MAX_KEYS];
4620 FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER];
4621 } ColumnsHashData;
4622 Oid parentId;
4623 int modulus;
4624 int remainder;
4625 Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
4626 ColumnsHashData *my_extra;
4627 uint64 rowHash = 0;
4628
4629 /* Return false if the parent OID, modulus, or remainder is NULL. */
4630 if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
4631 PG_RETURN_BOOL(false);
4632 parentId = PG_GETARG_OID(0);
4633 modulus = PG_GETARG_INT32(1);
4634 remainder = PG_GETARG_INT32(2);
4635
4636 /* Sanity check modulus and remainder. */
4637 if (modulus <= 0)
4638 ereport(ERROR,
4639 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4640 errmsg("modulus for hash partition must be an integer value greater than zero")));
4641 if (remainder < 0)
4642 ereport(ERROR,
4643 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4644 errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
4645 if (remainder >= modulus)
4646 ereport(ERROR,
4647 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4648 errmsg("remainder for hash partition must be less than modulus")));
4649
4650 /*
4651 * Cache hash function information.
4652 */
4653 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4654 if (my_extra == NULL || my_extra->relid != parentId)
4655 {
4656 Relation parent;
4657 PartitionKey key;
4658 int j;
4659
4660 /* Open parent relation and fetch partition key info */
4661 parent = relation_open(parentId, AccessShareLock);
4662 key = RelationGetPartitionKey(parent);
4663
4664 /* Reject parent table that is not hash-partitioned. */
4665 if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
4666 ereport(ERROR,
4667 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4668 errmsg("\"%s\" is not a hash partitioned table",
4669 get_rel_name(parentId))));
4670
4671 if (!get_fn_expr_variadic(fcinfo->flinfo))
4672 {
4673 int nargs = PG_NARGS() - 3;
4674
4675 /* complain if wrong number of column values */
4676 if (key->partnatts != nargs)
4677 ereport(ERROR,
4678 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4679 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4680 key->partnatts, nargs)));
4681
4682 /* allocate space for our cache */
4683 fcinfo->flinfo->fn_extra =
4684 MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4685 offsetof(ColumnsHashData, partsupfunc) +
4686 sizeof(FmgrInfo) * nargs);
4687 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4688 my_extra->relid = parentId;
4689 my_extra->nkeys = key->partnatts;
4690 memcpy(my_extra->partcollid, key->partcollation,
4691 key->partnatts * sizeof(Oid));
4692
4693 /* check argument types and save fmgr_infos */
4694 for (j = 0; j < key->partnatts; ++j)
4695 {
4696 Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
4697
4698 if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
4699 ereport(ERROR,
4700 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4701 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
4702 j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
4703
4704 fmgr_info_copy(&my_extra->partsupfunc[j],
4705 &key->partsupfunc[j],
4706 fcinfo->flinfo->fn_mcxt);
4707 }
4708 }
4709 else
4710 {
4711 ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4712
4713 /* allocate space for our cache -- just one FmgrInfo in this case */
4714 fcinfo->flinfo->fn_extra =
4715 MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4716 offsetof(ColumnsHashData, partsupfunc) +
4717 sizeof(FmgrInfo));
4718 my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4719 my_extra->relid = parentId;
4720 my_extra->nkeys = key->partnatts;
4721 my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
4722 get_typlenbyvalalign(my_extra->variadic_type,
4723 &my_extra->variadic_typlen,
4724 &my_extra->variadic_typbyval,
4725 &my_extra->variadic_typalign);
4726 my_extra->partcollid[0] = key->partcollation[0];
4727
4728 /* check argument types */
4729 for (j = 0; j < key->partnatts; ++j)
4730 if (key->parttypid[j] != my_extra->variadic_type)
4731 ereport(ERROR,
4732 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4733 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
4734 j + 1,
4735 format_type_be(key->parttypid[j]),
4736 format_type_be(my_extra->variadic_type))));
4737
4738 fmgr_info_copy(&my_extra->partsupfunc[0],
4739 &key->partsupfunc[0],
4740 fcinfo->flinfo->fn_mcxt);
4741 }
4742
4743 /* Hold lock until commit */
4744 relation_close(parent, NoLock);
4745 }
4746
4747 if (!OidIsValid(my_extra->variadic_type))
4748 {
4749 int nkeys = my_extra->nkeys;
4750 int i;
4751
4752 /*
4753 * For a non-variadic call, neither the number of arguments nor their
4754 * types can change across calls, so avoid the expense of rechecking
4755 * here.
4756 */
4757
4758 for (i = 0; i < nkeys; i++)
4759 {
4760 Datum hash;
4761
4762 /* keys start from fourth argument of function. */
4763 int argno = i + 3;
4764
4765 if (PG_ARGISNULL(argno))
4766 continue;
4767
4768 hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
4769 my_extra->partcollid[i],
4770 PG_GETARG_DATUM(argno),
4771 seed);
4772
4773 /* Form a single 64-bit hash value */
4774 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4775 }
4776 }
4777 else
4778 {
4779 ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4780 int i;
4781 int nelems;
4782 Datum *datum;
4783 bool *isnull;
4784
4785 deconstruct_array(variadic_array,
4786 my_extra->variadic_type,
4787 my_extra->variadic_typlen,
4788 my_extra->variadic_typbyval,
4789 my_extra->variadic_typalign,
4790 &datum, &isnull, &nelems);
4791
4792 /* complain if wrong number of column values */
4793 if (nelems != my_extra->nkeys)
4794 ereport(ERROR,
4795 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4796 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4797 my_extra->nkeys, nelems)));
4798
4799 for (i = 0; i < nelems; i++)
4800 {
4801 Datum hash;
4802
4803 if (isnull[i])
4804 continue;
4805
4806 hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
4807 my_extra->partcollid[0],
4808 datum[i],
4809 seed);
4810
4811 /* Form a single 64-bit hash value */
4812 rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4813 }
4814 }
4815
4816 PG_RETURN_BOOL(rowHash % modulus == remainder);
4817 }
4818