1 /*-------------------------------------------------------------------------
2  *
3  * partbounds.c
4  *		Support routines for manipulating partition bounds
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *		  src/backend/partitioning/partbounds.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/relation.h"
18 #include "access/table.h"
19 #include "access/tableam.h"
20 #include "catalog/partition.h"
21 #include "catalog/pg_inherits.h"
22 #include "catalog/pg_type.h"
23 #include "commands/tablecmds.h"
24 #include "common/hashfn.h"
25 #include "executor/executor.h"
26 #include "miscadmin.h"
27 #include "nodes/makefuncs.h"
28 #include "nodes/nodeFuncs.h"
29 #include "nodes/pathnodes.h"
30 #include "parser/parse_coerce.h"
31 #include "partitioning/partbounds.h"
32 #include "partitioning/partdesc.h"
33 #include "partitioning/partprune.h"
34 #include "utils/builtins.h"
35 #include "utils/datum.h"
36 #include "utils/fmgroids.h"
37 #include "utils/lsyscache.h"
38 #include "utils/partcache.h"
39 #include "utils/ruleutils.h"
40 #include "utils/snapmgr.h"
41 #include "utils/syscache.h"
42 
43 /*
44  * When qsort'ing partition bounds after reading from the catalog, each bound
45  * is represented with one of the following structs.
46  */
47 
48 /* One bound of a hash partition */
49 typedef struct PartitionHashBound
50 {
51 	int			modulus;
52 	int			remainder;
53 	int			index;
54 } PartitionHashBound;
55 
56 /* One value coming from some (index'th) list partition */
57 typedef struct PartitionListValue
58 {
59 	int			index;
60 	Datum		value;
61 } PartitionListValue;
62 
63 /* One bound of a range partition */
64 typedef struct PartitionRangeBound
65 {
66 	int			index;
67 	Datum	   *datums;			/* range bound datums */
68 	PartitionRangeDatumKind *kind;	/* the kind of each datum */
69 	bool		lower;			/* this is the lower (vs upper) bound */
70 } PartitionRangeBound;
71 
72 /*
73  * Mapping from partitions of a joining relation to partitions of a join
74  * relation being computed (a.k.a merged partitions)
75  */
76 typedef struct PartitionMap
77 {
78 	int			nparts;			/* number of partitions */
79 	int		   *merged_indexes; /* indexes of merged partitions */
80 	bool	   *merged;			/* flags to indicate whether partitions are
81 								 * merged with non-dummy partitions */
82 	bool		did_remapping;	/* did we re-map partitions? */
83 	int		   *old_indexes;	/* old indexes of merged partitions if
84 								 * did_remapping */
85 } PartitionMap;
86 
87 /* Macro for comparing two range bounds */
88 #define compare_range_bounds(partnatts, partsupfunc, partcollations, \
89 							 bound1, bound2) \
90 	(partition_rbound_cmp(partnatts, partsupfunc, partcollations, \
91 						  (bound1)->datums, (bound1)->kind, (bound1)->lower, \
92 						  bound2))
93 
94 static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
95 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
96 											void *arg);
97 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
98 										void *arg);
99 static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs,
100 											 int nparts, PartitionKey key, int **mapping);
101 static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs,
102 											 int nparts, PartitionKey key, int **mapping);
103 static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs,
104 											  int nparts, PartitionKey key, int **mapping);
105 static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc,
106 											Oid *collations,
107 											RelOptInfo *outer_rel,
108 											RelOptInfo *inner_rel,
109 											JoinType jointype,
110 											List **outer_parts,
111 											List **inner_parts);
112 static PartitionBoundInfo merge_range_bounds(int partnatts,
113 											 FmgrInfo *partsupfuncs,
114 											 Oid *partcollations,
115 											 RelOptInfo *outer_rel,
116 											 RelOptInfo *inner_rel,
117 											 JoinType jointype,
118 											 List **outer_parts,
119 											 List **inner_parts);
120 static void init_partition_map(RelOptInfo *rel, PartitionMap *map);
121 static void free_partition_map(PartitionMap *map);
122 static bool is_dummy_partition(RelOptInfo *rel, int part_index);
123 static int	merge_matching_partitions(PartitionMap *outer_map,
124 									  PartitionMap *inner_map,
125 									  int outer_part,
126 									  int inner_part,
127 									  int *next_index);
128 static int	process_outer_partition(PartitionMap *outer_map,
129 									PartitionMap *inner_map,
130 									bool outer_has_default,
131 									bool inner_has_default,
132 									int outer_index,
133 									int inner_default,
134 									JoinType jointype,
135 									int *next_index,
136 									int *default_index);
137 static int	process_inner_partition(PartitionMap *outer_map,
138 									PartitionMap *inner_map,
139 									bool outer_has_default,
140 									bool inner_has_default,
141 									int inner_index,
142 									int outer_default,
143 									JoinType jointype,
144 									int *next_index,
145 									int *default_index);
146 static void merge_null_partitions(PartitionMap *outer_map,
147 								  PartitionMap *inner_map,
148 								  bool outer_has_null,
149 								  bool inner_has_null,
150 								  int outer_null,
151 								  int inner_null,
152 								  JoinType jointype,
153 								  int *next_index,
154 								  int *null_index);
155 static void merge_default_partitions(PartitionMap *outer_map,
156 									 PartitionMap *inner_map,
157 									 bool outer_has_default,
158 									 bool inner_has_default,
159 									 int outer_default,
160 									 int inner_default,
161 									 JoinType jointype,
162 									 int *next_index,
163 									 int *default_index);
164 static int	merge_partition_with_dummy(PartitionMap *map, int index,
165 									   int *next_index);
166 static void fix_merged_indexes(PartitionMap *outer_map,
167 							   PartitionMap *inner_map,
168 							   int nmerged, List *merged_indexes);
169 static void generate_matching_part_pairs(RelOptInfo *outer_rel,
170 										 RelOptInfo *inner_rel,
171 										 PartitionMap *outer_map,
172 										 PartitionMap *inner_map,
173 										 int nmerged,
174 										 List **outer_parts,
175 										 List **inner_parts);
176 static PartitionBoundInfo build_merged_partition_bounds(char strategy,
177 														List *merged_datums,
178 														List *merged_kinds,
179 														List *merged_indexes,
180 														int null_index,
181 														int default_index);
182 static int	get_range_partition(RelOptInfo *rel,
183 								PartitionBoundInfo bi,
184 								int *lb_pos,
185 								PartitionRangeBound *lb,
186 								PartitionRangeBound *ub);
187 static int	get_range_partition_internal(PartitionBoundInfo bi,
188 										 int *lb_pos,
189 										 PartitionRangeBound *lb,
190 										 PartitionRangeBound *ub);
191 static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
192 									 Oid *partcollations,
193 									 PartitionRangeBound *outer_lb,
194 									 PartitionRangeBound *outer_ub,
195 									 PartitionRangeBound *inner_lb,
196 									 PartitionRangeBound *inner_ub,
197 									 int *lb_cmpval, int *ub_cmpval);
198 static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
199 									Oid *partcollations, JoinType jointype,
200 									PartitionRangeBound *outer_lb,
201 									PartitionRangeBound *outer_ub,
202 									PartitionRangeBound *inner_lb,
203 									PartitionRangeBound *inner_ub,
204 									int lb_cmpval, int ub_cmpval,
205 									PartitionRangeBound *merged_lb,
206 									PartitionRangeBound *merged_ub);
207 static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
208 									Oid *partcollations,
209 									PartitionRangeBound *merged_lb,
210 									PartitionRangeBound *merged_ub,
211 									int merged_index,
212 									List **merged_datums,
213 									List **merged_kinds,
214 									List **merged_indexes);
215 static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
216 													  List *datums, bool lower);
217 static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
218 								  int remainder2);
219 static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
220 								  Oid *partcollation, Datum *datums1,
221 								  PartitionRangeDatumKind *kind1, bool lower1,
222 								  PartitionRangeBound *b2);
223 static int	partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
224 									Oid *partcollation,
225 									PartitionBoundInfo boundinfo,
226 									PartitionRangeBound *probe, bool *is_equal);
227 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
228 									uint16 strategy, Expr *arg1, Expr *arg2);
229 static Oid	get_partition_operator(PartitionKey key, int col,
230 								   StrategyNumber strategy, bool *need_relabel);
231 static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
232 static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
233 static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
234 								bool for_default);
235 static void get_range_key_properties(PartitionKey key, int keynum,
236 									 PartitionRangeDatum *ldatum,
237 									 PartitionRangeDatum *udatum,
238 									 ListCell **partexprs_item,
239 									 Expr **keyCol,
240 									 Const **lower_val, Const **upper_val);
241 static List *get_range_nulltest(PartitionKey key);
242 
243 /*
244  * get_qual_from_partbound
245  *		Given a parser node for partition bound, return the list of executable
246  *		expressions as partition constraint
247  */
248 List *
get_qual_from_partbound(Relation rel,Relation parent,PartitionBoundSpec * spec)249 get_qual_from_partbound(Relation rel, Relation parent,
250 						PartitionBoundSpec *spec)
251 {
252 	PartitionKey key = RelationGetPartitionKey(parent);
253 	List	   *my_qual = NIL;
254 
255 	Assert(key != NULL);
256 
257 	switch (key->strategy)
258 	{
259 		case PARTITION_STRATEGY_HASH:
260 			Assert(spec->strategy == PARTITION_STRATEGY_HASH);
261 			my_qual = get_qual_for_hash(parent, spec);
262 			break;
263 
264 		case PARTITION_STRATEGY_LIST:
265 			Assert(spec->strategy == PARTITION_STRATEGY_LIST);
266 			my_qual = get_qual_for_list(parent, spec);
267 			break;
268 
269 		case PARTITION_STRATEGY_RANGE:
270 			Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
271 			my_qual = get_qual_for_range(parent, spec, false);
272 			break;
273 
274 		default:
275 			elog(ERROR, "unexpected partition strategy: %d",
276 				 (int) key->strategy);
277 	}
278 
279 	return my_qual;
280 }
281 
282 /*
283  *	partition_bounds_create
284  *		Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
285  *		nodes
286  *
287  * This function creates a PartitionBoundInfo and fills the values of its
288  * various members based on the input list.  Importantly, 'datums' array will
289  * contain Datum representation of individual bounds (possibly after
290  * de-duplication as in case of range bounds), sorted in a canonical order
291  * defined by qsort_partition_* functions of respective partitioning methods.
292  * 'indexes' array will contain as many elements as there are bounds (specific
293  * exceptions to this rule are listed in the function body), which represent
294  * the 0-based canonical positions of partitions.
295  *
296  * Upon return from this function, *mapping is set to an array of
297  * list_length(boundspecs) elements, each of which maps the original index of
298  * a partition to its canonical index.
299  *
300  * Note: The objects returned by this function are wholly allocated in the
301  * current memory context.
302  */
303 PartitionBoundInfo
partition_bounds_create(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)304 partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts,
305 						PartitionKey key, int **mapping)
306 {
307 	int			i;
308 
309 	Assert(nparts > 0);
310 
311 	/*
312 	 * For each partitioning method, we first convert the partition bounds
313 	 * from their parser node representation to the internal representation,
314 	 * along with any additional preprocessing (such as de-duplicating range
315 	 * bounds).  Resulting bound datums are then added to the 'datums' array
316 	 * in PartitionBoundInfo.  For each datum added, an integer indicating the
317 	 * canonical partition index is added to the 'indexes' array.
318 	 *
319 	 * For each bound, we remember its partition's position (0-based) in the
320 	 * original list to later map it to the canonical index.
321 	 */
322 
323 	/*
324 	 * Initialize mapping array with invalid values, this is filled within
325 	 * each sub-routine below depending on the bound type.
326 	 */
327 	*mapping = (int *) palloc(sizeof(int) * nparts);
328 	for (i = 0; i < nparts; i++)
329 		(*mapping)[i] = -1;
330 
331 	switch (key->strategy)
332 	{
333 		case PARTITION_STRATEGY_HASH:
334 			return create_hash_bounds(boundspecs, nparts, key, mapping);
335 
336 		case PARTITION_STRATEGY_LIST:
337 			return create_list_bounds(boundspecs, nparts, key, mapping);
338 
339 		case PARTITION_STRATEGY_RANGE:
340 			return create_range_bounds(boundspecs, nparts, key, mapping);
341 
342 		default:
343 			elog(ERROR, "unexpected partition strategy: %d",
344 				 (int) key->strategy);
345 			break;
346 	}
347 
348 	Assert(false);
349 	return NULL;				/* keep compiler quiet */
350 }
351 
352 /*
353  * create_hash_bounds
354  *		Create a PartitionBoundInfo for a hash partitioned table
355  */
356 static PartitionBoundInfo
create_hash_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)357 create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
358 				   PartitionKey key, int **mapping)
359 {
360 	PartitionBoundInfo boundinfo;
361 	PartitionHashBound **hbounds = NULL;
362 	int			i;
363 	int			ndatums = 0;
364 	int			greatest_modulus;
365 
366 	boundinfo = (PartitionBoundInfoData *)
367 		palloc0(sizeof(PartitionBoundInfoData));
368 	boundinfo->strategy = key->strategy;
369 	/* No special hash partitions. */
370 	boundinfo->null_index = -1;
371 	boundinfo->default_index = -1;
372 
373 	ndatums = nparts;
374 	hbounds = (PartitionHashBound **)
375 		palloc(nparts * sizeof(PartitionHashBound *));
376 
377 	/* Convert from node to the internal representation */
378 	for (i = 0; i < nparts; i++)
379 	{
380 		PartitionBoundSpec *spec = boundspecs[i];
381 
382 		if (spec->strategy != PARTITION_STRATEGY_HASH)
383 			elog(ERROR, "invalid strategy in partition bound spec");
384 
385 		hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound));
386 		hbounds[i]->modulus = spec->modulus;
387 		hbounds[i]->remainder = spec->remainder;
388 		hbounds[i]->index = i;
389 	}
390 
391 	/* Sort all the bounds in ascending order */
392 	qsort(hbounds, nparts, sizeof(PartitionHashBound *),
393 		  qsort_partition_hbound_cmp);
394 
395 	/* After sorting, moduli are now stored in ascending order. */
396 	greatest_modulus = hbounds[ndatums - 1]->modulus;
397 
398 	boundinfo->ndatums = ndatums;
399 	boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
400 	boundinfo->nindexes = greatest_modulus;
401 	boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
402 	for (i = 0; i < greatest_modulus; i++)
403 		boundinfo->indexes[i] = -1;
404 
405 	/*
406 	 * For hash partitioning, there are as many datums (modulus and remainder
407 	 * pairs) as there are partitions.  Indexes are simply values ranging from
408 	 * 0 to (nparts - 1).
409 	 */
410 	for (i = 0; i < nparts; i++)
411 	{
412 		int			modulus = hbounds[i]->modulus;
413 		int			remainder = hbounds[i]->remainder;
414 
415 		boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum));
416 		boundinfo->datums[i][0] = Int32GetDatum(modulus);
417 		boundinfo->datums[i][1] = Int32GetDatum(remainder);
418 
419 		while (remainder < greatest_modulus)
420 		{
421 			/* overlap? */
422 			Assert(boundinfo->indexes[remainder] == -1);
423 			boundinfo->indexes[remainder] = i;
424 			remainder += modulus;
425 		}
426 
427 		(*mapping)[hbounds[i]->index] = i;
428 		pfree(hbounds[i]);
429 	}
430 	pfree(hbounds);
431 
432 	return boundinfo;
433 }
434 
435 /*
436  * create_list_bounds
437  *		Create a PartitionBoundInfo for a list partitioned table
438  */
439 static PartitionBoundInfo
create_list_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)440 create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
441 				   PartitionKey key, int **mapping)
442 {
443 	PartitionBoundInfo boundinfo;
444 	PartitionListValue **all_values = NULL;
445 	ListCell   *cell;
446 	int			i = 0;
447 	int			ndatums = 0;
448 	int			next_index = 0;
449 	int			default_index = -1;
450 	int			null_index = -1;
451 	List	   *non_null_values = NIL;
452 
453 	boundinfo = (PartitionBoundInfoData *)
454 		palloc0(sizeof(PartitionBoundInfoData));
455 	boundinfo->strategy = key->strategy;
456 	/* Will be set correctly below. */
457 	boundinfo->null_index = -1;
458 	boundinfo->default_index = -1;
459 
460 	/* Create a unified list of non-null values across all partitions. */
461 	for (i = 0; i < nparts; i++)
462 	{
463 		PartitionBoundSpec *spec = boundspecs[i];
464 		ListCell   *c;
465 
466 		if (spec->strategy != PARTITION_STRATEGY_LIST)
467 			elog(ERROR, "invalid strategy in partition bound spec");
468 
469 		/*
470 		 * Note the index of the partition bound spec for the default
471 		 * partition.  There's no datum to add to the list on non-null datums
472 		 * for this partition.
473 		 */
474 		if (spec->is_default)
475 		{
476 			default_index = i;
477 			continue;
478 		}
479 
480 		foreach(c, spec->listdatums)
481 		{
482 			Const	   *val = castNode(Const, lfirst(c));
483 			PartitionListValue *list_value = NULL;
484 
485 			if (!val->constisnull)
486 			{
487 				list_value = (PartitionListValue *)
488 					palloc0(sizeof(PartitionListValue));
489 				list_value->index = i;
490 				list_value->value = val->constvalue;
491 			}
492 			else
493 			{
494 				/*
495 				 * Never put a null into the values array; save the index of
496 				 * the partition that stores nulls, instead.
497 				 */
498 				if (null_index != -1)
499 					elog(ERROR, "found null more than once");
500 				null_index = i;
501 			}
502 
503 			if (list_value)
504 				non_null_values = lappend(non_null_values, list_value);
505 		}
506 	}
507 
508 	ndatums = list_length(non_null_values);
509 
510 	/*
511 	 * Collect all list values in one array. Alongside the value, we also save
512 	 * the index of partition the value comes from.
513 	 */
514 	all_values = (PartitionListValue **)
515 		palloc(ndatums * sizeof(PartitionListValue *));
516 	i = 0;
517 	foreach(cell, non_null_values)
518 	{
519 		PartitionListValue *src = lfirst(cell);
520 
521 		all_values[i] = (PartitionListValue *)
522 			palloc(sizeof(PartitionListValue));
523 		all_values[i]->value = src->value;
524 		all_values[i]->index = src->index;
525 		i++;
526 	}
527 
528 	qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
529 			  qsort_partition_list_value_cmp, (void *) key);
530 
531 	boundinfo->ndatums = ndatums;
532 	boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
533 	boundinfo->nindexes = ndatums;
534 	boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
535 
536 	/*
537 	 * Copy values.  Canonical indexes are values ranging from 0 to (nparts -
538 	 * 1) assigned to each partition such that all datums of a given partition
539 	 * receive the same value. The value for a given partition is the index of
540 	 * that partition's smallest datum in the all_values[] array.
541 	 */
542 	for (i = 0; i < ndatums; i++)
543 	{
544 		int			orig_index = all_values[i]->index;
545 
546 		boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
547 		boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
548 											key->parttypbyval[0],
549 											key->parttyplen[0]);
550 
551 		/* If the old index has no mapping, assign one */
552 		if ((*mapping)[orig_index] == -1)
553 			(*mapping)[orig_index] = next_index++;
554 
555 		boundinfo->indexes[i] = (*mapping)[orig_index];
556 	}
557 
558 	/*
559 	 * Set the canonical value for null_index, if any.
560 	 *
561 	 * It is possible that the null-accepting partition has not been assigned
562 	 * an index yet, which could happen if such partition accepts only null
563 	 * and hence not handled in the above loop which only looked at non-null
564 	 * values.
565 	 */
566 	if (null_index != -1)
567 	{
568 		Assert(null_index >= 0);
569 		if ((*mapping)[null_index] == -1)
570 			(*mapping)[null_index] = next_index++;
571 		boundinfo->null_index = (*mapping)[null_index];
572 	}
573 
574 	/* Set the canonical value for default_index, if any. */
575 	if (default_index != -1)
576 	{
577 		/*
578 		 * The default partition accepts any value not specified in the lists
579 		 * of other partitions, hence it should not get mapped index while
580 		 * assigning those for non-null datums.
581 		 */
582 		Assert(default_index >= 0);
583 		Assert((*mapping)[default_index] == -1);
584 		(*mapping)[default_index] = next_index++;
585 		boundinfo->default_index = (*mapping)[default_index];
586 	}
587 
588 	/* All partitions must now have been assigned canonical indexes. */
589 	Assert(next_index == nparts);
590 	return boundinfo;
591 }
592 
593 /*
594  * create_range_bounds
595  *		Create a PartitionBoundInfo for a range partitioned table
596  */
597 static PartitionBoundInfo
create_range_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)598 create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
599 					PartitionKey key, int **mapping)
600 {
601 	PartitionBoundInfo boundinfo;
602 	PartitionRangeBound **rbounds = NULL;
603 	PartitionRangeBound **all_bounds,
604 			   *prev;
605 	int			i,
606 				k;
607 	int			ndatums = 0;
608 	int			default_index = -1;
609 	int			next_index = 0;
610 
611 	boundinfo = (PartitionBoundInfoData *)
612 		palloc0(sizeof(PartitionBoundInfoData));
613 	boundinfo->strategy = key->strategy;
614 	/* There is no special null-accepting range partition. */
615 	boundinfo->null_index = -1;
616 	/* Will be set correctly below. */
617 	boundinfo->default_index = -1;
618 
619 	all_bounds = (PartitionRangeBound **)
620 		palloc0(2 * nparts * sizeof(PartitionRangeBound *));
621 
622 	/* Create a unified list of range bounds across all the partitions. */
623 	ndatums = 0;
624 	for (i = 0; i < nparts; i++)
625 	{
626 		PartitionBoundSpec *spec = boundspecs[i];
627 		PartitionRangeBound *lower,
628 				   *upper;
629 
630 		if (spec->strategy != PARTITION_STRATEGY_RANGE)
631 			elog(ERROR, "invalid strategy in partition bound spec");
632 
633 		/*
634 		 * Note the index of the partition bound spec for the default
635 		 * partition.  There's no datum to add to the all_bounds array for
636 		 * this partition.
637 		 */
638 		if (spec->is_default)
639 		{
640 			default_index = i;
641 			continue;
642 		}
643 
644 		lower = make_one_partition_rbound(key, i, spec->lowerdatums, true);
645 		upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
646 		all_bounds[ndatums++] = lower;
647 		all_bounds[ndatums++] = upper;
648 	}
649 
650 	Assert(ndatums == nparts * 2 ||
651 		   (default_index != -1 && ndatums == (nparts - 1) * 2));
652 
653 	/* Sort all the bounds in ascending order */
654 	qsort_arg(all_bounds, ndatums,
655 			  sizeof(PartitionRangeBound *),
656 			  qsort_partition_rbound_cmp,
657 			  (void *) key);
658 
659 	/* Save distinct bounds from all_bounds into rbounds. */
660 	rbounds = (PartitionRangeBound **)
661 		palloc(ndatums * sizeof(PartitionRangeBound *));
662 	k = 0;
663 	prev = NULL;
664 	for (i = 0; i < ndatums; i++)
665 	{
666 		PartitionRangeBound *cur = all_bounds[i];
667 		bool		is_distinct = false;
668 		int			j;
669 
670 		/* Is the current bound distinct from the previous one? */
671 		for (j = 0; j < key->partnatts; j++)
672 		{
673 			Datum		cmpval;
674 
675 			if (prev == NULL || cur->kind[j] != prev->kind[j])
676 			{
677 				is_distinct = true;
678 				break;
679 			}
680 
681 			/*
682 			 * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
683 			 * them as equal, since any values after this point must be
684 			 * ignored.
685 			 */
686 			if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
687 				break;
688 
689 			cmpval = FunctionCall2Coll(&key->partsupfunc[j],
690 									   key->partcollation[j],
691 									   cur->datums[j],
692 									   prev->datums[j]);
693 			if (DatumGetInt32(cmpval) != 0)
694 			{
695 				is_distinct = true;
696 				break;
697 			}
698 		}
699 
700 		/*
701 		 * Only if the bound is distinct save it into a temporary array, i.e,
702 		 * rbounds which is later copied into boundinfo datums array.
703 		 */
704 		if (is_distinct)
705 			rbounds[k++] = all_bounds[i];
706 
707 		prev = cur;
708 	}
709 
710 	/* Update ndatums to hold the count of distinct datums. */
711 	ndatums = k;
712 
713 	/*
714 	 * Add datums to boundinfo.  Canonical indexes are values ranging from 0
715 	 * to nparts - 1, assigned in that order to each partition's upper bound.
716 	 * For 'datums' elements that are lower bounds, there is -1 in the
717 	 * 'indexes' array to signify that no partition exists for the values less
718 	 * than such a bound and greater than or equal to the previous upper
719 	 * bound.
720 	 */
721 	boundinfo->ndatums = ndatums;
722 	boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
723 	boundinfo->kind = (PartitionRangeDatumKind **)
724 		palloc(ndatums *
725 			   sizeof(PartitionRangeDatumKind *));
726 
727 	/*
728 	 * For range partitioning, an additional value of -1 is stored as the last
729 	 * element of the indexes[] array.
730 	 */
731 	boundinfo->nindexes = ndatums + 1;
732 	boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
733 
734 	for (i = 0; i < ndatums; i++)
735 	{
736 		int			j;
737 
738 		boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
739 												sizeof(Datum));
740 		boundinfo->kind[i] = (PartitionRangeDatumKind *)
741 			palloc(key->partnatts *
742 				   sizeof(PartitionRangeDatumKind));
743 		for (j = 0; j < key->partnatts; j++)
744 		{
745 			if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
746 				boundinfo->datums[i][j] =
747 					datumCopy(rbounds[i]->datums[j],
748 							  key->parttypbyval[j],
749 							  key->parttyplen[j]);
750 			boundinfo->kind[i][j] = rbounds[i]->kind[j];
751 		}
752 
753 		/*
754 		 * There is no mapping for invalid indexes.
755 		 *
756 		 * Any lower bounds in the rbounds array have invalid indexes
757 		 * assigned, because the values between the previous bound (if there
758 		 * is one) and this (lower) bound are not part of the range of any
759 		 * existing partition.
760 		 */
761 		if (rbounds[i]->lower)
762 			boundinfo->indexes[i] = -1;
763 		else
764 		{
765 			int			orig_index = rbounds[i]->index;
766 
767 			/* If the old index has no mapping, assign one */
768 			if ((*mapping)[orig_index] == -1)
769 				(*mapping)[orig_index] = next_index++;
770 
771 			boundinfo->indexes[i] = (*mapping)[orig_index];
772 		}
773 	}
774 
775 	/* Set the canonical value for default_index, if any. */
776 	if (default_index != -1)
777 	{
778 		Assert(default_index >= 0 && (*mapping)[default_index] == -1);
779 		(*mapping)[default_index] = next_index++;
780 		boundinfo->default_index = (*mapping)[default_index];
781 	}
782 
783 	/* The extra -1 element. */
784 	Assert(i == ndatums);
785 	boundinfo->indexes[i] = -1;
786 
787 	/* All partitions must now have been assigned canonical indexes. */
788 	Assert(next_index == nparts);
789 	return boundinfo;
790 }
791 
792 /*
793  * Are two partition bound collections logically equal?
794  *
795  * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
796  * This is also useful when b1 and b2 are bound collections of two separate
797  * relations, respectively, because PartitionBoundInfo is a canonical
798  * representation of partition bounds.
799  */
800 bool
partition_bounds_equal(int partnatts,int16 * parttyplen,bool * parttypbyval,PartitionBoundInfo b1,PartitionBoundInfo b2)801 partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
802 					   PartitionBoundInfo b1, PartitionBoundInfo b2)
803 {
804 	int			i;
805 
806 	if (b1->strategy != b2->strategy)
807 		return false;
808 
809 	if (b1->ndatums != b2->ndatums)
810 		return false;
811 
812 	if (b1->nindexes != b2->nindexes)
813 		return false;
814 
815 	if (b1->null_index != b2->null_index)
816 		return false;
817 
818 	if (b1->default_index != b2->default_index)
819 		return false;
820 
821 	/* For all partition strategies, the indexes[] arrays have to match */
822 	for (i = 0; i < b1->nindexes; i++)
823 	{
824 		if (b1->indexes[i] != b2->indexes[i])
825 			return false;
826 	}
827 
828 	/* Finally, compare the datums[] arrays */
829 	if (b1->strategy == PARTITION_STRATEGY_HASH)
830 	{
831 		/*
832 		 * We arrange the partitions in the ascending order of their moduli
833 		 * and remainders.  Also every modulus is factor of next larger
834 		 * modulus.  Therefore we can safely store index of a given partition
835 		 * in indexes array at remainder of that partition.  Also entries at
836 		 * (remainder + N * modulus) positions in indexes array are all same
837 		 * for (modulus, remainder) specification for any partition.  Thus the
838 		 * datums arrays from the given bounds are the same, if and only if
839 		 * their indexes arrays are the same.  So, it suffices to compare the
840 		 * indexes arrays.
841 		 *
842 		 * Nonetheless make sure that the bounds are indeed the same when the
843 		 * indexes match.  Hash partition bound stores modulus and remainder
844 		 * at b1->datums[i][0] and b1->datums[i][1] position respectively.
845 		 */
846 #ifdef USE_ASSERT_CHECKING
847 		for (i = 0; i < b1->ndatums; i++)
848 			Assert((b1->datums[i][0] == b2->datums[i][0] &&
849 					b1->datums[i][1] == b2->datums[i][1]));
850 #endif
851 	}
852 	else
853 	{
854 		for (i = 0; i < b1->ndatums; i++)
855 		{
856 			int			j;
857 
858 			for (j = 0; j < partnatts; j++)
859 			{
860 				/* For range partitions, the bounds might not be finite. */
861 				if (b1->kind != NULL)
862 				{
863 					/* The different kinds of bound all differ from each other */
864 					if (b1->kind[i][j] != b2->kind[i][j])
865 						return false;
866 
867 					/*
868 					 * Non-finite bounds are equal without further
869 					 * examination.
870 					 */
871 					if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
872 						continue;
873 				}
874 
875 				/*
876 				 * Compare the actual values. Note that it would be both
877 				 * incorrect and unsafe to invoke the comparison operator
878 				 * derived from the partitioning specification here.  It would
879 				 * be incorrect because we want the relcache entry to be
880 				 * updated for ANY change to the partition bounds, not just
881 				 * those that the partitioning operator thinks are
882 				 * significant.  It would be unsafe because we might reach
883 				 * this code in the context of an aborted transaction, and an
884 				 * arbitrary partitioning operator might not be safe in that
885 				 * context.  datumIsEqual() should be simple enough to be
886 				 * safe.
887 				 */
888 				if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
889 								  parttypbyval[j], parttyplen[j]))
890 					return false;
891 			}
892 		}
893 	}
894 	return true;
895 }
896 
897 /*
898  * Return a copy of given PartitionBoundInfo structure. The data types of bounds
899  * are described by given partition key specification.
900  *
901  * Note: it's important that this function and its callees not do any catalog
902  * access, nor anything else that would result in allocating memory other than
903  * the returned data structure.  Since this is called in a long-lived context,
904  * that would result in unwanted memory leaks.
905  */
906 PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,PartitionKey key)907 partition_bounds_copy(PartitionBoundInfo src,
908 					  PartitionKey key)
909 {
910 	PartitionBoundInfo dest;
911 	int			i;
912 	int			ndatums;
913 	int			nindexes;
914 	int			partnatts;
915 	bool		hash_part;
916 	int			natts;
917 
918 	dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
919 
920 	dest->strategy = src->strategy;
921 	ndatums = dest->ndatums = src->ndatums;
922 	nindexes = dest->nindexes = src->nindexes;
923 	partnatts = key->partnatts;
924 
925 	/* List partitioned tables have only a single partition key. */
926 	Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
927 
928 	dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
929 
930 	if (src->kind != NULL)
931 	{
932 		dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
933 														 sizeof(PartitionRangeDatumKind *));
934 		for (i = 0; i < ndatums; i++)
935 		{
936 			dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
937 															   sizeof(PartitionRangeDatumKind));
938 
939 			memcpy(dest->kind[i], src->kind[i],
940 				   sizeof(PartitionRangeDatumKind) * key->partnatts);
941 		}
942 	}
943 	else
944 		dest->kind = NULL;
945 
946 	/*
947 	 * For hash partitioning, datums array will have two elements - modulus
948 	 * and remainder.
949 	 */
950 	hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
951 	natts = hash_part ? 2 : partnatts;
952 
953 	for (i = 0; i < ndatums; i++)
954 	{
955 		int			j;
956 
957 		dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
958 
959 		for (j = 0; j < natts; j++)
960 		{
961 			bool		byval;
962 			int			typlen;
963 
964 			if (hash_part)
965 			{
966 				typlen = sizeof(int32); /* Always int4 */
967 				byval = true;	/* int4 is pass-by-value */
968 			}
969 			else
970 			{
971 				byval = key->parttypbyval[j];
972 				typlen = key->parttyplen[j];
973 			}
974 
975 			if (dest->kind == NULL ||
976 				dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
977 				dest->datums[i][j] = datumCopy(src->datums[i][j],
978 											   byval, typlen);
979 		}
980 	}
981 
982 	dest->indexes = (int *) palloc(sizeof(int) * nindexes);
983 	memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
984 
985 	dest->null_index = src->null_index;
986 	dest->default_index = src->default_index;
987 
988 	return dest;
989 }
990 
991 /*
992  * partition_bounds_merge
993  *		Check to see whether every partition of 'outer_rel' matches/overlaps
994  *		one partition of 'inner_rel' at most, and vice versa; and if so, build
995  *		and return the partition bounds for a join relation between the rels,
996  *		generating two lists of the matching/overlapping partitions, which are
997  *		returned to *outer_parts and *inner_parts respectively.
998  *
999  * The lists contain the same number of partitions, and the partitions at the
1000  * same positions in the lists indicate join pairs used for partitioned join.
1001  * If a partition on one side matches/overlaps multiple partitions on the other
1002  * side, this function returns NULL, setting *outer_parts and *inner_parts to
1003  * NIL.
1004  */
1005 PartitionBoundInfo
partition_bounds_merge(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,RelOptInfo * outer_rel,RelOptInfo * inner_rel,JoinType jointype,List ** outer_parts,List ** inner_parts)1006 partition_bounds_merge(int partnatts,
1007 					   FmgrInfo *partsupfunc, Oid *partcollation,
1008 					   RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1009 					   JoinType jointype,
1010 					   List **outer_parts, List **inner_parts)
1011 {
1012 	/*
1013 	 * Currently, this function is called only from try_partitionwise_join(),
1014 	 * so the join type should be INNER, LEFT, FULL, SEMI, or ANTI.
1015 	 */
1016 	Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT ||
1017 		   jointype == JOIN_FULL || jointype == JOIN_SEMI ||
1018 		   jointype == JOIN_ANTI);
1019 
1020 	/* The partitioning strategies should be the same. */
1021 	Assert(outer_rel->boundinfo->strategy == inner_rel->boundinfo->strategy);
1022 
1023 	*outer_parts = *inner_parts = NIL;
1024 	switch (outer_rel->boundinfo->strategy)
1025 	{
1026 		case PARTITION_STRATEGY_HASH:
1027 
1028 			/*
1029 			 * For hash partitioned tables, we currently support partitioned
1030 			 * join only when they have exactly the same partition bounds.
1031 			 *
1032 			 * XXX: it might be possible to relax the restriction to support
1033 			 * cases where hash partitioned tables have missing partitions
1034 			 * and/or different moduli, but it's not clear if it would be
1035 			 * useful to support the former case since it's unusual to have
1036 			 * missing partitions.  On the other hand, it would be useful to
1037 			 * support the latter case, but in that case, there is a high
1038 			 * probability that a partition on one side will match multiple
1039 			 * partitions on the other side, which is the scenario the current
1040 			 * implementation of partitioned join can't handle.
1041 			 */
1042 			return NULL;
1043 
1044 		case PARTITION_STRATEGY_LIST:
1045 			return merge_list_bounds(partsupfunc,
1046 									 partcollation,
1047 									 outer_rel,
1048 									 inner_rel,
1049 									 jointype,
1050 									 outer_parts,
1051 									 inner_parts);
1052 
1053 		case PARTITION_STRATEGY_RANGE:
1054 			return merge_range_bounds(partnatts,
1055 									  partsupfunc,
1056 									  partcollation,
1057 									  outer_rel,
1058 									  inner_rel,
1059 									  jointype,
1060 									  outer_parts,
1061 									  inner_parts);
1062 
1063 		default:
1064 			elog(ERROR, "unexpected partition strategy: %d",
1065 				 (int) outer_rel->boundinfo->strategy);
1066 			return NULL;		/* keep compiler quiet */
1067 	}
1068 }
1069 
1070 /*
1071  * merge_list_bounds
1072  *		Create the partition bounds for a join relation between list
1073  *		partitioned tables, if possible
1074  *
1075  * In this function we try to find sets of matching partitions from both sides
1076  * by comparing list values stored in their partition bounds.  Since the list
1077  * values appear in the ascending order, an algorithm similar to merge join is
1078  * used for that.  If a partition on one side doesn't have a matching
1079  * partition on the other side, the algorithm tries to match it with the
1080  * default partition on the other side if any; if not, the algorithm tries to
1081  * match it with a dummy partition on the other side if it's on the
1082  * non-nullable side of an outer join.  Also, if both sides have the default
1083  * partitions, the algorithm tries to match them with each other.  We give up
1084  * if the algorithm finds a partition matching multiple partitions on the
1085  * other side, which is the scenario the current implementation of partitioned
1086  * join can't handle.
1087  */
1088 static PartitionBoundInfo
merge_list_bounds(FmgrInfo * partsupfunc,Oid * partcollation,RelOptInfo * outer_rel,RelOptInfo * inner_rel,JoinType jointype,List ** outer_parts,List ** inner_parts)1089 merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation,
1090 				  RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1091 				  JoinType jointype,
1092 				  List **outer_parts, List **inner_parts)
1093 {
1094 	PartitionBoundInfo merged_bounds = NULL;
1095 	PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1096 	PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1097 	bool		outer_has_default = partition_bound_has_default(outer_bi);
1098 	bool		inner_has_default = partition_bound_has_default(inner_bi);
1099 	int			outer_default = outer_bi->default_index;
1100 	int			inner_default = inner_bi->default_index;
1101 	bool		outer_has_null = partition_bound_accepts_nulls(outer_bi);
1102 	bool		inner_has_null = partition_bound_accepts_nulls(inner_bi);
1103 	PartitionMap outer_map;
1104 	PartitionMap inner_map;
1105 	int			outer_pos;
1106 	int			inner_pos;
1107 	int			next_index = 0;
1108 	int			null_index = -1;
1109 	int			default_index = -1;
1110 	List	   *merged_datums = NIL;
1111 	List	   *merged_indexes = NIL;
1112 
1113 	Assert(*outer_parts == NIL);
1114 	Assert(*inner_parts == NIL);
1115 	Assert(outer_bi->strategy == inner_bi->strategy &&
1116 		   outer_bi->strategy == PARTITION_STRATEGY_LIST);
1117 	/* List partitioning doesn't require kinds. */
1118 	Assert(!outer_bi->kind && !inner_bi->kind);
1119 
1120 	init_partition_map(outer_rel, &outer_map);
1121 	init_partition_map(inner_rel, &inner_map);
1122 
1123 	/*
1124 	 * If the default partitions (if any) have been proven empty, deem them
1125 	 * non-existent.
1126 	 */
1127 	if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1128 		outer_has_default = false;
1129 	if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1130 		inner_has_default = false;
1131 
1132 	/*
1133 	 * Merge partitions from both sides.  In each iteration we compare a pair
1134 	 * of list values, one from each side, and decide whether the
1135 	 * corresponding partitions match or not.  If the two values match
1136 	 * exactly, move to the next pair of list values, otherwise move to the
1137 	 * next list value on the side with a smaller list value.
1138 	 */
1139 	outer_pos = inner_pos = 0;
1140 	while (outer_pos < outer_bi->ndatums || inner_pos < inner_bi->ndatums)
1141 	{
1142 		int			outer_index = -1;
1143 		int			inner_index = -1;
1144 		Datum	   *outer_datums;
1145 		Datum	   *inner_datums;
1146 		int			cmpval;
1147 		Datum	   *merged_datum = NULL;
1148 		int			merged_index = -1;
1149 
1150 		if (outer_pos < outer_bi->ndatums)
1151 		{
1152 			/*
1153 			 * If the partition on the outer side has been proven empty,
1154 			 * ignore it and move to the next datum on the outer side.
1155 			 */
1156 			outer_index = outer_bi->indexes[outer_pos];
1157 			if (is_dummy_partition(outer_rel, outer_index))
1158 			{
1159 				outer_pos++;
1160 				continue;
1161 			}
1162 		}
1163 		if (inner_pos < inner_bi->ndatums)
1164 		{
1165 			/*
1166 			 * If the partition on the inner side has been proven empty,
1167 			 * ignore it and move to the next datum on the inner side.
1168 			 */
1169 			inner_index = inner_bi->indexes[inner_pos];
1170 			if (is_dummy_partition(inner_rel, inner_index))
1171 			{
1172 				inner_pos++;
1173 				continue;
1174 			}
1175 		}
1176 
1177 		/* Get the list values. */
1178 		outer_datums = outer_pos < outer_bi->ndatums ?
1179 			outer_bi->datums[outer_pos] : NULL;
1180 		inner_datums = inner_pos < inner_bi->ndatums ?
1181 			inner_bi->datums[inner_pos] : NULL;
1182 
1183 		/*
1184 		 * We run this loop till both sides finish.  This allows us to avoid
1185 		 * duplicating code to handle the remaining values on the side which
1186 		 * finishes later.  For that we set the comparison parameter cmpval in
1187 		 * such a way that it appears as if the side which finishes earlier
1188 		 * has an extra value higher than any other value on the unfinished
1189 		 * side. That way we advance the values on the unfinished side till
1190 		 * all of its values are exhausted.
1191 		 */
1192 		if (outer_pos >= outer_bi->ndatums)
1193 			cmpval = 1;
1194 		else if (inner_pos >= inner_bi->ndatums)
1195 			cmpval = -1;
1196 		else
1197 		{
1198 			Assert(outer_datums != NULL && inner_datums != NULL);
1199 			cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
1200 													 partcollation[0],
1201 													 outer_datums[0],
1202 													 inner_datums[0]));
1203 		}
1204 
1205 		if (cmpval == 0)
1206 		{
1207 			/* Two list values match exactly. */
1208 			Assert(outer_pos < outer_bi->ndatums);
1209 			Assert(inner_pos < inner_bi->ndatums);
1210 			Assert(outer_index >= 0);
1211 			Assert(inner_index >= 0);
1212 
1213 			/*
1214 			 * Try merging both partitions.  If successful, add the list value
1215 			 * and index of the merged partition below.
1216 			 */
1217 			merged_index = merge_matching_partitions(&outer_map, &inner_map,
1218 													 outer_index, inner_index,
1219 													 &next_index);
1220 			if (merged_index == -1)
1221 				goto cleanup;
1222 
1223 			merged_datum = outer_datums;
1224 
1225 			/* Move to the next pair of list values. */
1226 			outer_pos++;
1227 			inner_pos++;
1228 		}
1229 		else if (cmpval < 0)
1230 		{
1231 			/* A list value missing from the inner side. */
1232 			Assert(outer_pos < outer_bi->ndatums);
1233 
1234 			/*
1235 			 * If the inner side has the default partition, or this is an
1236 			 * outer join, try to assign a merged partition to the outer
1237 			 * partition (see process_outer_partition()).  Otherwise, the
1238 			 * outer partition will not contribute to the result.
1239 			 */
1240 			if (inner_has_default || IS_OUTER_JOIN(jointype))
1241 			{
1242 				/* Get the outer partition. */
1243 				outer_index = outer_bi->indexes[outer_pos];
1244 				Assert(outer_index >= 0);
1245 				merged_index = process_outer_partition(&outer_map,
1246 													   &inner_map,
1247 													   outer_has_default,
1248 													   inner_has_default,
1249 													   outer_index,
1250 													   inner_default,
1251 													   jointype,
1252 													   &next_index,
1253 													   &default_index);
1254 				if (merged_index == -1)
1255 					goto cleanup;
1256 				merged_datum = outer_datums;
1257 			}
1258 
1259 			/* Move to the next list value on the outer side. */
1260 			outer_pos++;
1261 		}
1262 		else
1263 		{
1264 			/* A list value missing from the outer side. */
1265 			Assert(cmpval > 0);
1266 			Assert(inner_pos < inner_bi->ndatums);
1267 
1268 			/*
1269 			 * If the outer side has the default partition, or this is a FULL
1270 			 * join, try to assign a merged partition to the inner partition
1271 			 * (see process_inner_partition()).  Otherwise, the inner
1272 			 * partition will not contribute to the result.
1273 			 */
1274 			if (outer_has_default || jointype == JOIN_FULL)
1275 			{
1276 				/* Get the inner partition. */
1277 				inner_index = inner_bi->indexes[inner_pos];
1278 				Assert(inner_index >= 0);
1279 				merged_index = process_inner_partition(&outer_map,
1280 													   &inner_map,
1281 													   outer_has_default,
1282 													   inner_has_default,
1283 													   inner_index,
1284 													   outer_default,
1285 													   jointype,
1286 													   &next_index,
1287 													   &default_index);
1288 				if (merged_index == -1)
1289 					goto cleanup;
1290 				merged_datum = inner_datums;
1291 			}
1292 
1293 			/* Move to the next list value on the inner side. */
1294 			inner_pos++;
1295 		}
1296 
1297 		/*
1298 		 * If we assigned a merged partition, add the list value and index of
1299 		 * the merged partition if appropriate.
1300 		 */
1301 		if (merged_index >= 0 && merged_index != default_index)
1302 		{
1303 			merged_datums = lappend(merged_datums, merged_datum);
1304 			merged_indexes = lappend_int(merged_indexes, merged_index);
1305 		}
1306 	}
1307 
1308 	/*
1309 	 * If the NULL partitions (if any) have been proven empty, deem them
1310 	 * non-existent.
1311 	 */
1312 	if (outer_has_null &&
1313 		is_dummy_partition(outer_rel, outer_bi->null_index))
1314 		outer_has_null = false;
1315 	if (inner_has_null &&
1316 		is_dummy_partition(inner_rel, inner_bi->null_index))
1317 		inner_has_null = false;
1318 
1319 	/* Merge the NULL partitions if any. */
1320 	if (outer_has_null || inner_has_null)
1321 		merge_null_partitions(&outer_map, &inner_map,
1322 							  outer_has_null, inner_has_null,
1323 							  outer_bi->null_index, inner_bi->null_index,
1324 							  jointype, &next_index, &null_index);
1325 	else
1326 		Assert(null_index == -1);
1327 
1328 	/* Merge the default partitions if any. */
1329 	if (outer_has_default || inner_has_default)
1330 		merge_default_partitions(&outer_map, &inner_map,
1331 								 outer_has_default, inner_has_default,
1332 								 outer_default, inner_default,
1333 								 jointype, &next_index, &default_index);
1334 	else
1335 		Assert(default_index == -1);
1336 
1337 	/* If we have merged partitions, create the partition bounds. */
1338 	if (next_index > 0)
1339 	{
1340 		/* Fix the merged_indexes list if necessary. */
1341 		if (outer_map.did_remapping || inner_map.did_remapping)
1342 		{
1343 			Assert(jointype == JOIN_FULL);
1344 			fix_merged_indexes(&outer_map, &inner_map,
1345 							   next_index, merged_indexes);
1346 		}
1347 
1348 		/* Use maps to match partitions from inputs. */
1349 		generate_matching_part_pairs(outer_rel, inner_rel,
1350 									 &outer_map, &inner_map,
1351 									 next_index,
1352 									 outer_parts, inner_parts);
1353 		Assert(*outer_parts != NIL);
1354 		Assert(*inner_parts != NIL);
1355 		Assert(list_length(*outer_parts) == list_length(*inner_parts));
1356 		Assert(list_length(*outer_parts) <= next_index);
1357 
1358 		/* Make a PartitionBoundInfo struct to return. */
1359 		merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1360 													  merged_datums,
1361 													  NIL,
1362 													  merged_indexes,
1363 													  null_index,
1364 													  default_index);
1365 		Assert(merged_bounds);
1366 	}
1367 
1368 cleanup:
1369 	/* Free local memory before returning. */
1370 	list_free(merged_datums);
1371 	list_free(merged_indexes);
1372 	free_partition_map(&outer_map);
1373 	free_partition_map(&inner_map);
1374 
1375 	return merged_bounds;
1376 }
1377 
1378 /*
1379  * merge_range_bounds
1380  *		Create the partition bounds for a join relation between range
1381  *		partitioned tables, if possible
1382  *
1383  * In this function we try to find sets of overlapping partitions from both
1384  * sides by comparing ranges stored in their partition bounds.  Since the
1385  * ranges appear in the ascending order, an algorithm similar to merge join is
1386  * used for that.  If a partition on one side doesn't have an overlapping
1387  * partition on the other side, the algorithm tries to match it with the
1388  * default partition on the other side if any; if not, the algorithm tries to
1389  * match it with a dummy partition on the other side if it's on the
1390  * non-nullable side of an outer join.  Also, if both sides have the default
1391  * partitions, the algorithm tries to match them with each other.  We give up
1392  * if the algorithm finds a partition overlapping multiple partitions on the
1393  * other side, which is the scenario the current implementation of partitioned
1394  * join can't handle.
1395  */
1396 static PartitionBoundInfo
merge_range_bounds(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,RelOptInfo * outer_rel,RelOptInfo * inner_rel,JoinType jointype,List ** outer_parts,List ** inner_parts)1397 merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
1398 				   Oid *partcollations,
1399 				   RelOptInfo *outer_rel, RelOptInfo *inner_rel,
1400 				   JoinType jointype,
1401 				   List **outer_parts, List **inner_parts)
1402 {
1403 	PartitionBoundInfo merged_bounds = NULL;
1404 	PartitionBoundInfo outer_bi = outer_rel->boundinfo;
1405 	PartitionBoundInfo inner_bi = inner_rel->boundinfo;
1406 	bool		outer_has_default = partition_bound_has_default(outer_bi);
1407 	bool		inner_has_default = partition_bound_has_default(inner_bi);
1408 	int			outer_default = outer_bi->default_index;
1409 	int			inner_default = inner_bi->default_index;
1410 	PartitionMap outer_map;
1411 	PartitionMap inner_map;
1412 	int			outer_index;
1413 	int			inner_index;
1414 	int			outer_lb_pos;
1415 	int			inner_lb_pos;
1416 	PartitionRangeBound outer_lb;
1417 	PartitionRangeBound outer_ub;
1418 	PartitionRangeBound inner_lb;
1419 	PartitionRangeBound inner_ub;
1420 	int			next_index = 0;
1421 	int			default_index = -1;
1422 	List	   *merged_datums = NIL;
1423 	List	   *merged_kinds = NIL;
1424 	List	   *merged_indexes = NIL;
1425 
1426 	Assert(*outer_parts == NIL);
1427 	Assert(*inner_parts == NIL);
1428 	Assert(outer_bi->strategy == inner_bi->strategy &&
1429 		   outer_bi->strategy == PARTITION_STRATEGY_RANGE);
1430 
1431 	init_partition_map(outer_rel, &outer_map);
1432 	init_partition_map(inner_rel, &inner_map);
1433 
1434 	/*
1435 	 * If the default partitions (if any) have been proven empty, deem them
1436 	 * non-existent.
1437 	 */
1438 	if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
1439 		outer_has_default = false;
1440 	if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
1441 		inner_has_default = false;
1442 
1443 	/*
1444 	 * Merge partitions from both sides.  In each iteration we compare a pair
1445 	 * of ranges, one from each side, and decide whether the corresponding
1446 	 * partitions match or not.  If the two ranges overlap, move to the next
1447 	 * pair of ranges, otherwise move to the next range on the side with a
1448 	 * lower range.  outer_lb_pos/inner_lb_pos keep track of the positions of
1449 	 * lower bounds in the datums arrays in the outer/inner
1450 	 * PartitionBoundInfos respectively.
1451 	 */
1452 	outer_lb_pos = inner_lb_pos = 0;
1453 	outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1454 									  &outer_lb, &outer_ub);
1455 	inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1456 									  &inner_lb, &inner_ub);
1457 	while (outer_index >= 0 || inner_index >= 0)
1458 	{
1459 		bool		overlap;
1460 		int			ub_cmpval;
1461 		int			lb_cmpval;
1462 		PartitionRangeBound merged_lb = {-1, NULL, NULL, true};
1463 		PartitionRangeBound merged_ub = {-1, NULL, NULL, false};
1464 		int			merged_index = -1;
1465 
1466 		/*
1467 		 * We run this loop till both sides finish.  This allows us to avoid
1468 		 * duplicating code to handle the remaining ranges on the side which
1469 		 * finishes later.  For that we set the comparison parameter cmpval in
1470 		 * such a way that it appears as if the side which finishes earlier
1471 		 * has an extra range higher than any other range on the unfinished
1472 		 * side. That way we advance the ranges on the unfinished side till
1473 		 * all of its ranges are exhausted.
1474 		 */
1475 		if (outer_index == -1)
1476 		{
1477 			overlap = false;
1478 			lb_cmpval = 1;
1479 			ub_cmpval = 1;
1480 		}
1481 		else if (inner_index == -1)
1482 		{
1483 			overlap = false;
1484 			lb_cmpval = -1;
1485 			ub_cmpval = -1;
1486 		}
1487 		else
1488 			overlap = compare_range_partitions(partnatts, partsupfuncs,
1489 											   partcollations,
1490 											   &outer_lb, &outer_ub,
1491 											   &inner_lb, &inner_ub,
1492 											   &lb_cmpval, &ub_cmpval);
1493 
1494 		if (overlap)
1495 		{
1496 			/* Two ranges overlap; form a join pair. */
1497 
1498 			PartitionRangeBound save_outer_ub;
1499 			PartitionRangeBound save_inner_ub;
1500 
1501 			/* Both partitions should not have been merged yet. */
1502 			Assert(outer_index >= 0);
1503 			Assert(outer_map.merged_indexes[outer_index] == -1 &&
1504 				   outer_map.merged[outer_index] == false);
1505 			Assert(inner_index >= 0);
1506 			Assert(inner_map.merged_indexes[inner_index] == -1 &&
1507 				   inner_map.merged[inner_index] == false);
1508 
1509 			/*
1510 			 * Get the index of the merged partition.  Both partitions aren't
1511 			 * merged yet, so the partitions should be merged successfully.
1512 			 */
1513 			merged_index = merge_matching_partitions(&outer_map, &inner_map,
1514 													 outer_index, inner_index,
1515 													 &next_index);
1516 			Assert(merged_index >= 0);
1517 
1518 			/* Get the range bounds of the merged partition. */
1519 			get_merged_range_bounds(partnatts, partsupfuncs,
1520 									partcollations, jointype,
1521 									&outer_lb, &outer_ub,
1522 									&inner_lb, &inner_ub,
1523 									lb_cmpval, ub_cmpval,
1524 									&merged_lb, &merged_ub);
1525 
1526 			/* Save the upper bounds of both partitions for use below. */
1527 			save_outer_ub = outer_ub;
1528 			save_inner_ub = inner_ub;
1529 
1530 			/* Move to the next pair of ranges. */
1531 			outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1532 											  &outer_lb, &outer_ub);
1533 			inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1534 											  &inner_lb, &inner_ub);
1535 
1536 			/*
1537 			 * If the range of a partition on one side overlaps the range of
1538 			 * the next partition on the other side, that will cause the
1539 			 * partition on one side to match at least two partitions on the
1540 			 * other side, which is the case that we currently don't support
1541 			 * partitioned join for; give up.
1542 			 */
1543 			if (ub_cmpval > 0 && inner_index >= 0 &&
1544 				compare_range_bounds(partnatts, partsupfuncs, partcollations,
1545 									 &save_outer_ub, &inner_lb) > 0)
1546 				goto cleanup;
1547 			if (ub_cmpval < 0 && outer_index >= 0 &&
1548 				compare_range_bounds(partnatts, partsupfuncs, partcollations,
1549 									 &outer_lb, &save_inner_ub) < 0)
1550 				goto cleanup;
1551 
1552 			/*
1553 			 * A row from a non-overlapping portion (if any) of a partition on
1554 			 * one side might find its join partner in the default partition
1555 			 * (if any) on the other side, causing the same situation as
1556 			 * above; give up in that case.
1557 			 */
1558 			if ((outer_has_default && (lb_cmpval > 0 || ub_cmpval < 0)) ||
1559 				(inner_has_default && (lb_cmpval < 0 || ub_cmpval > 0)))
1560 				goto cleanup;
1561 		}
1562 		else if (ub_cmpval < 0)
1563 		{
1564 			/* A non-overlapping outer range. */
1565 
1566 			/* The outer partition should not have been merged yet. */
1567 			Assert(outer_index >= 0);
1568 			Assert(outer_map.merged_indexes[outer_index] == -1 &&
1569 				   outer_map.merged[outer_index] == false);
1570 
1571 			/*
1572 			 * If the inner side has the default partition, or this is an
1573 			 * outer join, try to assign a merged partition to the outer
1574 			 * partition (see process_outer_partition()).  Otherwise, the
1575 			 * outer partition will not contribute to the result.
1576 			 */
1577 			if (inner_has_default || IS_OUTER_JOIN(jointype))
1578 			{
1579 				merged_index = process_outer_partition(&outer_map,
1580 													   &inner_map,
1581 													   outer_has_default,
1582 													   inner_has_default,
1583 													   outer_index,
1584 													   inner_default,
1585 													   jointype,
1586 													   &next_index,
1587 													   &default_index);
1588 				if (merged_index == -1)
1589 					goto cleanup;
1590 				merged_lb = outer_lb;
1591 				merged_ub = outer_ub;
1592 			}
1593 
1594 			/* Move to the next range on the outer side. */
1595 			outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
1596 											  &outer_lb, &outer_ub);
1597 		}
1598 		else
1599 		{
1600 			/* A non-overlapping inner range. */
1601 			Assert(ub_cmpval > 0);
1602 
1603 			/* The inner partition should not have been merged yet. */
1604 			Assert(inner_index >= 0);
1605 			Assert(inner_map.merged_indexes[inner_index] == -1 &&
1606 				   inner_map.merged[inner_index] == false);
1607 
1608 			/*
1609 			 * If the outer side has the default partition, or this is a FULL
1610 			 * join, try to assign a merged partition to the inner partition
1611 			 * (see process_inner_partition()).  Otherwise, the inner
1612 			 * partition will not contribute to the result.
1613 			 */
1614 			if (outer_has_default || jointype == JOIN_FULL)
1615 			{
1616 				merged_index = process_inner_partition(&outer_map,
1617 													   &inner_map,
1618 													   outer_has_default,
1619 													   inner_has_default,
1620 													   inner_index,
1621 													   outer_default,
1622 													   jointype,
1623 													   &next_index,
1624 													   &default_index);
1625 				if (merged_index == -1)
1626 					goto cleanup;
1627 				merged_lb = inner_lb;
1628 				merged_ub = inner_ub;
1629 			}
1630 
1631 			/* Move to the next range on the inner side. */
1632 			inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
1633 											  &inner_lb, &inner_ub);
1634 		}
1635 
1636 		/*
1637 		 * If we assigned a merged partition, add the range bounds and index
1638 		 * of the merged partition if appropriate.
1639 		 */
1640 		if (merged_index >= 0 && merged_index != default_index)
1641 			add_merged_range_bounds(partnatts, partsupfuncs, partcollations,
1642 									&merged_lb, &merged_ub, merged_index,
1643 									&merged_datums, &merged_kinds,
1644 									&merged_indexes);
1645 	}
1646 
1647 	/* Merge the default partitions if any. */
1648 	if (outer_has_default || inner_has_default)
1649 		merge_default_partitions(&outer_map, &inner_map,
1650 								 outer_has_default, inner_has_default,
1651 								 outer_default, inner_default,
1652 								 jointype, &next_index, &default_index);
1653 	else
1654 		Assert(default_index == -1);
1655 
1656 	/* If we have merged partitions, create the partition bounds. */
1657 	if (next_index > 0)
1658 	{
1659 		/*
1660 		 * Unlike the case of list partitioning, we wouldn't have re-merged
1661 		 * partitions, so did_remapping should be left alone.
1662 		 */
1663 		Assert(!outer_map.did_remapping);
1664 		Assert(!inner_map.did_remapping);
1665 
1666 		/* Use maps to match partitions from inputs. */
1667 		generate_matching_part_pairs(outer_rel, inner_rel,
1668 									 &outer_map, &inner_map,
1669 									 next_index,
1670 									 outer_parts, inner_parts);
1671 		Assert(*outer_parts != NIL);
1672 		Assert(*inner_parts != NIL);
1673 		Assert(list_length(*outer_parts) == list_length(*inner_parts));
1674 		Assert(list_length(*outer_parts) == next_index);
1675 
1676 		/* Make a PartitionBoundInfo struct to return. */
1677 		merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
1678 													  merged_datums,
1679 													  merged_kinds,
1680 													  merged_indexes,
1681 													  -1,
1682 													  default_index);
1683 		Assert(merged_bounds);
1684 	}
1685 
1686 cleanup:
1687 	/* Free local memory before returning. */
1688 	list_free(merged_datums);
1689 	list_free(merged_kinds);
1690 	list_free(merged_indexes);
1691 	free_partition_map(&outer_map);
1692 	free_partition_map(&inner_map);
1693 
1694 	return merged_bounds;
1695 }
1696 
1697 /*
1698  * init_partition_map
1699  *		Initialize a PartitionMap struct for given relation
1700  */
1701 static void
init_partition_map(RelOptInfo * rel,PartitionMap * map)1702 init_partition_map(RelOptInfo *rel, PartitionMap *map)
1703 {
1704 	int			nparts = rel->nparts;
1705 	int			i;
1706 
1707 	map->nparts = nparts;
1708 	map->merged_indexes = (int *) palloc(sizeof(int) * nparts);
1709 	map->merged = (bool *) palloc(sizeof(bool) * nparts);
1710 	map->did_remapping = false;
1711 	map->old_indexes = (int *) palloc(sizeof(int) * nparts);
1712 	for (i = 0; i < nparts; i++)
1713 	{
1714 		map->merged_indexes[i] = map->old_indexes[i] = -1;
1715 		map->merged[i] = false;
1716 	}
1717 }
1718 
1719 /*
1720  * free_partition_map
1721  */
1722 static void
free_partition_map(PartitionMap * map)1723 free_partition_map(PartitionMap *map)
1724 {
1725 	pfree(map->merged_indexes);
1726 	pfree(map->merged);
1727 	pfree(map->old_indexes);
1728 }
1729 
1730 /*
1731  * is_dummy_partition --- has partition been proven empty?
1732  */
1733 static bool
is_dummy_partition(RelOptInfo * rel,int part_index)1734 is_dummy_partition(RelOptInfo *rel, int part_index)
1735 {
1736 	RelOptInfo *part_rel;
1737 
1738 	Assert(part_index >= 0);
1739 	part_rel = rel->part_rels[part_index];
1740 	if (part_rel == NULL || IS_DUMMY_REL(part_rel))
1741 		return true;
1742 	return false;
1743 }
1744 
1745 /*
1746  * merge_matching_partitions
1747  *		Try to merge given outer/inner partitions, and return the index of a
1748  *		merged partition produced from them if successful, -1 otherwise
1749  *
1750  * If the merged partition is newly created, *next_index is incremented.
1751  */
1752 static int
merge_matching_partitions(PartitionMap * outer_map,PartitionMap * inner_map,int outer_index,int inner_index,int * next_index)1753 merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map,
1754 						  int outer_index, int inner_index, int *next_index)
1755 {
1756 	int			outer_merged_index;
1757 	int			inner_merged_index;
1758 	bool		outer_merged;
1759 	bool		inner_merged;
1760 
1761 	Assert(outer_index >= 0 && outer_index < outer_map->nparts);
1762 	outer_merged_index = outer_map->merged_indexes[outer_index];
1763 	outer_merged = outer_map->merged[outer_index];
1764 	Assert(inner_index >= 0 && inner_index < inner_map->nparts);
1765 	inner_merged_index = inner_map->merged_indexes[inner_index];
1766 	inner_merged = inner_map->merged[inner_index];
1767 
1768 	/*
1769 	 * Handle cases where we have already assigned a merged partition to each
1770 	 * of the given partitions.
1771 	 */
1772 	if (outer_merged_index >= 0 && inner_merged_index >= 0)
1773 	{
1774 		/*
1775 		 * If the mereged partitions are the same, no need to do anything;
1776 		 * return the index of the merged partitions.  Otherwise, if each of
1777 		 * the given partitions has been merged with a dummy partition on the
1778 		 * other side, re-map them to either of the two merged partitions.
1779 		 * Otherwise, they can't be merged, so return -1.
1780 		 */
1781 		if (outer_merged_index == inner_merged_index)
1782 		{
1783 			Assert(outer_merged);
1784 			Assert(inner_merged);
1785 			return outer_merged_index;
1786 		}
1787 		if (!outer_merged && !inner_merged)
1788 		{
1789 			/*
1790 			 * This can only happen for a list-partitioning case.  We re-map
1791 			 * them to the merged partition with the smaller of the two merged
1792 			 * indexes to preserve the property that the canonical order of
1793 			 * list partitions is determined by the indexes assigned to the
1794 			 * smallest list value of each partition.
1795 			 */
1796 			if (outer_merged_index < inner_merged_index)
1797 			{
1798 				outer_map->merged[outer_index] = true;
1799 				inner_map->merged_indexes[inner_index] = outer_merged_index;
1800 				inner_map->merged[inner_index] = true;
1801 				inner_map->did_remapping = true;
1802 				inner_map->old_indexes[inner_index] = inner_merged_index;
1803 				return outer_merged_index;
1804 			}
1805 			else
1806 			{
1807 				inner_map->merged[inner_index] = true;
1808 				outer_map->merged_indexes[outer_index] = inner_merged_index;
1809 				outer_map->merged[outer_index] = true;
1810 				outer_map->did_remapping = true;
1811 				outer_map->old_indexes[outer_index] = outer_merged_index;
1812 				return inner_merged_index;
1813 			}
1814 		}
1815 		return -1;
1816 	}
1817 
1818 	/* At least one of the given partitions should not have yet been merged. */
1819 	Assert(outer_merged_index == -1 || inner_merged_index == -1);
1820 
1821 	/*
1822 	 * If neither of them has been merged, merge them.  Otherwise, if one has
1823 	 * been merged with a dummy partition on the other side (and the other
1824 	 * hasn't yet been merged with anything), re-merge them.  Otherwise, they
1825 	 * can't be merged, so return -1.
1826 	 */
1827 	if (outer_merged_index == -1 && inner_merged_index == -1)
1828 	{
1829 		int			merged_index = *next_index;
1830 
1831 		Assert(!outer_merged);
1832 		Assert(!inner_merged);
1833 		outer_map->merged_indexes[outer_index] = merged_index;
1834 		outer_map->merged[outer_index] = true;
1835 		inner_map->merged_indexes[inner_index] = merged_index;
1836 		inner_map->merged[inner_index] = true;
1837 		*next_index = *next_index + 1;
1838 		return merged_index;
1839 	}
1840 	if (outer_merged_index >= 0 && !outer_map->merged[outer_index])
1841 	{
1842 		Assert(inner_merged_index == -1);
1843 		Assert(!inner_merged);
1844 		inner_map->merged_indexes[inner_index] = outer_merged_index;
1845 		inner_map->merged[inner_index] = true;
1846 		outer_map->merged[outer_index] = true;
1847 		return outer_merged_index;
1848 	}
1849 	if (inner_merged_index >= 0 && !inner_map->merged[inner_index])
1850 	{
1851 		Assert(outer_merged_index == -1);
1852 		Assert(!outer_merged);
1853 		outer_map->merged_indexes[outer_index] = inner_merged_index;
1854 		outer_map->merged[outer_index] = true;
1855 		inner_map->merged[inner_index] = true;
1856 		return inner_merged_index;
1857 	}
1858 	return -1;
1859 }
1860 
1861 /*
1862  * process_outer_partition
1863  *		Try to assign given outer partition a merged partition, and return the
1864  *		index of the merged partition if successful, -1 otherwise
1865  *
1866  * If the partition is newly created, *next_index is incremented.  Also, if it
1867  * is the default partition of the join relation, *default_index is set to the
1868  * index if not already done.
1869  */
1870 static int
process_outer_partition(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_default,bool inner_has_default,int outer_index,int inner_default,JoinType jointype,int * next_index,int * default_index)1871 process_outer_partition(PartitionMap *outer_map,
1872 						PartitionMap *inner_map,
1873 						bool outer_has_default,
1874 						bool inner_has_default,
1875 						int outer_index,
1876 						int inner_default,
1877 						JoinType jointype,
1878 						int *next_index,
1879 						int *default_index)
1880 {
1881 	int			merged_index = -1;
1882 
1883 	Assert(outer_index >= 0);
1884 
1885 	/*
1886 	 * If the inner side has the default partition, a row from the outer
1887 	 * partition might find its join partner in the default partition; try
1888 	 * merging the outer partition with the default partition.  Otherwise,
1889 	 * this should be an outer join, in which case the outer partition has to
1890 	 * be scanned all the way anyway; merge the outer partition with a dummy
1891 	 * partition on the other side.
1892 	 */
1893 	if (inner_has_default)
1894 	{
1895 		Assert(inner_default >= 0);
1896 
1897 		/*
1898 		 * If the outer side has the default partition as well, the default
1899 		 * partition on the inner side will have two matching partitions on
1900 		 * the other side: the outer partition and the default partition on
1901 		 * the outer side.  Partitionwise join doesn't handle this scenario
1902 		 * yet.
1903 		 */
1904 		if (outer_has_default)
1905 			return -1;
1906 
1907 		merged_index = merge_matching_partitions(outer_map, inner_map,
1908 												 outer_index, inner_default,
1909 												 next_index);
1910 		if (merged_index == -1)
1911 			return -1;
1912 
1913 		/*
1914 		 * If this is a FULL join, the default partition on the inner side has
1915 		 * to be scanned all the way anyway, so the resulting partition will
1916 		 * contain all key values from the default partition, which any other
1917 		 * partition of the join relation will not contain.  Thus the
1918 		 * resulting partition will act as the default partition of the join
1919 		 * relation; record the index in *default_index if not already done.
1920 		 */
1921 		if (jointype == JOIN_FULL)
1922 		{
1923 			if (*default_index == -1)
1924 				*default_index = merged_index;
1925 			else
1926 				Assert(*default_index == merged_index);
1927 		}
1928 	}
1929 	else
1930 	{
1931 		Assert(IS_OUTER_JOIN(jointype));
1932 		Assert(jointype != JOIN_RIGHT);
1933 
1934 		/* If we have already assigned a partition, no need to do anything. */
1935 		merged_index = outer_map->merged_indexes[outer_index];
1936 		if (merged_index == -1)
1937 			merged_index = merge_partition_with_dummy(outer_map, outer_index,
1938 													  next_index);
1939 	}
1940 	return merged_index;
1941 }
1942 
1943 /*
1944  * process_inner_partition
1945  *		Try to assign given inner partition a merged partition, and return the
1946  *		index of the merged partition if successful, -1 otherwise
1947  *
1948  * If the partition is newly created, *next_index is incremented.  Also, if it
1949  * is the default partition of the join relation, *default_index is set to the
1950  * index if not already done.
1951  */
1952 static int
process_inner_partition(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_default,bool inner_has_default,int inner_index,int outer_default,JoinType jointype,int * next_index,int * default_index)1953 process_inner_partition(PartitionMap *outer_map,
1954 						PartitionMap *inner_map,
1955 						bool outer_has_default,
1956 						bool inner_has_default,
1957 						int inner_index,
1958 						int outer_default,
1959 						JoinType jointype,
1960 						int *next_index,
1961 						int *default_index)
1962 {
1963 	int			merged_index = -1;
1964 
1965 	Assert(inner_index >= 0);
1966 
1967 	/*
1968 	 * If the outer side has the default partition, a row from the inner
1969 	 * partition might find its join partner in the default partition; try
1970 	 * merging the inner partition with the default partition.  Otherwise,
1971 	 * this should be a FULL join, in which case the inner partition has to be
1972 	 * scanned all the way anyway; merge the inner partition with a dummy
1973 	 * partition on the other side.
1974 	 */
1975 	if (outer_has_default)
1976 	{
1977 		Assert(outer_default >= 0);
1978 
1979 		/*
1980 		 * If the inner side has the default partition as well, the default
1981 		 * partition on the outer side will have two matching partitions on
1982 		 * the other side: the inner partition and the default partition on
1983 		 * the inner side.  Partitionwise join doesn't handle this scenario
1984 		 * yet.
1985 		 */
1986 		if (inner_has_default)
1987 			return -1;
1988 
1989 		merged_index = merge_matching_partitions(outer_map, inner_map,
1990 												 outer_default, inner_index,
1991 												 next_index);
1992 		if (merged_index == -1)
1993 			return -1;
1994 
1995 		/*
1996 		 * If this is an outer join, the default partition on the outer side
1997 		 * has to be scanned all the way anyway, so the resulting partition
1998 		 * will contain all key values from the default partition, which any
1999 		 * other partition of the join relation will not contain.  Thus the
2000 		 * resulting partition will act as the default partition of the join
2001 		 * relation; record the index in *default_index if not already done.
2002 		 */
2003 		if (IS_OUTER_JOIN(jointype))
2004 		{
2005 			Assert(jointype != JOIN_RIGHT);
2006 			if (*default_index == -1)
2007 				*default_index = merged_index;
2008 			else
2009 				Assert(*default_index == merged_index);
2010 		}
2011 	}
2012 	else
2013 	{
2014 		Assert(jointype == JOIN_FULL);
2015 
2016 		/* If we have already assigned a partition, no need to do anything. */
2017 		merged_index = inner_map->merged_indexes[inner_index];
2018 		if (merged_index == -1)
2019 			merged_index = merge_partition_with_dummy(inner_map, inner_index,
2020 													  next_index);
2021 	}
2022 	return merged_index;
2023 }
2024 
2025 /*
2026  * merge_null_partitions
2027  *		Merge the NULL partitions from a join's outer and inner sides.
2028  *
2029  * If the merged partition produced from them is the NULL partition of the join
2030  * relation, *null_index is set to the index of the merged partition.
2031  *
2032  * Note: We assume here that the join clause for a partitioned join is strict
2033  * because have_partkey_equi_join() requires that the corresponding operator
2034  * be mergejoinable, and we currently assume that mergejoinable operators are
2035  * strict (see MJEvalOuterValues()/MJEvalInnerValues()).
2036  */
2037 static void
merge_null_partitions(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_null,bool inner_has_null,int outer_null,int inner_null,JoinType jointype,int * next_index,int * null_index)2038 merge_null_partitions(PartitionMap *outer_map,
2039 					  PartitionMap *inner_map,
2040 					  bool outer_has_null,
2041 					  bool inner_has_null,
2042 					  int outer_null,
2043 					  int inner_null,
2044 					  JoinType jointype,
2045 					  int *next_index,
2046 					  int *null_index)
2047 {
2048 	bool		consider_outer_null = false;
2049 	bool		consider_inner_null = false;
2050 
2051 	Assert(outer_has_null || inner_has_null);
2052 	Assert(*null_index == -1);
2053 
2054 	/*
2055 	 * Check whether the NULL partitions have already been merged and if so,
2056 	 * set the consider_outer_null/consider_inner_null flags.
2057 	 */
2058 	if (outer_has_null)
2059 	{
2060 		Assert(outer_null >= 0 && outer_null < outer_map->nparts);
2061 		if (outer_map->merged_indexes[outer_null] == -1)
2062 			consider_outer_null = true;
2063 	}
2064 	if (inner_has_null)
2065 	{
2066 		Assert(inner_null >= 0 && inner_null < inner_map->nparts);
2067 		if (inner_map->merged_indexes[inner_null] == -1)
2068 			consider_inner_null = true;
2069 	}
2070 
2071 	/* If both flags are set false, we don't need to do anything. */
2072 	if (!consider_outer_null && !consider_inner_null)
2073 		return;
2074 
2075 	if (consider_outer_null && !consider_inner_null)
2076 	{
2077 		Assert(outer_has_null);
2078 
2079 		/*
2080 		 * If this is an outer join, the NULL partition on the outer side has
2081 		 * to be scanned all the way anyway; merge the NULL partition with a
2082 		 * dummy partition on the other side.  In that case
2083 		 * consider_outer_null means that the NULL partition only contains
2084 		 * NULL values as the key values, so the merged partition will do so;
2085 		 * treat it as the NULL partition of the join relation.
2086 		 */
2087 		if (IS_OUTER_JOIN(jointype))
2088 		{
2089 			Assert(jointype != JOIN_RIGHT);
2090 			*null_index = merge_partition_with_dummy(outer_map, outer_null,
2091 													 next_index);
2092 		}
2093 	}
2094 	else if (!consider_outer_null && consider_inner_null)
2095 	{
2096 		Assert(inner_has_null);
2097 
2098 		/*
2099 		 * If this is a FULL join, the NULL partition on the inner side has to
2100 		 * be scanned all the way anyway; merge the NULL partition with a
2101 		 * dummy partition on the other side.  In that case
2102 		 * consider_inner_null means that the NULL partition only contains
2103 		 * NULL values as the key values, so the merged partition will do so;
2104 		 * treat it as the NULL partition of the join relation.
2105 		 */
2106 		if (jointype == JOIN_FULL)
2107 			*null_index = merge_partition_with_dummy(inner_map, inner_null,
2108 													 next_index);
2109 	}
2110 	else
2111 	{
2112 		Assert(consider_outer_null && consider_inner_null);
2113 		Assert(outer_has_null);
2114 		Assert(inner_has_null);
2115 
2116 		/*
2117 		 * If this is an outer join, the NULL partition on the outer side (and
2118 		 * that on the inner side if this is a FULL join) have to be scanned
2119 		 * all the way anyway, so merge them.  Note that each of the NULL
2120 		 * partitions isn't merged yet, so they should be merged successfully.
2121 		 * Like the above, each of the NULL partitions only contains NULL
2122 		 * values as the key values, so the merged partition will do so; treat
2123 		 * it as the NULL partition of the join relation.
2124 		 *
2125 		 * Note: if this an INNER/SEMI join, the join clause will never be
2126 		 * satisfied by two NULL values (see comments above), so both the NULL
2127 		 * partitions can be eliminated.
2128 		 */
2129 		if (IS_OUTER_JOIN(jointype))
2130 		{
2131 			Assert(jointype != JOIN_RIGHT);
2132 			*null_index = merge_matching_partitions(outer_map, inner_map,
2133 													outer_null, inner_null,
2134 													next_index);
2135 			Assert(*null_index >= 0);
2136 		}
2137 	}
2138 }
2139 
2140 /*
2141  * merge_default_partitions
2142  *		Merge the default partitions from a join's outer and inner sides.
2143  *
2144  * If the merged partition produced from them is the default partition of the
2145  * join relation, *default_index is set to the index of the merged partition.
2146  */
2147 static void
merge_default_partitions(PartitionMap * outer_map,PartitionMap * inner_map,bool outer_has_default,bool inner_has_default,int outer_default,int inner_default,JoinType jointype,int * next_index,int * default_index)2148 merge_default_partitions(PartitionMap *outer_map,
2149 						 PartitionMap *inner_map,
2150 						 bool outer_has_default,
2151 						 bool inner_has_default,
2152 						 int outer_default,
2153 						 int inner_default,
2154 						 JoinType jointype,
2155 						 int *next_index,
2156 						 int *default_index)
2157 {
2158 	int			outer_merged_index = -1;
2159 	int			inner_merged_index = -1;
2160 
2161 	Assert(outer_has_default || inner_has_default);
2162 
2163 	/* Get the merged partition indexes for the default partitions. */
2164 	if (outer_has_default)
2165 	{
2166 		Assert(outer_default >= 0 && outer_default < outer_map->nparts);
2167 		outer_merged_index = outer_map->merged_indexes[outer_default];
2168 	}
2169 	if (inner_has_default)
2170 	{
2171 		Assert(inner_default >= 0 && inner_default < inner_map->nparts);
2172 		inner_merged_index = inner_map->merged_indexes[inner_default];
2173 	}
2174 
2175 	if (outer_has_default && !inner_has_default)
2176 	{
2177 		/*
2178 		 * If this is an outer join, the default partition on the outer side
2179 		 * has to be scanned all the way anyway; if we have not yet assigned a
2180 		 * partition, merge the default partition with a dummy partition on
2181 		 * the other side.  The merged partition will act as the default
2182 		 * partition of the join relation (see comments in
2183 		 * process_inner_partition()).
2184 		 */
2185 		if (IS_OUTER_JOIN(jointype))
2186 		{
2187 			Assert(jointype != JOIN_RIGHT);
2188 			if (outer_merged_index == -1)
2189 			{
2190 				Assert(*default_index == -1);
2191 				*default_index = merge_partition_with_dummy(outer_map,
2192 															outer_default,
2193 															next_index);
2194 			}
2195 			else
2196 				Assert(*default_index == outer_merged_index);
2197 		}
2198 		else
2199 			Assert(*default_index == -1);
2200 	}
2201 	else if (!outer_has_default && inner_has_default)
2202 	{
2203 		/*
2204 		 * If this is a FULL join, the default partition on the inner side has
2205 		 * to be scanned all the way anyway; if we have not yet assigned a
2206 		 * partition, merge the default partition with a dummy partition on
2207 		 * the other side.  The merged partition will act as the default
2208 		 * partition of the join relation (see comments in
2209 		 * process_outer_partition()).
2210 		 */
2211 		if (jointype == JOIN_FULL)
2212 		{
2213 			if (inner_merged_index == -1)
2214 			{
2215 				Assert(*default_index == -1);
2216 				*default_index = merge_partition_with_dummy(inner_map,
2217 															inner_default,
2218 															next_index);
2219 			}
2220 			else
2221 				Assert(*default_index == inner_merged_index);
2222 		}
2223 		else
2224 			Assert(*default_index == -1);
2225 	}
2226 	else
2227 	{
2228 		Assert(outer_has_default && inner_has_default);
2229 
2230 		/*
2231 		 * The default partitions have to be joined with each other, so merge
2232 		 * them.  Note that each of the default partitions isn't merged yet
2233 		 * (see, process_outer_partition()/process_innerer_partition()), so
2234 		 * they should be merged successfully.  The merged partition will act
2235 		 * as the default partition of the join relation.
2236 		 */
2237 		Assert(outer_merged_index == -1);
2238 		Assert(inner_merged_index == -1);
2239 		Assert(*default_index == -1);
2240 		*default_index = merge_matching_partitions(outer_map,
2241 												   inner_map,
2242 												   outer_default,
2243 												   inner_default,
2244 												   next_index);
2245 		Assert(*default_index >= 0);
2246 	}
2247 }
2248 
2249 /*
2250  * merge_partition_with_dummy
2251  *		Assign given partition a new partition of a join relation
2252  *
2253  * Note: The caller assumes that the given partition doesn't have a non-dummy
2254  * matching partition on the other side, but if the given partition finds the
2255  * matching partition later, we will adjust the assignment.
2256  */
2257 static int
merge_partition_with_dummy(PartitionMap * map,int index,int * next_index)2258 merge_partition_with_dummy(PartitionMap *map, int index, int *next_index)
2259 {
2260 	int			merged_index = *next_index;
2261 
2262 	Assert(index >= 0 && index < map->nparts);
2263 	Assert(map->merged_indexes[index] == -1);
2264 	Assert(!map->merged[index]);
2265 	map->merged_indexes[index] = merged_index;
2266 	/* Leave the merged flag alone! */
2267 	*next_index = *next_index + 1;
2268 	return merged_index;
2269 }
2270 
2271 /*
2272  * fix_merged_indexes
2273  *		Adjust merged indexes of re-merged partitions
2274  */
2275 static void
fix_merged_indexes(PartitionMap * outer_map,PartitionMap * inner_map,int nmerged,List * merged_indexes)2276 fix_merged_indexes(PartitionMap *outer_map, PartitionMap *inner_map,
2277 				   int nmerged, List *merged_indexes)
2278 {
2279 	int		   *new_indexes;
2280 	int			merged_index;
2281 	int			i;
2282 	ListCell   *lc;
2283 
2284 	Assert(nmerged > 0);
2285 
2286 	new_indexes = (int *) palloc(sizeof(int) * nmerged);
2287 	for (i = 0; i < nmerged; i++)
2288 		new_indexes[i] = -1;
2289 
2290 	/* Build the mapping of old merged indexes to new merged indexes. */
2291 	if (outer_map->did_remapping)
2292 	{
2293 		for (i = 0; i < outer_map->nparts; i++)
2294 		{
2295 			merged_index = outer_map->old_indexes[i];
2296 			if (merged_index >= 0)
2297 				new_indexes[merged_index] = outer_map->merged_indexes[i];
2298 		}
2299 	}
2300 	if (inner_map->did_remapping)
2301 	{
2302 		for (i = 0; i < inner_map->nparts; i++)
2303 		{
2304 			merged_index = inner_map->old_indexes[i];
2305 			if (merged_index >= 0)
2306 				new_indexes[merged_index] = inner_map->merged_indexes[i];
2307 		}
2308 	}
2309 
2310 	/* Fix the merged_indexes list using the mapping. */
2311 	foreach(lc, merged_indexes)
2312 	{
2313 		merged_index = lfirst_int(lc);
2314 		Assert(merged_index >= 0);
2315 		if (new_indexes[merged_index] >= 0)
2316 			lfirst_int(lc) = new_indexes[merged_index];
2317 	}
2318 
2319 	pfree(new_indexes);
2320 }
2321 
2322 /*
2323  * generate_matching_part_pairs
2324  *		Generate a pair of lists of partitions that produce merged partitions
2325  *
2326  * The lists of partitions are built in the order of merged partition indexes,
2327  * and returned in *outer_parts and *inner_parts.
2328  */
2329 static void
generate_matching_part_pairs(RelOptInfo * outer_rel,RelOptInfo * inner_rel,PartitionMap * outer_map,PartitionMap * inner_map,int nmerged,List ** outer_parts,List ** inner_parts)2330 generate_matching_part_pairs(RelOptInfo *outer_rel, RelOptInfo *inner_rel,
2331 							 PartitionMap *outer_map, PartitionMap *inner_map,
2332 							 int nmerged,
2333 							 List **outer_parts, List **inner_parts)
2334 {
2335 	int			outer_nparts = outer_map->nparts;
2336 	int			inner_nparts = inner_map->nparts;
2337 	int		   *outer_indexes;
2338 	int		   *inner_indexes;
2339 	int			max_nparts;
2340 	int			i;
2341 
2342 	Assert(nmerged > 0);
2343 	Assert(*outer_parts == NIL);
2344 	Assert(*inner_parts == NIL);
2345 
2346 	outer_indexes = (int *) palloc(sizeof(int) * nmerged);
2347 	inner_indexes = (int *) palloc(sizeof(int) * nmerged);
2348 	for (i = 0; i < nmerged; i++)
2349 		outer_indexes[i] = inner_indexes[i] = -1;
2350 
2351 	/* Set pairs of matching partitions. */
2352 	Assert(outer_nparts == outer_rel->nparts);
2353 	Assert(inner_nparts == inner_rel->nparts);
2354 	max_nparts = Max(outer_nparts, inner_nparts);
2355 	for (i = 0; i < max_nparts; i++)
2356 	{
2357 		if (i < outer_nparts)
2358 		{
2359 			int			merged_index = outer_map->merged_indexes[i];
2360 
2361 			if (merged_index >= 0)
2362 			{
2363 				Assert(merged_index < nmerged);
2364 				outer_indexes[merged_index] = i;
2365 			}
2366 		}
2367 		if (i < inner_nparts)
2368 		{
2369 			int			merged_index = inner_map->merged_indexes[i];
2370 
2371 			if (merged_index >= 0)
2372 			{
2373 				Assert(merged_index < nmerged);
2374 				inner_indexes[merged_index] = i;
2375 			}
2376 		}
2377 	}
2378 
2379 	/* Build the list pairs. */
2380 	for (i = 0; i < nmerged; i++)
2381 	{
2382 		int			outer_index = outer_indexes[i];
2383 		int			inner_index = inner_indexes[i];
2384 
2385 		/*
2386 		 * If both partitions are dummy, it means the merged partition that
2387 		 * had been assigned to the outer/inner partition was removed when
2388 		 * re-merging the outer/inner partition in
2389 		 * merge_matching_partitions(); ignore the merged partition.
2390 		 */
2391 		if (outer_index == -1 && inner_index == -1)
2392 			continue;
2393 
2394 		*outer_parts = lappend(*outer_parts, outer_index >= 0 ?
2395 							   outer_rel->part_rels[outer_index] : NULL);
2396 		*inner_parts = lappend(*inner_parts, inner_index >= 0 ?
2397 							   inner_rel->part_rels[inner_index] : NULL);
2398 	}
2399 
2400 	pfree(outer_indexes);
2401 	pfree(inner_indexes);
2402 }
2403 
2404 /*
2405  * build_merged_partition_bounds
2406  *		Create a PartitionBoundInfo struct from merged partition bounds
2407  */
2408 static PartitionBoundInfo
build_merged_partition_bounds(char strategy,List * merged_datums,List * merged_kinds,List * merged_indexes,int null_index,int default_index)2409 build_merged_partition_bounds(char strategy, List *merged_datums,
2410 							  List *merged_kinds, List *merged_indexes,
2411 							  int null_index, int default_index)
2412 {
2413 	PartitionBoundInfo merged_bounds;
2414 	int			ndatums = list_length(merged_datums);
2415 	int			pos;
2416 	ListCell   *lc;
2417 
2418 	merged_bounds = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
2419 	merged_bounds->strategy = strategy;
2420 	merged_bounds->ndatums = ndatums;
2421 
2422 	merged_bounds->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
2423 	pos = 0;
2424 	foreach(lc, merged_datums)
2425 		merged_bounds->datums[pos++] = (Datum *) lfirst(lc);
2426 
2427 	if (strategy == PARTITION_STRATEGY_RANGE)
2428 	{
2429 		Assert(list_length(merged_kinds) == ndatums);
2430 		merged_bounds->kind = (PartitionRangeDatumKind **)
2431 			palloc(sizeof(PartitionRangeDatumKind *) * ndatums);
2432 		pos = 0;
2433 		foreach(lc, merged_kinds)
2434 			merged_bounds->kind[pos++] = (PartitionRangeDatumKind *) lfirst(lc);
2435 
2436 		/* There are ndatums+1 indexes in the case of range partitioning. */
2437 		merged_indexes = lappend_int(merged_indexes, -1);
2438 		ndatums++;
2439 	}
2440 	else
2441 	{
2442 		Assert(strategy == PARTITION_STRATEGY_LIST);
2443 		Assert(merged_kinds == NIL);
2444 		merged_bounds->kind = NULL;
2445 	}
2446 
2447 	Assert(list_length(merged_indexes) == ndatums);
2448 	merged_bounds->nindexes = ndatums;
2449 	merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
2450 	pos = 0;
2451 	foreach(lc, merged_indexes)
2452 		merged_bounds->indexes[pos++] = lfirst_int(lc);
2453 
2454 	merged_bounds->null_index = null_index;
2455 	merged_bounds->default_index = default_index;
2456 
2457 	return merged_bounds;
2458 }
2459 
2460 /*
2461  * get_range_partition
2462  *		Get the next non-dummy partition of a range-partitioned relation,
2463  *		returning the index of that partition
2464  *
2465  * *lb and *ub are set to the lower and upper bounds of that partition
2466  * respectively, and *lb_pos is advanced to the next lower bound, if any.
2467  */
2468 static int
get_range_partition(RelOptInfo * rel,PartitionBoundInfo bi,int * lb_pos,PartitionRangeBound * lb,PartitionRangeBound * ub)2469 get_range_partition(RelOptInfo *rel,
2470 					PartitionBoundInfo bi,
2471 					int *lb_pos,
2472 					PartitionRangeBound *lb,
2473 					PartitionRangeBound *ub)
2474 {
2475 	int			part_index;
2476 
2477 	Assert(bi->strategy == PARTITION_STRATEGY_RANGE);
2478 
2479 	do
2480 	{
2481 		part_index = get_range_partition_internal(bi, lb_pos, lb, ub);
2482 		if (part_index == -1)
2483 			return -1;
2484 	} while (is_dummy_partition(rel, part_index));
2485 
2486 	return part_index;
2487 }
2488 
2489 static int
get_range_partition_internal(PartitionBoundInfo bi,int * lb_pos,PartitionRangeBound * lb,PartitionRangeBound * ub)2490 get_range_partition_internal(PartitionBoundInfo bi,
2491 							 int *lb_pos,
2492 							 PartitionRangeBound *lb,
2493 							 PartitionRangeBound *ub)
2494 {
2495 	/* Return the index as -1 if we've exhausted all lower bounds. */
2496 	if (*lb_pos >= bi->ndatums)
2497 		return -1;
2498 
2499 	/* A lower bound should have at least one more bound after it. */
2500 	Assert(*lb_pos + 1 < bi->ndatums);
2501 
2502 	/* Set the lower bound. */
2503 	lb->index = bi->indexes[*lb_pos];
2504 	lb->datums = bi->datums[*lb_pos];
2505 	lb->kind = bi->kind[*lb_pos];
2506 	lb->lower = true;
2507 	/* Set the upper bound. */
2508 	ub->index = bi->indexes[*lb_pos + 1];
2509 	ub->datums = bi->datums[*lb_pos + 1];
2510 	ub->kind = bi->kind[*lb_pos + 1];
2511 	ub->lower = false;
2512 
2513 	/* The index assigned to an upper bound should be valid. */
2514 	Assert(ub->index >= 0);
2515 
2516 	/*
2517 	 * Advance the position to the next lower bound.  If there are no bounds
2518 	 * left beyond the upper bound, we have reached the last lower bound.
2519 	 */
2520 	if (*lb_pos + 2 >= bi->ndatums)
2521 		*lb_pos = bi->ndatums;
2522 	else
2523 	{
2524 		/*
2525 		 * If the index assigned to the bound next to the upper bound isn't
2526 		 * valid, that is the next lower bound; else, the upper bound is also
2527 		 * the lower bound of the next range partition.
2528 		 */
2529 		if (bi->indexes[*lb_pos + 2] < 0)
2530 			*lb_pos = *lb_pos + 2;
2531 		else
2532 			*lb_pos = *lb_pos + 1;
2533 	}
2534 
2535 	return ub->index;
2536 }
2537 
2538 /*
2539  * compare_range_partitions
2540  *		Compare the bounds of two range partitions, and return true if the
2541  *		two partitions overlap, false otherwise
2542  *
2543  * *lb_cmpval is set to -1, 0, or 1 if the outer partition's lower bound is
2544  * lower than, equal to, or higher than the inner partition's lower bound
2545  * respectively.  Likewise, *ub_cmpval is set to -1, 0, or 1 if the outer
2546  * partition's upper bound is lower than, equal to, or higher than the inner
2547  * partition's upper bound respectively.
2548  */
2549 static bool
compare_range_partitions(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,PartitionRangeBound * outer_lb,PartitionRangeBound * outer_ub,PartitionRangeBound * inner_lb,PartitionRangeBound * inner_ub,int * lb_cmpval,int * ub_cmpval)2550 compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
2551 						 Oid *partcollations,
2552 						 PartitionRangeBound *outer_lb,
2553 						 PartitionRangeBound *outer_ub,
2554 						 PartitionRangeBound *inner_lb,
2555 						 PartitionRangeBound *inner_ub,
2556 						 int *lb_cmpval, int *ub_cmpval)
2557 {
2558 	/*
2559 	 * Check if the outer partition's upper bound is lower than the inner
2560 	 * partition's lower bound; if so the partitions aren't overlapping.
2561 	 */
2562 	if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2563 							 outer_ub, inner_lb) < 0)
2564 	{
2565 		*lb_cmpval = -1;
2566 		*ub_cmpval = -1;
2567 		return false;
2568 	}
2569 
2570 	/*
2571 	 * Check if the outer partition's lower bound is higher than the inner
2572 	 * partition's upper bound; if so the partitions aren't overlapping.
2573 	 */
2574 	if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
2575 							 outer_lb, inner_ub) > 0)
2576 	{
2577 		*lb_cmpval = 1;
2578 		*ub_cmpval = 1;
2579 		return false;
2580 	}
2581 
2582 	/* All other cases indicate overlapping partitions. */
2583 	*lb_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2584 									  outer_lb, inner_lb);
2585 	*ub_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
2586 									  outer_ub, inner_ub);
2587 	return true;
2588 }
2589 
2590 /*
2591  * get_merged_range_bounds
2592  *		Given the bounds of range partitions to be joined, determine the bounds
2593  *		of a merged partition produced from the range partitions
2594  *
2595  * *merged_lb and *merged_ub are set to the lower and upper bounds of the
2596  * merged partition.
2597  */
2598 static void
get_merged_range_bounds(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,JoinType jointype,PartitionRangeBound * outer_lb,PartitionRangeBound * outer_ub,PartitionRangeBound * inner_lb,PartitionRangeBound * inner_ub,int lb_cmpval,int ub_cmpval,PartitionRangeBound * merged_lb,PartitionRangeBound * merged_ub)2599 get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2600 						Oid *partcollations, JoinType jointype,
2601 						PartitionRangeBound *outer_lb,
2602 						PartitionRangeBound *outer_ub,
2603 						PartitionRangeBound *inner_lb,
2604 						PartitionRangeBound *inner_ub,
2605 						int lb_cmpval, int ub_cmpval,
2606 						PartitionRangeBound *merged_lb,
2607 						PartitionRangeBound *merged_ub)
2608 {
2609 	Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2610 								outer_lb, inner_lb) == lb_cmpval);
2611 	Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
2612 								outer_ub, inner_ub) == ub_cmpval);
2613 
2614 	switch (jointype)
2615 	{
2616 		case JOIN_INNER:
2617 		case JOIN_SEMI:
2618 
2619 			/*
2620 			 * An INNER/SEMI join will have the rows that fit both sides, so
2621 			 * the lower bound of the merged partition will be the higher of
2622 			 * the two lower bounds, and the upper bound of the merged
2623 			 * partition will be the lower of the two upper bounds.
2624 			 */
2625 			*merged_lb = (lb_cmpval > 0) ? *outer_lb : *inner_lb;
2626 			*merged_ub = (ub_cmpval < 0) ? *outer_ub : *inner_ub;
2627 			break;
2628 
2629 		case JOIN_LEFT:
2630 		case JOIN_ANTI:
2631 
2632 			/*
2633 			 * A LEFT/ANTI join will have all the rows from the outer side, so
2634 			 * the bounds of the merged partition will be the same as the
2635 			 * outer bounds.
2636 			 */
2637 			*merged_lb = *outer_lb;
2638 			*merged_ub = *outer_ub;
2639 			break;
2640 
2641 		case JOIN_FULL:
2642 
2643 			/*
2644 			 * A FULL join will have all the rows from both sides, so the
2645 			 * lower bound of the merged partition will be the lower of the
2646 			 * two lower bounds, and the upper bound of the merged partition
2647 			 * will be the higher of the two upper bounds.
2648 			 */
2649 			*merged_lb = (lb_cmpval < 0) ? *outer_lb : *inner_lb;
2650 			*merged_ub = (ub_cmpval > 0) ? *outer_ub : *inner_ub;
2651 			break;
2652 
2653 		default:
2654 			elog(ERROR, "unrecognized join type: %d", (int) jointype);
2655 	}
2656 }
2657 
2658 /*
2659  * add_merged_range_bounds
2660  *		Add the bounds of a merged partition to the lists of range bounds
2661  */
2662 static void
add_merged_range_bounds(int partnatts,FmgrInfo * partsupfuncs,Oid * partcollations,PartitionRangeBound * merged_lb,PartitionRangeBound * merged_ub,int merged_index,List ** merged_datums,List ** merged_kinds,List ** merged_indexes)2663 add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
2664 						Oid *partcollations,
2665 						PartitionRangeBound *merged_lb,
2666 						PartitionRangeBound *merged_ub,
2667 						int merged_index,
2668 						List **merged_datums,
2669 						List **merged_kinds,
2670 						List **merged_indexes)
2671 {
2672 	int			cmpval;
2673 
2674 	if (!*merged_datums)
2675 	{
2676 		/* First merged partition */
2677 		Assert(!*merged_kinds);
2678 		Assert(!*merged_indexes);
2679 		cmpval = 1;
2680 	}
2681 	else
2682 	{
2683 		PartitionRangeBound prev_ub;
2684 
2685 		Assert(*merged_datums);
2686 		Assert(*merged_kinds);
2687 		Assert(*merged_indexes);
2688 
2689 		/* Get the last upper bound. */
2690 		prev_ub.index = llast_int(*merged_indexes);
2691 		prev_ub.datums = (Datum *) llast(*merged_datums);
2692 		prev_ub.kind = (PartitionRangeDatumKind *) llast(*merged_kinds);
2693 		prev_ub.lower = false;
2694 
2695 		/*
2696 		 * We pass to partition_rbound_cmp() lower1 as false to prevent it
2697 		 * from considering the last upper bound to be smaller than the lower
2698 		 * bound of the merged partition when the values of the two range
2699 		 * bounds compare equal.
2700 		 */
2701 		cmpval = partition_rbound_cmp(partnatts, partsupfuncs, partcollations,
2702 									  merged_lb->datums, merged_lb->kind,
2703 									  false, &prev_ub);
2704 		Assert(cmpval >= 0);
2705 	}
2706 
2707 	/*
2708 	 * If the lower bound is higher than the last upper bound, add the lower
2709 	 * bound with the index as -1 indicating that that is a lower bound; else,
2710 	 * the last upper bound will be reused as the lower bound of the merged
2711 	 * partition, so skip this.
2712 	 */
2713 	if (cmpval > 0)
2714 	{
2715 		*merged_datums = lappend(*merged_datums, merged_lb->datums);
2716 		*merged_kinds = lappend(*merged_kinds, merged_lb->kind);
2717 		*merged_indexes = lappend_int(*merged_indexes, -1);
2718 	}
2719 
2720 	/* Add the upper bound and index of the merged partition. */
2721 	*merged_datums = lappend(*merged_datums, merged_ub->datums);
2722 	*merged_kinds = lappend(*merged_kinds, merged_ub->kind);
2723 	*merged_indexes = lappend_int(*merged_indexes, merged_index);
2724 }
2725 
2726 /*
2727  * partitions_are_ordered
2728  *		Determine whether the partitions described by 'boundinfo' are ordered,
2729  *		that is partitions appearing earlier in the PartitionDesc sequence
2730  *		contain partition keys strictly less than those appearing later.
2731  *		Also, if NULL values are possible, they must come in the last
2732  *		partition defined in the PartitionDesc.
2733  *
2734  * If out of order, or there is insufficient info to know the order,
2735  * then we return false.
2736  */
2737 bool
partitions_are_ordered(PartitionBoundInfo boundinfo,int nparts)2738 partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts)
2739 {
2740 	Assert(boundinfo != NULL);
2741 
2742 	switch (boundinfo->strategy)
2743 	{
2744 		case PARTITION_STRATEGY_RANGE:
2745 
2746 			/*
2747 			 * RANGE-type partitioning guarantees that the partitions can be
2748 			 * scanned in the order that they're defined in the PartitionDesc
2749 			 * to provide sequential, non-overlapping ranges of tuples.
2750 			 * However, if a DEFAULT partition exists then it doesn't work, as
2751 			 * that could contain tuples from either below or above the
2752 			 * defined range, or tuples belonging to gaps between partitions.
2753 			 */
2754 			if (!partition_bound_has_default(boundinfo))
2755 				return true;
2756 			break;
2757 
2758 		case PARTITION_STRATEGY_LIST:
2759 
2760 			/*
2761 			 * LIST partitioning can also guarantee ordering, but only if the
2762 			 * partitions don't accept interleaved values.  We could likely
2763 			 * check for this by looping over the PartitionBound's indexes
2764 			 * array to check that the indexes are in order.  For now, let's
2765 			 * just keep it simple and just accept LIST partitioning when
2766 			 * there's no DEFAULT partition, exactly one value per partition,
2767 			 * and optionally a NULL partition that does not accept any other
2768 			 * values.  Such a NULL partition will come last in the
2769 			 * PartitionDesc, and the other partitions will be properly
2770 			 * ordered.  This is a cheap test to make as it does not require
2771 			 * any per-partition processing.  Maybe we'd like to handle more
2772 			 * complex cases in the future.
2773 			 */
2774 			if (partition_bound_has_default(boundinfo))
2775 				return false;
2776 
2777 			if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo)
2778 				== nparts)
2779 				return true;
2780 			break;
2781 
2782 		default:
2783 			/* HASH, or some other strategy */
2784 			break;
2785 	}
2786 
2787 	return false;
2788 }
2789 
2790 /*
2791  * check_new_partition_bound
2792  *
2793  * Checks if the new partition's bound overlaps any of the existing partitions
2794  * of parent.  Also performs additional checks as necessary per strategy.
2795  */
2796 void
check_new_partition_bound(char * relname,Relation parent,PartitionBoundSpec * spec)2797 check_new_partition_bound(char *relname, Relation parent,
2798 						  PartitionBoundSpec *spec)
2799 {
2800 	PartitionKey key = RelationGetPartitionKey(parent);
2801 	PartitionDesc partdesc = RelationGetPartitionDesc(parent);
2802 	PartitionBoundInfo boundinfo = partdesc->boundinfo;
2803 	ParseState *pstate = make_parsestate(NULL);
2804 	int			with = -1;
2805 	bool		overlap = false;
2806 
2807 	if (spec->is_default)
2808 	{
2809 		/*
2810 		 * The default partition bound never conflicts with any other
2811 		 * partition's; if that's what we're attaching, the only possible
2812 		 * problem is that one already exists, so check for that and we're
2813 		 * done.
2814 		 */
2815 		if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
2816 			return;
2817 
2818 		/* Default partition already exists, error out. */
2819 		ereport(ERROR,
2820 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2821 				 errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
2822 						relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
2823 				 parser_errposition(pstate, spec->location)));
2824 	}
2825 
2826 	switch (key->strategy)
2827 	{
2828 		case PARTITION_STRATEGY_HASH:
2829 			{
2830 				Assert(spec->strategy == PARTITION_STRATEGY_HASH);
2831 				Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
2832 
2833 				if (partdesc->nparts > 0)
2834 				{
2835 					Datum	  **datums = boundinfo->datums;
2836 					int			ndatums = boundinfo->ndatums;
2837 					int			greatest_modulus;
2838 					int			remainder;
2839 					int			offset;
2840 					bool		valid_modulus = true;
2841 					int			prev_modulus,	/* Previous largest modulus */
2842 								next_modulus;	/* Next largest modulus */
2843 
2844 					/*
2845 					 * Check rule that every modulus must be a factor of the
2846 					 * next larger modulus.  For example, if you have a bunch
2847 					 * of partitions that all have modulus 5, you can add a
2848 					 * new partition with modulus 10 or a new partition with
2849 					 * modulus 15, but you cannot add both a partition with
2850 					 * modulus 10 and a partition with modulus 15, because 10
2851 					 * is not a factor of 15.
2852 					 *
2853 					 * Get the greatest (modulus, remainder) pair contained in
2854 					 * boundinfo->datums that is less than or equal to the
2855 					 * (spec->modulus, spec->remainder) pair.
2856 					 */
2857 					offset = partition_hash_bsearch(boundinfo,
2858 													spec->modulus,
2859 													spec->remainder);
2860 					if (offset < 0)
2861 					{
2862 						next_modulus = DatumGetInt32(datums[0][0]);
2863 						valid_modulus = (next_modulus % spec->modulus) == 0;
2864 					}
2865 					else
2866 					{
2867 						prev_modulus = DatumGetInt32(datums[offset][0]);
2868 						valid_modulus = (spec->modulus % prev_modulus) == 0;
2869 
2870 						if (valid_modulus && (offset + 1) < ndatums)
2871 						{
2872 							next_modulus = DatumGetInt32(datums[offset + 1][0]);
2873 							valid_modulus = (next_modulus % spec->modulus) == 0;
2874 						}
2875 					}
2876 
2877 					if (!valid_modulus)
2878 						ereport(ERROR,
2879 								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2880 								 errmsg("every hash partition modulus must be a factor of the next larger modulus")));
2881 
2882 					greatest_modulus = boundinfo->nindexes;
2883 					remainder = spec->remainder;
2884 
2885 					/*
2886 					 * Normally, the lowest remainder that could conflict with
2887 					 * the new partition is equal to the remainder specified
2888 					 * for the new partition, but when the new partition has a
2889 					 * modulus higher than any used so far, we need to adjust.
2890 					 */
2891 					if (remainder >= greatest_modulus)
2892 						remainder = remainder % greatest_modulus;
2893 
2894 					/* Check every potentially-conflicting remainder. */
2895 					do
2896 					{
2897 						if (boundinfo->indexes[remainder] != -1)
2898 						{
2899 							overlap = true;
2900 							with = boundinfo->indexes[remainder];
2901 							break;
2902 						}
2903 						remainder += spec->modulus;
2904 					} while (remainder < greatest_modulus);
2905 				}
2906 
2907 				break;
2908 			}
2909 
2910 		case PARTITION_STRATEGY_LIST:
2911 			{
2912 				Assert(spec->strategy == PARTITION_STRATEGY_LIST);
2913 
2914 				if (partdesc->nparts > 0)
2915 				{
2916 					ListCell   *cell;
2917 
2918 					Assert(boundinfo &&
2919 						   boundinfo->strategy == PARTITION_STRATEGY_LIST &&
2920 						   (boundinfo->ndatums > 0 ||
2921 							partition_bound_accepts_nulls(boundinfo) ||
2922 							partition_bound_has_default(boundinfo)));
2923 
2924 					foreach(cell, spec->listdatums)
2925 					{
2926 						Const	   *val = castNode(Const, lfirst(cell));
2927 
2928 						if (!val->constisnull)
2929 						{
2930 							int			offset;
2931 							bool		equal;
2932 
2933 							offset = partition_list_bsearch(&key->partsupfunc[0],
2934 															key->partcollation,
2935 															boundinfo,
2936 															val->constvalue,
2937 															&equal);
2938 							if (offset >= 0 && equal)
2939 							{
2940 								overlap = true;
2941 								with = boundinfo->indexes[offset];
2942 								break;
2943 							}
2944 						}
2945 						else if (partition_bound_accepts_nulls(boundinfo))
2946 						{
2947 							overlap = true;
2948 							with = boundinfo->null_index;
2949 							break;
2950 						}
2951 					}
2952 				}
2953 
2954 				break;
2955 			}
2956 
2957 		case PARTITION_STRATEGY_RANGE:
2958 			{
2959 				PartitionRangeBound *lower,
2960 						   *upper;
2961 
2962 				Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
2963 				lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
2964 				upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
2965 
2966 				/*
2967 				 * First check if the resulting range would be empty with
2968 				 * specified lower and upper bounds
2969 				 */
2970 				if (partition_rbound_cmp(key->partnatts, key->partsupfunc,
2971 										 key->partcollation, lower->datums,
2972 										 lower->kind, true, upper) >= 0)
2973 				{
2974 					ereport(ERROR,
2975 							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2976 							 errmsg("empty range bound specified for partition \"%s\"",
2977 									relname),
2978 							 errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
2979 									   get_range_partbound_string(spec->lowerdatums),
2980 									   get_range_partbound_string(spec->upperdatums)),
2981 							 parser_errposition(pstate, spec->location)));
2982 				}
2983 
2984 				if (partdesc->nparts > 0)
2985 				{
2986 					int			offset;
2987 					bool		equal;
2988 
2989 					Assert(boundinfo &&
2990 						   boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
2991 						   (boundinfo->ndatums > 0 ||
2992 							partition_bound_has_default(boundinfo)));
2993 
2994 					/*
2995 					 * Test whether the new lower bound (which is treated
2996 					 * inclusively as part of the new partition) lies inside
2997 					 * an existing partition, or in a gap.
2998 					 *
2999 					 * If it's inside an existing partition, the bound at
3000 					 * offset + 1 will be the upper bound of that partition,
3001 					 * and its index will be >= 0.
3002 					 *
3003 					 * If it's in a gap, the bound at offset + 1 will be the
3004 					 * lower bound of the next partition, and its index will
3005 					 * be -1. This is also true if there is no next partition,
3006 					 * since the index array is initialised with an extra -1
3007 					 * at the end.
3008 					 */
3009 					offset = partition_range_bsearch(key->partnatts,
3010 													 key->partsupfunc,
3011 													 key->partcollation,
3012 													 boundinfo, lower,
3013 													 &equal);
3014 
3015 					if (boundinfo->indexes[offset + 1] < 0)
3016 					{
3017 						/*
3018 						 * Check that the new partition will fit in the gap.
3019 						 * For it to fit, the new upper bound must be less
3020 						 * than or equal to the lower bound of the next
3021 						 * partition, if there is one.
3022 						 */
3023 						if (offset + 1 < boundinfo->ndatums)
3024 						{
3025 							int32		cmpval;
3026 							Datum	   *datums;
3027 							PartitionRangeDatumKind *kind;
3028 							bool		is_lower;
3029 
3030 							datums = boundinfo->datums[offset + 1];
3031 							kind = boundinfo->kind[offset + 1];
3032 							is_lower = (boundinfo->indexes[offset + 1] == -1);
3033 
3034 							cmpval = partition_rbound_cmp(key->partnatts,
3035 														  key->partsupfunc,
3036 														  key->partcollation,
3037 														  datums, kind,
3038 														  is_lower, upper);
3039 							if (cmpval < 0)
3040 							{
3041 								/*
3042 								 * The new partition overlaps with the
3043 								 * existing partition between offset + 1 and
3044 								 * offset + 2.
3045 								 */
3046 								overlap = true;
3047 								with = boundinfo->indexes[offset + 2];
3048 							}
3049 						}
3050 					}
3051 					else
3052 					{
3053 						/*
3054 						 * The new partition overlaps with the existing
3055 						 * partition between offset and offset + 1.
3056 						 */
3057 						overlap = true;
3058 						with = boundinfo->indexes[offset + 1];
3059 					}
3060 				}
3061 
3062 				break;
3063 			}
3064 
3065 		default:
3066 			elog(ERROR, "unexpected partition strategy: %d",
3067 				 (int) key->strategy);
3068 	}
3069 
3070 	if (overlap)
3071 	{
3072 		Assert(with >= 0);
3073 		ereport(ERROR,
3074 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
3075 				 errmsg("partition \"%s\" would overlap partition \"%s\"",
3076 						relname, get_rel_name(partdesc->oids[with])),
3077 				 parser_errposition(pstate, spec->location)));
3078 	}
3079 }
3080 
3081 /*
3082  * check_default_partition_contents
3083  *
3084  * This function checks if there exists a row in the default partition that
3085  * would properly belong to the new partition being added.  If it finds one,
3086  * it throws an error.
3087  */
3088 void
check_default_partition_contents(Relation parent,Relation default_rel,PartitionBoundSpec * new_spec)3089 check_default_partition_contents(Relation parent, Relation default_rel,
3090 								 PartitionBoundSpec *new_spec)
3091 {
3092 	List	   *new_part_constraints;
3093 	List	   *def_part_constraints;
3094 	List	   *all_parts;
3095 	ListCell   *lc;
3096 
3097 	new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
3098 		? get_qual_for_list(parent, new_spec)
3099 		: get_qual_for_range(parent, new_spec, false);
3100 	def_part_constraints =
3101 		get_proposed_default_constraint(new_part_constraints);
3102 
3103 	/*
3104 	 * Map the Vars in the constraint expression from parent's attnos to
3105 	 * default_rel's.
3106 	 */
3107 	def_part_constraints =
3108 		map_partition_varattnos(def_part_constraints, 1, default_rel,
3109 								parent);
3110 
3111 	/*
3112 	 * If the existing constraints on the default partition imply that it will
3113 	 * not contain any row that would belong to the new partition, we can
3114 	 * avoid scanning the default partition.
3115 	 */
3116 	if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
3117 	{
3118 		ereport(DEBUG1,
3119 				(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3120 						RelationGetRelationName(default_rel))));
3121 		return;
3122 	}
3123 
3124 	/*
3125 	 * Scan the default partition and its subpartitions, and check for rows
3126 	 * that do not satisfy the revised partition constraints.
3127 	 */
3128 	if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3129 		all_parts = find_all_inheritors(RelationGetRelid(default_rel),
3130 										AccessExclusiveLock, NULL);
3131 	else
3132 		all_parts = list_make1_oid(RelationGetRelid(default_rel));
3133 
3134 	foreach(lc, all_parts)
3135 	{
3136 		Oid			part_relid = lfirst_oid(lc);
3137 		Relation	part_rel;
3138 		Expr	   *partition_constraint;
3139 		EState	   *estate;
3140 		ExprState  *partqualstate = NULL;
3141 		Snapshot	snapshot;
3142 		ExprContext *econtext;
3143 		TableScanDesc scan;
3144 		MemoryContext oldCxt;
3145 		TupleTableSlot *tupslot;
3146 
3147 		/* Lock already taken above. */
3148 		if (part_relid != RelationGetRelid(default_rel))
3149 		{
3150 			part_rel = table_open(part_relid, NoLock);
3151 
3152 			/*
3153 			 * Map the Vars in the constraint expression from default_rel's
3154 			 * the sub-partition's.
3155 			 */
3156 			partition_constraint = make_ands_explicit(def_part_constraints);
3157 			partition_constraint = (Expr *)
3158 				map_partition_varattnos((List *) partition_constraint, 1,
3159 										part_rel, default_rel);
3160 
3161 			/*
3162 			 * If the partition constraints on default partition child imply
3163 			 * that it will not contain any row that would belong to the new
3164 			 * partition, we can avoid scanning the child table.
3165 			 */
3166 			if (PartConstraintImpliedByRelConstraint(part_rel,
3167 													 def_part_constraints))
3168 			{
3169 				ereport(DEBUG1,
3170 						(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
3171 								RelationGetRelationName(part_rel))));
3172 
3173 				table_close(part_rel, NoLock);
3174 				continue;
3175 			}
3176 		}
3177 		else
3178 		{
3179 			part_rel = default_rel;
3180 			partition_constraint = make_ands_explicit(def_part_constraints);
3181 		}
3182 
3183 		/*
3184 		 * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
3185 		 * scanned.
3186 		 */
3187 		if (part_rel->rd_rel->relkind != RELKIND_RELATION)
3188 		{
3189 			if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
3190 				ereport(WARNING,
3191 						(errcode(ERRCODE_CHECK_VIOLATION),
3192 						 errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
3193 								RelationGetRelationName(part_rel),
3194 								RelationGetRelationName(default_rel))));
3195 
3196 			if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3197 				table_close(part_rel, NoLock);
3198 
3199 			continue;
3200 		}
3201 
3202 		estate = CreateExecutorState();
3203 
3204 		/* Build expression execution states for partition check quals */
3205 		partqualstate = ExecPrepareExpr(partition_constraint, estate);
3206 
3207 		econtext = GetPerTupleExprContext(estate);
3208 		snapshot = RegisterSnapshot(GetLatestSnapshot());
3209 		tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
3210 		scan = table_beginscan(part_rel, snapshot, 0, NULL);
3211 
3212 		/*
3213 		 * Switch to per-tuple memory context and reset it for each tuple
3214 		 * produced, so we don't leak memory.
3215 		 */
3216 		oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
3217 
3218 		while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
3219 		{
3220 			econtext->ecxt_scantuple = tupslot;
3221 
3222 			if (!ExecCheck(partqualstate, econtext))
3223 				ereport(ERROR,
3224 						(errcode(ERRCODE_CHECK_VIOLATION),
3225 						 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
3226 								RelationGetRelationName(default_rel)),
3227 						 errtable(default_rel)));
3228 
3229 			ResetExprContext(econtext);
3230 			CHECK_FOR_INTERRUPTS();
3231 		}
3232 
3233 		MemoryContextSwitchTo(oldCxt);
3234 		table_endscan(scan);
3235 		UnregisterSnapshot(snapshot);
3236 		ExecDropSingleTupleTableSlot(tupslot);
3237 		FreeExecutorState(estate);
3238 
3239 		if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
3240 			table_close(part_rel, NoLock);	/* keep the lock until commit */
3241 	}
3242 }
3243 
3244 /*
3245  * get_hash_partition_greatest_modulus
3246  *
3247  * Returns the greatest modulus of the hash partition bound.
3248  * This is no longer used in the core code, but we keep it around
3249  * in case external modules are using it.
3250  */
3251 int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)3252 get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
3253 {
3254 	Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
3255 	return bound->nindexes;
3256 }
3257 
3258 /*
3259  * make_one_partition_rbound
3260  *
3261  * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
3262  * and a flag telling whether the bound is lower or not.  Made into a function
3263  * because there are multiple sites that want to use this facility.
3264  */
3265 static PartitionRangeBound *
make_one_partition_rbound(PartitionKey key,int index,List * datums,bool lower)3266 make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
3267 {
3268 	PartitionRangeBound *bound;
3269 	ListCell   *lc;
3270 	int			i;
3271 
3272 	Assert(datums != NIL);
3273 
3274 	bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
3275 	bound->index = index;
3276 	bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
3277 	bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
3278 													  sizeof(PartitionRangeDatumKind));
3279 	bound->lower = lower;
3280 
3281 	i = 0;
3282 	foreach(lc, datums)
3283 	{
3284 		PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
3285 
3286 		/* What's contained in this range datum? */
3287 		bound->kind[i] = datum->kind;
3288 
3289 		if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
3290 		{
3291 			Const	   *val = castNode(Const, datum->value);
3292 
3293 			if (val->constisnull)
3294 				elog(ERROR, "invalid range bound datum");
3295 			bound->datums[i] = val->constvalue;
3296 		}
3297 
3298 		i++;
3299 	}
3300 
3301 	return bound;
3302 }
3303 
3304 /*
3305  * partition_rbound_cmp
3306  *
3307  * Return for two range bounds whether the 1st one (specified in datums1,
3308  * kind1, and lower1) is <, =, or > the bound specified in *b2.
3309  *
3310  * partnatts, partsupfunc and partcollation give the number of attributes in the
3311  * bounds to be compared, comparison function to be used and the collations of
3312  * attributes, respectively.
3313  *
3314  * Note that if the values of the two range bounds compare equal, then we take
3315  * into account whether they are upper or lower bounds, and an upper bound is
3316  * considered to be smaller than a lower bound. This is important to the way
3317  * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
3318  * structure, which only stores the upper bound of a common boundary between
3319  * two contiguous partitions.
3320  */
3321 static int32
partition_rbound_cmp(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * datums1,PartitionRangeDatumKind * kind1,bool lower1,PartitionRangeBound * b2)3322 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
3323 					 Oid *partcollation,
3324 					 Datum *datums1, PartitionRangeDatumKind *kind1,
3325 					 bool lower1, PartitionRangeBound *b2)
3326 {
3327 	int32		cmpval = 0;		/* placate compiler */
3328 	int			i;
3329 	Datum	   *datums2 = b2->datums;
3330 	PartitionRangeDatumKind *kind2 = b2->kind;
3331 	bool		lower2 = b2->lower;
3332 
3333 	for (i = 0; i < partnatts; i++)
3334 	{
3335 		/*
3336 		 * First, handle cases where the column is unbounded, which should not
3337 		 * invoke the comparison procedure, and should not consider any later
3338 		 * columns. Note that the PartitionRangeDatumKind enum elements
3339 		 * compare the same way as the values they represent.
3340 		 */
3341 		if (kind1[i] < kind2[i])
3342 			return -1;
3343 		else if (kind1[i] > kind2[i])
3344 			return 1;
3345 		else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
3346 
3347 			/*
3348 			 * The column bounds are both MINVALUE or both MAXVALUE. No later
3349 			 * columns should be considered, but we still need to compare
3350 			 * whether they are upper or lower bounds.
3351 			 */
3352 			break;
3353 
3354 		cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3355 												 partcollation[i],
3356 												 datums1[i],
3357 												 datums2[i]));
3358 		if (cmpval != 0)
3359 			break;
3360 	}
3361 
3362 	/*
3363 	 * If the comparison is anything other than equal, we're done. If they
3364 	 * compare equal though, we still have to consider whether the boundaries
3365 	 * are inclusive or exclusive.  Exclusive one is considered smaller of the
3366 	 * two.
3367 	 */
3368 	if (cmpval == 0 && lower1 != lower2)
3369 		cmpval = lower1 ? 1 : -1;
3370 
3371 	return cmpval;
3372 }
3373 
3374 /*
3375  * partition_rbound_datum_cmp
3376  *
3377  * Return whether range bound (specified in rb_datums and rb_kind)
3378  * is <, =, or > partition key of tuple (tuple_datums)
3379  *
3380  * n_tuple_datums, partsupfunc and partcollation give number of attributes in
3381  * the bounds to be compared, comparison function to be used and the collations
3382  * of attributes resp.
3383  *
3384  */
3385 int32
partition_rbound_datum_cmp(FmgrInfo * partsupfunc,Oid * partcollation,Datum * rb_datums,PartitionRangeDatumKind * rb_kind,Datum * tuple_datums,int n_tuple_datums)3386 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
3387 						   Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
3388 						   Datum *tuple_datums, int n_tuple_datums)
3389 {
3390 	int			i;
3391 	int32		cmpval = -1;
3392 
3393 	for (i = 0; i < n_tuple_datums; i++)
3394 	{
3395 		if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
3396 			return -1;
3397 		else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
3398 			return 1;
3399 
3400 		cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
3401 												 partcollation[i],
3402 												 rb_datums[i],
3403 												 tuple_datums[i]));
3404 		if (cmpval != 0)
3405 			break;
3406 	}
3407 
3408 	return cmpval;
3409 }
3410 
3411 /*
3412  * partition_hbound_cmp
3413  *
3414  * Compares modulus first, then remainder if modulus is equal.
3415  */
3416 static int32
partition_hbound_cmp(int modulus1,int remainder1,int modulus2,int remainder2)3417 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
3418 {
3419 	if (modulus1 < modulus2)
3420 		return -1;
3421 	if (modulus1 > modulus2)
3422 		return 1;
3423 	if (modulus1 == modulus2 && remainder1 != remainder2)
3424 		return (remainder1 > remainder2) ? 1 : -1;
3425 	return 0;
3426 }
3427 
3428 /*
3429  * partition_list_bsearch
3430  *		Returns the index of the greatest bound datum that is less than equal
3431  * 		to the given value or -1 if all of the bound datums are greater
3432  *
3433  * *is_equal is set to true if the bound datum at the returned index is equal
3434  * to the input value.
3435  */
3436 int
partition_list_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,Datum value,bool * is_equal)3437 partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3438 					   PartitionBoundInfo boundinfo,
3439 					   Datum value, bool *is_equal)
3440 {
3441 	int			lo,
3442 				hi,
3443 				mid;
3444 
3445 	lo = -1;
3446 	hi = boundinfo->ndatums - 1;
3447 	while (lo < hi)
3448 	{
3449 		int32		cmpval;
3450 
3451 		mid = (lo + hi + 1) / 2;
3452 		cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
3453 												 partcollation[0],
3454 												 boundinfo->datums[mid][0],
3455 												 value));
3456 		if (cmpval <= 0)
3457 		{
3458 			lo = mid;
3459 			*is_equal = (cmpval == 0);
3460 			if (*is_equal)
3461 				break;
3462 		}
3463 		else
3464 			hi = mid - 1;
3465 	}
3466 
3467 	return lo;
3468 }
3469 
3470 /*
3471  * partition_range_bsearch
3472  *		Returns the index of the greatest range bound that is less than or
3473  *		equal to the given range bound or -1 if all of the range bounds are
3474  *		greater
3475  *
3476  * *is_equal is set to true if the range bound at the returned index is equal
3477  * to the input range bound
3478  */
3479 static int
partition_range_bsearch(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,PartitionRangeBound * probe,bool * is_equal)3480 partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
3481 						Oid *partcollation,
3482 						PartitionBoundInfo boundinfo,
3483 						PartitionRangeBound *probe, bool *is_equal)
3484 {
3485 	int			lo,
3486 				hi,
3487 				mid;
3488 
3489 	lo = -1;
3490 	hi = boundinfo->ndatums - 1;
3491 	while (lo < hi)
3492 	{
3493 		int32		cmpval;
3494 
3495 		mid = (lo + hi + 1) / 2;
3496 		cmpval = partition_rbound_cmp(partnatts, partsupfunc,
3497 									  partcollation,
3498 									  boundinfo->datums[mid],
3499 									  boundinfo->kind[mid],
3500 									  (boundinfo->indexes[mid] == -1),
3501 									  probe);
3502 		if (cmpval <= 0)
3503 		{
3504 			lo = mid;
3505 			*is_equal = (cmpval == 0);
3506 
3507 			if (*is_equal)
3508 				break;
3509 		}
3510 		else
3511 			hi = mid - 1;
3512 	}
3513 
3514 	return lo;
3515 }
3516 
3517 /*
3518  * partition_range_bsearch
3519  *		Returns the index of the greatest range bound that is less than or
3520  *		equal to the given tuple or -1 if all of the range bounds are greater
3521  *
3522  * *is_equal is set to true if the range bound at the returned index is equal
3523  * to the input tuple.
3524  */
3525 int
partition_range_datum_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,int nvalues,Datum * values,bool * is_equal)3526 partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
3527 							  PartitionBoundInfo boundinfo,
3528 							  int nvalues, Datum *values, bool *is_equal)
3529 {
3530 	int			lo,
3531 				hi,
3532 				mid;
3533 
3534 	lo = -1;
3535 	hi = boundinfo->ndatums - 1;
3536 	while (lo < hi)
3537 	{
3538 		int32		cmpval;
3539 
3540 		mid = (lo + hi + 1) / 2;
3541 		cmpval = partition_rbound_datum_cmp(partsupfunc,
3542 											partcollation,
3543 											boundinfo->datums[mid],
3544 											boundinfo->kind[mid],
3545 											values,
3546 											nvalues);
3547 		if (cmpval <= 0)
3548 		{
3549 			lo = mid;
3550 			*is_equal = (cmpval == 0);
3551 
3552 			if (*is_equal)
3553 				break;
3554 		}
3555 		else
3556 			hi = mid - 1;
3557 	}
3558 
3559 	return lo;
3560 }
3561 
3562 /*
3563  * partition_hash_bsearch
3564  *		Returns the index of the greatest (modulus, remainder) pair that is
3565  *		less than or equal to the given (modulus, remainder) pair or -1 if
3566  *		all of them are greater
3567  */
3568 int
partition_hash_bsearch(PartitionBoundInfo boundinfo,int modulus,int remainder)3569 partition_hash_bsearch(PartitionBoundInfo boundinfo,
3570 					   int modulus, int remainder)
3571 {
3572 	int			lo,
3573 				hi,
3574 				mid;
3575 
3576 	lo = -1;
3577 	hi = boundinfo->ndatums - 1;
3578 	while (lo < hi)
3579 	{
3580 		int32		cmpval,
3581 					bound_modulus,
3582 					bound_remainder;
3583 
3584 		mid = (lo + hi + 1) / 2;
3585 		bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
3586 		bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
3587 		cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
3588 									  modulus, remainder);
3589 		if (cmpval <= 0)
3590 		{
3591 			lo = mid;
3592 
3593 			if (cmpval == 0)
3594 				break;
3595 		}
3596 		else
3597 			hi = mid - 1;
3598 	}
3599 
3600 	return lo;
3601 }
3602 
3603 /*
3604  * qsort_partition_hbound_cmp
3605  *
3606  * Hash bounds are sorted by modulus, then by remainder.
3607  */
3608 static int32
qsort_partition_hbound_cmp(const void * a,const void * b)3609 qsort_partition_hbound_cmp(const void *a, const void *b)
3610 {
3611 	PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
3612 	PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
3613 
3614 	return partition_hbound_cmp(h1->modulus, h1->remainder,
3615 								h2->modulus, h2->remainder);
3616 }
3617 
3618 /*
3619  * qsort_partition_list_value_cmp
3620  *
3621  * Compare two list partition bound datums.
3622  */
3623 static int32
qsort_partition_list_value_cmp(const void * a,const void * b,void * arg)3624 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
3625 {
3626 	Datum		val1 = (*(PartitionListValue *const *) a)->value,
3627 				val2 = (*(PartitionListValue *const *) b)->value;
3628 	PartitionKey key = (PartitionKey) arg;
3629 
3630 	return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
3631 										   key->partcollation[0],
3632 										   val1, val2));
3633 }
3634 
3635 /*
3636  * qsort_partition_rbound_cmp
3637  *
3638  * Used when sorting range bounds across all range partitions.
3639  */
3640 static int32
qsort_partition_rbound_cmp(const void * a,const void * b,void * arg)3641 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
3642 {
3643 	PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
3644 	PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
3645 	PartitionKey key = (PartitionKey) arg;
3646 
3647 	return partition_rbound_cmp(key->partnatts, key->partsupfunc,
3648 								key->partcollation, b1->datums, b1->kind,
3649 								b1->lower, b2);
3650 }
3651 
3652 /*
3653  * get_partition_operator
3654  *
3655  * Return oid of the operator of the given strategy for the given partition
3656  * key column.  It is assumed that the partitioning key is of the same type as
3657  * the chosen partitioning opclass, or at least binary-compatible.  In the
3658  * latter case, *need_relabel is set to true if the opclass is not of a
3659  * polymorphic type (indicating a RelabelType node needed on top), otherwise
3660  * false.
3661  */
3662 static Oid
get_partition_operator(PartitionKey key,int col,StrategyNumber strategy,bool * need_relabel)3663 get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
3664 					   bool *need_relabel)
3665 {
3666 	Oid			operoid;
3667 
3668 	/*
3669 	 * Get the operator in the partitioning opfamily using the opclass'
3670 	 * declared input type as both left- and righttype.
3671 	 */
3672 	operoid = get_opfamily_member(key->partopfamily[col],
3673 								  key->partopcintype[col],
3674 								  key->partopcintype[col],
3675 								  strategy);
3676 	if (!OidIsValid(operoid))
3677 		elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
3678 			 strategy, key->partopcintype[col], key->partopcintype[col],
3679 			 key->partopfamily[col]);
3680 
3681 	/*
3682 	 * If the partition key column is not of the same type as the operator
3683 	 * class and not polymorphic, tell caller to wrap the non-Const expression
3684 	 * in a RelabelType.  This matches what parse_coerce.c does.
3685 	 */
3686 	*need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
3687 					 key->partopcintype[col] != RECORDOID &&
3688 					 !IsPolymorphicType(key->partopcintype[col]));
3689 
3690 	return operoid;
3691 }
3692 
3693 /*
3694  * make_partition_op_expr
3695  *		Returns an Expr for the given partition key column with arg1 and
3696  *		arg2 as its leftop and rightop, respectively
3697  */
3698 static Expr *
make_partition_op_expr(PartitionKey key,int keynum,uint16 strategy,Expr * arg1,Expr * arg2)3699 make_partition_op_expr(PartitionKey key, int keynum,
3700 					   uint16 strategy, Expr *arg1, Expr *arg2)
3701 {
3702 	Oid			operoid;
3703 	bool		need_relabel = false;
3704 	Expr	   *result = NULL;
3705 
3706 	/* Get the correct btree operator for this partitioning column */
3707 	operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
3708 
3709 	/*
3710 	 * Chosen operator may be such that the non-Const operand needs to be
3711 	 * coerced, so apply the same; see the comment in
3712 	 * get_partition_operator().
3713 	 */
3714 	if (!IsA(arg1, Const) &&
3715 		(need_relabel ||
3716 		 key->partcollation[keynum] != key->parttypcoll[keynum]))
3717 		arg1 = (Expr *) makeRelabelType(arg1,
3718 										key->partopcintype[keynum],
3719 										-1,
3720 										key->partcollation[keynum],
3721 										COERCE_EXPLICIT_CAST);
3722 
3723 	/* Generate the actual expression */
3724 	switch (key->strategy)
3725 	{
3726 		case PARTITION_STRATEGY_LIST:
3727 			{
3728 				List	   *elems = (List *) arg2;
3729 				int			nelems = list_length(elems);
3730 
3731 				Assert(nelems >= 1);
3732 				Assert(keynum == 0);
3733 
3734 				if (nelems > 1 &&
3735 					!type_is_array(key->parttypid[keynum]))
3736 				{
3737 					ArrayExpr  *arrexpr;
3738 					ScalarArrayOpExpr *saopexpr;
3739 
3740 					/* Construct an ArrayExpr for the right-hand inputs */
3741 					arrexpr = makeNode(ArrayExpr);
3742 					arrexpr->array_typeid =
3743 						get_array_type(key->parttypid[keynum]);
3744 					arrexpr->array_collid = key->parttypcoll[keynum];
3745 					arrexpr->element_typeid = key->parttypid[keynum];
3746 					arrexpr->elements = elems;
3747 					arrexpr->multidims = false;
3748 					arrexpr->location = -1;
3749 
3750 					/* Build leftop = ANY (rightop) */
3751 					saopexpr = makeNode(ScalarArrayOpExpr);
3752 					saopexpr->opno = operoid;
3753 					saopexpr->opfuncid = get_opcode(operoid);
3754 					saopexpr->useOr = true;
3755 					saopexpr->inputcollid = key->partcollation[keynum];
3756 					saopexpr->args = list_make2(arg1, arrexpr);
3757 					saopexpr->location = -1;
3758 
3759 					result = (Expr *) saopexpr;
3760 				}
3761 				else
3762 				{
3763 					List	   *elemops = NIL;
3764 					ListCell   *lc;
3765 
3766 					foreach(lc, elems)
3767 					{
3768 						Expr	   *elem = lfirst(lc),
3769 								   *elemop;
3770 
3771 						elemop = make_opclause(operoid,
3772 											   BOOLOID,
3773 											   false,
3774 											   arg1, elem,
3775 											   InvalidOid,
3776 											   key->partcollation[keynum]);
3777 						elemops = lappend(elemops, elemop);
3778 					}
3779 
3780 					result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
3781 				}
3782 				break;
3783 			}
3784 
3785 		case PARTITION_STRATEGY_RANGE:
3786 			result = make_opclause(operoid,
3787 								   BOOLOID,
3788 								   false,
3789 								   arg1, arg2,
3790 								   InvalidOid,
3791 								   key->partcollation[keynum]);
3792 			break;
3793 
3794 		default:
3795 			elog(ERROR, "invalid partitioning strategy");
3796 			break;
3797 	}
3798 
3799 	return result;
3800 }
3801 
3802 /*
3803  * get_qual_for_hash
3804  *
3805  * Returns a CHECK constraint expression to use as a hash partition's
3806  * constraint, given the parent relation and partition bound structure.
3807  *
3808  * The partition constraint for a hash partition is always a call to the
3809  * built-in function satisfies_hash_partition().
3810  */
3811 static List *
get_qual_for_hash(Relation parent,PartitionBoundSpec * spec)3812 get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
3813 {
3814 	PartitionKey key = RelationGetPartitionKey(parent);
3815 	FuncExpr   *fexpr;
3816 	Node	   *relidConst;
3817 	Node	   *modulusConst;
3818 	Node	   *remainderConst;
3819 	List	   *args;
3820 	ListCell   *partexprs_item;
3821 	int			i;
3822 
3823 	/* Fixed arguments. */
3824 	relidConst = (Node *) makeConst(OIDOID,
3825 									-1,
3826 									InvalidOid,
3827 									sizeof(Oid),
3828 									ObjectIdGetDatum(RelationGetRelid(parent)),
3829 									false,
3830 									true);
3831 
3832 	modulusConst = (Node *) makeConst(INT4OID,
3833 									  -1,
3834 									  InvalidOid,
3835 									  sizeof(int32),
3836 									  Int32GetDatum(spec->modulus),
3837 									  false,
3838 									  true);
3839 
3840 	remainderConst = (Node *) makeConst(INT4OID,
3841 										-1,
3842 										InvalidOid,
3843 										sizeof(int32),
3844 										Int32GetDatum(spec->remainder),
3845 										false,
3846 										true);
3847 
3848 	args = list_make3(relidConst, modulusConst, remainderConst);
3849 	partexprs_item = list_head(key->partexprs);
3850 
3851 	/* Add an argument for each key column. */
3852 	for (i = 0; i < key->partnatts; i++)
3853 	{
3854 		Node	   *keyCol;
3855 
3856 		/* Left operand */
3857 		if (key->partattrs[i] != 0)
3858 		{
3859 			keyCol = (Node *) makeVar(1,
3860 									  key->partattrs[i],
3861 									  key->parttypid[i],
3862 									  key->parttypmod[i],
3863 									  key->parttypcoll[i],
3864 									  0);
3865 		}
3866 		else
3867 		{
3868 			keyCol = (Node *) copyObject(lfirst(partexprs_item));
3869 			partexprs_item = lnext(key->partexprs, partexprs_item);
3870 		}
3871 
3872 		args = lappend(args, keyCol);
3873 	}
3874 
3875 	fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
3876 						 BOOLOID,
3877 						 args,
3878 						 InvalidOid,
3879 						 InvalidOid,
3880 						 COERCE_EXPLICIT_CALL);
3881 
3882 	return list_make1(fexpr);
3883 }
3884 
3885 /*
3886  * get_qual_for_list
3887  *
3888  * Returns an implicit-AND list of expressions to use as a list partition's
3889  * constraint, given the parent relation and partition bound structure.
3890  *
3891  * The function returns NIL for a default partition when it's the only
3892  * partition since in that case there is no constraint.
3893  */
3894 static List *
get_qual_for_list(Relation parent,PartitionBoundSpec * spec)3895 get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
3896 {
3897 	PartitionKey key = RelationGetPartitionKey(parent);
3898 	List	   *result;
3899 	Expr	   *keyCol;
3900 	Expr	   *opexpr;
3901 	NullTest   *nulltest;
3902 	ListCell   *cell;
3903 	List	   *elems = NIL;
3904 	bool		list_has_null = false;
3905 
3906 	/*
3907 	 * Only single-column list partitioning is supported, so we are worried
3908 	 * only about the partition key with index 0.
3909 	 */
3910 	Assert(key->partnatts == 1);
3911 
3912 	/* Construct Var or expression representing the partition column */
3913 	if (key->partattrs[0] != 0)
3914 		keyCol = (Expr *) makeVar(1,
3915 								  key->partattrs[0],
3916 								  key->parttypid[0],
3917 								  key->parttypmod[0],
3918 								  key->parttypcoll[0],
3919 								  0);
3920 	else
3921 		keyCol = (Expr *) copyObject(linitial(key->partexprs));
3922 
3923 	/*
3924 	 * For default list partition, collect datums for all the partitions. The
3925 	 * default partition constraint should check that the partition key is
3926 	 * equal to none of those.
3927 	 */
3928 	if (spec->is_default)
3929 	{
3930 		int			i;
3931 		int			ndatums = 0;
3932 		PartitionDesc pdesc = RelationGetPartitionDesc(parent);
3933 		PartitionBoundInfo boundinfo = pdesc->boundinfo;
3934 
3935 		if (boundinfo)
3936 		{
3937 			ndatums = boundinfo->ndatums;
3938 
3939 			if (partition_bound_accepts_nulls(boundinfo))
3940 				list_has_null = true;
3941 		}
3942 
3943 		/*
3944 		 * If default is the only partition, there need not be any partition
3945 		 * constraint on it.
3946 		 */
3947 		if (ndatums == 0 && !list_has_null)
3948 			return NIL;
3949 
3950 		for (i = 0; i < ndatums; i++)
3951 		{
3952 			Const	   *val;
3953 
3954 			/*
3955 			 * Construct Const from known-not-null datum.  We must be careful
3956 			 * to copy the value, because our result has to be able to outlive
3957 			 * the relcache entry we're copying from.
3958 			 */
3959 			val = makeConst(key->parttypid[0],
3960 							key->parttypmod[0],
3961 							key->parttypcoll[0],
3962 							key->parttyplen[0],
3963 							datumCopy(*boundinfo->datums[i],
3964 									  key->parttypbyval[0],
3965 									  key->parttyplen[0]),
3966 							false,	/* isnull */
3967 							key->parttypbyval[0]);
3968 
3969 			elems = lappend(elems, val);
3970 		}
3971 	}
3972 	else
3973 	{
3974 		/*
3975 		 * Create list of Consts for the allowed values, excluding any nulls.
3976 		 */
3977 		foreach(cell, spec->listdatums)
3978 		{
3979 			Const	   *val = castNode(Const, lfirst(cell));
3980 
3981 			if (val->constisnull)
3982 				list_has_null = true;
3983 			else
3984 				elems = lappend(elems, copyObject(val));
3985 		}
3986 	}
3987 
3988 	if (elems)
3989 	{
3990 		/*
3991 		 * Generate the operator expression from the non-null partition
3992 		 * values.
3993 		 */
3994 		opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
3995 										keyCol, (Expr *) elems);
3996 	}
3997 	else
3998 	{
3999 		/*
4000 		 * If there are no partition values, we don't need an operator
4001 		 * expression.
4002 		 */
4003 		opexpr = NULL;
4004 	}
4005 
4006 	if (!list_has_null)
4007 	{
4008 		/*
4009 		 * Gin up a "col IS NOT NULL" test that will be AND'd with the main
4010 		 * expression.  This might seem redundant, but the partition routing
4011 		 * machinery needs it.
4012 		 */
4013 		nulltest = makeNode(NullTest);
4014 		nulltest->arg = keyCol;
4015 		nulltest->nulltesttype = IS_NOT_NULL;
4016 		nulltest->argisrow = false;
4017 		nulltest->location = -1;
4018 
4019 		result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
4020 	}
4021 	else
4022 	{
4023 		/*
4024 		 * Gin up a "col IS NULL" test that will be OR'd with the main
4025 		 * expression.
4026 		 */
4027 		nulltest = makeNode(NullTest);
4028 		nulltest->arg = keyCol;
4029 		nulltest->nulltesttype = IS_NULL;
4030 		nulltest->argisrow = false;
4031 		nulltest->location = -1;
4032 
4033 		if (opexpr)
4034 		{
4035 			Expr	   *or;
4036 
4037 			or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
4038 			result = list_make1(or);
4039 		}
4040 		else
4041 			result = list_make1(nulltest);
4042 	}
4043 
4044 	/*
4045 	 * Note that, in general, applying NOT to a constraint expression doesn't
4046 	 * necessarily invert the set of rows it accepts, because NOT (NULL) is
4047 	 * NULL.  However, the partition constraints we construct here never
4048 	 * evaluate to NULL, so applying NOT works as intended.
4049 	 */
4050 	if (spec->is_default)
4051 	{
4052 		result = list_make1(make_ands_explicit(result));
4053 		result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
4054 	}
4055 
4056 	return result;
4057 }
4058 
4059 /*
4060  * get_qual_for_range
4061  *
4062  * Returns an implicit-AND list of expressions to use as a range partition's
4063  * constraint, given the parent relation and partition bound structure.
4064  *
4065  * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
4066  * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
4067  * generate an expression tree of the following form:
4068  *
4069  *	(a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4070  *		AND
4071  *	(a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
4072  *		AND
4073  *	(a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
4074  *
4075  * It is often the case that a prefix of lower and upper bound tuples contains
4076  * the same values, for example, (al = au), in which case, we will emit an
4077  * expression tree of the following form:
4078  *
4079  *	(a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
4080  *		AND
4081  *	(a = al)
4082  *		AND
4083  *	(b > bl OR (b = bl AND c >= cl))
4084  *		AND
4085  *	(b < bu OR (b = bu AND c < cu))
4086  *
4087  * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
4088  * simplified using the fact that any value is greater than MINVALUE and less
4089  * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
4090  * true, and we need not emit any expression for it, and the last line becomes
4091  *
4092  *	(b < bu) OR (b = bu), which is simplified to (b <= bu)
4093  *
4094  * In most common cases with only one partition column, say a, the following
4095  * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
4096  *
4097  * For default partition, it returns the negation of the constraints of all
4098  * the other partitions.
4099  *
4100  * External callers should pass for_default as false; we set it to true only
4101  * when recursing.
4102  */
4103 static List *
get_qual_for_range(Relation parent,PartitionBoundSpec * spec,bool for_default)4104 get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
4105 				   bool for_default)
4106 {
4107 	List	   *result = NIL;
4108 	ListCell   *cell1,
4109 			   *cell2,
4110 			   *partexprs_item,
4111 			   *partexprs_item_saved;
4112 	int			i,
4113 				j;
4114 	PartitionRangeDatum *ldatum,
4115 			   *udatum;
4116 	PartitionKey key = RelationGetPartitionKey(parent);
4117 	Expr	   *keyCol;
4118 	Const	   *lower_val,
4119 			   *upper_val;
4120 	List	   *lower_or_arms,
4121 			   *upper_or_arms;
4122 	int			num_or_arms,
4123 				current_or_arm;
4124 	ListCell   *lower_or_start_datum,
4125 			   *upper_or_start_datum;
4126 	bool		need_next_lower_arm,
4127 				need_next_upper_arm;
4128 
4129 	if (spec->is_default)
4130 	{
4131 		List	   *or_expr_args = NIL;
4132 		PartitionDesc pdesc = RelationGetPartitionDesc(parent);
4133 		Oid		   *inhoids = pdesc->oids;
4134 		int			nparts = pdesc->nparts,
4135 					i;
4136 
4137 		for (i = 0; i < nparts; i++)
4138 		{
4139 			Oid			inhrelid = inhoids[i];
4140 			HeapTuple	tuple;
4141 			Datum		datum;
4142 			bool		isnull;
4143 			PartitionBoundSpec *bspec;
4144 
4145 			tuple = SearchSysCache1(RELOID, inhrelid);
4146 			if (!HeapTupleIsValid(tuple))
4147 				elog(ERROR, "cache lookup failed for relation %u", inhrelid);
4148 
4149 			datum = SysCacheGetAttr(RELOID, tuple,
4150 									Anum_pg_class_relpartbound,
4151 									&isnull);
4152 			if (isnull)
4153 				elog(ERROR, "null relpartbound for relation %u", inhrelid);
4154 
4155 			bspec = (PartitionBoundSpec *)
4156 				stringToNode(TextDatumGetCString(datum));
4157 			if (!IsA(bspec, PartitionBoundSpec))
4158 				elog(ERROR, "expected PartitionBoundSpec");
4159 
4160 			if (!bspec->is_default)
4161 			{
4162 				List	   *part_qual;
4163 
4164 				part_qual = get_qual_for_range(parent, bspec, true);
4165 
4166 				/*
4167 				 * AND the constraints of the partition and add to
4168 				 * or_expr_args
4169 				 */
4170 				or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
4171 									   ? makeBoolExpr(AND_EXPR, part_qual, -1)
4172 									   : linitial(part_qual));
4173 			}
4174 			ReleaseSysCache(tuple);
4175 		}
4176 
4177 		if (or_expr_args != NIL)
4178 		{
4179 			Expr	   *other_parts_constr;
4180 
4181 			/*
4182 			 * Combine the constraints obtained for non-default partitions
4183 			 * using OR.  As requested, each of the OR's args doesn't include
4184 			 * the NOT NULL test for partition keys (which is to avoid its
4185 			 * useless repetition).  Add the same now.
4186 			 */
4187 			other_parts_constr =
4188 				makeBoolExpr(AND_EXPR,
4189 							 lappend(get_range_nulltest(key),
4190 									 list_length(or_expr_args) > 1
4191 									 ? makeBoolExpr(OR_EXPR, or_expr_args,
4192 													-1)
4193 									 : linitial(or_expr_args)),
4194 							 -1);
4195 
4196 			/*
4197 			 * Finally, the default partition contains everything *NOT*
4198 			 * contained in the non-default partitions.
4199 			 */
4200 			result = list_make1(makeBoolExpr(NOT_EXPR,
4201 											 list_make1(other_parts_constr), -1));
4202 		}
4203 
4204 		return result;
4205 	}
4206 
4207 	lower_or_start_datum = list_head(spec->lowerdatums);
4208 	upper_or_start_datum = list_head(spec->upperdatums);
4209 	num_or_arms = key->partnatts;
4210 
4211 	/*
4212 	 * If it is the recursive call for default, we skip the get_range_nulltest
4213 	 * to avoid accumulating the NullTest on the same keys for each partition.
4214 	 */
4215 	if (!for_default)
4216 		result = get_range_nulltest(key);
4217 
4218 	/*
4219 	 * Iterate over the key columns and check if the corresponding lower and
4220 	 * upper datums are equal using the btree equality operator for the
4221 	 * column's type.  If equal, we emit single keyCol = common_value
4222 	 * expression.  Starting from the first column for which the corresponding
4223 	 * lower and upper bound datums are not equal, we generate OR expressions
4224 	 * as shown in the function's header comment.
4225 	 */
4226 	i = 0;
4227 	partexprs_item = list_head(key->partexprs);
4228 	partexprs_item_saved = partexprs_item;	/* placate compiler */
4229 	forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
4230 	{
4231 		EState	   *estate;
4232 		MemoryContext oldcxt;
4233 		Expr	   *test_expr;
4234 		ExprState  *test_exprstate;
4235 		Datum		test_result;
4236 		bool		isNull;
4237 
4238 		ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
4239 		udatum = castNode(PartitionRangeDatum, lfirst(cell2));
4240 
4241 		/*
4242 		 * Since get_range_key_properties() modifies partexprs_item, and we
4243 		 * might need to start over from the previous expression in the later
4244 		 * part of this function, save away the current value.
4245 		 */
4246 		partexprs_item_saved = partexprs_item;
4247 
4248 		get_range_key_properties(key, i, ldatum, udatum,
4249 								 &partexprs_item,
4250 								 &keyCol,
4251 								 &lower_val, &upper_val);
4252 
4253 		/*
4254 		 * If either value is NULL, the corresponding partition bound is
4255 		 * either MINVALUE or MAXVALUE, and we treat them as unequal, because
4256 		 * even if they're the same, there is no common value to equate the
4257 		 * key column with.
4258 		 */
4259 		if (!lower_val || !upper_val)
4260 			break;
4261 
4262 		/* Create the test expression */
4263 		estate = CreateExecutorState();
4264 		oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
4265 		test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
4266 										   (Expr *) lower_val,
4267 										   (Expr *) upper_val);
4268 		fix_opfuncids((Node *) test_expr);
4269 		test_exprstate = ExecInitExpr(test_expr, NULL);
4270 		test_result = ExecEvalExprSwitchContext(test_exprstate,
4271 												GetPerTupleExprContext(estate),
4272 												&isNull);
4273 		MemoryContextSwitchTo(oldcxt);
4274 		FreeExecutorState(estate);
4275 
4276 		/* If not equal, go generate the OR expressions */
4277 		if (!DatumGetBool(test_result))
4278 			break;
4279 
4280 		/*
4281 		 * The bounds for the last key column can't be equal, because such a
4282 		 * range partition would never be allowed to be defined (it would have
4283 		 * an empty range otherwise).
4284 		 */
4285 		if (i == key->partnatts - 1)
4286 			elog(ERROR, "invalid range bound specification");
4287 
4288 		/* Equal, so generate keyCol = lower_val expression */
4289 		result = lappend(result,
4290 						 make_partition_op_expr(key, i, BTEqualStrategyNumber,
4291 												keyCol, (Expr *) lower_val));
4292 
4293 		i++;
4294 	}
4295 
4296 	/* First pair of lower_val and upper_val that are not equal. */
4297 	lower_or_start_datum = cell1;
4298 	upper_or_start_datum = cell2;
4299 
4300 	/* OR will have as many arms as there are key columns left. */
4301 	num_or_arms = key->partnatts - i;
4302 	current_or_arm = 0;
4303 	lower_or_arms = upper_or_arms = NIL;
4304 	need_next_lower_arm = need_next_upper_arm = true;
4305 	while (current_or_arm < num_or_arms)
4306 	{
4307 		List	   *lower_or_arm_args = NIL,
4308 				   *upper_or_arm_args = NIL;
4309 
4310 		/* Restart scan of columns from the i'th one */
4311 		j = i;
4312 		partexprs_item = partexprs_item_saved;
4313 
4314 		for_both_cell(cell1, spec->lowerdatums, lower_or_start_datum,
4315 					  cell2, spec->upperdatums, upper_or_start_datum)
4316 		{
4317 			PartitionRangeDatum *ldatum_next = NULL,
4318 					   *udatum_next = NULL;
4319 
4320 			ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
4321 			if (lnext(spec->lowerdatums, cell1))
4322 				ldatum_next = castNode(PartitionRangeDatum,
4323 									   lfirst(lnext(spec->lowerdatums, cell1)));
4324 			udatum = castNode(PartitionRangeDatum, lfirst(cell2));
4325 			if (lnext(spec->upperdatums, cell2))
4326 				udatum_next = castNode(PartitionRangeDatum,
4327 									   lfirst(lnext(spec->upperdatums, cell2)));
4328 			get_range_key_properties(key, j, ldatum, udatum,
4329 									 &partexprs_item,
4330 									 &keyCol,
4331 									 &lower_val, &upper_val);
4332 
4333 			if (need_next_lower_arm && lower_val)
4334 			{
4335 				uint16		strategy;
4336 
4337 				/*
4338 				 * For the non-last columns of this arm, use the EQ operator.
4339 				 * For the last column of this arm, use GT, unless this is the
4340 				 * last column of the whole bound check, or the next bound
4341 				 * datum is MINVALUE, in which case use GE.
4342 				 */
4343 				if (j - i < current_or_arm)
4344 					strategy = BTEqualStrategyNumber;
4345 				else if (j == key->partnatts - 1 ||
4346 						 (ldatum_next &&
4347 						  ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
4348 					strategy = BTGreaterEqualStrategyNumber;
4349 				else
4350 					strategy = BTGreaterStrategyNumber;
4351 
4352 				lower_or_arm_args = lappend(lower_or_arm_args,
4353 											make_partition_op_expr(key, j,
4354 																   strategy,
4355 																   keyCol,
4356 																   (Expr *) lower_val));
4357 			}
4358 
4359 			if (need_next_upper_arm && upper_val)
4360 			{
4361 				uint16		strategy;
4362 
4363 				/*
4364 				 * For the non-last columns of this arm, use the EQ operator.
4365 				 * For the last column of this arm, use LT, unless the next
4366 				 * bound datum is MAXVALUE, in which case use LE.
4367 				 */
4368 				if (j - i < current_or_arm)
4369 					strategy = BTEqualStrategyNumber;
4370 				else if (udatum_next &&
4371 						 udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
4372 					strategy = BTLessEqualStrategyNumber;
4373 				else
4374 					strategy = BTLessStrategyNumber;
4375 
4376 				upper_or_arm_args = lappend(upper_or_arm_args,
4377 											make_partition_op_expr(key, j,
4378 																   strategy,
4379 																   keyCol,
4380 																   (Expr *) upper_val));
4381 			}
4382 
4383 			/*
4384 			 * Did we generate enough of OR's arguments?  First arm considers
4385 			 * the first of the remaining columns, second arm considers first
4386 			 * two of the remaining columns, and so on.
4387 			 */
4388 			++j;
4389 			if (j - i > current_or_arm)
4390 			{
4391 				/*
4392 				 * We must not emit any more arms if the new column that will
4393 				 * be considered is unbounded, or this one was.
4394 				 */
4395 				if (!lower_val || !ldatum_next ||
4396 					ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4397 					need_next_lower_arm = false;
4398 				if (!upper_val || !udatum_next ||
4399 					udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
4400 					need_next_upper_arm = false;
4401 				break;
4402 			}
4403 		}
4404 
4405 		if (lower_or_arm_args != NIL)
4406 			lower_or_arms = lappend(lower_or_arms,
4407 									list_length(lower_or_arm_args) > 1
4408 									? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
4409 									: linitial(lower_or_arm_args));
4410 
4411 		if (upper_or_arm_args != NIL)
4412 			upper_or_arms = lappend(upper_or_arms,
4413 									list_length(upper_or_arm_args) > 1
4414 									? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
4415 									: linitial(upper_or_arm_args));
4416 
4417 		/* If no work to do in the next iteration, break away. */
4418 		if (!need_next_lower_arm && !need_next_upper_arm)
4419 			break;
4420 
4421 		++current_or_arm;
4422 	}
4423 
4424 	/*
4425 	 * Generate the OR expressions for each of lower and upper bounds (if
4426 	 * required), and append to the list of implicitly ANDed list of
4427 	 * expressions.
4428 	 */
4429 	if (lower_or_arms != NIL)
4430 		result = lappend(result,
4431 						 list_length(lower_or_arms) > 1
4432 						 ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
4433 						 : linitial(lower_or_arms));
4434 	if (upper_or_arms != NIL)
4435 		result = lappend(result,
4436 						 list_length(upper_or_arms) > 1
4437 						 ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
4438 						 : linitial(upper_or_arms));
4439 
4440 	/*
4441 	 * As noted above, for non-default, we return list with constant TRUE. If
4442 	 * the result is NIL during the recursive call for default, it implies
4443 	 * this is the only other partition which can hold every value of the key
4444 	 * except NULL. Hence we return the NullTest result skipped earlier.
4445 	 */
4446 	if (result == NIL)
4447 		result = for_default
4448 			? get_range_nulltest(key)
4449 			: list_make1(makeBoolConst(true, false));
4450 
4451 	return result;
4452 }
4453 
4454 /*
4455  * get_range_key_properties
4456  *		Returns range partition key information for a given column
4457  *
4458  * This is a subroutine for get_qual_for_range, and its API is pretty
4459  * specialized to that caller.
4460  *
4461  * Constructs an Expr for the key column (returned in *keyCol) and Consts
4462  * for the lower and upper range limits (returned in *lower_val and
4463  * *upper_val).  For MINVALUE/MAXVALUE limits, NULL is returned instead of
4464  * a Const.  All of these structures are freshly palloc'd.
4465  *
4466  * *partexprs_item points to the cell containing the next expression in
4467  * the key->partexprs list, or NULL.  It may be advanced upon return.
4468  */
4469 static void
get_range_key_properties(PartitionKey key,int keynum,PartitionRangeDatum * ldatum,PartitionRangeDatum * udatum,ListCell ** partexprs_item,Expr ** keyCol,Const ** lower_val,Const ** upper_val)4470 get_range_key_properties(PartitionKey key, int keynum,
4471 						 PartitionRangeDatum *ldatum,
4472 						 PartitionRangeDatum *udatum,
4473 						 ListCell **partexprs_item,
4474 						 Expr **keyCol,
4475 						 Const **lower_val, Const **upper_val)
4476 {
4477 	/* Get partition key expression for this column */
4478 	if (key->partattrs[keynum] != 0)
4479 	{
4480 		*keyCol = (Expr *) makeVar(1,
4481 								   key->partattrs[keynum],
4482 								   key->parttypid[keynum],
4483 								   key->parttypmod[keynum],
4484 								   key->parttypcoll[keynum],
4485 								   0);
4486 	}
4487 	else
4488 	{
4489 		if (*partexprs_item == NULL)
4490 			elog(ERROR, "wrong number of partition key expressions");
4491 		*keyCol = copyObject(lfirst(*partexprs_item));
4492 		*partexprs_item = lnext(key->partexprs, *partexprs_item);
4493 	}
4494 
4495 	/* Get appropriate Const nodes for the bounds */
4496 	if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
4497 		*lower_val = castNode(Const, copyObject(ldatum->value));
4498 	else
4499 		*lower_val = NULL;
4500 
4501 	if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
4502 		*upper_val = castNode(Const, copyObject(udatum->value));
4503 	else
4504 		*upper_val = NULL;
4505 }
4506 
4507 /*
4508  * get_range_nulltest
4509  *
4510  * A non-default range partition table does not currently allow partition
4511  * keys to be null, so emit an IS NOT NULL expression for each key column.
4512  */
4513 static List *
get_range_nulltest(PartitionKey key)4514 get_range_nulltest(PartitionKey key)
4515 {
4516 	List	   *result = NIL;
4517 	NullTest   *nulltest;
4518 	ListCell   *partexprs_item;
4519 	int			i;
4520 
4521 	partexprs_item = list_head(key->partexprs);
4522 	for (i = 0; i < key->partnatts; i++)
4523 	{
4524 		Expr	   *keyCol;
4525 
4526 		if (key->partattrs[i] != 0)
4527 		{
4528 			keyCol = (Expr *) makeVar(1,
4529 									  key->partattrs[i],
4530 									  key->parttypid[i],
4531 									  key->parttypmod[i],
4532 									  key->parttypcoll[i],
4533 									  0);
4534 		}
4535 		else
4536 		{
4537 			if (partexprs_item == NULL)
4538 				elog(ERROR, "wrong number of partition key expressions");
4539 			keyCol = copyObject(lfirst(partexprs_item));
4540 			partexprs_item = lnext(key->partexprs, partexprs_item);
4541 		}
4542 
4543 		nulltest = makeNode(NullTest);
4544 		nulltest->arg = keyCol;
4545 		nulltest->nulltesttype = IS_NOT_NULL;
4546 		nulltest->argisrow = false;
4547 		nulltest->location = -1;
4548 		result = lappend(result, nulltest);
4549 	}
4550 
4551 	return result;
4552 }
4553 
4554 /*
4555  * compute_partition_hash_value
4556  *
4557  * Compute the hash value for given partition key values.
4558  */
4559 uint64
compute_partition_hash_value(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * values,bool * isnull)4560 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
4561 							 Datum *values, bool *isnull)
4562 {
4563 	int			i;
4564 	uint64		rowHash = 0;
4565 	Datum		seed = UInt64GetDatum(HASH_PARTITION_SEED);
4566 
4567 	for (i = 0; i < partnatts; i++)
4568 	{
4569 		/* Nulls are just ignored */
4570 		if (!isnull[i])
4571 		{
4572 			Datum		hash;
4573 
4574 			Assert(OidIsValid(partsupfunc[i].fn_oid));
4575 
4576 			/*
4577 			 * Compute hash for each datum value by calling respective
4578 			 * datatype-specific hash functions of each partition key
4579 			 * attribute.
4580 			 */
4581 			hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
4582 									 values[i], seed);
4583 
4584 			/* Form a single 64-bit hash value */
4585 			rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4586 		}
4587 	}
4588 
4589 	return rowHash;
4590 }
4591 
4592 /*
4593  * satisfies_hash_partition
4594  *
4595  * This is an SQL-callable function for use in hash partition constraints.
4596  * The first three arguments are the parent table OID, modulus, and remainder.
4597  * The remaining arguments are the value of the partitioning columns (or
4598  * expressions); these are hashed and the results are combined into a single
4599  * hash value by calling hash_combine64.
4600  *
4601  * Returns true if remainder produced when this computed single hash value is
4602  * divided by the given modulus is equal to given remainder, otherwise false.
4603  * NB: it's important that this never return null, as the constraint machinery
4604  * would consider that to be a "pass".
4605  *
4606  * See get_qual_for_hash() for usage.
4607  */
4608 Datum
satisfies_hash_partition(PG_FUNCTION_ARGS)4609 satisfies_hash_partition(PG_FUNCTION_ARGS)
4610 {
4611 	typedef struct ColumnsHashData
4612 	{
4613 		Oid			relid;
4614 		int			nkeys;
4615 		Oid			variadic_type;
4616 		int16		variadic_typlen;
4617 		bool		variadic_typbyval;
4618 		char		variadic_typalign;
4619 		Oid			partcollid[PARTITION_MAX_KEYS];
4620 		FmgrInfo	partsupfunc[FLEXIBLE_ARRAY_MEMBER];
4621 	} ColumnsHashData;
4622 	Oid			parentId;
4623 	int			modulus;
4624 	int			remainder;
4625 	Datum		seed = UInt64GetDatum(HASH_PARTITION_SEED);
4626 	ColumnsHashData *my_extra;
4627 	uint64		rowHash = 0;
4628 
4629 	/* Return false if the parent OID, modulus, or remainder is NULL. */
4630 	if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
4631 		PG_RETURN_BOOL(false);
4632 	parentId = PG_GETARG_OID(0);
4633 	modulus = PG_GETARG_INT32(1);
4634 	remainder = PG_GETARG_INT32(2);
4635 
4636 	/* Sanity check modulus and remainder. */
4637 	if (modulus <= 0)
4638 		ereport(ERROR,
4639 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4640 				 errmsg("modulus for hash partition must be an integer value greater than zero")));
4641 	if (remainder < 0)
4642 		ereport(ERROR,
4643 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4644 				 errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
4645 	if (remainder >= modulus)
4646 		ereport(ERROR,
4647 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4648 				 errmsg("remainder for hash partition must be less than modulus")));
4649 
4650 	/*
4651 	 * Cache hash function information.
4652 	 */
4653 	my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4654 	if (my_extra == NULL || my_extra->relid != parentId)
4655 	{
4656 		Relation	parent;
4657 		PartitionKey key;
4658 		int			j;
4659 
4660 		/* Open parent relation and fetch partition key info */
4661 		parent = relation_open(parentId, AccessShareLock);
4662 		key = RelationGetPartitionKey(parent);
4663 
4664 		/* Reject parent table that is not hash-partitioned. */
4665 		if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
4666 			ereport(ERROR,
4667 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4668 					 errmsg("\"%s\" is not a hash partitioned table",
4669 							get_rel_name(parentId))));
4670 
4671 		if (!get_fn_expr_variadic(fcinfo->flinfo))
4672 		{
4673 			int			nargs = PG_NARGS() - 3;
4674 
4675 			/* complain if wrong number of column values */
4676 			if (key->partnatts != nargs)
4677 				ereport(ERROR,
4678 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4679 						 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4680 								key->partnatts, nargs)));
4681 
4682 			/* allocate space for our cache */
4683 			fcinfo->flinfo->fn_extra =
4684 				MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4685 									   offsetof(ColumnsHashData, partsupfunc) +
4686 									   sizeof(FmgrInfo) * nargs);
4687 			my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4688 			my_extra->relid = parentId;
4689 			my_extra->nkeys = key->partnatts;
4690 			memcpy(my_extra->partcollid, key->partcollation,
4691 				   key->partnatts * sizeof(Oid));
4692 
4693 			/* check argument types and save fmgr_infos */
4694 			for (j = 0; j < key->partnatts; ++j)
4695 			{
4696 				Oid			argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
4697 
4698 				if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
4699 					ereport(ERROR,
4700 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4701 							 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
4702 									j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
4703 
4704 				fmgr_info_copy(&my_extra->partsupfunc[j],
4705 							   &key->partsupfunc[j],
4706 							   fcinfo->flinfo->fn_mcxt);
4707 			}
4708 		}
4709 		else
4710 		{
4711 			ArrayType  *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4712 
4713 			/* allocate space for our cache -- just one FmgrInfo in this case */
4714 			fcinfo->flinfo->fn_extra =
4715 				MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
4716 									   offsetof(ColumnsHashData, partsupfunc) +
4717 									   sizeof(FmgrInfo));
4718 			my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
4719 			my_extra->relid = parentId;
4720 			my_extra->nkeys = key->partnatts;
4721 			my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
4722 			get_typlenbyvalalign(my_extra->variadic_type,
4723 								 &my_extra->variadic_typlen,
4724 								 &my_extra->variadic_typbyval,
4725 								 &my_extra->variadic_typalign);
4726 			my_extra->partcollid[0] = key->partcollation[0];
4727 
4728 			/* check argument types */
4729 			for (j = 0; j < key->partnatts; ++j)
4730 				if (key->parttypid[j] != my_extra->variadic_type)
4731 					ereport(ERROR,
4732 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4733 							 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
4734 									j + 1,
4735 									format_type_be(key->parttypid[j]),
4736 									format_type_be(my_extra->variadic_type))));
4737 
4738 			fmgr_info_copy(&my_extra->partsupfunc[0],
4739 						   &key->partsupfunc[0],
4740 						   fcinfo->flinfo->fn_mcxt);
4741 		}
4742 
4743 		/* Hold lock until commit */
4744 		relation_close(parent, NoLock);
4745 	}
4746 
4747 	if (!OidIsValid(my_extra->variadic_type))
4748 	{
4749 		int			nkeys = my_extra->nkeys;
4750 		int			i;
4751 
4752 		/*
4753 		 * For a non-variadic call, neither the number of arguments nor their
4754 		 * types can change across calls, so avoid the expense of rechecking
4755 		 * here.
4756 		 */
4757 
4758 		for (i = 0; i < nkeys; i++)
4759 		{
4760 			Datum		hash;
4761 
4762 			/* keys start from fourth argument of function. */
4763 			int			argno = i + 3;
4764 
4765 			if (PG_ARGISNULL(argno))
4766 				continue;
4767 
4768 			hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
4769 									 my_extra->partcollid[i],
4770 									 PG_GETARG_DATUM(argno),
4771 									 seed);
4772 
4773 			/* Form a single 64-bit hash value */
4774 			rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4775 		}
4776 	}
4777 	else
4778 	{
4779 		ArrayType  *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
4780 		int			i;
4781 		int			nelems;
4782 		Datum	   *datum;
4783 		bool	   *isnull;
4784 
4785 		deconstruct_array(variadic_array,
4786 						  my_extra->variadic_type,
4787 						  my_extra->variadic_typlen,
4788 						  my_extra->variadic_typbyval,
4789 						  my_extra->variadic_typalign,
4790 						  &datum, &isnull, &nelems);
4791 
4792 		/* complain if wrong number of column values */
4793 		if (nelems != my_extra->nkeys)
4794 			ereport(ERROR,
4795 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4796 					 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
4797 							my_extra->nkeys, nelems)));
4798 
4799 		for (i = 0; i < nelems; i++)
4800 		{
4801 			Datum		hash;
4802 
4803 			if (isnull[i])
4804 				continue;
4805 
4806 			hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
4807 									 my_extra->partcollid[0],
4808 									 datum[i],
4809 									 seed);
4810 
4811 			/* Form a single 64-bit hash value */
4812 			rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
4813 		}
4814 	}
4815 
4816 	PG_RETURN_BOOL(rowHash % modulus == remainder);
4817 }
4818