1 /*-------------------------------------------------------------------------
2  *
3  * partbounds.c
4  *		Support routines for manipulating partition bounds
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *		  src/backend/partitioning/partbounds.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/relation.h"
18 #include "access/table.h"
19 #include "access/tableam.h"
20 #include "catalog/partition.h"
21 #include "catalog/pg_inherits.h"
22 #include "catalog/pg_type.h"
23 #include "commands/tablecmds.h"
24 #include "executor/executor.h"
25 #include "miscadmin.h"
26 #include "nodes/makefuncs.h"
27 #include "nodes/nodeFuncs.h"
28 #include "parser/parse_coerce.h"
29 #include "partitioning/partbounds.h"
30 #include "partitioning/partdesc.h"
31 #include "partitioning/partprune.h"
32 #include "utils/builtins.h"
33 #include "utils/datum.h"
34 #include "utils/fmgroids.h"
35 #include "utils/hashutils.h"
36 #include "utils/lsyscache.h"
37 #include "utils/partcache.h"
38 #include "utils/rel.h"
39 #include "utils/snapmgr.h"
40 #include "utils/ruleutils.h"
41 #include "utils/syscache.h"
42 
43 /*
44  * When qsort'ing partition bounds after reading from the catalog, each bound
45  * is represented with one of the following structs.
46  */
47 
48 /* One bound of a hash partition */
49 typedef struct PartitionHashBound
50 {
51 	int			modulus;
52 	int			remainder;
53 	int			index;
54 } PartitionHashBound;
55 
56 /* One value coming from some (index'th) list partition */
57 typedef struct PartitionListValue
58 {
59 	int			index;
60 	Datum		value;
61 } PartitionListValue;
62 
63 /* One bound of a range partition */
64 typedef struct PartitionRangeBound
65 {
66 	int			index;
67 	Datum	   *datums;			/* range bound datums */
68 	PartitionRangeDatumKind *kind;	/* the kind of each datum */
69 	bool		lower;			/* this is the lower (vs upper) bound */
70 } PartitionRangeBound;
71 
72 static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
73 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
74 											void *arg);
75 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
76 										void *arg);
77 static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs,
78 											 int nparts, PartitionKey key, int **mapping);
79 static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs,
80 											 int nparts, PartitionKey key, int **mapping);
81 static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs,
82 											  int nparts, PartitionKey key, int **mapping);
83 static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
84 													  List *datums, bool lower);
85 static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
86 								  int remainder2);
87 static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
88 								  Oid *partcollation, Datum *datums1,
89 								  PartitionRangeDatumKind *kind1, bool lower1,
90 								  PartitionRangeBound *b2);
91 static int	partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
92 									Oid *partcollation,
93 									PartitionBoundInfo boundinfo,
94 									PartitionRangeBound *probe, bool *is_equal);
95 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
96 									uint16 strategy, Expr *arg1, Expr *arg2);
97 static Oid	get_partition_operator(PartitionKey key, int col,
98 								   StrategyNumber strategy, bool *need_relabel);
99 static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
100 static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
101 static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
102 								bool for_default);
103 static void get_range_key_properties(PartitionKey key, int keynum,
104 									 PartitionRangeDatum *ldatum,
105 									 PartitionRangeDatum *udatum,
106 									 ListCell **partexprs_item,
107 									 Expr **keyCol,
108 									 Const **lower_val, Const **upper_val);
109 static List *get_range_nulltest(PartitionKey key);
110 
111 /*
112  * get_qual_from_partbound
113  *		Given a parser node for partition bound, return the list of executable
114  *		expressions as partition constraint
115  */
116 List *
get_qual_from_partbound(Relation rel,Relation parent,PartitionBoundSpec * spec)117 get_qual_from_partbound(Relation rel, Relation parent,
118 						PartitionBoundSpec *spec)
119 {
120 	PartitionKey key = RelationGetPartitionKey(parent);
121 	List	   *my_qual = NIL;
122 
123 	Assert(key != NULL);
124 
125 	switch (key->strategy)
126 	{
127 		case PARTITION_STRATEGY_HASH:
128 			Assert(spec->strategy == PARTITION_STRATEGY_HASH);
129 			my_qual = get_qual_for_hash(parent, spec);
130 			break;
131 
132 		case PARTITION_STRATEGY_LIST:
133 			Assert(spec->strategy == PARTITION_STRATEGY_LIST);
134 			my_qual = get_qual_for_list(parent, spec);
135 			break;
136 
137 		case PARTITION_STRATEGY_RANGE:
138 			Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
139 			my_qual = get_qual_for_range(parent, spec, false);
140 			break;
141 
142 		default:
143 			elog(ERROR, "unexpected partition strategy: %d",
144 				 (int) key->strategy);
145 	}
146 
147 	return my_qual;
148 }
149 
150 /*
151  *	partition_bounds_create
152  *		Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
153  *		nodes
154  *
155  * This function creates a PartitionBoundInfo and fills the values of its
156  * various members based on the input list.  Importantly, 'datums' array will
157  * contain Datum representation of individual bounds (possibly after
158  * de-duplication as in case of range bounds), sorted in a canonical order
159  * defined by qsort_partition_* functions of respective partitioning methods.
160  * 'indexes' array will contain as many elements as there are bounds (specific
161  * exceptions to this rule are listed in the function body), which represent
162  * the 0-based canonical positions of partitions.
163  *
164  * Upon return from this function, *mapping is set to an array of
165  * list_length(boundspecs) elements, each of which maps the original index of
166  * a partition to its canonical index.
167  *
168  * Note: The objects returned by this function are wholly allocated in the
169  * current memory context.
170  */
171 PartitionBoundInfo
partition_bounds_create(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)172 partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts,
173 						PartitionKey key, int **mapping)
174 {
175 	int			i;
176 
177 	Assert(nparts > 0);
178 
179 	/*
180 	 * For each partitioning method, we first convert the partition bounds
181 	 * from their parser node representation to the internal representation,
182 	 * along with any additional preprocessing (such as de-duplicating range
183 	 * bounds).  Resulting bound datums are then added to the 'datums' array
184 	 * in PartitionBoundInfo.  For each datum added, an integer indicating the
185 	 * canonical partition index is added to the 'indexes' array.
186 	 *
187 	 * For each bound, we remember its partition's position (0-based) in the
188 	 * original list to later map it to the canonical index.
189 	 */
190 
191 	/*
192 	 * Initialize mapping array with invalid values, this is filled within
193 	 * each sub-routine below depending on the bound type.
194 	 */
195 	*mapping = (int *) palloc(sizeof(int) * nparts);
196 	for (i = 0; i < nparts; i++)
197 		(*mapping)[i] = -1;
198 
199 	switch (key->strategy)
200 	{
201 		case PARTITION_STRATEGY_HASH:
202 			return create_hash_bounds(boundspecs, nparts, key, mapping);
203 
204 		case PARTITION_STRATEGY_LIST:
205 			return create_list_bounds(boundspecs, nparts, key, mapping);
206 
207 		case PARTITION_STRATEGY_RANGE:
208 			return create_range_bounds(boundspecs, nparts, key, mapping);
209 
210 		default:
211 			elog(ERROR, "unexpected partition strategy: %d",
212 				 (int) key->strategy);
213 			break;
214 	}
215 
216 	Assert(false);
217 	return NULL;				/* keep compiler quiet */
218 }
219 
220 /*
221  * create_hash_bounds
222  *		Create a PartitionBoundInfo for a hash partitioned table
223  */
224 static PartitionBoundInfo
create_hash_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)225 create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
226 				   PartitionKey key, int **mapping)
227 {
228 	PartitionBoundInfo boundinfo;
229 	PartitionHashBound **hbounds = NULL;
230 	int			i;
231 	int			ndatums = 0;
232 	int			greatest_modulus;
233 
234 	boundinfo = (PartitionBoundInfoData *)
235 		palloc0(sizeof(PartitionBoundInfoData));
236 	boundinfo->strategy = key->strategy;
237 	/* No special hash partitions. */
238 	boundinfo->null_index = -1;
239 	boundinfo->default_index = -1;
240 
241 	ndatums = nparts;
242 	hbounds = (PartitionHashBound **)
243 		palloc(nparts * sizeof(PartitionHashBound *));
244 
245 	/* Convert from node to the internal representation */
246 	for (i = 0; i < nparts; i++)
247 	{
248 		PartitionBoundSpec *spec = boundspecs[i];
249 
250 		if (spec->strategy != PARTITION_STRATEGY_HASH)
251 			elog(ERROR, "invalid strategy in partition bound spec");
252 
253 		hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound));
254 		hbounds[i]->modulus = spec->modulus;
255 		hbounds[i]->remainder = spec->remainder;
256 		hbounds[i]->index = i;
257 	}
258 
259 	/* Sort all the bounds in ascending order */
260 	qsort(hbounds, nparts, sizeof(PartitionHashBound *),
261 		  qsort_partition_hbound_cmp);
262 
263 	/* After sorting, moduli are now stored in ascending order. */
264 	greatest_modulus = hbounds[ndatums - 1]->modulus;
265 
266 	boundinfo->ndatums = ndatums;
267 	boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
268 	boundinfo->nindexes = greatest_modulus;
269 	boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
270 	for (i = 0; i < greatest_modulus; i++)
271 		boundinfo->indexes[i] = -1;
272 
273 	/*
274 	 * For hash partitioning, there are as many datums (modulus and remainder
275 	 * pairs) as there are partitions.  Indexes are simply values ranging from
276 	 * 0 to (nparts - 1).
277 	 */
278 	for (i = 0; i < nparts; i++)
279 	{
280 		int			modulus = hbounds[i]->modulus;
281 		int			remainder = hbounds[i]->remainder;
282 
283 		boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum));
284 		boundinfo->datums[i][0] = Int32GetDatum(modulus);
285 		boundinfo->datums[i][1] = Int32GetDatum(remainder);
286 
287 		while (remainder < greatest_modulus)
288 		{
289 			/* overlap? */
290 			Assert(boundinfo->indexes[remainder] == -1);
291 			boundinfo->indexes[remainder] = i;
292 			remainder += modulus;
293 		}
294 
295 		(*mapping)[hbounds[i]->index] = i;
296 		pfree(hbounds[i]);
297 	}
298 	pfree(hbounds);
299 
300 	return boundinfo;
301 }
302 
303 /*
304  * create_list_bounds
305  *		Create a PartitionBoundInfo for a list partitioned table
306  */
307 static PartitionBoundInfo
create_list_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)308 create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
309 				   PartitionKey key, int **mapping)
310 {
311 	PartitionBoundInfo boundinfo;
312 	PartitionListValue **all_values = NULL;
313 	ListCell   *cell;
314 	int			i = 0;
315 	int			ndatums = 0;
316 	int			next_index = 0;
317 	int			default_index = -1;
318 	int			null_index = -1;
319 	List	   *non_null_values = NIL;
320 
321 	boundinfo = (PartitionBoundInfoData *)
322 		palloc0(sizeof(PartitionBoundInfoData));
323 	boundinfo->strategy = key->strategy;
324 	/* Will be set correctly below. */
325 	boundinfo->null_index = -1;
326 	boundinfo->default_index = -1;
327 
328 	/* Create a unified list of non-null values across all partitions. */
329 	for (i = 0; i < nparts; i++)
330 	{
331 		PartitionBoundSpec *spec = boundspecs[i];
332 		ListCell   *c;
333 
334 		if (spec->strategy != PARTITION_STRATEGY_LIST)
335 			elog(ERROR, "invalid strategy in partition bound spec");
336 
337 		/*
338 		 * Note the index of the partition bound spec for the default
339 		 * partition.  There's no datum to add to the list on non-null datums
340 		 * for this partition.
341 		 */
342 		if (spec->is_default)
343 		{
344 			default_index = i;
345 			continue;
346 		}
347 
348 		foreach(c, spec->listdatums)
349 		{
350 			Const	   *val = castNode(Const, lfirst(c));
351 			PartitionListValue *list_value = NULL;
352 
353 			if (!val->constisnull)
354 			{
355 				list_value = (PartitionListValue *)
356 					palloc0(sizeof(PartitionListValue));
357 				list_value->index = i;
358 				list_value->value = val->constvalue;
359 			}
360 			else
361 			{
362 				/*
363 				 * Never put a null into the values array; save the index of
364 				 * the partition that stores nulls, instead.
365 				 */
366 				if (null_index != -1)
367 					elog(ERROR, "found null more than once");
368 				null_index = i;
369 			}
370 
371 			if (list_value)
372 				non_null_values = lappend(non_null_values, list_value);
373 		}
374 	}
375 
376 	ndatums = list_length(non_null_values);
377 
378 	/*
379 	 * Collect all list values in one array. Alongside the value, we also save
380 	 * the index of partition the value comes from.
381 	 */
382 	all_values = (PartitionListValue **)
383 		palloc(ndatums * sizeof(PartitionListValue *));
384 	i = 0;
385 	foreach(cell, non_null_values)
386 	{
387 		PartitionListValue *src = lfirst(cell);
388 
389 		all_values[i] = (PartitionListValue *)
390 			palloc(sizeof(PartitionListValue));
391 		all_values[i]->value = src->value;
392 		all_values[i]->index = src->index;
393 		i++;
394 	}
395 
396 	qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
397 			  qsort_partition_list_value_cmp, (void *) key);
398 
399 	boundinfo->ndatums = ndatums;
400 	boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
401 	boundinfo->nindexes = ndatums;
402 	boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
403 
404 	/*
405 	 * Copy values.  Canonical indexes are values ranging from 0 to (nparts -
406 	 * 1) assigned to each partition such that all datums of a given partition
407 	 * receive the same value. The value for a given partition is the index of
408 	 * that partition's smallest datum in the all_values[] array.
409 	 */
410 	for (i = 0; i < ndatums; i++)
411 	{
412 		int			orig_index = all_values[i]->index;
413 
414 		boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
415 		boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
416 											key->parttypbyval[0],
417 											key->parttyplen[0]);
418 
419 		/* If the old index has no mapping, assign one */
420 		if ((*mapping)[orig_index] == -1)
421 			(*mapping)[orig_index] = next_index++;
422 
423 		boundinfo->indexes[i] = (*mapping)[orig_index];
424 	}
425 
426 	/*
427 	 * Set the canonical value for null_index, if any.
428 	 *
429 	 * It is possible that the null-accepting partition has not been assigned
430 	 * an index yet, which could happen if such partition accepts only null
431 	 * and hence not handled in the above loop which only looked at non-null
432 	 * values.
433 	 */
434 	if (null_index != -1)
435 	{
436 		Assert(null_index >= 0);
437 		if ((*mapping)[null_index] == -1)
438 			(*mapping)[null_index] = next_index++;
439 		boundinfo->null_index = (*mapping)[null_index];
440 	}
441 
442 	/* Set the canonical value for default_index, if any. */
443 	if (default_index != -1)
444 	{
445 		/*
446 		 * The default partition accepts any value not specified in the lists
447 		 * of other partitions, hence it should not get mapped index while
448 		 * assigning those for non-null datums.
449 		 */
450 		Assert(default_index >= 0);
451 		Assert((*mapping)[default_index] == -1);
452 		(*mapping)[default_index] = next_index++;
453 		boundinfo->default_index = (*mapping)[default_index];
454 	}
455 
456 	/* All partitions must now have been assigned canonical indexes. */
457 	Assert(next_index == nparts);
458 	return boundinfo;
459 }
460 
461 /*
462  * create_range_bounds
463  *		Create a PartitionBoundInfo for a range partitioned table
464  */
465 static PartitionBoundInfo
create_range_bounds(PartitionBoundSpec ** boundspecs,int nparts,PartitionKey key,int ** mapping)466 create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
467 					PartitionKey key, int **mapping)
468 {
469 	PartitionBoundInfo boundinfo;
470 	PartitionRangeBound **rbounds = NULL;
471 	PartitionRangeBound **all_bounds,
472 			   *prev;
473 	int			i,
474 				k;
475 	int			ndatums = 0;
476 	int			default_index = -1;
477 	int			next_index = 0;
478 
479 	boundinfo = (PartitionBoundInfoData *)
480 		palloc0(sizeof(PartitionBoundInfoData));
481 	boundinfo->strategy = key->strategy;
482 	/* There is no special null-accepting range partition. */
483 	boundinfo->null_index = -1;
484 	/* Will be set correctly below. */
485 	boundinfo->default_index = -1;
486 
487 	all_bounds = (PartitionRangeBound **)
488 		palloc0(2 * nparts * sizeof(PartitionRangeBound *));
489 
490 	/* Create a unified list of range bounds across all the partitions. */
491 	ndatums = 0;
492 	for (i = 0; i < nparts; i++)
493 	{
494 		PartitionBoundSpec *spec = boundspecs[i];
495 		PartitionRangeBound *lower,
496 				   *upper;
497 
498 		if (spec->strategy != PARTITION_STRATEGY_RANGE)
499 			elog(ERROR, "invalid strategy in partition bound spec");
500 
501 		/*
502 		 * Note the index of the partition bound spec for the default
503 		 * partition.  There's no datum to add to the all_bounds array for
504 		 * this partition.
505 		 */
506 		if (spec->is_default)
507 		{
508 			default_index = i;
509 			continue;
510 		}
511 
512 		lower = make_one_partition_rbound(key, i, spec->lowerdatums, true);
513 		upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
514 		all_bounds[ndatums++] = lower;
515 		all_bounds[ndatums++] = upper;
516 	}
517 
518 	Assert(ndatums == nparts * 2 ||
519 		   (default_index != -1 && ndatums == (nparts - 1) * 2));
520 
521 	/* Sort all the bounds in ascending order */
522 	qsort_arg(all_bounds, ndatums,
523 			  sizeof(PartitionRangeBound *),
524 			  qsort_partition_rbound_cmp,
525 			  (void *) key);
526 
527 	/* Save distinct bounds from all_bounds into rbounds. */
528 	rbounds = (PartitionRangeBound **)
529 		palloc(ndatums * sizeof(PartitionRangeBound *));
530 	k = 0;
531 	prev = NULL;
532 	for (i = 0; i < ndatums; i++)
533 	{
534 		PartitionRangeBound *cur = all_bounds[i];
535 		bool		is_distinct = false;
536 		int			j;
537 
538 		/* Is the current bound distinct from the previous one? */
539 		for (j = 0; j < key->partnatts; j++)
540 		{
541 			Datum		cmpval;
542 
543 			if (prev == NULL || cur->kind[j] != prev->kind[j])
544 			{
545 				is_distinct = true;
546 				break;
547 			}
548 
549 			/*
550 			 * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
551 			 * them as equal, since any values after this point must be
552 			 * ignored.
553 			 */
554 			if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
555 				break;
556 
557 			cmpval = FunctionCall2Coll(&key->partsupfunc[j],
558 									   key->partcollation[j],
559 									   cur->datums[j],
560 									   prev->datums[j]);
561 			if (DatumGetInt32(cmpval) != 0)
562 			{
563 				is_distinct = true;
564 				break;
565 			}
566 		}
567 
568 		/*
569 		 * Only if the bound is distinct save it into a temporary array, i.e,
570 		 * rbounds which is later copied into boundinfo datums array.
571 		 */
572 		if (is_distinct)
573 			rbounds[k++] = all_bounds[i];
574 
575 		prev = cur;
576 	}
577 
578 	/* Update ndatums to hold the count of distinct datums. */
579 	ndatums = k;
580 
581 	/*
582 	 * Add datums to boundinfo.  Canonical indexes are values ranging from 0
583 	 * to nparts - 1, assigned in that order to each partition's upper bound.
584 	 * For 'datums' elements that are lower bounds, there is -1 in the
585 	 * 'indexes' array to signify that no partition exists for the values less
586 	 * than such a bound and greater than or equal to the previous upper
587 	 * bound.
588 	 */
589 	boundinfo->ndatums = ndatums;
590 	boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
591 	boundinfo->kind = (PartitionRangeDatumKind **)
592 		palloc(ndatums *
593 			   sizeof(PartitionRangeDatumKind *));
594 
595 	/*
596 	 * For range partitioning, an additional value of -1 is stored as the last
597 	 * element of the indexes[] array.
598 	 */
599 	boundinfo->nindexes = ndatums + 1;
600 	boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
601 
602 	for (i = 0; i < ndatums; i++)
603 	{
604 		int			j;
605 
606 		boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
607 												sizeof(Datum));
608 		boundinfo->kind[i] = (PartitionRangeDatumKind *)
609 			palloc(key->partnatts *
610 				   sizeof(PartitionRangeDatumKind));
611 		for (j = 0; j < key->partnatts; j++)
612 		{
613 			if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
614 				boundinfo->datums[i][j] =
615 					datumCopy(rbounds[i]->datums[j],
616 							  key->parttypbyval[j],
617 							  key->parttyplen[j]);
618 			boundinfo->kind[i][j] = rbounds[i]->kind[j];
619 		}
620 
621 		/*
622 		 * There is no mapping for invalid indexes.
623 		 *
624 		 * Any lower bounds in the rbounds array have invalid indexes
625 		 * assigned, because the values between the previous bound (if there
626 		 * is one) and this (lower) bound are not part of the range of any
627 		 * existing partition.
628 		 */
629 		if (rbounds[i]->lower)
630 			boundinfo->indexes[i] = -1;
631 		else
632 		{
633 			int			orig_index = rbounds[i]->index;
634 
635 			/* If the old index has no mapping, assign one */
636 			if ((*mapping)[orig_index] == -1)
637 				(*mapping)[orig_index] = next_index++;
638 
639 			boundinfo->indexes[i] = (*mapping)[orig_index];
640 		}
641 	}
642 
643 	/* Set the canonical value for default_index, if any. */
644 	if (default_index != -1)
645 	{
646 		Assert(default_index >= 0 && (*mapping)[default_index] == -1);
647 		(*mapping)[default_index] = next_index++;
648 		boundinfo->default_index = (*mapping)[default_index];
649 	}
650 
651 	/* The extra -1 element. */
652 	Assert(i == ndatums);
653 	boundinfo->indexes[i] = -1;
654 
655 	/* All partitions must now have been assigned canonical indexes. */
656 	Assert(next_index == nparts);
657 	return boundinfo;
658 }
659 
660 /*
661  * Are two partition bound collections logically equal?
662  *
663  * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
664  * This is also useful when b1 and b2 are bound collections of two separate
665  * relations, respectively, because PartitionBoundInfo is a canonical
666  * representation of partition bounds.
667  */
668 bool
partition_bounds_equal(int partnatts,int16 * parttyplen,bool * parttypbyval,PartitionBoundInfo b1,PartitionBoundInfo b2)669 partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
670 					   PartitionBoundInfo b1, PartitionBoundInfo b2)
671 {
672 	int			i;
673 
674 	if (b1->strategy != b2->strategy)
675 		return false;
676 
677 	if (b1->ndatums != b2->ndatums)
678 		return false;
679 
680 	if (b1->nindexes != b2->nindexes)
681 		return false;
682 
683 	if (b1->null_index != b2->null_index)
684 		return false;
685 
686 	if (b1->default_index != b2->default_index)
687 		return false;
688 
689 	/* For all partition strategies, the indexes[] arrays have to match */
690 	for (i = 0; i < b1->nindexes; i++)
691 	{
692 		if (b1->indexes[i] != b2->indexes[i])
693 			return false;
694 	}
695 
696 	/* Finally, compare the datums[] arrays */
697 	if (b1->strategy == PARTITION_STRATEGY_HASH)
698 	{
699 		/*
700 		 * We arrange the partitions in the ascending order of their moduli
701 		 * and remainders.  Also every modulus is factor of next larger
702 		 * modulus.  Therefore we can safely store index of a given partition
703 		 * in indexes array at remainder of that partition.  Also entries at
704 		 * (remainder + N * modulus) positions in indexes array are all same
705 		 * for (modulus, remainder) specification for any partition.  Thus the
706 		 * datums arrays from the given bounds are the same, if and only if
707 		 * their indexes arrays are the same.  So, it suffices to compare the
708 		 * indexes arrays.
709 		 *
710 		 * Nonetheless make sure that the bounds are indeed the same when the
711 		 * indexes match.  Hash partition bound stores modulus and remainder
712 		 * at b1->datums[i][0] and b1->datums[i][1] position respectively.
713 		 */
714 #ifdef USE_ASSERT_CHECKING
715 		for (i = 0; i < b1->ndatums; i++)
716 			Assert((b1->datums[i][0] == b2->datums[i][0] &&
717 					b1->datums[i][1] == b2->datums[i][1]));
718 #endif
719 	}
720 	else
721 	{
722 		for (i = 0; i < b1->ndatums; i++)
723 		{
724 			int			j;
725 
726 			for (j = 0; j < partnatts; j++)
727 			{
728 				/* For range partitions, the bounds might not be finite. */
729 				if (b1->kind != NULL)
730 				{
731 					/* The different kinds of bound all differ from each other */
732 					if (b1->kind[i][j] != b2->kind[i][j])
733 						return false;
734 
735 					/*
736 					 * Non-finite bounds are equal without further
737 					 * examination.
738 					 */
739 					if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
740 						continue;
741 				}
742 
743 				/*
744 				 * Compare the actual values. Note that it would be both
745 				 * incorrect and unsafe to invoke the comparison operator
746 				 * derived from the partitioning specification here.  It would
747 				 * be incorrect because we want the relcache entry to be
748 				 * updated for ANY change to the partition bounds, not just
749 				 * those that the partitioning operator thinks are
750 				 * significant.  It would be unsafe because we might reach
751 				 * this code in the context of an aborted transaction, and an
752 				 * arbitrary partitioning operator might not be safe in that
753 				 * context.  datumIsEqual() should be simple enough to be
754 				 * safe.
755 				 */
756 				if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
757 								  parttypbyval[j], parttyplen[j]))
758 					return false;
759 			}
760 		}
761 	}
762 	return true;
763 }
764 
765 /*
766  * Return a copy of given PartitionBoundInfo structure. The data types of bounds
767  * are described by given partition key specification.
768  */
769 PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,PartitionKey key)770 partition_bounds_copy(PartitionBoundInfo src,
771 					  PartitionKey key)
772 {
773 	PartitionBoundInfo dest;
774 	int			i;
775 	int			ndatums;
776 	int			nindexes;
777 	int			partnatts;
778 
779 	dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
780 
781 	dest->strategy = src->strategy;
782 	ndatums = dest->ndatums = src->ndatums;
783 	nindexes = dest->nindexes = src->nindexes;
784 	partnatts = key->partnatts;
785 
786 	/* List partitioned tables have only a single partition key. */
787 	Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
788 
789 	dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
790 
791 	if (src->kind != NULL)
792 	{
793 		dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
794 														 sizeof(PartitionRangeDatumKind *));
795 		for (i = 0; i < ndatums; i++)
796 		{
797 			dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
798 															   sizeof(PartitionRangeDatumKind));
799 
800 			memcpy(dest->kind[i], src->kind[i],
801 				   sizeof(PartitionRangeDatumKind) * key->partnatts);
802 		}
803 	}
804 	else
805 		dest->kind = NULL;
806 
807 	for (i = 0; i < ndatums; i++)
808 	{
809 		int			j;
810 
811 		/*
812 		 * For a corresponding hash partition, datums array will have two
813 		 * elements - modulus and remainder.
814 		 */
815 		bool		hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
816 		int			natts = hash_part ? 2 : partnatts;
817 
818 		dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
819 
820 		for (j = 0; j < natts; j++)
821 		{
822 			bool		byval;
823 			int			typlen;
824 
825 			if (hash_part)
826 			{
827 				typlen = sizeof(int32); /* Always int4 */
828 				byval = true;	/* int4 is pass-by-value */
829 			}
830 			else
831 			{
832 				byval = key->parttypbyval[j];
833 				typlen = key->parttyplen[j];
834 			}
835 
836 			if (dest->kind == NULL ||
837 				dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
838 				dest->datums[i][j] = datumCopy(src->datums[i][j],
839 											   byval, typlen);
840 		}
841 	}
842 
843 	dest->indexes = (int *) palloc(sizeof(int) * nindexes);
844 	memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
845 
846 	dest->null_index = src->null_index;
847 	dest->default_index = src->default_index;
848 
849 	return dest;
850 }
851 
852 /*
853  * partitions_are_ordered
854  *		Determine whether the partitions described by 'boundinfo' are ordered,
855  *		that is partitions appearing earlier in the PartitionDesc sequence
856  *		contain partition keys strictly less than those appearing later.
857  *		Also, if NULL values are possible, they must come in the last
858  *		partition defined in the PartitionDesc.
859  *
860  * If out of order, or there is insufficient info to know the order,
861  * then we return false.
862  */
863 bool
partitions_are_ordered(PartitionBoundInfo boundinfo,int nparts)864 partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts)
865 {
866 	Assert(boundinfo != NULL);
867 
868 	switch (boundinfo->strategy)
869 	{
870 		case PARTITION_STRATEGY_RANGE:
871 
872 			/*
873 			 * RANGE-type partitioning guarantees that the partitions can be
874 			 * scanned in the order that they're defined in the PartitionDesc
875 			 * to provide sequential, non-overlapping ranges of tuples.
876 			 * However, if a DEFAULT partition exists then it doesn't work, as
877 			 * that could contain tuples from either below or above the
878 			 * defined range, or tuples belonging to gaps between partitions.
879 			 */
880 			if (!partition_bound_has_default(boundinfo))
881 				return true;
882 			break;
883 
884 		case PARTITION_STRATEGY_LIST:
885 
886 			/*
887 			 * LIST partitioning can also guarantee ordering, but only if the
888 			 * partitions don't accept interleaved values.  We could likely
889 			 * check for this by looping over the PartitionBound's indexes
890 			 * array to check that the indexes are in order.  For now, let's
891 			 * just keep it simple and just accept LIST partitioning when
892 			 * there's no DEFAULT partition, exactly one value per partition,
893 			 * and optionally a NULL partition that does not accept any other
894 			 * values.  Such a NULL partition will come last in the
895 			 * PartitionDesc, and the other partitions will be properly
896 			 * ordered.  This is a cheap test to make as it does not require
897 			 * any per-partition processing.  Maybe we'd like to handle more
898 			 * complex cases in the future.
899 			 */
900 			if (partition_bound_has_default(boundinfo))
901 				return false;
902 
903 			if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo)
904 				== nparts)
905 				return true;
906 			break;
907 
908 		default:
909 			/* HASH, or some other strategy */
910 			break;
911 	}
912 
913 	return false;
914 }
915 
916 /*
917  * check_new_partition_bound
918  *
919  * Checks if the new partition's bound overlaps any of the existing partitions
920  * of parent.  Also performs additional checks as necessary per strategy.
921  */
922 void
check_new_partition_bound(char * relname,Relation parent,PartitionBoundSpec * spec)923 check_new_partition_bound(char *relname, Relation parent,
924 						  PartitionBoundSpec *spec)
925 {
926 	PartitionKey key = RelationGetPartitionKey(parent);
927 	PartitionDesc partdesc = RelationGetPartitionDesc(parent);
928 	PartitionBoundInfo boundinfo = partdesc->boundinfo;
929 	ParseState *pstate = make_parsestate(NULL);
930 	int			with = -1;
931 	bool		overlap = false;
932 
933 	if (spec->is_default)
934 	{
935 		/*
936 		 * The default partition bound never conflicts with any other
937 		 * partition's; if that's what we're attaching, the only possible
938 		 * problem is that one already exists, so check for that and we're
939 		 * done.
940 		 */
941 		if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
942 			return;
943 
944 		/* Default partition already exists, error out. */
945 		ereport(ERROR,
946 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
947 				 errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
948 						relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
949 				 parser_errposition(pstate, spec->location)));
950 	}
951 
952 	switch (key->strategy)
953 	{
954 		case PARTITION_STRATEGY_HASH:
955 			{
956 				Assert(spec->strategy == PARTITION_STRATEGY_HASH);
957 				Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
958 
959 				if (partdesc->nparts > 0)
960 				{
961 					Datum	  **datums = boundinfo->datums;
962 					int			ndatums = boundinfo->ndatums;
963 					int			greatest_modulus;
964 					int			remainder;
965 					int			offset;
966 					bool		valid_modulus = true;
967 					int			prev_modulus,	/* Previous largest modulus */
968 								next_modulus;	/* Next largest modulus */
969 
970 					/*
971 					 * Check rule that every modulus must be a factor of the
972 					 * next larger modulus.  For example, if you have a bunch
973 					 * of partitions that all have modulus 5, you can add a
974 					 * new partition with modulus 10 or a new partition with
975 					 * modulus 15, but you cannot add both a partition with
976 					 * modulus 10 and a partition with modulus 15, because 10
977 					 * is not a factor of 15.
978 					 *
979 					 * Get the greatest (modulus, remainder) pair contained in
980 					 * boundinfo->datums that is less than or equal to the
981 					 * (spec->modulus, spec->remainder) pair.
982 					 */
983 					offset = partition_hash_bsearch(boundinfo,
984 													spec->modulus,
985 													spec->remainder);
986 					if (offset < 0)
987 					{
988 						next_modulus = DatumGetInt32(datums[0][0]);
989 						valid_modulus = (next_modulus % spec->modulus) == 0;
990 					}
991 					else
992 					{
993 						prev_modulus = DatumGetInt32(datums[offset][0]);
994 						valid_modulus = (spec->modulus % prev_modulus) == 0;
995 
996 						if (valid_modulus && (offset + 1) < ndatums)
997 						{
998 							next_modulus = DatumGetInt32(datums[offset + 1][0]);
999 							valid_modulus = (next_modulus % spec->modulus) == 0;
1000 						}
1001 					}
1002 
1003 					if (!valid_modulus)
1004 						ereport(ERROR,
1005 								(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1006 								 errmsg("every hash partition modulus must be a factor of the next larger modulus")));
1007 
1008 					greatest_modulus = boundinfo->nindexes;
1009 					remainder = spec->remainder;
1010 
1011 					/*
1012 					 * Normally, the lowest remainder that could conflict with
1013 					 * the new partition is equal to the remainder specified
1014 					 * for the new partition, but when the new partition has a
1015 					 * modulus higher than any used so far, we need to adjust.
1016 					 */
1017 					if (remainder >= greatest_modulus)
1018 						remainder = remainder % greatest_modulus;
1019 
1020 					/* Check every potentially-conflicting remainder. */
1021 					do
1022 					{
1023 						if (boundinfo->indexes[remainder] != -1)
1024 						{
1025 							overlap = true;
1026 							with = boundinfo->indexes[remainder];
1027 							break;
1028 						}
1029 						remainder += spec->modulus;
1030 					} while (remainder < greatest_modulus);
1031 				}
1032 
1033 				break;
1034 			}
1035 
1036 		case PARTITION_STRATEGY_LIST:
1037 			{
1038 				Assert(spec->strategy == PARTITION_STRATEGY_LIST);
1039 
1040 				if (partdesc->nparts > 0)
1041 				{
1042 					ListCell   *cell;
1043 
1044 					Assert(boundinfo &&
1045 						   boundinfo->strategy == PARTITION_STRATEGY_LIST &&
1046 						   (boundinfo->ndatums > 0 ||
1047 							partition_bound_accepts_nulls(boundinfo) ||
1048 							partition_bound_has_default(boundinfo)));
1049 
1050 					foreach(cell, spec->listdatums)
1051 					{
1052 						Const	   *val = castNode(Const, lfirst(cell));
1053 
1054 						if (!val->constisnull)
1055 						{
1056 							int			offset;
1057 							bool		equal;
1058 
1059 							offset = partition_list_bsearch(&key->partsupfunc[0],
1060 															key->partcollation,
1061 															boundinfo,
1062 															val->constvalue,
1063 															&equal);
1064 							if (offset >= 0 && equal)
1065 							{
1066 								overlap = true;
1067 								with = boundinfo->indexes[offset];
1068 								break;
1069 							}
1070 						}
1071 						else if (partition_bound_accepts_nulls(boundinfo))
1072 						{
1073 							overlap = true;
1074 							with = boundinfo->null_index;
1075 							break;
1076 						}
1077 					}
1078 				}
1079 
1080 				break;
1081 			}
1082 
1083 		case PARTITION_STRATEGY_RANGE:
1084 			{
1085 				PartitionRangeBound *lower,
1086 						   *upper;
1087 
1088 				Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
1089 				lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true);
1090 				upper = make_one_partition_rbound(key, -1, spec->upperdatums, false);
1091 
1092 				/*
1093 				 * First check if the resulting range would be empty with
1094 				 * specified lower and upper bounds
1095 				 */
1096 				if (partition_rbound_cmp(key->partnatts, key->partsupfunc,
1097 										 key->partcollation, lower->datums,
1098 										 lower->kind, true, upper) >= 0)
1099 				{
1100 					ereport(ERROR,
1101 							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1102 							 errmsg("empty range bound specified for partition \"%s\"",
1103 									relname),
1104 							 errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
1105 									   get_range_partbound_string(spec->lowerdatums),
1106 									   get_range_partbound_string(spec->upperdatums)),
1107 							 parser_errposition(pstate, spec->location)));
1108 				}
1109 
1110 				if (partdesc->nparts > 0)
1111 				{
1112 					int			offset;
1113 					bool		equal;
1114 
1115 					Assert(boundinfo &&
1116 						   boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
1117 						   (boundinfo->ndatums > 0 ||
1118 							partition_bound_has_default(boundinfo)));
1119 
1120 					/*
1121 					 * Test whether the new lower bound (which is treated
1122 					 * inclusively as part of the new partition) lies inside
1123 					 * an existing partition, or in a gap.
1124 					 *
1125 					 * If it's inside an existing partition, the bound at
1126 					 * offset + 1 will be the upper bound of that partition,
1127 					 * and its index will be >= 0.
1128 					 *
1129 					 * If it's in a gap, the bound at offset + 1 will be the
1130 					 * lower bound of the next partition, and its index will
1131 					 * be -1. This is also true if there is no next partition,
1132 					 * since the index array is initialised with an extra -1
1133 					 * at the end.
1134 					 */
1135 					offset = partition_range_bsearch(key->partnatts,
1136 													 key->partsupfunc,
1137 													 key->partcollation,
1138 													 boundinfo, lower,
1139 													 &equal);
1140 
1141 					if (boundinfo->indexes[offset + 1] < 0)
1142 					{
1143 						/*
1144 						 * Check that the new partition will fit in the gap.
1145 						 * For it to fit, the new upper bound must be less
1146 						 * than or equal to the lower bound of the next
1147 						 * partition, if there is one.
1148 						 */
1149 						if (offset + 1 < boundinfo->ndatums)
1150 						{
1151 							int32		cmpval;
1152 							Datum	   *datums;
1153 							PartitionRangeDatumKind *kind;
1154 							bool		is_lower;
1155 
1156 							datums = boundinfo->datums[offset + 1];
1157 							kind = boundinfo->kind[offset + 1];
1158 							is_lower = (boundinfo->indexes[offset + 1] == -1);
1159 
1160 							cmpval = partition_rbound_cmp(key->partnatts,
1161 														  key->partsupfunc,
1162 														  key->partcollation,
1163 														  datums, kind,
1164 														  is_lower, upper);
1165 							if (cmpval < 0)
1166 							{
1167 								/*
1168 								 * The new partition overlaps with the
1169 								 * existing partition between offset + 1 and
1170 								 * offset + 2.
1171 								 */
1172 								overlap = true;
1173 								with = boundinfo->indexes[offset + 2];
1174 							}
1175 						}
1176 					}
1177 					else
1178 					{
1179 						/*
1180 						 * The new partition overlaps with the existing
1181 						 * partition between offset and offset + 1.
1182 						 */
1183 						overlap = true;
1184 						with = boundinfo->indexes[offset + 1];
1185 					}
1186 				}
1187 
1188 				break;
1189 			}
1190 
1191 		default:
1192 			elog(ERROR, "unexpected partition strategy: %d",
1193 				 (int) key->strategy);
1194 	}
1195 
1196 	if (overlap)
1197 	{
1198 		Assert(with >= 0);
1199 		ereport(ERROR,
1200 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1201 				 errmsg("partition \"%s\" would overlap partition \"%s\"",
1202 						relname, get_rel_name(partdesc->oids[with])),
1203 				 parser_errposition(pstate, spec->location)));
1204 	}
1205 }
1206 
1207 /*
1208  * check_default_partition_contents
1209  *
1210  * This function checks if there exists a row in the default partition that
1211  * would properly belong to the new partition being added.  If it finds one,
1212  * it throws an error.
1213  */
1214 void
check_default_partition_contents(Relation parent,Relation default_rel,PartitionBoundSpec * new_spec)1215 check_default_partition_contents(Relation parent, Relation default_rel,
1216 								 PartitionBoundSpec *new_spec)
1217 {
1218 	List	   *new_part_constraints;
1219 	List	   *def_part_constraints;
1220 	List	   *all_parts;
1221 	ListCell   *lc;
1222 
1223 	new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
1224 		? get_qual_for_list(parent, new_spec)
1225 		: get_qual_for_range(parent, new_spec, false);
1226 	def_part_constraints =
1227 		get_proposed_default_constraint(new_part_constraints);
1228 
1229 	/*
1230 	 * Map the Vars in the constraint expression from parent's attnos to
1231 	 * default_rel's.
1232 	 */
1233 	def_part_constraints =
1234 		map_partition_varattnos(def_part_constraints, 1, default_rel,
1235 								parent, NULL);
1236 
1237 	/*
1238 	 * If the existing constraints on the default partition imply that it will
1239 	 * not contain any row that would belong to the new partition, we can
1240 	 * avoid scanning the default partition.
1241 	 */
1242 	if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
1243 	{
1244 		ereport(DEBUG1,
1245 				(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
1246 						RelationGetRelationName(default_rel))));
1247 		return;
1248 	}
1249 
1250 	/*
1251 	 * Scan the default partition and its subpartitions, and check for rows
1252 	 * that do not satisfy the revised partition constraints.
1253 	 */
1254 	if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1255 		all_parts = find_all_inheritors(RelationGetRelid(default_rel),
1256 										AccessExclusiveLock, NULL);
1257 	else
1258 		all_parts = list_make1_oid(RelationGetRelid(default_rel));
1259 
1260 	foreach(lc, all_parts)
1261 	{
1262 		Oid			part_relid = lfirst_oid(lc);
1263 		Relation	part_rel;
1264 		Expr	   *partition_constraint;
1265 		EState	   *estate;
1266 		ExprState  *partqualstate = NULL;
1267 		Snapshot	snapshot;
1268 		ExprContext *econtext;
1269 		TableScanDesc scan;
1270 		MemoryContext oldCxt;
1271 		TupleTableSlot *tupslot;
1272 
1273 		/* Lock already taken above. */
1274 		if (part_relid != RelationGetRelid(default_rel))
1275 		{
1276 			part_rel = table_open(part_relid, NoLock);
1277 
1278 			/*
1279 			 * Map the Vars in the constraint expression from default_rel's
1280 			 * the sub-partition's.
1281 			 */
1282 			partition_constraint = make_ands_explicit(def_part_constraints);
1283 			partition_constraint = (Expr *)
1284 				map_partition_varattnos((List *) partition_constraint, 1,
1285 										part_rel, default_rel, NULL);
1286 
1287 			/*
1288 			 * If the partition constraints on default partition child imply
1289 			 * that it will not contain any row that would belong to the new
1290 			 * partition, we can avoid scanning the child table.
1291 			 */
1292 			if (PartConstraintImpliedByRelConstraint(part_rel,
1293 													 def_part_constraints))
1294 			{
1295 				ereport(DEBUG1,
1296 						(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
1297 								RelationGetRelationName(part_rel))));
1298 
1299 				table_close(part_rel, NoLock);
1300 				continue;
1301 			}
1302 		}
1303 		else
1304 		{
1305 			part_rel = default_rel;
1306 			partition_constraint = make_ands_explicit(def_part_constraints);
1307 		}
1308 
1309 		/*
1310 		 * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
1311 		 * scanned.
1312 		 */
1313 		if (part_rel->rd_rel->relkind != RELKIND_RELATION)
1314 		{
1315 			if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1316 				ereport(WARNING,
1317 						(errcode(ERRCODE_CHECK_VIOLATION),
1318 						 errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
1319 								RelationGetRelationName(part_rel),
1320 								RelationGetRelationName(default_rel))));
1321 
1322 			if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
1323 				table_close(part_rel, NoLock);
1324 
1325 			continue;
1326 		}
1327 
1328 		estate = CreateExecutorState();
1329 
1330 		/* Build expression execution states for partition check quals */
1331 		partqualstate = ExecPrepareExpr(partition_constraint, estate);
1332 
1333 		econtext = GetPerTupleExprContext(estate);
1334 		snapshot = RegisterSnapshot(GetLatestSnapshot());
1335 		tupslot = table_slot_create(part_rel, &estate->es_tupleTable);
1336 		scan = table_beginscan(part_rel, snapshot, 0, NULL);
1337 
1338 		/*
1339 		 * Switch to per-tuple memory context and reset it for each tuple
1340 		 * produced, so we don't leak memory.
1341 		 */
1342 		oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
1343 
1344 		while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot))
1345 		{
1346 			econtext->ecxt_scantuple = tupslot;
1347 
1348 			if (!ExecCheck(partqualstate, econtext))
1349 				ereport(ERROR,
1350 						(errcode(ERRCODE_CHECK_VIOLATION),
1351 						 errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
1352 								RelationGetRelationName(default_rel))));
1353 
1354 			ResetExprContext(econtext);
1355 			CHECK_FOR_INTERRUPTS();
1356 		}
1357 
1358 		MemoryContextSwitchTo(oldCxt);
1359 		table_endscan(scan);
1360 		UnregisterSnapshot(snapshot);
1361 		ExecDropSingleTupleTableSlot(tupslot);
1362 		FreeExecutorState(estate);
1363 
1364 		if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
1365 			table_close(part_rel, NoLock);	/* keep the lock until commit */
1366 	}
1367 }
1368 
1369 /*
1370  * get_hash_partition_greatest_modulus
1371  *
1372  * Returns the greatest modulus of the hash partition bound.
1373  * This is no longer used in the core code, but we keep it around
1374  * in case external modules are using it.
1375  */
1376 int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)1377 get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
1378 {
1379 	Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
1380 	return bound->nindexes;
1381 }
1382 
1383 /*
1384  * make_one_partition_rbound
1385  *
1386  * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
1387  * and a flag telling whether the bound is lower or not.  Made into a function
1388  * because there are multiple sites that want to use this facility.
1389  */
1390 static PartitionRangeBound *
make_one_partition_rbound(PartitionKey key,int index,List * datums,bool lower)1391 make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
1392 {
1393 	PartitionRangeBound *bound;
1394 	ListCell   *lc;
1395 	int			i;
1396 
1397 	Assert(datums != NIL);
1398 
1399 	bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
1400 	bound->index = index;
1401 	bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
1402 	bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
1403 													  sizeof(PartitionRangeDatumKind));
1404 	bound->lower = lower;
1405 
1406 	i = 0;
1407 	foreach(lc, datums)
1408 	{
1409 		PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
1410 
1411 		/* What's contained in this range datum? */
1412 		bound->kind[i] = datum->kind;
1413 
1414 		if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
1415 		{
1416 			Const	   *val = castNode(Const, datum->value);
1417 
1418 			if (val->constisnull)
1419 				elog(ERROR, "invalid range bound datum");
1420 			bound->datums[i] = val->constvalue;
1421 		}
1422 
1423 		i++;
1424 	}
1425 
1426 	return bound;
1427 }
1428 
1429 /*
1430  * partition_rbound_cmp
1431  *
1432  * Return for two range bounds whether the 1st one (specified in datums1,
1433  * kind1, and lower1) is <, =, or > the bound specified in *b2.
1434  *
1435  * partnatts, partsupfunc and partcollation give the number of attributes in the
1436  * bounds to be compared, comparison function to be used and the collations of
1437  * attributes, respectively.
1438  *
1439  * Note that if the values of the two range bounds compare equal, then we take
1440  * into account whether they are upper or lower bounds, and an upper bound is
1441  * considered to be smaller than a lower bound. This is important to the way
1442  * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
1443  * structure, which only stores the upper bound of a common boundary between
1444  * two contiguous partitions.
1445  */
1446 static int32
partition_rbound_cmp(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * datums1,PartitionRangeDatumKind * kind1,bool lower1,PartitionRangeBound * b2)1447 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
1448 					 Oid *partcollation,
1449 					 Datum *datums1, PartitionRangeDatumKind *kind1,
1450 					 bool lower1, PartitionRangeBound *b2)
1451 {
1452 	int32		cmpval = 0;		/* placate compiler */
1453 	int			i;
1454 	Datum	   *datums2 = b2->datums;
1455 	PartitionRangeDatumKind *kind2 = b2->kind;
1456 	bool		lower2 = b2->lower;
1457 
1458 	for (i = 0; i < partnatts; i++)
1459 	{
1460 		/*
1461 		 * First, handle cases where the column is unbounded, which should not
1462 		 * invoke the comparison procedure, and should not consider any later
1463 		 * columns. Note that the PartitionRangeDatumKind enum elements
1464 		 * compare the same way as the values they represent.
1465 		 */
1466 		if (kind1[i] < kind2[i])
1467 			return -1;
1468 		else if (kind1[i] > kind2[i])
1469 			return 1;
1470 		else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
1471 
1472 			/*
1473 			 * The column bounds are both MINVALUE or both MAXVALUE. No later
1474 			 * columns should be considered, but we still need to compare
1475 			 * whether they are upper or lower bounds.
1476 			 */
1477 			break;
1478 
1479 		cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
1480 												 partcollation[i],
1481 												 datums1[i],
1482 												 datums2[i]));
1483 		if (cmpval != 0)
1484 			break;
1485 	}
1486 
1487 	/*
1488 	 * If the comparison is anything other than equal, we're done. If they
1489 	 * compare equal though, we still have to consider whether the boundaries
1490 	 * are inclusive or exclusive.  Exclusive one is considered smaller of the
1491 	 * two.
1492 	 */
1493 	if (cmpval == 0 && lower1 != lower2)
1494 		cmpval = lower1 ? 1 : -1;
1495 
1496 	return cmpval;
1497 }
1498 
1499 /*
1500  * partition_rbound_datum_cmp
1501  *
1502  * Return whether range bound (specified in rb_datums, rb_kind, and rb_lower)
1503  * is <, =, or > partition key of tuple (tuple_datums)
1504  *
1505  * n_tuple_datums, partsupfunc and partcollation give number of attributes in
1506  * the bounds to be compared, comparison function to be used and the collations
1507  * of attributes resp.
1508  *
1509  */
1510 int32
partition_rbound_datum_cmp(FmgrInfo * partsupfunc,Oid * partcollation,Datum * rb_datums,PartitionRangeDatumKind * rb_kind,Datum * tuple_datums,int n_tuple_datums)1511 partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
1512 						   Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
1513 						   Datum *tuple_datums, int n_tuple_datums)
1514 {
1515 	int			i;
1516 	int32		cmpval = -1;
1517 
1518 	for (i = 0; i < n_tuple_datums; i++)
1519 	{
1520 		if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
1521 			return -1;
1522 		else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
1523 			return 1;
1524 
1525 		cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
1526 												 partcollation[i],
1527 												 rb_datums[i],
1528 												 tuple_datums[i]));
1529 		if (cmpval != 0)
1530 			break;
1531 	}
1532 
1533 	return cmpval;
1534 }
1535 
1536 /*
1537  * partition_hbound_cmp
1538  *
1539  * Compares modulus first, then remainder if modulus is equal.
1540  */
1541 static int32
partition_hbound_cmp(int modulus1,int remainder1,int modulus2,int remainder2)1542 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
1543 {
1544 	if (modulus1 < modulus2)
1545 		return -1;
1546 	if (modulus1 > modulus2)
1547 		return 1;
1548 	if (modulus1 == modulus2 && remainder1 != remainder2)
1549 		return (remainder1 > remainder2) ? 1 : -1;
1550 	return 0;
1551 }
1552 
1553 /*
1554  * partition_list_bsearch
1555  *		Returns the index of the greatest bound datum that is less than equal
1556  * 		to the given value or -1 if all of the bound datums are greater
1557  *
1558  * *is_equal is set to true if the bound datum at the returned index is equal
1559  * to the input value.
1560  */
1561 int
partition_list_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,Datum value,bool * is_equal)1562 partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
1563 					   PartitionBoundInfo boundinfo,
1564 					   Datum value, bool *is_equal)
1565 {
1566 	int			lo,
1567 				hi,
1568 				mid;
1569 
1570 	lo = -1;
1571 	hi = boundinfo->ndatums - 1;
1572 	while (lo < hi)
1573 	{
1574 		int32		cmpval;
1575 
1576 		mid = (lo + hi + 1) / 2;
1577 		cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
1578 												 partcollation[0],
1579 												 boundinfo->datums[mid][0],
1580 												 value));
1581 		if (cmpval <= 0)
1582 		{
1583 			lo = mid;
1584 			*is_equal = (cmpval == 0);
1585 			if (*is_equal)
1586 				break;
1587 		}
1588 		else
1589 			hi = mid - 1;
1590 	}
1591 
1592 	return lo;
1593 }
1594 
1595 /*
1596  * partition_range_bsearch
1597  *		Returns the index of the greatest range bound that is less than or
1598  *		equal to the given range bound or -1 if all of the range bounds are
1599  *		greater
1600  *
1601  * *is_equal is set to true if the range bound at the returned index is equal
1602  * to the input range bound
1603  */
1604 static int
partition_range_bsearch(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,PartitionRangeBound * probe,bool * is_equal)1605 partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
1606 						Oid *partcollation,
1607 						PartitionBoundInfo boundinfo,
1608 						PartitionRangeBound *probe, bool *is_equal)
1609 {
1610 	int			lo,
1611 				hi,
1612 				mid;
1613 
1614 	lo = -1;
1615 	hi = boundinfo->ndatums - 1;
1616 	while (lo < hi)
1617 	{
1618 		int32		cmpval;
1619 
1620 		mid = (lo + hi + 1) / 2;
1621 		cmpval = partition_rbound_cmp(partnatts, partsupfunc,
1622 									  partcollation,
1623 									  boundinfo->datums[mid],
1624 									  boundinfo->kind[mid],
1625 									  (boundinfo->indexes[mid] == -1),
1626 									  probe);
1627 		if (cmpval <= 0)
1628 		{
1629 			lo = mid;
1630 			*is_equal = (cmpval == 0);
1631 
1632 			if (*is_equal)
1633 				break;
1634 		}
1635 		else
1636 			hi = mid - 1;
1637 	}
1638 
1639 	return lo;
1640 }
1641 
1642 /*
1643  * partition_range_bsearch
1644  *		Returns the index of the greatest range bound that is less than or
1645  *		equal to the given tuple or -1 if all of the range bounds are greater
1646  *
1647  * *is_equal is set to true if the range bound at the returned index is equal
1648  * to the input tuple.
1649  */
1650 int
partition_range_datum_bsearch(FmgrInfo * partsupfunc,Oid * partcollation,PartitionBoundInfo boundinfo,int nvalues,Datum * values,bool * is_equal)1651 partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
1652 							  PartitionBoundInfo boundinfo,
1653 							  int nvalues, Datum *values, bool *is_equal)
1654 {
1655 	int			lo,
1656 				hi,
1657 				mid;
1658 
1659 	lo = -1;
1660 	hi = boundinfo->ndatums - 1;
1661 	while (lo < hi)
1662 	{
1663 		int32		cmpval;
1664 
1665 		mid = (lo + hi + 1) / 2;
1666 		cmpval = partition_rbound_datum_cmp(partsupfunc,
1667 											partcollation,
1668 											boundinfo->datums[mid],
1669 											boundinfo->kind[mid],
1670 											values,
1671 											nvalues);
1672 		if (cmpval <= 0)
1673 		{
1674 			lo = mid;
1675 			*is_equal = (cmpval == 0);
1676 
1677 			if (*is_equal)
1678 				break;
1679 		}
1680 		else
1681 			hi = mid - 1;
1682 	}
1683 
1684 	return lo;
1685 }
1686 
1687 /*
1688  * partition_hash_bsearch
1689  *		Returns the index of the greatest (modulus, remainder) pair that is
1690  *		less than or equal to the given (modulus, remainder) pair or -1 if
1691  *		all of them are greater
1692  */
1693 int
partition_hash_bsearch(PartitionBoundInfo boundinfo,int modulus,int remainder)1694 partition_hash_bsearch(PartitionBoundInfo boundinfo,
1695 					   int modulus, int remainder)
1696 {
1697 	int			lo,
1698 				hi,
1699 				mid;
1700 
1701 	lo = -1;
1702 	hi = boundinfo->ndatums - 1;
1703 	while (lo < hi)
1704 	{
1705 		int32		cmpval,
1706 					bound_modulus,
1707 					bound_remainder;
1708 
1709 		mid = (lo + hi + 1) / 2;
1710 		bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
1711 		bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
1712 		cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
1713 									  modulus, remainder);
1714 		if (cmpval <= 0)
1715 		{
1716 			lo = mid;
1717 
1718 			if (cmpval == 0)
1719 				break;
1720 		}
1721 		else
1722 			hi = mid - 1;
1723 	}
1724 
1725 	return lo;
1726 }
1727 
1728 /*
1729  * qsort_partition_hbound_cmp
1730  *
1731  * Hash bounds are sorted by modulus, then by remainder.
1732  */
1733 static int32
qsort_partition_hbound_cmp(const void * a,const void * b)1734 qsort_partition_hbound_cmp(const void *a, const void *b)
1735 {
1736 	PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
1737 	PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
1738 
1739 	return partition_hbound_cmp(h1->modulus, h1->remainder,
1740 								h2->modulus, h2->remainder);
1741 }
1742 
1743 /*
1744  * qsort_partition_list_value_cmp
1745  *
1746  * Compare two list partition bound datums.
1747  */
1748 static int32
qsort_partition_list_value_cmp(const void * a,const void * b,void * arg)1749 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
1750 {
1751 	Datum		val1 = (*(PartitionListValue *const *) a)->value,
1752 				val2 = (*(PartitionListValue *const *) b)->value;
1753 	PartitionKey key = (PartitionKey) arg;
1754 
1755 	return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1756 										   key->partcollation[0],
1757 										   val1, val2));
1758 }
1759 
1760 /*
1761  * qsort_partition_rbound_cmp
1762  *
1763  * Used when sorting range bounds across all range partitions.
1764  */
1765 static int32
qsort_partition_rbound_cmp(const void * a,const void * b,void * arg)1766 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
1767 {
1768 	PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
1769 	PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
1770 	PartitionKey key = (PartitionKey) arg;
1771 
1772 	return partition_rbound_cmp(key->partnatts, key->partsupfunc,
1773 								key->partcollation, b1->datums, b1->kind,
1774 								b1->lower, b2);
1775 }
1776 
1777 /*
1778  * get_partition_operator
1779  *
1780  * Return oid of the operator of the given strategy for the given partition
1781  * key column.  It is assumed that the partitioning key is of the same type as
1782  * the chosen partitioning opclass, or at least binary-compatible.  In the
1783  * latter case, *need_relabel is set to true if the opclass is not of a
1784  * polymorphic type (indicating a RelabelType node needed on top), otherwise
1785  * false.
1786  */
1787 static Oid
get_partition_operator(PartitionKey key,int col,StrategyNumber strategy,bool * need_relabel)1788 get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
1789 					   bool *need_relabel)
1790 {
1791 	Oid			operoid;
1792 
1793 	/*
1794 	 * Get the operator in the partitioning opfamily using the opclass'
1795 	 * declared input type as both left- and righttype.
1796 	 */
1797 	operoid = get_opfamily_member(key->partopfamily[col],
1798 								  key->partopcintype[col],
1799 								  key->partopcintype[col],
1800 								  strategy);
1801 	if (!OidIsValid(operoid))
1802 		elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
1803 			 strategy, key->partopcintype[col], key->partopcintype[col],
1804 			 key->partopfamily[col]);
1805 
1806 	/*
1807 	 * If the partition key column is not of the same type as the operator
1808 	 * class and not polymorphic, tell caller to wrap the non-Const expression
1809 	 * in a RelabelType.  This matches what parse_coerce.c does.
1810 	 */
1811 	*need_relabel = (key->parttypid[col] != key->partopcintype[col] &&
1812 					 key->partopcintype[col] != RECORDOID &&
1813 					 !IsPolymorphicType(key->partopcintype[col]));
1814 
1815 	return operoid;
1816 }
1817 
1818 /*
1819  * make_partition_op_expr
1820  *		Returns an Expr for the given partition key column with arg1 and
1821  *		arg2 as its leftop and rightop, respectively
1822  */
1823 static Expr *
make_partition_op_expr(PartitionKey key,int keynum,uint16 strategy,Expr * arg1,Expr * arg2)1824 make_partition_op_expr(PartitionKey key, int keynum,
1825 					   uint16 strategy, Expr *arg1, Expr *arg2)
1826 {
1827 	Oid			operoid;
1828 	bool		need_relabel = false;
1829 	Expr	   *result = NULL;
1830 
1831 	/* Get the correct btree operator for this partitioning column */
1832 	operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
1833 
1834 	/*
1835 	 * Chosen operator may be such that the non-Const operand needs to be
1836 	 * coerced, so apply the same; see the comment in
1837 	 * get_partition_operator().
1838 	 */
1839 	if (!IsA(arg1, Const) &&
1840 		(need_relabel ||
1841 		 key->partcollation[keynum] != key->parttypcoll[keynum]))
1842 		arg1 = (Expr *) makeRelabelType(arg1,
1843 										key->partopcintype[keynum],
1844 										-1,
1845 										key->partcollation[keynum],
1846 										COERCE_EXPLICIT_CAST);
1847 
1848 	/* Generate the actual expression */
1849 	switch (key->strategy)
1850 	{
1851 		case PARTITION_STRATEGY_LIST:
1852 			{
1853 				List	   *elems = (List *) arg2;
1854 				int			nelems = list_length(elems);
1855 
1856 				Assert(nelems >= 1);
1857 				Assert(keynum == 0);
1858 
1859 				if (nelems > 1 &&
1860 					!type_is_array(key->parttypid[keynum]))
1861 				{
1862 					ArrayExpr  *arrexpr;
1863 					ScalarArrayOpExpr *saopexpr;
1864 
1865 					/* Construct an ArrayExpr for the right-hand inputs */
1866 					arrexpr = makeNode(ArrayExpr);
1867 					arrexpr->array_typeid =
1868 						get_array_type(key->parttypid[keynum]);
1869 					arrexpr->array_collid = key->parttypcoll[keynum];
1870 					arrexpr->element_typeid = key->parttypid[keynum];
1871 					arrexpr->elements = elems;
1872 					arrexpr->multidims = false;
1873 					arrexpr->location = -1;
1874 
1875 					/* Build leftop = ANY (rightop) */
1876 					saopexpr = makeNode(ScalarArrayOpExpr);
1877 					saopexpr->opno = operoid;
1878 					saopexpr->opfuncid = get_opcode(operoid);
1879 					saopexpr->useOr = true;
1880 					saopexpr->inputcollid = key->partcollation[keynum];
1881 					saopexpr->args = list_make2(arg1, arrexpr);
1882 					saopexpr->location = -1;
1883 
1884 					result = (Expr *) saopexpr;
1885 				}
1886 				else
1887 				{
1888 					List	   *elemops = NIL;
1889 					ListCell   *lc;
1890 
1891 					foreach(lc, elems)
1892 					{
1893 						Expr	   *elem = lfirst(lc),
1894 								   *elemop;
1895 
1896 						elemop = make_opclause(operoid,
1897 											   BOOLOID,
1898 											   false,
1899 											   arg1, elem,
1900 											   InvalidOid,
1901 											   key->partcollation[keynum]);
1902 						elemops = lappend(elemops, elemop);
1903 					}
1904 
1905 					result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
1906 				}
1907 				break;
1908 			}
1909 
1910 		case PARTITION_STRATEGY_RANGE:
1911 			result = make_opclause(operoid,
1912 								   BOOLOID,
1913 								   false,
1914 								   arg1, arg2,
1915 								   InvalidOid,
1916 								   key->partcollation[keynum]);
1917 			break;
1918 
1919 		default:
1920 			elog(ERROR, "invalid partitioning strategy");
1921 			break;
1922 	}
1923 
1924 	return result;
1925 }
1926 
1927 /*
1928  * get_qual_for_hash
1929  *
1930  * Returns a CHECK constraint expression to use as a hash partition's
1931  * constraint, given the parent relation and partition bound structure.
1932  *
1933  * The partition constraint for a hash partition is always a call to the
1934  * built-in function satisfies_hash_partition().
1935  */
1936 static List *
get_qual_for_hash(Relation parent,PartitionBoundSpec * spec)1937 get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
1938 {
1939 	PartitionKey key = RelationGetPartitionKey(parent);
1940 	FuncExpr   *fexpr;
1941 	Node	   *relidConst;
1942 	Node	   *modulusConst;
1943 	Node	   *remainderConst;
1944 	List	   *args;
1945 	ListCell   *partexprs_item;
1946 	int			i;
1947 
1948 	/* Fixed arguments. */
1949 	relidConst = (Node *) makeConst(OIDOID,
1950 									-1,
1951 									InvalidOid,
1952 									sizeof(Oid),
1953 									ObjectIdGetDatum(RelationGetRelid(parent)),
1954 									false,
1955 									true);
1956 
1957 	modulusConst = (Node *) makeConst(INT4OID,
1958 									  -1,
1959 									  InvalidOid,
1960 									  sizeof(int32),
1961 									  Int32GetDatum(spec->modulus),
1962 									  false,
1963 									  true);
1964 
1965 	remainderConst = (Node *) makeConst(INT4OID,
1966 										-1,
1967 										InvalidOid,
1968 										sizeof(int32),
1969 										Int32GetDatum(spec->remainder),
1970 										false,
1971 										true);
1972 
1973 	args = list_make3(relidConst, modulusConst, remainderConst);
1974 	partexprs_item = list_head(key->partexprs);
1975 
1976 	/* Add an argument for each key column. */
1977 	for (i = 0; i < key->partnatts; i++)
1978 	{
1979 		Node	   *keyCol;
1980 
1981 		/* Left operand */
1982 		if (key->partattrs[i] != 0)
1983 		{
1984 			keyCol = (Node *) makeVar(1,
1985 									  key->partattrs[i],
1986 									  key->parttypid[i],
1987 									  key->parttypmod[i],
1988 									  key->parttypcoll[i],
1989 									  0);
1990 		}
1991 		else
1992 		{
1993 			keyCol = (Node *) copyObject(lfirst(partexprs_item));
1994 			partexprs_item = lnext(partexprs_item);
1995 		}
1996 
1997 		args = lappend(args, keyCol);
1998 	}
1999 
2000 	fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
2001 						 BOOLOID,
2002 						 args,
2003 						 InvalidOid,
2004 						 InvalidOid,
2005 						 COERCE_EXPLICIT_CALL);
2006 
2007 	return list_make1(fexpr);
2008 }
2009 
2010 /*
2011  * get_qual_for_list
2012  *
2013  * Returns an implicit-AND list of expressions to use as a list partition's
2014  * constraint, given the parent relation and partition bound structure.
2015  *
2016  * The function returns NIL for a default partition when it's the only
2017  * partition since in that case there is no constraint.
2018  */
2019 static List *
get_qual_for_list(Relation parent,PartitionBoundSpec * spec)2020 get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
2021 {
2022 	PartitionKey key = RelationGetPartitionKey(parent);
2023 	List	   *result;
2024 	Expr	   *keyCol;
2025 	Expr	   *opexpr;
2026 	NullTest   *nulltest;
2027 	ListCell   *cell;
2028 	List	   *elems = NIL;
2029 	bool		list_has_null = false;
2030 
2031 	/*
2032 	 * Only single-column list partitioning is supported, so we are worried
2033 	 * only about the partition key with index 0.
2034 	 */
2035 	Assert(key->partnatts == 1);
2036 
2037 	/* Construct Var or expression representing the partition column */
2038 	if (key->partattrs[0] != 0)
2039 		keyCol = (Expr *) makeVar(1,
2040 								  key->partattrs[0],
2041 								  key->parttypid[0],
2042 								  key->parttypmod[0],
2043 								  key->parttypcoll[0],
2044 								  0);
2045 	else
2046 		keyCol = (Expr *) copyObject(linitial(key->partexprs));
2047 
2048 	/*
2049 	 * For default list partition, collect datums for all the partitions. The
2050 	 * default partition constraint should check that the partition key is
2051 	 * equal to none of those.
2052 	 */
2053 	if (spec->is_default)
2054 	{
2055 		int			i;
2056 		int			ndatums = 0;
2057 		PartitionDesc pdesc = RelationGetPartitionDesc(parent);
2058 		PartitionBoundInfo boundinfo = pdesc->boundinfo;
2059 
2060 		if (boundinfo)
2061 		{
2062 			ndatums = boundinfo->ndatums;
2063 
2064 			if (partition_bound_accepts_nulls(boundinfo))
2065 				list_has_null = true;
2066 		}
2067 
2068 		/*
2069 		 * If default is the only partition, there need not be any partition
2070 		 * constraint on it.
2071 		 */
2072 		if (ndatums == 0 && !list_has_null)
2073 			return NIL;
2074 
2075 		for (i = 0; i < ndatums; i++)
2076 		{
2077 			Const	   *val;
2078 
2079 			/*
2080 			 * Construct Const from known-not-null datum.  We must be careful
2081 			 * to copy the value, because our result has to be able to outlive
2082 			 * the relcache entry we're copying from.
2083 			 */
2084 			val = makeConst(key->parttypid[0],
2085 							key->parttypmod[0],
2086 							key->parttypcoll[0],
2087 							key->parttyplen[0],
2088 							datumCopy(*boundinfo->datums[i],
2089 									  key->parttypbyval[0],
2090 									  key->parttyplen[0]),
2091 							false,	/* isnull */
2092 							key->parttypbyval[0]);
2093 
2094 			elems = lappend(elems, val);
2095 		}
2096 	}
2097 	else
2098 	{
2099 		/*
2100 		 * Create list of Consts for the allowed values, excluding any nulls.
2101 		 */
2102 		foreach(cell, spec->listdatums)
2103 		{
2104 			Const	   *val = castNode(Const, lfirst(cell));
2105 
2106 			if (val->constisnull)
2107 				list_has_null = true;
2108 			else
2109 				elems = lappend(elems, copyObject(val));
2110 		}
2111 	}
2112 
2113 	if (elems)
2114 	{
2115 		/*
2116 		 * Generate the operator expression from the non-null partition
2117 		 * values.
2118 		 */
2119 		opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
2120 										keyCol, (Expr *) elems);
2121 	}
2122 	else
2123 	{
2124 		/*
2125 		 * If there are no partition values, we don't need an operator
2126 		 * expression.
2127 		 */
2128 		opexpr = NULL;
2129 	}
2130 
2131 	if (!list_has_null)
2132 	{
2133 		/*
2134 		 * Gin up a "col IS NOT NULL" test that will be AND'd with the main
2135 		 * expression.  This might seem redundant, but the partition routing
2136 		 * machinery needs it.
2137 		 */
2138 		nulltest = makeNode(NullTest);
2139 		nulltest->arg = keyCol;
2140 		nulltest->nulltesttype = IS_NOT_NULL;
2141 		nulltest->argisrow = false;
2142 		nulltest->location = -1;
2143 
2144 		result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
2145 	}
2146 	else
2147 	{
2148 		/*
2149 		 * Gin up a "col IS NULL" test that will be OR'd with the main
2150 		 * expression.
2151 		 */
2152 		nulltest = makeNode(NullTest);
2153 		nulltest->arg = keyCol;
2154 		nulltest->nulltesttype = IS_NULL;
2155 		nulltest->argisrow = false;
2156 		nulltest->location = -1;
2157 
2158 		if (opexpr)
2159 		{
2160 			Expr	   *or;
2161 
2162 			or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
2163 			result = list_make1(or);
2164 		}
2165 		else
2166 			result = list_make1(nulltest);
2167 	}
2168 
2169 	/*
2170 	 * Note that, in general, applying NOT to a constraint expression doesn't
2171 	 * necessarily invert the set of rows it accepts, because NOT (NULL) is
2172 	 * NULL.  However, the partition constraints we construct here never
2173 	 * evaluate to NULL, so applying NOT works as intended.
2174 	 */
2175 	if (spec->is_default)
2176 	{
2177 		result = list_make1(make_ands_explicit(result));
2178 		result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
2179 	}
2180 
2181 	return result;
2182 }
2183 
2184 /*
2185  * get_qual_for_range
2186  *
2187  * Returns an implicit-AND list of expressions to use as a range partition's
2188  * constraint, given the parent relation and partition bound structure.
2189  *
2190  * For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
2191  * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
2192  * generate an expression tree of the following form:
2193  *
2194  *	(a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
2195  *		AND
2196  *	(a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
2197  *		AND
2198  *	(a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
2199  *
2200  * It is often the case that a prefix of lower and upper bound tuples contains
2201  * the same values, for example, (al = au), in which case, we will emit an
2202  * expression tree of the following form:
2203  *
2204  *	(a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
2205  *		AND
2206  *	(a = al)
2207  *		AND
2208  *	(b > bl OR (b = bl AND c >= cl))
2209  *		AND
2210  *	(b < bu OR (b = bu AND c < cu))
2211  *
2212  * If a bound datum is either MINVALUE or MAXVALUE, these expressions are
2213  * simplified using the fact that any value is greater than MINVALUE and less
2214  * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
2215  * true, and we need not emit any expression for it, and the last line becomes
2216  *
2217  *	(b < bu) OR (b = bu), which is simplified to (b <= bu)
2218  *
2219  * In most common cases with only one partition column, say a, the following
2220  * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
2221  *
2222  * For default partition, it returns the negation of the constraints of all
2223  * the other partitions.
2224  *
2225  * External callers should pass for_default as false; we set it to true only
2226  * when recursing.
2227  */
2228 static List *
get_qual_for_range(Relation parent,PartitionBoundSpec * spec,bool for_default)2229 get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
2230 				   bool for_default)
2231 {
2232 	List	   *result = NIL;
2233 	ListCell   *cell1,
2234 			   *cell2,
2235 			   *partexprs_item,
2236 			   *partexprs_item_saved;
2237 	int			i,
2238 				j;
2239 	PartitionRangeDatum *ldatum,
2240 			   *udatum;
2241 	PartitionKey key = RelationGetPartitionKey(parent);
2242 	Expr	   *keyCol;
2243 	Const	   *lower_val,
2244 			   *upper_val;
2245 	List	   *lower_or_arms,
2246 			   *upper_or_arms;
2247 	int			num_or_arms,
2248 				current_or_arm;
2249 	ListCell   *lower_or_start_datum,
2250 			   *upper_or_start_datum;
2251 	bool		need_next_lower_arm,
2252 				need_next_upper_arm;
2253 
2254 	if (spec->is_default)
2255 	{
2256 		List	   *or_expr_args = NIL;
2257 		PartitionDesc pdesc = RelationGetPartitionDesc(parent);
2258 		Oid		   *inhoids = pdesc->oids;
2259 		int			nparts = pdesc->nparts,
2260 					i;
2261 
2262 		for (i = 0; i < nparts; i++)
2263 		{
2264 			Oid			inhrelid = inhoids[i];
2265 			HeapTuple	tuple;
2266 			Datum		datum;
2267 			bool		isnull;
2268 			PartitionBoundSpec *bspec;
2269 
2270 			tuple = SearchSysCache1(RELOID, inhrelid);
2271 			if (!HeapTupleIsValid(tuple))
2272 				elog(ERROR, "cache lookup failed for relation %u", inhrelid);
2273 
2274 			datum = SysCacheGetAttr(RELOID, tuple,
2275 									Anum_pg_class_relpartbound,
2276 									&isnull);
2277 			if (isnull)
2278 				elog(ERROR, "null relpartbound for relation %u", inhrelid);
2279 
2280 			bspec = (PartitionBoundSpec *)
2281 				stringToNode(TextDatumGetCString(datum));
2282 			if (!IsA(bspec, PartitionBoundSpec))
2283 				elog(ERROR, "expected PartitionBoundSpec");
2284 
2285 			if (!bspec->is_default)
2286 			{
2287 				List	   *part_qual;
2288 
2289 				part_qual = get_qual_for_range(parent, bspec, true);
2290 
2291 				/*
2292 				 * AND the constraints of the partition and add to
2293 				 * or_expr_args
2294 				 */
2295 				or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
2296 									   ? makeBoolExpr(AND_EXPR, part_qual, -1)
2297 									   : linitial(part_qual));
2298 			}
2299 			ReleaseSysCache(tuple);
2300 		}
2301 
2302 		if (or_expr_args != NIL)
2303 		{
2304 			Expr	   *other_parts_constr;
2305 
2306 			/*
2307 			 * Combine the constraints obtained for non-default partitions
2308 			 * using OR.  As requested, each of the OR's args doesn't include
2309 			 * the NOT NULL test for partition keys (which is to avoid its
2310 			 * useless repetition).  Add the same now.
2311 			 */
2312 			other_parts_constr =
2313 				makeBoolExpr(AND_EXPR,
2314 							 lappend(get_range_nulltest(key),
2315 									 list_length(or_expr_args) > 1
2316 									 ? makeBoolExpr(OR_EXPR, or_expr_args,
2317 													-1)
2318 									 : linitial(or_expr_args)),
2319 							 -1);
2320 
2321 			/*
2322 			 * Finally, the default partition contains everything *NOT*
2323 			 * contained in the non-default partitions.
2324 			 */
2325 			result = list_make1(makeBoolExpr(NOT_EXPR,
2326 											 list_make1(other_parts_constr), -1));
2327 		}
2328 
2329 		return result;
2330 	}
2331 
2332 	lower_or_start_datum = list_head(spec->lowerdatums);
2333 	upper_or_start_datum = list_head(spec->upperdatums);
2334 	num_or_arms = key->partnatts;
2335 
2336 	/*
2337 	 * If it is the recursive call for default, we skip the get_range_nulltest
2338 	 * to avoid accumulating the NullTest on the same keys for each partition.
2339 	 */
2340 	if (!for_default)
2341 		result = get_range_nulltest(key);
2342 
2343 	/*
2344 	 * Iterate over the key columns and check if the corresponding lower and
2345 	 * upper datums are equal using the btree equality operator for the
2346 	 * column's type.  If equal, we emit single keyCol = common_value
2347 	 * expression.  Starting from the first column for which the corresponding
2348 	 * lower and upper bound datums are not equal, we generate OR expressions
2349 	 * as shown in the function's header comment.
2350 	 */
2351 	i = 0;
2352 	partexprs_item = list_head(key->partexprs);
2353 	partexprs_item_saved = partexprs_item;	/* placate compiler */
2354 	forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
2355 	{
2356 		EState	   *estate;
2357 		MemoryContext oldcxt;
2358 		Expr	   *test_expr;
2359 		ExprState  *test_exprstate;
2360 		Datum		test_result;
2361 		bool		isNull;
2362 
2363 		ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
2364 		udatum = castNode(PartitionRangeDatum, lfirst(cell2));
2365 
2366 		/*
2367 		 * Since get_range_key_properties() modifies partexprs_item, and we
2368 		 * might need to start over from the previous expression in the later
2369 		 * part of this function, save away the current value.
2370 		 */
2371 		partexprs_item_saved = partexprs_item;
2372 
2373 		get_range_key_properties(key, i, ldatum, udatum,
2374 								 &partexprs_item,
2375 								 &keyCol,
2376 								 &lower_val, &upper_val);
2377 
2378 		/*
2379 		 * If either value is NULL, the corresponding partition bound is
2380 		 * either MINVALUE or MAXVALUE, and we treat them as unequal, because
2381 		 * even if they're the same, there is no common value to equate the
2382 		 * key column with.
2383 		 */
2384 		if (!lower_val || !upper_val)
2385 			break;
2386 
2387 		/* Create the test expression */
2388 		estate = CreateExecutorState();
2389 		oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
2390 		test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
2391 										   (Expr *) lower_val,
2392 										   (Expr *) upper_val);
2393 		fix_opfuncids((Node *) test_expr);
2394 		test_exprstate = ExecInitExpr(test_expr, NULL);
2395 		test_result = ExecEvalExprSwitchContext(test_exprstate,
2396 												GetPerTupleExprContext(estate),
2397 												&isNull);
2398 		MemoryContextSwitchTo(oldcxt);
2399 		FreeExecutorState(estate);
2400 
2401 		/* If not equal, go generate the OR expressions */
2402 		if (!DatumGetBool(test_result))
2403 			break;
2404 
2405 		/*
2406 		 * The bounds for the last key column can't be equal, because such a
2407 		 * range partition would never be allowed to be defined (it would have
2408 		 * an empty range otherwise).
2409 		 */
2410 		if (i == key->partnatts - 1)
2411 			elog(ERROR, "invalid range bound specification");
2412 
2413 		/* Equal, so generate keyCol = lower_val expression */
2414 		result = lappend(result,
2415 						 make_partition_op_expr(key, i, BTEqualStrategyNumber,
2416 												keyCol, (Expr *) lower_val));
2417 
2418 		i++;
2419 	}
2420 
2421 	/* First pair of lower_val and upper_val that are not equal. */
2422 	lower_or_start_datum = cell1;
2423 	upper_or_start_datum = cell2;
2424 
2425 	/* OR will have as many arms as there are key columns left. */
2426 	num_or_arms = key->partnatts - i;
2427 	current_or_arm = 0;
2428 	lower_or_arms = upper_or_arms = NIL;
2429 	need_next_lower_arm = need_next_upper_arm = true;
2430 	while (current_or_arm < num_or_arms)
2431 	{
2432 		List	   *lower_or_arm_args = NIL,
2433 				   *upper_or_arm_args = NIL;
2434 
2435 		/* Restart scan of columns from the i'th one */
2436 		j = i;
2437 		partexprs_item = partexprs_item_saved;
2438 
2439 		for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
2440 		{
2441 			PartitionRangeDatum *ldatum_next = NULL,
2442 					   *udatum_next = NULL;
2443 
2444 			ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
2445 			if (lnext(cell1))
2446 				ldatum_next = castNode(PartitionRangeDatum,
2447 									   lfirst(lnext(cell1)));
2448 			udatum = castNode(PartitionRangeDatum, lfirst(cell2));
2449 			if (lnext(cell2))
2450 				udatum_next = castNode(PartitionRangeDatum,
2451 									   lfirst(lnext(cell2)));
2452 			get_range_key_properties(key, j, ldatum, udatum,
2453 									 &partexprs_item,
2454 									 &keyCol,
2455 									 &lower_val, &upper_val);
2456 
2457 			if (need_next_lower_arm && lower_val)
2458 			{
2459 				uint16		strategy;
2460 
2461 				/*
2462 				 * For the non-last columns of this arm, use the EQ operator.
2463 				 * For the last column of this arm, use GT, unless this is the
2464 				 * last column of the whole bound check, or the next bound
2465 				 * datum is MINVALUE, in which case use GE.
2466 				 */
2467 				if (j - i < current_or_arm)
2468 					strategy = BTEqualStrategyNumber;
2469 				else if (j == key->partnatts - 1 ||
2470 						 (ldatum_next &&
2471 						  ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
2472 					strategy = BTGreaterEqualStrategyNumber;
2473 				else
2474 					strategy = BTGreaterStrategyNumber;
2475 
2476 				lower_or_arm_args = lappend(lower_or_arm_args,
2477 											make_partition_op_expr(key, j,
2478 																   strategy,
2479 																   keyCol,
2480 																   (Expr *) lower_val));
2481 			}
2482 
2483 			if (need_next_upper_arm && upper_val)
2484 			{
2485 				uint16		strategy;
2486 
2487 				/*
2488 				 * For the non-last columns of this arm, use the EQ operator.
2489 				 * For the last column of this arm, use LT, unless the next
2490 				 * bound datum is MAXVALUE, in which case use LE.
2491 				 */
2492 				if (j - i < current_or_arm)
2493 					strategy = BTEqualStrategyNumber;
2494 				else if (udatum_next &&
2495 						 udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
2496 					strategy = BTLessEqualStrategyNumber;
2497 				else
2498 					strategy = BTLessStrategyNumber;
2499 
2500 				upper_or_arm_args = lappend(upper_or_arm_args,
2501 											make_partition_op_expr(key, j,
2502 																   strategy,
2503 																   keyCol,
2504 																   (Expr *) upper_val));
2505 			}
2506 
2507 			/*
2508 			 * Did we generate enough of OR's arguments?  First arm considers
2509 			 * the first of the remaining columns, second arm considers first
2510 			 * two of the remaining columns, and so on.
2511 			 */
2512 			++j;
2513 			if (j - i > current_or_arm)
2514 			{
2515 				/*
2516 				 * We must not emit any more arms if the new column that will
2517 				 * be considered is unbounded, or this one was.
2518 				 */
2519 				if (!lower_val || !ldatum_next ||
2520 					ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
2521 					need_next_lower_arm = false;
2522 				if (!upper_val || !udatum_next ||
2523 					udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
2524 					need_next_upper_arm = false;
2525 				break;
2526 			}
2527 		}
2528 
2529 		if (lower_or_arm_args != NIL)
2530 			lower_or_arms = lappend(lower_or_arms,
2531 									list_length(lower_or_arm_args) > 1
2532 									? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
2533 									: linitial(lower_or_arm_args));
2534 
2535 		if (upper_or_arm_args != NIL)
2536 			upper_or_arms = lappend(upper_or_arms,
2537 									list_length(upper_or_arm_args) > 1
2538 									? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
2539 									: linitial(upper_or_arm_args));
2540 
2541 		/* If no work to do in the next iteration, break away. */
2542 		if (!need_next_lower_arm && !need_next_upper_arm)
2543 			break;
2544 
2545 		++current_or_arm;
2546 	}
2547 
2548 	/*
2549 	 * Generate the OR expressions for each of lower and upper bounds (if
2550 	 * required), and append to the list of implicitly ANDed list of
2551 	 * expressions.
2552 	 */
2553 	if (lower_or_arms != NIL)
2554 		result = lappend(result,
2555 						 list_length(lower_or_arms) > 1
2556 						 ? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
2557 						 : linitial(lower_or_arms));
2558 	if (upper_or_arms != NIL)
2559 		result = lappend(result,
2560 						 list_length(upper_or_arms) > 1
2561 						 ? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
2562 						 : linitial(upper_or_arms));
2563 
2564 	/*
2565 	 * As noted above, for non-default, we return list with constant TRUE. If
2566 	 * the result is NIL during the recursive call for default, it implies
2567 	 * this is the only other partition which can hold every value of the key
2568 	 * except NULL. Hence we return the NullTest result skipped earlier.
2569 	 */
2570 	if (result == NIL)
2571 		result = for_default
2572 			? get_range_nulltest(key)
2573 			: list_make1(makeBoolConst(true, false));
2574 
2575 	return result;
2576 }
2577 
2578 /*
2579  * get_range_key_properties
2580  *		Returns range partition key information for a given column
2581  *
2582  * This is a subroutine for get_qual_for_range, and its API is pretty
2583  * specialized to that caller.
2584  *
2585  * Constructs an Expr for the key column (returned in *keyCol) and Consts
2586  * for the lower and upper range limits (returned in *lower_val and
2587  * *upper_val).  For MINVALUE/MAXVALUE limits, NULL is returned instead of
2588  * a Const.  All of these structures are freshly palloc'd.
2589  *
2590  * *partexprs_item points to the cell containing the next expression in
2591  * the key->partexprs list, or NULL.  It may be advanced upon return.
2592  */
2593 static void
get_range_key_properties(PartitionKey key,int keynum,PartitionRangeDatum * ldatum,PartitionRangeDatum * udatum,ListCell ** partexprs_item,Expr ** keyCol,Const ** lower_val,Const ** upper_val)2594 get_range_key_properties(PartitionKey key, int keynum,
2595 						 PartitionRangeDatum *ldatum,
2596 						 PartitionRangeDatum *udatum,
2597 						 ListCell **partexprs_item,
2598 						 Expr **keyCol,
2599 						 Const **lower_val, Const **upper_val)
2600 {
2601 	/* Get partition key expression for this column */
2602 	if (key->partattrs[keynum] != 0)
2603 	{
2604 		*keyCol = (Expr *) makeVar(1,
2605 								   key->partattrs[keynum],
2606 								   key->parttypid[keynum],
2607 								   key->parttypmod[keynum],
2608 								   key->parttypcoll[keynum],
2609 								   0);
2610 	}
2611 	else
2612 	{
2613 		if (*partexprs_item == NULL)
2614 			elog(ERROR, "wrong number of partition key expressions");
2615 		*keyCol = copyObject(lfirst(*partexprs_item));
2616 		*partexprs_item = lnext(*partexprs_item);
2617 	}
2618 
2619 	/* Get appropriate Const nodes for the bounds */
2620 	if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
2621 		*lower_val = castNode(Const, copyObject(ldatum->value));
2622 	else
2623 		*lower_val = NULL;
2624 
2625 	if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
2626 		*upper_val = castNode(Const, copyObject(udatum->value));
2627 	else
2628 		*upper_val = NULL;
2629 }
2630 
2631 /*
2632  * get_range_nulltest
2633  *
2634  * A non-default range partition table does not currently allow partition
2635  * keys to be null, so emit an IS NOT NULL expression for each key column.
2636  */
2637 static List *
get_range_nulltest(PartitionKey key)2638 get_range_nulltest(PartitionKey key)
2639 {
2640 	List	   *result = NIL;
2641 	NullTest   *nulltest;
2642 	ListCell   *partexprs_item;
2643 	int			i;
2644 
2645 	partexprs_item = list_head(key->partexprs);
2646 	for (i = 0; i < key->partnatts; i++)
2647 	{
2648 		Expr	   *keyCol;
2649 
2650 		if (key->partattrs[i] != 0)
2651 		{
2652 			keyCol = (Expr *) makeVar(1,
2653 									  key->partattrs[i],
2654 									  key->parttypid[i],
2655 									  key->parttypmod[i],
2656 									  key->parttypcoll[i],
2657 									  0);
2658 		}
2659 		else
2660 		{
2661 			if (partexprs_item == NULL)
2662 				elog(ERROR, "wrong number of partition key expressions");
2663 			keyCol = copyObject(lfirst(partexprs_item));
2664 			partexprs_item = lnext(partexprs_item);
2665 		}
2666 
2667 		nulltest = makeNode(NullTest);
2668 		nulltest->arg = keyCol;
2669 		nulltest->nulltesttype = IS_NOT_NULL;
2670 		nulltest->argisrow = false;
2671 		nulltest->location = -1;
2672 		result = lappend(result, nulltest);
2673 	}
2674 
2675 	return result;
2676 }
2677 
2678 /*
2679  * compute_partition_hash_value
2680  *
2681  * Compute the hash value for given partition key values.
2682  */
2683 uint64
compute_partition_hash_value(int partnatts,FmgrInfo * partsupfunc,Oid * partcollation,Datum * values,bool * isnull)2684 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
2685 							 Datum *values, bool *isnull)
2686 {
2687 	int			i;
2688 	uint64		rowHash = 0;
2689 	Datum		seed = UInt64GetDatum(HASH_PARTITION_SEED);
2690 
2691 	for (i = 0; i < partnatts; i++)
2692 	{
2693 		/* Nulls are just ignored */
2694 		if (!isnull[i])
2695 		{
2696 			Datum		hash;
2697 
2698 			Assert(OidIsValid(partsupfunc[i].fn_oid));
2699 
2700 			/*
2701 			 * Compute hash for each datum value by calling respective
2702 			 * datatype-specific hash functions of each partition key
2703 			 * attribute.
2704 			 */
2705 			hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i],
2706 									 values[i], seed);
2707 
2708 			/* Form a single 64-bit hash value */
2709 			rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
2710 		}
2711 	}
2712 
2713 	return rowHash;
2714 }
2715 
2716 /*
2717  * satisfies_hash_partition
2718  *
2719  * This is an SQL-callable function for use in hash partition constraints.
2720  * The first three arguments are the parent table OID, modulus, and remainder.
2721  * The remaining arguments are the value of the partitioning columns (or
2722  * expressions); these are hashed and the results are combined into a single
2723  * hash value by calling hash_combine64.
2724  *
2725  * Returns true if remainder produced when this computed single hash value is
2726  * divided by the given modulus is equal to given remainder, otherwise false.
2727  * NB: it's important that this never return null, as the constraint machinery
2728  * would consider that to be a "pass".
2729  *
2730  * See get_qual_for_hash() for usage.
2731  */
2732 Datum
satisfies_hash_partition(PG_FUNCTION_ARGS)2733 satisfies_hash_partition(PG_FUNCTION_ARGS)
2734 {
2735 	typedef struct ColumnsHashData
2736 	{
2737 		Oid			relid;
2738 		int			nkeys;
2739 		Oid			variadic_type;
2740 		int16		variadic_typlen;
2741 		bool		variadic_typbyval;
2742 		char		variadic_typalign;
2743 		Oid			partcollid[PARTITION_MAX_KEYS];
2744 		FmgrInfo	partsupfunc[FLEXIBLE_ARRAY_MEMBER];
2745 	} ColumnsHashData;
2746 	Oid			parentId;
2747 	int			modulus;
2748 	int			remainder;
2749 	Datum		seed = UInt64GetDatum(HASH_PARTITION_SEED);
2750 	ColumnsHashData *my_extra;
2751 	uint64		rowHash = 0;
2752 
2753 	/* Return false if the parent OID, modulus, or remainder is NULL. */
2754 	if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
2755 		PG_RETURN_BOOL(false);
2756 	parentId = PG_GETARG_OID(0);
2757 	modulus = PG_GETARG_INT32(1);
2758 	remainder = PG_GETARG_INT32(2);
2759 
2760 	/* Sanity check modulus and remainder. */
2761 	if (modulus <= 0)
2762 		ereport(ERROR,
2763 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2764 				 errmsg("modulus for hash partition must be an integer value greater than zero")));
2765 	if (remainder < 0)
2766 		ereport(ERROR,
2767 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2768 				 errmsg("remainder for hash partition must be an integer value greater than or equal to zero")));
2769 	if (remainder >= modulus)
2770 		ereport(ERROR,
2771 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2772 				 errmsg("remainder for hash partition must be less than modulus")));
2773 
2774 	/*
2775 	 * Cache hash function information.
2776 	 */
2777 	my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
2778 	if (my_extra == NULL || my_extra->relid != parentId)
2779 	{
2780 		Relation	parent;
2781 		PartitionKey key;
2782 		int			j;
2783 
2784 		/* Open parent relation and fetch partition key info */
2785 		parent = relation_open(parentId, AccessShareLock);
2786 		key = RelationGetPartitionKey(parent);
2787 
2788 		/* Reject parent table that is not hash-partitioned. */
2789 		if (key == NULL || key->strategy != PARTITION_STRATEGY_HASH)
2790 			ereport(ERROR,
2791 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2792 					 errmsg("\"%s\" is not a hash partitioned table",
2793 							get_rel_name(parentId))));
2794 
2795 		if (!get_fn_expr_variadic(fcinfo->flinfo))
2796 		{
2797 			int			nargs = PG_NARGS() - 3;
2798 
2799 			/* complain if wrong number of column values */
2800 			if (key->partnatts != nargs)
2801 				ereport(ERROR,
2802 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2803 						 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
2804 								key->partnatts, nargs)));
2805 
2806 			/* allocate space for our cache */
2807 			fcinfo->flinfo->fn_extra =
2808 				MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
2809 									   offsetof(ColumnsHashData, partsupfunc) +
2810 									   sizeof(FmgrInfo) * nargs);
2811 			my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
2812 			my_extra->relid = parentId;
2813 			my_extra->nkeys = key->partnatts;
2814 			memcpy(my_extra->partcollid, key->partcollation,
2815 				   key->partnatts * sizeof(Oid));
2816 
2817 			/* check argument types and save fmgr_infos */
2818 			for (j = 0; j < key->partnatts; ++j)
2819 			{
2820 				Oid			argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
2821 
2822 				if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
2823 					ereport(ERROR,
2824 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2825 							 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
2826 									j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
2827 
2828 				fmgr_info_copy(&my_extra->partsupfunc[j],
2829 							   &key->partsupfunc[j],
2830 							   fcinfo->flinfo->fn_mcxt);
2831 			}
2832 		}
2833 		else
2834 		{
2835 			ArrayType  *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
2836 
2837 			/* allocate space for our cache -- just one FmgrInfo in this case */
2838 			fcinfo->flinfo->fn_extra =
2839 				MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
2840 									   offsetof(ColumnsHashData, partsupfunc) +
2841 									   sizeof(FmgrInfo));
2842 			my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
2843 			my_extra->relid = parentId;
2844 			my_extra->nkeys = key->partnatts;
2845 			my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
2846 			get_typlenbyvalalign(my_extra->variadic_type,
2847 								 &my_extra->variadic_typlen,
2848 								 &my_extra->variadic_typbyval,
2849 								 &my_extra->variadic_typalign);
2850 			my_extra->partcollid[0] = key->partcollation[0];
2851 
2852 			/* check argument types */
2853 			for (j = 0; j < key->partnatts; ++j)
2854 				if (key->parttypid[j] != my_extra->variadic_type)
2855 					ereport(ERROR,
2856 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2857 							 errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
2858 									j + 1,
2859 									format_type_be(key->parttypid[j]),
2860 									format_type_be(my_extra->variadic_type))));
2861 
2862 			fmgr_info_copy(&my_extra->partsupfunc[0],
2863 						   &key->partsupfunc[0],
2864 						   fcinfo->flinfo->fn_mcxt);
2865 		}
2866 
2867 		/* Hold lock until commit */
2868 		relation_close(parent, NoLock);
2869 	}
2870 
2871 	if (!OidIsValid(my_extra->variadic_type))
2872 	{
2873 		int			nkeys = my_extra->nkeys;
2874 		int			i;
2875 
2876 		/*
2877 		 * For a non-variadic call, neither the number of arguments nor their
2878 		 * types can change across calls, so avoid the expense of rechecking
2879 		 * here.
2880 		 */
2881 
2882 		for (i = 0; i < nkeys; i++)
2883 		{
2884 			Datum		hash;
2885 
2886 			/* keys start from fourth argument of function. */
2887 			int			argno = i + 3;
2888 
2889 			if (PG_ARGISNULL(argno))
2890 				continue;
2891 
2892 			hash = FunctionCall2Coll(&my_extra->partsupfunc[i],
2893 									 my_extra->partcollid[i],
2894 									 PG_GETARG_DATUM(argno),
2895 									 seed);
2896 
2897 			/* Form a single 64-bit hash value */
2898 			rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
2899 		}
2900 	}
2901 	else
2902 	{
2903 		ArrayType  *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
2904 		int			i;
2905 		int			nelems;
2906 		Datum	   *datum;
2907 		bool	   *isnull;
2908 
2909 		deconstruct_array(variadic_array,
2910 						  my_extra->variadic_type,
2911 						  my_extra->variadic_typlen,
2912 						  my_extra->variadic_typbyval,
2913 						  my_extra->variadic_typalign,
2914 						  &datum, &isnull, &nelems);
2915 
2916 		/* complain if wrong number of column values */
2917 		if (nelems != my_extra->nkeys)
2918 			ereport(ERROR,
2919 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2920 					 errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
2921 							my_extra->nkeys, nelems)));
2922 
2923 		for (i = 0; i < nelems; i++)
2924 		{
2925 			Datum		hash;
2926 
2927 			if (isnull[i])
2928 				continue;
2929 
2930 			hash = FunctionCall2Coll(&my_extra->partsupfunc[0],
2931 									 my_extra->partcollid[0],
2932 									 datum[i],
2933 									 seed);
2934 
2935 			/* Form a single 64-bit hash value */
2936 			rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
2937 		}
2938 	}
2939 
2940 	PG_RETURN_BOOL(rowHash % modulus == remainder);
2941 }
2942