1 /*-------------------------------------------------------------------------
2  *
3  * partcache.c
4  *		Support routines for manipulating partition information cached in
5  *		relcache
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *		  src/backend/utils/cache/partcache.c
12  *
13  *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16 
17 #include "access/hash.h"
18 #include "access/heapam.h"
19 #include "access/htup_details.h"
20 #include "access/nbtree.h"
21 #include "catalog/partition.h"
22 #include "catalog/pg_inherits.h"
23 #include "catalog/pg_opclass.h"
24 #include "catalog/pg_partitioned_table.h"
25 #include "miscadmin.h"
26 #include "nodes/makefuncs.h"
27 #include "nodes/nodeFuncs.h"
28 #include "optimizer/clauses.h"
29 #include "optimizer/planner.h"
30 #include "partitioning/partbounds.h"
31 #include "utils/builtins.h"
32 #include "utils/datum.h"
33 #include "utils/lsyscache.h"
34 #include "utils/memutils.h"
35 #include "utils/partcache.h"
36 #include "utils/rel.h"
37 #include "utils/syscache.h"
38 
39 
40 static List *generate_partition_qual(Relation rel);
41 static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
42 static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
43 							   void *arg);
44 static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
45 						   void *arg);
46 
47 
48 /*
49  * RelationBuildPartitionKey
50  *		Build partition key data of relation, and attach to relcache
51  *
52  * Partitioning key data is a complex structure; to avoid complicated logic to
53  * free individual elements whenever the relcache entry is flushed, we give it
54  * its own memory context, a child of CacheMemoryContext, which can easily be
55  * deleted on its own.  To avoid leaking memory in that context in case of an
56  * error partway through this function, the context is initially created as a
57  * child of CurTransactionContext and only re-parented to CacheMemoryContext
58  * at the end, when no further errors are possible.  Also, we don't make this
59  * context the current context except in very brief code sections, out of fear
60  * that some of our callees allocate memory on their own which would be leaked
61  * permanently.
62  */
63 void
64 RelationBuildPartitionKey(Relation relation)
65 {
66 	Form_pg_partitioned_table form;
67 	HeapTuple	tuple;
68 	bool		isnull;
69 	int			i;
70 	PartitionKey key;
71 	AttrNumber *attrs;
72 	oidvector  *opclass;
73 	oidvector  *collation;
74 	ListCell   *partexprs_item;
75 	Datum		datum;
76 	MemoryContext partkeycxt,
77 				oldcxt;
78 	int16		procnum;
79 
80 	tuple = SearchSysCache1(PARTRELID,
81 							ObjectIdGetDatum(RelationGetRelid(relation)));
82 
83 	/*
84 	 * The following happens when we have created our pg_class entry but not
85 	 * the pg_partitioned_table entry yet.
86 	 */
87 	if (!HeapTupleIsValid(tuple))
88 		return;
89 
90 	partkeycxt = AllocSetContextCreate(CurTransactionContext,
91 									   "partition key",
92 									   ALLOCSET_SMALL_SIZES);
93 	MemoryContextCopyAndSetIdentifier(partkeycxt,
94 									  RelationGetRelationName(relation));
95 
96 	key = (PartitionKey) MemoryContextAllocZero(partkeycxt,
97 												sizeof(PartitionKeyData));
98 
99 	/* Fixed-length attributes */
100 	form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
101 	key->strategy = form->partstrat;
102 	key->partnatts = form->partnatts;
103 
104 	/*
105 	 * We can rely on the first variable-length attribute being mapped to the
106 	 * relevant field of the catalog's C struct, because all previous
107 	 * attributes are non-nullable and fixed-length.
108 	 */
109 	attrs = form->partattrs.values;
110 
111 	/* But use the hard way to retrieve further variable-length attributes */
112 	/* Operator class */
113 	datum = SysCacheGetAttr(PARTRELID, tuple,
114 							Anum_pg_partitioned_table_partclass, &isnull);
115 	Assert(!isnull);
116 	opclass = (oidvector *) DatumGetPointer(datum);
117 
118 	/* Collation */
119 	datum = SysCacheGetAttr(PARTRELID, tuple,
120 							Anum_pg_partitioned_table_partcollation, &isnull);
121 	Assert(!isnull);
122 	collation = (oidvector *) DatumGetPointer(datum);
123 
124 	/* Expressions */
125 	datum = SysCacheGetAttr(PARTRELID, tuple,
126 							Anum_pg_partitioned_table_partexprs, &isnull);
127 	if (!isnull)
128 	{
129 		char	   *exprString;
130 		Node	   *expr;
131 
132 		exprString = TextDatumGetCString(datum);
133 		expr = stringToNode(exprString);
134 		pfree(exprString);
135 
136 		/*
137 		 * Run the expressions through const-simplification since the planner
138 		 * will be comparing them to similarly-processed qual clause operands,
139 		 * and may fail to detect valid matches without this step; fix
140 		 * opfuncids while at it.  We don't need to bother with
141 		 * canonicalize_qual() though, because partition expressions should be
142 		 * in canonical form already (ie, no need for OR-merging or constant
143 		 * elimination).
144 		 */
145 		expr = eval_const_expressions(NULL, expr);
146 		fix_opfuncids(expr);
147 
148 		oldcxt = MemoryContextSwitchTo(partkeycxt);
149 		key->partexprs = (List *) copyObject(expr);
150 		MemoryContextSwitchTo(oldcxt);
151 	}
152 
153 	/* Allocate assorted arrays in the partkeycxt, which we'll fill below */
154 	oldcxt = MemoryContextSwitchTo(partkeycxt);
155 	key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
156 	key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
157 	key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
158 	key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
159 
160 	key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
161 	key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
162 	key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
163 	key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
164 	key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
165 	key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
166 	key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
167 	MemoryContextSwitchTo(oldcxt);
168 
169 	/* determine support function number to search for */
170 	procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
171 		HASHEXTENDED_PROC : BTORDER_PROC;
172 
173 	/* Copy partattrs and fill other per-attribute info */
174 	memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
175 	partexprs_item = list_head(key->partexprs);
176 	for (i = 0; i < key->partnatts; i++)
177 	{
178 		AttrNumber	attno = key->partattrs[i];
179 		HeapTuple	opclasstup;
180 		Form_pg_opclass opclassform;
181 		Oid			funcid;
182 
183 		/* Collect opfamily information */
184 		opclasstup = SearchSysCache1(CLAOID,
185 									 ObjectIdGetDatum(opclass->values[i]));
186 		if (!HeapTupleIsValid(opclasstup))
187 			elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
188 
189 		opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
190 		key->partopfamily[i] = opclassform->opcfamily;
191 		key->partopcintype[i] = opclassform->opcintype;
192 
193 		/* Get a support function for the specified opfamily and datatypes */
194 		funcid = get_opfamily_proc(opclassform->opcfamily,
195 								   opclassform->opcintype,
196 								   opclassform->opcintype,
197 								   procnum);
198 		if (!OidIsValid(funcid))
199 			ereport(ERROR,
200 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
201 					 errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s",
202 							NameStr(opclassform->opcname),
203 							(key->strategy == PARTITION_STRATEGY_HASH) ?
204 							"hash" : "btree",
205 							procnum,
206 							format_type_be(opclassform->opcintype))));
207 
208 		fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt);
209 
210 		/* Collation */
211 		key->partcollation[i] = collation->values[i];
212 
213 		/* Collect type information */
214 		if (attno != 0)
215 		{
216 			Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
217 
218 			key->parttypid[i] = att->atttypid;
219 			key->parttypmod[i] = att->atttypmod;
220 			key->parttypcoll[i] = att->attcollation;
221 		}
222 		else
223 		{
224 			if (partexprs_item == NULL)
225 				elog(ERROR, "wrong number of partition key expressions");
226 
227 			key->parttypid[i] = exprType(lfirst(partexprs_item));
228 			key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
229 			key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
230 
231 			partexprs_item = lnext(partexprs_item);
232 		}
233 		get_typlenbyvalalign(key->parttypid[i],
234 							 &key->parttyplen[i],
235 							 &key->parttypbyval[i],
236 							 &key->parttypalign[i]);
237 
238 		ReleaseSysCache(opclasstup);
239 	}
240 
241 	ReleaseSysCache(tuple);
242 
243 	/* Assert that we're not leaking any old data during assignments below */
244 	Assert(relation->rd_partkeycxt == NULL);
245 	Assert(relation->rd_partkey == NULL);
246 
247 	/*
248 	 * Success --- reparent our context and make the relcache point to the
249 	 * newly constructed key
250 	 */
251 	MemoryContextSetParent(partkeycxt, CacheMemoryContext);
252 	relation->rd_partkeycxt = partkeycxt;
253 	relation->rd_partkey = key;
254 }
255 
256 /*
257  * RelationBuildPartitionDesc
258  *		Form rel's partition descriptor, and store in relcache entry
259  *
260  * Note: the descriptor won't be flushed from the cache by
261  * RelationClearRelation() unless it's changed because of
262  * addition or removal of a partition.  Hence, code holding a lock
263  * that's sufficient to prevent that can assume that rd_partdesc
264  * won't change underneath it.
265  */
266 void
267 RelationBuildPartitionDesc(Relation rel)
268 {
269 	List	   *inhoids,
270 			   *partoids;
271 	Oid		   *oids = NULL;
272 	List	   *boundspecs = NIL;
273 	ListCell   *cell;
274 	int			i,
275 				nparts;
276 	PartitionKey key = RelationGetPartitionKey(rel);
277 	PartitionDesc result;
278 	MemoryContext oldcxt;
279 
280 	int			ndatums = 0;
281 	int			default_index = -1;
282 
283 	/* Hash partitioning specific */
284 	PartitionHashBound **hbounds = NULL;
285 
286 	/* List partitioning specific */
287 	PartitionListValue **all_values = NULL;
288 	int			null_index = -1;
289 
290 	/* Range partitioning specific */
291 	PartitionRangeBound **rbounds = NULL;
292 
293 	/* Get partition oids from pg_inherits */
294 	inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
295 
296 	/* Collect bound spec nodes in a list */
297 	i = 0;
298 	partoids = NIL;
299 	foreach(cell, inhoids)
300 	{
301 		Oid			inhrelid = lfirst_oid(cell);
302 		HeapTuple	tuple;
303 		Datum		datum;
304 		bool		isnull;
305 		Node	   *boundspec;
306 
307 		tuple = SearchSysCache1(RELOID, inhrelid);
308 		if (!HeapTupleIsValid(tuple))
309 			elog(ERROR, "cache lookup failed for relation %u", inhrelid);
310 
311 		datum = SysCacheGetAttr(RELOID, tuple,
312 								Anum_pg_class_relpartbound,
313 								&isnull);
314 		if (isnull)
315 			elog(ERROR, "null relpartbound for relation %u", inhrelid);
316 		boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
317 
318 		/*
319 		 * Sanity check: If the PartitionBoundSpec says this is the default
320 		 * partition, its OID should correspond to whatever's stored in
321 		 * pg_partitioned_table.partdefid; if not, the catalog is corrupt.
322 		 */
323 		if (castNode(PartitionBoundSpec, boundspec)->is_default)
324 		{
325 			Oid			partdefid;
326 
327 			partdefid = get_default_partition_oid(RelationGetRelid(rel));
328 			if (partdefid != inhrelid)
329 				elog(ERROR, "expected partdefid %u, but got %u",
330 					 inhrelid, partdefid);
331 		}
332 
333 		boundspecs = lappend(boundspecs, boundspec);
334 		partoids = lappend_oid(partoids, inhrelid);
335 		ReleaseSysCache(tuple);
336 	}
337 
338 	nparts = list_length(partoids);
339 
340 	if (nparts > 0)
341 	{
342 		oids = (Oid *) palloc(nparts * sizeof(Oid));
343 		i = 0;
344 		foreach(cell, partoids)
345 			oids[i++] = lfirst_oid(cell);
346 
347 		/* Convert from node to the internal representation */
348 		if (key->strategy == PARTITION_STRATEGY_HASH)
349 		{
350 			ndatums = nparts;
351 			hbounds = (PartitionHashBound **)
352 				palloc(nparts * sizeof(PartitionHashBound *));
353 
354 			i = 0;
355 			foreach(cell, boundspecs)
356 			{
357 				PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
358 													lfirst(cell));
359 
360 				if (spec->strategy != PARTITION_STRATEGY_HASH)
361 					elog(ERROR, "invalid strategy in partition bound spec");
362 
363 				hbounds[i] = (PartitionHashBound *)
364 					palloc(sizeof(PartitionHashBound));
365 
366 				hbounds[i]->modulus = spec->modulus;
367 				hbounds[i]->remainder = spec->remainder;
368 				hbounds[i]->index = i;
369 				i++;
370 			}
371 
372 			/* Sort all the bounds in ascending order */
373 			qsort(hbounds, nparts, sizeof(PartitionHashBound *),
374 				  qsort_partition_hbound_cmp);
375 		}
376 		else if (key->strategy == PARTITION_STRATEGY_LIST)
377 		{
378 			List	   *non_null_values = NIL;
379 
380 			/*
381 			 * Create a unified list of non-null values across all partitions.
382 			 */
383 			i = 0;
384 			null_index = -1;
385 			foreach(cell, boundspecs)
386 			{
387 				PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
388 													lfirst(cell));
389 				ListCell   *c;
390 
391 				if (spec->strategy != PARTITION_STRATEGY_LIST)
392 					elog(ERROR, "invalid strategy in partition bound spec");
393 
394 				/*
395 				 * Note the index of the partition bound spec for the default
396 				 * partition. There's no datum to add to the list of non-null
397 				 * datums for this partition.
398 				 */
399 				if (spec->is_default)
400 				{
401 					default_index = i;
402 					i++;
403 					continue;
404 				}
405 
406 				foreach(c, spec->listdatums)
407 				{
408 					Const	   *val = castNode(Const, lfirst(c));
409 					PartitionListValue *list_value = NULL;
410 
411 					if (!val->constisnull)
412 					{
413 						list_value = (PartitionListValue *)
414 							palloc0(sizeof(PartitionListValue));
415 						list_value->index = i;
416 						list_value->value = val->constvalue;
417 					}
418 					else
419 					{
420 						/*
421 						 * Never put a null into the values array, flag
422 						 * instead for the code further down below where we
423 						 * construct the actual relcache struct.
424 						 */
425 						if (null_index != -1)
426 							elog(ERROR, "found null more than once");
427 						null_index = i;
428 					}
429 
430 					if (list_value)
431 						non_null_values = lappend(non_null_values,
432 												  list_value);
433 				}
434 
435 				i++;
436 			}
437 
438 			ndatums = list_length(non_null_values);
439 
440 			/*
441 			 * Collect all list values in one array. Alongside the value, we
442 			 * also save the index of partition the value comes from.
443 			 */
444 			all_values = (PartitionListValue **) palloc(ndatums *
445 														sizeof(PartitionListValue *));
446 			i = 0;
447 			foreach(cell, non_null_values)
448 			{
449 				PartitionListValue *src = lfirst(cell);
450 
451 				all_values[i] = (PartitionListValue *)
452 					palloc(sizeof(PartitionListValue));
453 				all_values[i]->value = src->value;
454 				all_values[i]->index = src->index;
455 				i++;
456 			}
457 
458 			qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
459 					  qsort_partition_list_value_cmp, (void *) key);
460 		}
461 		else if (key->strategy == PARTITION_STRATEGY_RANGE)
462 		{
463 			int			k;
464 			PartitionRangeBound **all_bounds,
465 					   *prev;
466 
467 			all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
468 														  sizeof(PartitionRangeBound *));
469 
470 			/*
471 			 * Create a unified list of range bounds across all the
472 			 * partitions.
473 			 */
474 			i = ndatums = 0;
475 			foreach(cell, boundspecs)
476 			{
477 				PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
478 													lfirst(cell));
479 				PartitionRangeBound *lower,
480 						   *upper;
481 
482 				if (spec->strategy != PARTITION_STRATEGY_RANGE)
483 					elog(ERROR, "invalid strategy in partition bound spec");
484 
485 				/*
486 				 * Note the index of the partition bound spec for the default
487 				 * partition. There's no datum to add to the allbounds array
488 				 * for this partition.
489 				 */
490 				if (spec->is_default)
491 				{
492 					default_index = i++;
493 					continue;
494 				}
495 
496 				lower = make_one_partition_rbound(key, i, spec->lowerdatums,
497 												  true);
498 				upper = make_one_partition_rbound(key, i, spec->upperdatums,
499 												  false);
500 				all_bounds[ndatums++] = lower;
501 				all_bounds[ndatums++] = upper;
502 				i++;
503 			}
504 
505 			Assert(ndatums == nparts * 2 ||
506 				   (default_index != -1 && ndatums == (nparts - 1) * 2));
507 
508 			/* Sort all the bounds in ascending order */
509 			qsort_arg(all_bounds, ndatums,
510 					  sizeof(PartitionRangeBound *),
511 					  qsort_partition_rbound_cmp,
512 					  (void *) key);
513 
514 			/* Save distinct bounds from all_bounds into rbounds. */
515 			rbounds = (PartitionRangeBound **)
516 				palloc(ndatums * sizeof(PartitionRangeBound *));
517 			k = 0;
518 			prev = NULL;
519 			for (i = 0; i < ndatums; i++)
520 			{
521 				PartitionRangeBound *cur = all_bounds[i];
522 				bool		is_distinct = false;
523 				int			j;
524 
525 				/* Is the current bound distinct from the previous one? */
526 				for (j = 0; j < key->partnatts; j++)
527 				{
528 					Datum		cmpval;
529 
530 					if (prev == NULL || cur->kind[j] != prev->kind[j])
531 					{
532 						is_distinct = true;
533 						break;
534 					}
535 
536 					/*
537 					 * If the bounds are both MINVALUE or MAXVALUE, stop now
538 					 * and treat them as equal, since any values after this
539 					 * point must be ignored.
540 					 */
541 					if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
542 						break;
543 
544 					cmpval = FunctionCall2Coll(&key->partsupfunc[j],
545 											   key->partcollation[j],
546 											   cur->datums[j],
547 											   prev->datums[j]);
548 					if (DatumGetInt32(cmpval) != 0)
549 					{
550 						is_distinct = true;
551 						break;
552 					}
553 				}
554 
555 				/*
556 				 * Only if the bound is distinct save it into a temporary
557 				 * array i.e. rbounds which is later copied into boundinfo
558 				 * datums array.
559 				 */
560 				if (is_distinct)
561 					rbounds[k++] = all_bounds[i];
562 
563 				prev = cur;
564 			}
565 
566 			/* Update ndatums to hold the count of distinct datums. */
567 			ndatums = k;
568 		}
569 		else
570 			elog(ERROR, "unexpected partition strategy: %d",
571 				 (int) key->strategy);
572 	}
573 
574 	/* Assert we aren't about to leak any old data structure */
575 	Assert(rel->rd_pdcxt == NULL);
576 	Assert(rel->rd_partdesc == NULL);
577 
578 	/*
579 	 * Now build the actual relcache partition descriptor.  Note that the
580 	 * order of operations here is fairly critical.  If we fail partway
581 	 * through this code, we won't have leaked memory because the rd_pdcxt is
582 	 * attached to the relcache entry immediately, so it'll be freed whenever
583 	 * the entry is rebuilt or destroyed.  However, we don't assign to
584 	 * rd_partdesc until the cached data structure is fully complete and
585 	 * valid, so that no other code might try to use it.
586 	 */
587 	rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
588 										  "partition descriptor",
589 										  ALLOCSET_SMALL_SIZES);
590 	MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt,
591 									  RelationGetRelationName(rel));
592 
593 	oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
594 
595 	result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
596 	result->nparts = nparts;
597 	if (nparts > 0)
598 	{
599 		PartitionBoundInfo boundinfo;
600 		int		   *mapping;
601 		int			next_index = 0;
602 
603 		result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
604 
605 		boundinfo = (PartitionBoundInfoData *)
606 			palloc0(sizeof(PartitionBoundInfoData));
607 		boundinfo->strategy = key->strategy;
608 		boundinfo->default_index = -1;
609 		boundinfo->ndatums = ndatums;
610 		boundinfo->null_index = -1;
611 		boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
612 
613 		/* Initialize mapping array with invalid values */
614 		mapping = (int *) palloc(sizeof(int) * nparts);
615 		for (i = 0; i < nparts; i++)
616 			mapping[i] = -1;
617 
618 		switch (key->strategy)
619 		{
620 			case PARTITION_STRATEGY_HASH:
621 				{
622 					/* Moduli are stored in ascending order */
623 					int			greatest_modulus = hbounds[ndatums - 1]->modulus;
624 
625 					boundinfo->nindexes = greatest_modulus;
626 					boundinfo->indexes = (int *) palloc(greatest_modulus *
627 														sizeof(int));
628 
629 					for (i = 0; i < greatest_modulus; i++)
630 						boundinfo->indexes[i] = -1;
631 
632 					for (i = 0; i < nparts; i++)
633 					{
634 						int			modulus = hbounds[i]->modulus;
635 						int			remainder = hbounds[i]->remainder;
636 
637 						boundinfo->datums[i] = (Datum *) palloc(2 *
638 																sizeof(Datum));
639 						boundinfo->datums[i][0] = Int32GetDatum(modulus);
640 						boundinfo->datums[i][1] = Int32GetDatum(remainder);
641 
642 						while (remainder < greatest_modulus)
643 						{
644 							/* overlap? */
645 							Assert(boundinfo->indexes[remainder] == -1);
646 							boundinfo->indexes[remainder] = i;
647 							remainder += modulus;
648 						}
649 
650 						mapping[hbounds[i]->index] = i;
651 						pfree(hbounds[i]);
652 					}
653 					pfree(hbounds);
654 					break;
655 				}
656 
657 			case PARTITION_STRATEGY_LIST:
658 				{
659 					boundinfo->nindexes = ndatums;
660 					boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
661 
662 					/*
663 					 * Copy values.  Indexes of individual values are mapped
664 					 * to canonical values so that they match for any two list
665 					 * partitioned tables with same number of partitions and
666 					 * same lists per partition.  One way to canonicalize is
667 					 * to assign the index in all_values[] of the smallest
668 					 * value of each partition, as the index of all of the
669 					 * partition's values.
670 					 */
671 					for (i = 0; i < ndatums; i++)
672 					{
673 						boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
674 						boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
675 															key->parttypbyval[0],
676 															key->parttyplen[0]);
677 
678 						/* If the old index has no mapping, assign one */
679 						if (mapping[all_values[i]->index] == -1)
680 							mapping[all_values[i]->index] = next_index++;
681 
682 						boundinfo->indexes[i] = mapping[all_values[i]->index];
683 					}
684 
685 					/*
686 					 * If null-accepting partition has no mapped index yet,
687 					 * assign one.  This could happen if such partition
688 					 * accepts only null and hence not covered in the above
689 					 * loop which only handled non-null values.
690 					 */
691 					if (null_index != -1)
692 					{
693 						Assert(null_index >= 0);
694 						if (mapping[null_index] == -1)
695 							mapping[null_index] = next_index++;
696 						boundinfo->null_index = mapping[null_index];
697 					}
698 
699 					/* Assign mapped index for the default partition. */
700 					if (default_index != -1)
701 					{
702 						/*
703 						 * The default partition accepts any value not
704 						 * specified in the lists of other partitions, hence
705 						 * it should not get mapped index while assigning
706 						 * those for non-null datums.
707 						 */
708 						Assert(default_index >= 0 &&
709 							   mapping[default_index] == -1);
710 						mapping[default_index] = next_index++;
711 						boundinfo->default_index = mapping[default_index];
712 					}
713 
714 					/* All partitions must now have a valid mapping */
715 					Assert(next_index == nparts);
716 					break;
717 				}
718 
719 			case PARTITION_STRATEGY_RANGE:
720 				{
721 					boundinfo->kind = (PartitionRangeDatumKind **)
722 						palloc(ndatums *
723 							   sizeof(PartitionRangeDatumKind *));
724 					boundinfo->nindexes = ndatums + 1;
725 					boundinfo->indexes = (int *) palloc((ndatums + 1) *
726 														sizeof(int));
727 
728 					for (i = 0; i < ndatums; i++)
729 					{
730 						int			j;
731 
732 						boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
733 																sizeof(Datum));
734 						boundinfo->kind[i] = (PartitionRangeDatumKind *)
735 							palloc(key->partnatts *
736 								   sizeof(PartitionRangeDatumKind));
737 						for (j = 0; j < key->partnatts; j++)
738 						{
739 							if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
740 								boundinfo->datums[i][j] =
741 									datumCopy(rbounds[i]->datums[j],
742 											  key->parttypbyval[j],
743 											  key->parttyplen[j]);
744 							boundinfo->kind[i][j] = rbounds[i]->kind[j];
745 						}
746 
747 						/*
748 						 * There is no mapping for invalid indexes.
749 						 *
750 						 * Any lower bounds in the rbounds array have invalid
751 						 * indexes assigned, because the values between the
752 						 * previous bound (if there is one) and this (lower)
753 						 * bound are not part of the range of any existing
754 						 * partition.
755 						 */
756 						if (rbounds[i]->lower)
757 							boundinfo->indexes[i] = -1;
758 						else
759 						{
760 							int			orig_index = rbounds[i]->index;
761 
762 							/* If the old index has no mapping, assign one */
763 							if (mapping[orig_index] == -1)
764 								mapping[orig_index] = next_index++;
765 
766 							boundinfo->indexes[i] = mapping[orig_index];
767 						}
768 					}
769 
770 					/* Assign mapped index for the default partition. */
771 					if (default_index != -1)
772 					{
773 						Assert(default_index >= 0 && mapping[default_index] == -1);
774 						mapping[default_index] = next_index++;
775 						boundinfo->default_index = mapping[default_index];
776 					}
777 					boundinfo->indexes[i] = -1;
778 					break;
779 				}
780 
781 			default:
782 				elog(ERROR, "unexpected partition strategy: %d",
783 					 (int) key->strategy);
784 		}
785 
786 		result->boundinfo = boundinfo;
787 
788 		/*
789 		 * Now assign OIDs from the original array into mapped indexes of the
790 		 * result array.  Order of OIDs in the former is defined by the
791 		 * catalog scan that retrieved them, whereas that in the latter is
792 		 * defined by canonicalized representation of the partition bounds.
793 		 */
794 		for (i = 0; i < nparts; i++)
795 			result->oids[mapping[i]] = oids[i];
796 		pfree(mapping);
797 	}
798 
799 	MemoryContextSwitchTo(oldcxt);
800 	rel->rd_partdesc = result;
801 }
802 
803 /*
804  * RelationGetPartitionQual
805  *
806  * Returns a list of partition quals
807  */
808 List *
809 RelationGetPartitionQual(Relation rel)
810 {
811 	/* Quick exit */
812 	if (!rel->rd_rel->relispartition)
813 		return NIL;
814 
815 	return generate_partition_qual(rel);
816 }
817 
818 /*
819  * get_partition_qual_relid
820  *
821  * Returns an expression tree describing the passed-in relation's partition
822  * constraint.
823  *
824  * If the relation is not found, or is not a partition, or there is no
825  * partition constraint, return NULL.  We must guard against the first two
826  * cases because this supports a SQL function that could be passed any OID.
827  * The last case can happen even if relispartition is true, when a default
828  * partition is the only partition.
829  */
830 Expr *
831 get_partition_qual_relid(Oid relid)
832 {
833 	Expr	   *result = NULL;
834 
835 	/* Do the work only if this relation exists and is a partition. */
836 	if (get_rel_relispartition(relid))
837 	{
838 		Relation	rel = relation_open(relid, AccessShareLock);
839 		List	   *and_args;
840 
841 		and_args = generate_partition_qual(rel);
842 
843 		/* Convert implicit-AND list format to boolean expression */
844 		if (and_args == NIL)
845 			result = NULL;
846 		else if (list_length(and_args) > 1)
847 			result = makeBoolExpr(AND_EXPR, and_args, -1);
848 		else
849 			result = linitial(and_args);
850 
851 		/* Keep the lock, to allow safe deparsing against the rel by caller. */
852 		relation_close(rel, NoLock);
853 	}
854 
855 	return result;
856 }
857 
858 /*
859  * generate_partition_qual
860  *
861  * Generate partition predicate from rel's partition bound expression. The
862  * function returns a NIL list if there is no predicate.
863  *
864  * We cache a copy of the result in the relcache entry, after constructing
865  * it using the caller's context.  This approach avoids leaking any data
866  * into long-lived cache contexts, especially if we fail partway through.
867  */
868 static List *
869 generate_partition_qual(Relation rel)
870 {
871 	HeapTuple	tuple;
872 	MemoryContext oldcxt;
873 	Datum		boundDatum;
874 	bool		isnull;
875 	List	   *my_qual = NIL,
876 			   *result = NIL;
877 	Relation	parent;
878 	bool		found_whole_row;
879 
880 	/* Guard against stack overflow due to overly deep partition tree */
881 	check_stack_depth();
882 
883 	/* If we already cached the result, just return a copy */
884 	if (rel->rd_partcheckvalid)
885 		return copyObject(rel->rd_partcheck);
886 
887 	/* Grab at least an AccessShareLock on the parent table */
888 	parent = relation_open(get_partition_parent(RelationGetRelid(rel)),
889 						   AccessShareLock);
890 
891 	/* Get pg_class.relpartbound */
892 	tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
893 	if (!HeapTupleIsValid(tuple))
894 		elog(ERROR, "cache lookup failed for relation %u",
895 			 RelationGetRelid(rel));
896 
897 	boundDatum = SysCacheGetAttr(RELOID, tuple,
898 								 Anum_pg_class_relpartbound,
899 								 &isnull);
900 	if (!isnull)
901 	{
902 		PartitionBoundSpec *bound;
903 
904 		bound = castNode(PartitionBoundSpec,
905 						 stringToNode(TextDatumGetCString(boundDatum)));
906 
907 		my_qual = get_qual_from_partbound(rel, parent, bound);
908 	}
909 
910 	ReleaseSysCache(tuple);
911 
912 	/* Add the parent's quals to the list (if any) */
913 	if (parent->rd_rel->relispartition)
914 		result = list_concat(generate_partition_qual(parent), my_qual);
915 	else
916 		result = my_qual;
917 
918 	/*
919 	 * Change Vars to have partition's attnos instead of the parent's. We do
920 	 * this after we concatenate the parent's quals, because we want every Var
921 	 * in it to bear this relation's attnos. It's safe to assume varno = 1
922 	 * here.
923 	 */
924 	result = map_partition_varattnos(result, 1, rel, parent,
925 									 &found_whole_row);
926 	/* There can never be a whole-row reference here */
927 	if (found_whole_row)
928 		elog(ERROR, "unexpected whole-row reference found in partition key");
929 
930 	/* Assert that we're not leaking any old data during assignments below */
931 	Assert(rel->rd_partcheckcxt == NULL);
932 	Assert(rel->rd_partcheck == NIL);
933 
934 	/*
935 	 * Save a copy in the relcache.  The order of these operations is fairly
936 	 * critical to avoid memory leaks and ensure that we don't leave a corrupt
937 	 * relcache entry if we fail partway through copyObject.
938 	 *
939 	 * If, as is definitely possible, the partcheck list is NIL, then we do
940 	 * not need to make a context to hold it.
941 	 */
942 	if (result != NIL)
943 	{
944 		rel->rd_partcheckcxt = AllocSetContextCreate(CacheMemoryContext,
945 													 "partition constraint",
946 													 ALLOCSET_SMALL_SIZES);
947 		MemoryContextCopyAndSetIdentifier(rel->rd_partcheckcxt,
948 										  RelationGetRelationName(rel));
949 		oldcxt = MemoryContextSwitchTo(rel->rd_partcheckcxt);
950 		rel->rd_partcheck = copyObject(result);
951 		MemoryContextSwitchTo(oldcxt);
952 	}
953 	else
954 		rel->rd_partcheck = NIL;
955 	rel->rd_partcheckvalid = true;
956 
957 	/* Keep the parent locked until commit */
958 	relation_close(parent, NoLock);
959 
960 	/* Return the working copy to the caller */
961 	return result;
962 }
963 
964 /*
965  * qsort_partition_hbound_cmp
966  *
967  * We sort hash bounds by modulus, then by remainder.
968  */
969 static int32
970 qsort_partition_hbound_cmp(const void *a, const void *b)
971 {
972 	PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
973 	PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
974 
975 	return partition_hbound_cmp(h1->modulus, h1->remainder,
976 								h2->modulus, h2->remainder);
977 }
978 
979 /*
980  * qsort_partition_list_value_cmp
981  *
982  * Compare two list partition bound datums
983  */
984 static int32
985 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
986 {
987 	Datum		val1 = (*(const PartitionListValue **) a)->value,
988 				val2 = (*(const PartitionListValue **) b)->value;
989 	PartitionKey key = (PartitionKey) arg;
990 
991 	return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
992 										   key->partcollation[0],
993 										   val1, val2));
994 }
995 
996 /* Used when sorting range bounds across all range partitions */
997 static int32
998 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
999 {
1000 	PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
1001 	PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
1002 	PartitionKey key = (PartitionKey) arg;
1003 
1004 	return partition_rbound_cmp(key->partnatts, key->partsupfunc,
1005 								key->partcollation, b1->datums, b1->kind,
1006 								b1->lower, b2);
1007 }
1008