1 /*-------------------------------------------------------------------------
2  *
3  * execPartition.c
4  *	  Support routines for partitioning.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *	  src/backend/executor/execPartition.c
11  *
12  *-------------------------------------------------------------------------
13  */
xA::B::base14 #include "postgres.h"
15 
16 #include "catalog/partition.h"
17 #include "catalog/pg_inherits.h"
18 #include "catalog/pg_type.h"
19 #include "executor/execPartition.h"
20 #include "executor/executor.h"
21 #include "foreign/fdwapi.h"
22 #include "mb/pg_wchar.h"
23 #include "miscadmin.h"
24 #include "nodes/makefuncs.h"
25 #include "partitioning/partbounds.h"
26 #include "partitioning/partprune.h"
27 #include "rewrite/rewriteManip.h"
28 #include "utils/lsyscache.h"
29 #include "utils/partcache.h"
30 #include "utils/rel.h"
31 #include "utils/rls.h"
32 #include "utils/ruleutils.h"
33 
34 
35 /*-----------------------
36  * PartitionDispatch - information about one partitioned table in a partition
37  * hierarchy required to route a tuple to one of its partitions
38  *
39  *	reldesc		Relation descriptor of the table
40  *	key			Partition key information of the table
41  *	keystate	Execution state required for expressions in the partition key
42  *	partdesc	Partition descriptor of the table
43  *	tupslot		A standalone TupleTableSlot initialized with this table's tuple
44  *				descriptor
45  *	tupmap		TupleConversionMap to convert from the parent's rowtype to
46  *				this table's rowtype (when extracting the partition key of a
47  *				tuple just before routing it through this table)
48  *	indexes		Array with partdesc->nparts members (for details on what
49  *				individual members represent, see how they are set in
50  *				get_partition_dispatch_recurse())
51  *-----------------------
52  */
53 typedef struct PartitionDispatchData
54 {
55 	Relation	reldesc;
56 	PartitionKey key;
57 	List	   *keystate;		/* list of ExprState */
58 	PartitionDesc partdesc;
59 	TupleTableSlot *tupslot;
60 	TupleConversionMap *tupmap;
61 	int		   *indexes;
62 } PartitionDispatchData;
63 
fun2()64 
65 static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
66 								 int *num_parted, List **leaf_part_oids);
67 static void get_partition_dispatch_recurse(Relation rel, Relation parent,
68 							   List **pds, List **leaf_part_oids);
69 static void FormPartitionKeyDatum(PartitionDispatch pd,
70 					  TupleTableSlot *slot,
71 					  EState *estate,
72 					  Datum *values,
73 					  bool *isnull);
74 static int get_partition_for_tuple(Relation relation, Datum *values,
75 						bool *isnull);
76 static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
77 									 Datum *values,
78 									 bool *isnull,
79 									 int maxfieldlen);
80 static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
81 static void ExecInitPruningContext(PartitionPruneContext *context,
82 					   Oid reloid,
83 					   List *pruning_steps,
84 					   PlanState *planstate);
fun3()85 static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
86 							   PartitionedRelPruningData *pprune,
87 							   bool initial_prune,
88 							   Bitmapset **validsubplans);
89 
90 
fun4a()91 /*
92  * ExecSetupPartitionTupleRouting - sets up information needed during
93  * tuple routing for partitioned tables, encapsulates it in
94  * PartitionTupleRouting, and returns it.
95  *
96  * Note that all the relations in the partition tree are locked using the
97  * RowExclusiveLock mode upon return from this function.
98  *
99  * While we allocate the arrays of pointers of ResultRelInfo and
100  * TupleConversionMap for all partitions here, actual objects themselves are
101  * lazily allocated for a given partition if a tuple is actually routed to it;
102  * see ExecInitPartitionInfo.  However, if the function is invoked for update
103  * tuple routing, caller would already have initialized ResultRelInfo's for
104  * some of the partitions, which are reused and assigned to their respective
105  * slot in the aforementioned array.  For such partitions, we delay setting
106  * up objects such as TupleConversionMap until those are actually chosen as
107  * the partitions to route tuples to.  See ExecPrepareTupleRouting.
108  */
109 PartitionTupleRouting *
110 ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, ResultRelInfo *rootResultRelInfo)
111 {
112 	List	   *leaf_parts;
113 	ListCell   *cell;
114 	int			i;
115 	ResultRelInfo *update_rri = NULL;
116 	int			num_update_rri = 0,
117 				update_rri_index = 0;
118 	PartitionTupleRouting *proute;
119 	int			nparts;
120 	ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
121 
122 	/*
123 	 * Get the information about the partition tree after locking all the
124 	 * partitions.
125 	 */
126 	(void) find_all_inheritors(RelationGetRelid(rootResultRelInfo->ri_RelationDesc),
127 							   RowExclusiveLock, NULL);
128 	proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
129 	proute->partition_dispatch_info =
130 		RelationGetPartitionDispatchInfo(rootResultRelInfo->ri_RelationDesc,
131 										 &proute->num_dispatch,
132 										 &leaf_parts);
133 	proute->num_partitions = nparts = list_length(leaf_parts);
134 	proute->partitions =
135 		(ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *));
136 	proute->parent_child_tupconv_maps =
137 		(TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *));
138 	proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid));
139 
140 	/* Set up details specific to the type of tuple routing we are doing. */
141 	if (node && node->operation == CMD_UPDATE)
142 	{
143 		update_rri = mtstate->resultRelInfo;
144 		num_update_rri = list_length(node->plans);
145 		proute->subplan_partition_offsets =
146 			palloc(num_update_rri * sizeof(int));
147 		proute->num_subplan_partition_offsets = num_update_rri;
148 
149 		/*
150 		 * We need an additional tuple slot for storing transient tuples that
151 		 * are converted to the root table descriptor.
152 		 */
153 		proute->root_tuple_slot = MakeTupleTableSlot(NULL);
154 	}
155 
156 	/*
157 	 * Initialize an empty slot that will be used to manipulate tuples of any
158 	 * given partition's rowtype.  It is attached to the caller-specified node
159 	 * (such as ModifyTableState) and released when the node finishes
160 	 * processing.
161 	 */
162 	proute->partition_tuple_slot = MakeTupleTableSlot(NULL);
163 
164 	i = 0;
165 	foreach(cell, leaf_parts)
166 	{
167 		ResultRelInfo *leaf_part_rri = NULL;
168 		Oid			leaf_oid = lfirst_oid(cell);
169 
170 		proute->partition_oids[i] = leaf_oid;
171 
172 		/*
173 		 * If the leaf partition is already present in the per-subplan result
174 		 * rels, we re-use that rather than initialize a new result rel. The
175 		 * per-subplan resultrels and the resultrels of the leaf partitions
176 		 * are both in the same canonical order. So while going through the
177 		 * leaf partition oids, we need to keep track of the next per-subplan
178 		 * result rel to be looked for in the leaf partition resultrels.
179 		 */
180 		if (update_rri_index < num_update_rri &&
181 			RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid)
182 		{
183 			leaf_part_rri = &update_rri[update_rri_index];
184 
185 			/*
186 			 * This is required in order to convert the partition's tuple to
187 			 * be compatible with the root partitioned table's tuple
188 			 * descriptor.  When generating the per-subplan result rels, this
189 			 * was not set.
190 			 */
191 			leaf_part_rri->ri_RootResultRelInfo = rootResultRelInfo;
192 
193 			/* Remember the subplan offset for this ResultRelInfo */
194 			proute->subplan_partition_offsets[update_rri_index] = i;
195 
196 			update_rri_index++;
197 		}
198 
199 		proute->partitions[i] = leaf_part_rri;
200 		i++;
201 	}
202 
203 	/*
204 	 * For UPDATE, we should have found all the per-subplan resultrels in the
205 	 * leaf partitions.  (If this is an INSERT, both values will be zero.)
206 	 */
207 	Assert(update_rri_index == num_update_rri);
208 
209 	return proute;
210 }
211 
212 /*
213  * ExecFindPartition -- Find a leaf partition in the partition tree rooted
214  * at parent, for the heap tuple contained in *slot
215  *
216  * estate must be non-NULL; we'll need it to compute any expressions in the
217  * partition key(s)
218  *
219  * If no leaf partition is found, this routine errors out with the appropriate
220  * error message, else it returns the leaf partition sequence number
221  * as an index into the array of (ResultRelInfos of) all leaf partitions in
222  * the partition tree.
223  */
224 int
225 ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
226 				  TupleTableSlot *slot, EState *estate)
227 {
228 	int			result;
229 	Datum		values[PARTITION_MAX_KEYS];
230 	bool		isnull[PARTITION_MAX_KEYS];
231 	Relation	rel;
232 	PartitionDispatch dispatch;
233 	ExprContext *ecxt = GetPerTupleExprContext(estate);
234 	TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
235 	TupleTableSlot *myslot = NULL;
236 	MemoryContext	oldcxt;
237 	HeapTuple		tuple;
238 
239 	/* use per-tuple context here to avoid leaking memory */
240 	oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
241 
242 	/*
243 	 * First check the root table's partition constraint, if any.  No point in
244 	 * routing the tuple if it doesn't belong in the root table itself.
245 	 */
246 	if (resultRelInfo->ri_PartitionCheck)
247 		ExecPartitionCheck(resultRelInfo, slot, estate, true);
248 
249 	/* start with the root partitioned table */
250 	tuple = ExecFetchSlotTuple(slot);
251 	dispatch = pd[0];
252 	while (true)
253 	{
254 		PartitionDesc partdesc;
255 		TupleConversionMap *map = dispatch->tupmap;
256 		int			cur_index = -1;
257 
258 		rel = dispatch->reldesc;
259 		partdesc = RelationGetPartitionDesc(rel);
260 
261 		/*
262 		 * Use the slot dedicated to this level's parent.  All parents except
263 		 * the root have a dedicated slot.  For the root parent, we just use
264 		 * the original input slot.
265 		 */
266 		myslot = dispatch->tupslot == NULL ? slot : dispatch->tupslot;
267 
268 		/*
269 		 * If the tuple layout of this level's parent is different from the
270 		 * previous level's parent, convert the tuple and store it into its
271 		 * dedicated slot.
272 		 */
273 		if (myslot != slot)
274 		{
275 			if (map != NULL)
276 				tuple = do_convert_tuple(tuple, map);
277 			ExecStoreTuple(tuple, myslot, InvalidBuffer, true);
278 		}
279 		else
280 			Assert(map == NULL);
281 
282 		/*
283 		 * Extract partition key from tuple. Expression evaluation machinery
284 		 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
285 		 * point to the correct tuple slot.  The slot might have changed from
286 		 * what was used for the parent table if the table of the current
287 		 * partitioning level has different tuple descriptor from the parent.
288 		 * So update ecxt_scantuple accordingly.
289 		 */
290 		ecxt->ecxt_scantuple = myslot;
291 		FormPartitionKeyDatum(dispatch, myslot, estate, values, isnull);
292 
293 		/*
294 		 * Nothing for get_partition_for_tuple() to do if there are no
295 		 * partitions to begin with.
296 		 */
297 		if (partdesc->nparts == 0)
298 		{
299 			result = -1;
300 			break;
301 		}
302 
303 		cur_index = get_partition_for_tuple(rel, values, isnull);
304 
305 		/*
306 		 * cur_index < 0 means we failed to find a partition of this parent.
307 		 * cur_index >= 0 means we either found the leaf partition, or the
308 		 * next parent to find a partition of.
309 		 */
310 		if (cur_index < 0)
311 		{
312 			result = -1;
313 			break;
314 		}
315 		else if (dispatch->indexes[cur_index] >= 0)
316 		{
317 			result = dispatch->indexes[cur_index];
318 			/* success! */
319 			break;
320 		}
321 		else
322 		{
323 			/* move down one level */
324 			dispatch = pd[-dispatch->indexes[cur_index]];
325 
326 			/*
327 			 * Make a copy of the tuple for the next level of routing.  If
328 			 * this level's parent had a dedicated slot, we must clear its
329 			 * tuple too, which would be the copy we made in the last
330 			 * iteration.
331 			 */
332 			tuple = ExecCopySlotTuple(myslot);
333 			if (myslot != slot)
334 				ExecClearTuple(myslot);
335 		}
336 	}
337 
338 	/* A partition was not found. */
339 	if (result < 0)
340 	{
341 		char	   *val_desc;
342 
343 		val_desc = ExecBuildSlotPartitionKeyDescription(rel,
344 														values, isnull, 64);
345 		Assert(OidIsValid(RelationGetRelid(rel)));
346 		ereport(ERROR,
347 				(errcode(ERRCODE_CHECK_VIOLATION),
348 				 errmsg("no partition of relation \"%s\" found for row",
349 						RelationGetRelationName(rel)),
350 				 val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
351 	}
352 
353 	/* Release the tuple in the lowest parent's dedicated slot. */
354 	if (myslot != slot)
355 		ExecClearTuple(myslot);
356 
357 	MemoryContextSwitchTo(oldcxt);
358 	ecxt->ecxt_scantuple = ecxt_scantuple_old;
359 
360 	return result;
361 }
362 
363 /*
364  * ExecInitPartitionInfo
365  *		Initialize ResultRelInfo and other information for a partition
366  *
367  * Returns the ResultRelInfo
368  */
369 ResultRelInfo *
370 ExecInitPartitionInfo(ModifyTableState *mtstate,
371 					  ResultRelInfo *rootResultRelInfo,
372 					  PartitionTupleRouting *proute,
373 					  EState *estate, int partidx)
374 {
375 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
376 	Relation	partrel;
377 	int			firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
378 	Relation	firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
379 	ResultRelInfo *leaf_part_rri;
380 	MemoryContext oldContext;
381 	AttrNumber *part_attnos = NULL;
382 	bool		found_whole_row;
383 
384 	/*
385 	 * We locked all the partitions in ExecSetupPartitionTupleRouting
386 	 * including the leaf partitions.
387 	 */
388 	partrel = heap_open(proute->partition_oids[partidx], NoLock);
389 
390 	/*
391 	 * Keep ResultRelInfo and other information for this partition in the
392 	 * per-query memory context so they'll survive throughout the query.
393 	 */
394 	oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
395 
396 	leaf_part_rri = makeNode(ResultRelInfo);
397 	InitResultRelInfo(leaf_part_rri,
398 					  partrel,
399 					  0,
400 					  rootResultRelInfo,
401 					  estate->es_instrument);
402 
403 	/*
404 	 * Verify result relation is a valid target for an INSERT.  An UPDATE of a
405 	 * partition-key becomes a DELETE+INSERT operation, so this check is still
406 	 * required when the operation is CMD_UPDATE.
407 	 */
408 	CheckValidResultRel(leaf_part_rri, CMD_INSERT);
409 
410 	/*
411 	 * Since we've just initialized this ResultRelInfo, it's not in any list
412 	 * attached to the estate as yet.  Add it, so that it can be found later.
413 	 *
414 	 * Note that the entries in this list appear in no predetermined order,
415 	 * because partition result rels are initialized as and when they're
416 	 * needed.
417 	 */
418 	estate->es_tuple_routing_result_relations =
419 		lappend(estate->es_tuple_routing_result_relations,
420 				leaf_part_rri);
421 
422 	/*
423 	 * Open partition indices.  The user may have asked to check for conflicts
424 	 * within this leaf partition and do "nothing" instead of throwing an
425 	 * error.  Be prepared in that case by initializing the index information
426 	 * needed by ExecInsert() to perform speculative insertions.
427 	 */
428 	if (partrel->rd_rel->relhasindex &&
429 		leaf_part_rri->ri_IndexRelationDescs == NULL)
430 		ExecOpenIndices(leaf_part_rri,
431 						(node != NULL &&
432 						 node->onConflictAction != ONCONFLICT_NONE));
433 
434 	/*
435 	 * Build WITH CHECK OPTION constraints for the partition.  Note that we
436 	 * didn't build the withCheckOptionList for partitions within the planner,
437 	 * but simple translation of varattnos will suffice.  This only occurs for
438 	 * the INSERT case or in the case of UPDATE tuple routing where we didn't
439 	 * find a result rel to reuse in ExecSetupPartitionTupleRouting().
440 	 */
441 	if (node && node->withCheckOptionLists != NIL)
442 	{
443 		List	   *wcoList;
444 		List	   *wcoExprs = NIL;
445 		ListCell   *ll;
446 
447 		/*
448 		 * In the case of INSERT on a partitioned table, there is only one
449 		 * plan.  Likewise, there is only one WCO list, not one per partition.
450 		 * For UPDATE, there are as many WCO lists as there are plans.
451 		 */
452 		Assert((node->operation == CMD_INSERT &&
453 				list_length(node->withCheckOptionLists) == 1 &&
454 				list_length(node->plans) == 1) ||
455 			   (node->operation == CMD_UPDATE &&
456 				list_length(node->withCheckOptionLists) ==
457 				list_length(node->plans)));
458 
459 		/*
460 		 * Use the WCO list of the first plan as a reference to calculate
461 		 * attno's for the WCO list of this partition.  In the INSERT case,
462 		 * that refers to the root partitioned table, whereas in the UPDATE
463 		 * tuple routing case, that refers to the first partition in the
464 		 * mtstate->resultRelInfo array.  In any case, both that relation and
465 		 * this partition should have the same columns, so we should be able
466 		 * to map attributes successfully.
467 		 */
468 		wcoList = linitial(node->withCheckOptionLists);
469 
470 		/*
471 		 * Convert Vars in it to contain this partition's attribute numbers.
472 		 */
473 		part_attnos =
474 			convert_tuples_by_name_map(RelationGetDescr(partrel),
475 									   RelationGetDescr(firstResultRel),
476 									   gettext_noop("could not convert row type"));
477 		wcoList = (List *)
478 			map_variable_attnos((Node *) wcoList,
479 								firstVarno, 0,
480 								part_attnos,
481 								RelationGetDescr(firstResultRel)->natts,
482 								RelationGetForm(partrel)->reltype,
483 								&found_whole_row);
484 		/* We ignore the value of found_whole_row. */
485 
486 		foreach(ll, wcoList)
487 		{
488 			WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
489 			ExprState  *wcoExpr = ExecInitQual(castNode(List, wco->qual),
490 											   &mtstate->ps);
491 
492 			wcoExprs = lappend(wcoExprs, wcoExpr);
493 		}
494 
495 		leaf_part_rri->ri_WithCheckOptions = wcoList;
496 		leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
497 	}
498 
499 	/*
500 	 * Build the RETURNING projection for the partition.  Note that we didn't
501 	 * build the returningList for partitions within the planner, but simple
502 	 * translation of varattnos will suffice.  This only occurs for the INSERT
503 	 * case or in the case of UPDATE tuple routing where we didn't find a
504 	 * result rel to reuse in ExecSetupPartitionTupleRouting().
505 	 */
506 	if (node && node->returningLists != NIL)
507 	{
508 		TupleTableSlot *slot;
509 		ExprContext *econtext;
510 		List	   *returningList;
511 
512 		/* See the comment above for WCO lists. */
513 		Assert((node->operation == CMD_INSERT &&
514 				list_length(node->returningLists) == 1 &&
515 				list_length(node->plans) == 1) ||
516 			   (node->operation == CMD_UPDATE &&
517 				list_length(node->returningLists) ==
518 				list_length(node->plans)));
519 
520 		/*
521 		 * Use the RETURNING list of the first plan as a reference to
522 		 * calculate attno's for the RETURNING list of this partition.  See
523 		 * the comment above for WCO lists for more details on why this is
524 		 * okay.
525 		 */
526 		returningList = linitial(node->returningLists);
527 
528 		/*
529 		 * Convert Vars in it to contain this partition's attribute numbers.
530 		 */
531 		if (part_attnos == NULL)
532 			part_attnos =
533 				convert_tuples_by_name_map(RelationGetDescr(partrel),
534 										   RelationGetDescr(firstResultRel),
535 										   gettext_noop("could not convert row type"));
536 		returningList = (List *)
537 			map_variable_attnos((Node *) returningList,
538 								firstVarno, 0,
539 								part_attnos,
540 								RelationGetDescr(firstResultRel)->natts,
541 								RelationGetForm(partrel)->reltype,
542 								&found_whole_row);
543 		/* We ignore the value of found_whole_row. */
544 
545 		leaf_part_rri->ri_returningList = returningList;
546 
547 		/*
548 		 * Initialize the projection itself.
549 		 *
550 		 * Use the slot and the expression context that would have been set up
551 		 * in ExecInitModifyTable() for projection's output.
552 		 */
553 		Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
554 		slot = mtstate->ps.ps_ResultTupleSlot;
555 		Assert(mtstate->ps.ps_ExprContext != NULL);
556 		econtext = mtstate->ps.ps_ExprContext;
557 		leaf_part_rri->ri_projectReturning =
558 			ExecBuildProjectionInfo(returningList, econtext, slot,
559 									&mtstate->ps, RelationGetDescr(partrel));
560 	}
561 
562 	/* Set up information needed for routing tuples to the partition. */
563 	ExecInitRoutingInfo(mtstate, estate, proute, leaf_part_rri, partidx);
564 
565 	/*
566 	 * If there is an ON CONFLICT clause, initialize state for it.
567 	 */
568 	if (node && node->onConflictAction != ONCONFLICT_NONE)
569 	{
570 		TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx];
571 		TupleDesc	partrelDesc = RelationGetDescr(partrel);
572 		ExprContext *econtext = mtstate->ps.ps_ExprContext;
573 		ListCell   *lc;
574 		List	   *arbiterIndexes = NIL;
575 
576 		/*
577 		 * If there is a list of arbiter indexes, map it to a list of indexes
578 		 * in the partition.  We do that by scanning the partition's index
579 		 * list and searching for ancestry relationships to each index in the
580 		 * ancestor table.
581 		 */
582 		if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0)
583 		{
584 			List	   *childIdxs;
585 
586 			childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
587 
588 			foreach(lc, childIdxs)
589 			{
590 				Oid			childIdx = lfirst_oid(lc);
591 				List	   *ancestors;
592 				ListCell   *lc2;
593 
594 				ancestors = get_partition_ancestors(childIdx);
595 				foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
596 				{
597 					if (list_member_oid(ancestors, lfirst_oid(lc2)))
598 						arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
599 				}
600 				list_free(ancestors);
601 			}
602 		}
603 
604 		/*
605 		 * If the resulting lists are of inequal length, something is wrong.
606 		 * (This shouldn't happen, since arbiter index selection should not
607 		 * pick up an invalid index.)
608 		 */
609 		if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
610 			list_length(arbiterIndexes))
611 			elog(ERROR, "invalid arbiter index list");
612 		leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
613 
614 		/*
615 		 * In the DO UPDATE case, we have some more state to initialize.
616 		 */
617 		if (node->onConflictAction == ONCONFLICT_UPDATE)
618 		{
619 			Assert(node->onConflictSet != NIL);
620 			Assert(rootResultRelInfo->ri_onConflict != NULL);
621 
622 			/*
623 			 * If the partition's tuple descriptor matches exactly the root
624 			 * parent (the common case), we can simply re-use the parent's ON
625 			 * CONFLICT SET state, skipping a bunch of work.  Otherwise, we
626 			 * need to create state specific to this partition.
627 			 */
628 			if (map == NULL)
629 				leaf_part_rri->ri_onConflict = rootResultRelInfo->ri_onConflict;
630 			else
631 			{
632 				OnConflictSetState *onconfl = makeNode(OnConflictSetState);
633 				List	   *onconflset;
634 				bool		found_whole_row;
635 
636 				leaf_part_rri->ri_onConflict = onconfl;
637 
638 				/*
639 				 * Translate expressions in onConflictSet to account for
640 				 * different attribute numbers.  For that, map partition
641 				 * varattnos twice: first to catch the EXCLUDED
642 				 * pseudo-relation (INNER_VAR), and second to handle the main
643 				 * target relation (firstVarno).
644 				 */
645 				onconflset = copyObject(node->onConflictSet);
646 				if (part_attnos == NULL)
647 					part_attnos =
648 						convert_tuples_by_name_map(RelationGetDescr(partrel),
649 												   RelationGetDescr(firstResultRel),
650 												   gettext_noop("could not convert row type"));
651 				onconflset = (List *)
652 					map_variable_attnos((Node *) onconflset,
653 										INNER_VAR, 0,
654 										part_attnos,
655 										RelationGetDescr(firstResultRel)->natts,
656 										RelationGetForm(partrel)->reltype,
657 										&found_whole_row);
658 				/* We ignore the value of found_whole_row. */
659 				onconflset = (List *)
660 					map_variable_attnos((Node *) onconflset,
661 										firstVarno, 0,
662 										part_attnos,
663 										RelationGetDescr(firstResultRel)->natts,
664 										RelationGetForm(partrel)->reltype,
665 										&found_whole_row);
666 				/* We ignore the value of found_whole_row. */
667 
668 				/* Finally, reorder the tlist to match the partition. */
669 				onconflset = adjust_partition_tlist(onconflset, map);
670 
671 				/*
672 				 * Build UPDATE SET's projection info.  The user of this
673 				 * projection is responsible for setting the slot's tupdesc!
674 				 * We set aside a tupdesc that's good for the common case of a
675 				 * partition that's tupdesc-equal to the partitioned table;
676 				 * partitions of different tupdescs must generate their own.
677 				 */
678 				ExecSetSlotDescriptor(mtstate->mt_conflproj, partrelDesc);
679 				onconfl->oc_ProjInfo =
680 					ExecBuildProjectionInfoExt(onconflset, econtext,
681 											   mtstate->mt_conflproj, false,
682 											   &mtstate->ps, partrelDesc);
683 				onconfl->oc_ProjTupdesc = partrelDesc;
684 
685 				/*
686 				 * If there is a WHERE clause, initialize state where it will
687 				 * be evaluated, mapping the attribute numbers appropriately.
688 				 * As with onConflictSet, we need to map partition varattnos
689 				 * to the partition's tupdesc.
690 				 */
691 				if (node->onConflictWhere)
692 				{
693 					List	   *clause;
694 
695 					clause = copyObject((List *) node->onConflictWhere);
696 					clause = (List *)
697 						map_variable_attnos((Node *) clause,
698 											INNER_VAR, 0,
699 											part_attnos,
700 											RelationGetDescr(firstResultRel)->natts,
701 											RelationGetForm(partrel)->reltype,
702 											&found_whole_row);
703 					/* We ignore the value of found_whole_row. */
704 					clause = (List *)
705 						map_variable_attnos((Node *) clause,
706 											firstVarno, 0,
707 											part_attnos,
708 											RelationGetDescr(firstResultRel)->natts,
709 											RelationGetForm(partrel)->reltype,
710 											&found_whole_row);
711 					/* We ignore the value of found_whole_row. */
712 					onconfl->oc_WhereClause =
713 						ExecInitQual((List *) clause, &mtstate->ps);
714 				}
715 			}
716 		}
717 	}
718 
719 	Assert(proute->partitions[partidx] == NULL);
720 	proute->partitions[partidx] = leaf_part_rri;
721 
722 	MemoryContextSwitchTo(oldContext);
723 
724 	return leaf_part_rri;
725 }
726 
727 /*
728  * ExecInitRoutingInfo
729  *		Set up information needed for routing tuples to a leaf partition
730  */
731 void
732 ExecInitRoutingInfo(ModifyTableState *mtstate,
733 					EState *estate,
734 					PartitionTupleRouting *proute,
735 					ResultRelInfo *partRelInfo,
736 					int partidx)
737 {
738 	ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo;
739 	MemoryContext oldContext;
740 
741 	/*
742 	 * Switch into per-query memory context.
743 	 */
744 	oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
745 
746 	/*
747 	 * Set up a tuple conversion map to convert a tuple routed to the
748 	 * partition from the parent's type to the partition's.
749 	 */
750 	proute->parent_child_tupconv_maps[partidx] =
751 		convert_tuples_by_name(RelationGetDescr(rootRelInfo->ri_RelationDesc),
752 							   RelationGetDescr(partRelInfo->ri_RelationDesc),
753 							   gettext_noop("could not convert row type"));
754 
755 	/*
756 	 * If the partition is a foreign table, let the FDW init itself for
757 	 * routing tuples to the partition.
758 	 */
759 	if (partRelInfo->ri_FdwRoutine != NULL &&
760 		partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
761 		partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
762 
763 	MemoryContextSwitchTo(oldContext);
764 
765 	partRelInfo->ri_PartitionReadyForRouting = true;
766 }
767 
768 /*
769  * ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition
770  * child-to-root tuple conversion map array.
771  *
772  * This map is required for capturing transition tuples when the target table
773  * is a partitioned table. For a tuple that is routed by an INSERT or UPDATE,
774  * we need to convert it from the leaf partition to the target table
775  * descriptor.
776  */
777 void
778 ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute)
779 {
780 	Assert(proute != NULL);
781 
782 	/*
783 	 * These array elements get filled up with maps on an on-demand basis.
784 	 * Initially just set all of them to NULL.
785 	 */
786 	proute->child_parent_tupconv_maps =
787 		(TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) *
788 										proute->num_partitions);
789 
790 	/* Same is the case for this array. All the values are set to false */
791 	proute->child_parent_map_not_required =
792 		(bool *) palloc0(sizeof(bool) * proute->num_partitions);
793 }
794 
795 /*
796  * TupConvMapForLeaf -- Get the tuple conversion map for a given leaf partition
797  * index.
798  */
799 TupleConversionMap *
800 TupConvMapForLeaf(PartitionTupleRouting *proute,
801 				  ResultRelInfo *rootRelInfo, int leaf_index)
802 {
803 	ResultRelInfo **resultRelInfos = proute->partitions;
804 	TupleConversionMap **map;
805 	TupleDesc	tupdesc;
806 
807 	/* Don't call this if we're not supposed to be using this type of map. */
808 	Assert(proute->child_parent_tupconv_maps != NULL);
809 
810 	/* If it's already known that we don't need a map, return NULL. */
811 	if (proute->child_parent_map_not_required[leaf_index])
812 		return NULL;
813 
814 	/* If we've already got a map, return it. */
815 	map = &proute->child_parent_tupconv_maps[leaf_index];
816 	if (*map != NULL)
817 		return *map;
818 
819 	/* No map yet; try to create one. */
820 	tupdesc = RelationGetDescr(resultRelInfos[leaf_index]->ri_RelationDesc);
821 	*map =
822 		convert_tuples_by_name(tupdesc,
823 							   RelationGetDescr(rootRelInfo->ri_RelationDesc),
824 							   gettext_noop("could not convert row type"));
825 
826 	/* If it turns out no map is needed, remember for next time. */
827 	proute->child_parent_map_not_required[leaf_index] = (*map == NULL);
828 
829 	return *map;
830 }
831 
832 /*
833  * ConvertPartitionTupleSlot -- convenience function for tuple conversion.
834  * The tuple, if converted, is stored in new_slot, and *p_my_slot is
835  * updated to point to it.  new_slot typically should be one of the
836  * dedicated partition tuple slots. If map is NULL, *p_my_slot is not changed.
837  *
838  * Returns the converted tuple, unless map is NULL, in which case original
839  * tuple is returned unmodified.
840  */
841 HeapTuple
842 ConvertPartitionTupleSlot(TupleConversionMap *map,
843 						  HeapTuple tuple,
844 						  TupleTableSlot *new_slot,
845 						  TupleTableSlot **p_my_slot)
846 {
847 	if (!map)
848 		return tuple;
849 
850 	tuple = do_convert_tuple(tuple, map);
851 
852 	/*
853 	 * Change the partition tuple slot descriptor, as per converted tuple.
854 	 */
855 	*p_my_slot = new_slot;
856 	Assert(new_slot != NULL);
857 	ExecSetSlotDescriptor(new_slot, map->outdesc);
858 	ExecStoreTuple(tuple, new_slot, InvalidBuffer, true);
859 
860 	return tuple;
861 }
862 
863 /*
864  * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
865  * routing.
866  *
867  * Close all the partitioned tables, leaf partitions, and their indices.
868  */
869 void
870 ExecCleanupTupleRouting(ModifyTableState *mtstate,
871 						PartitionTupleRouting *proute)
872 {
873 	int			i;
874 	int			subplan_index = 0;
875 
876 	/*
877 	 * Remember, proute->partition_dispatch_info[0] corresponds to the root
878 	 * partitioned table, which we must not try to close, because it is the
879 	 * main target table of the query that will be closed by callers such as
880 	 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
881 	 * partitioned table.
882 	 */
883 	for (i = 1; i < proute->num_dispatch; i++)
884 	{
885 		PartitionDispatch pd = proute->partition_dispatch_info[i];
886 
887 		heap_close(pd->reldesc, NoLock);
888 		ExecDropSingleTupleTableSlot(pd->tupslot);
889 	}
890 
891 	for (i = 0; i < proute->num_partitions; i++)
892 	{
893 		ResultRelInfo *resultRelInfo = proute->partitions[i];
894 
895 		/* skip further processsing for uninitialized partitions */
896 		if (resultRelInfo == NULL)
897 			continue;
898 
899 		/* Allow any FDWs to shut down if they've been exercised */
900 		if (resultRelInfo->ri_PartitionReadyForRouting &&
901 			resultRelInfo->ri_FdwRoutine != NULL &&
902 			resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
903 			resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
904 														   resultRelInfo);
905 
906 		/*
907 		 * If this result rel is one of the UPDATE subplan result rels, let
908 		 * ExecEndPlan() close it. For INSERT or COPY,
909 		 * proute->subplan_partition_offsets will always be NULL. Note that
910 		 * the subplan_partition_offsets array and the partitions array have
911 		 * the partitions in the same order. So, while we iterate over
912 		 * partitions array, we also iterate over the
913 		 * subplan_partition_offsets array in order to figure out which of the
914 		 * result rels are present in the UPDATE subplans.
915 		 */
916 		if (proute->subplan_partition_offsets &&
917 			subplan_index < proute->num_subplan_partition_offsets &&
918 			proute->subplan_partition_offsets[subplan_index] == i)
919 		{
920 			subplan_index++;
921 			continue;
922 		}
923 
924 		ExecCloseIndices(resultRelInfo);
925 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
926 	}
927 
928 	/* Release the standalone partition tuple descriptors, if any */
929 	if (proute->root_tuple_slot)
930 		ExecDropSingleTupleTableSlot(proute->root_tuple_slot);
931 	if (proute->partition_tuple_slot)
932 		ExecDropSingleTupleTableSlot(proute->partition_tuple_slot);
933 }
934 
935 /*
936  * RelationGetPartitionDispatchInfo
937  *		Returns information necessary to route tuples down a partition tree
938  *
939  * The number of elements in the returned array (that is, the number of
940  * PartitionDispatch objects for the partitioned tables in the partition tree)
941  * is returned in *num_parted and a list of the OIDs of all the leaf
942  * partitions of rel is returned in *leaf_part_oids.
943  *
944  * All the relations in the partition tree (including 'rel') must have been
945  * locked (using at least the AccessShareLock) by the caller.
946  */
947 static PartitionDispatch *
948 RelationGetPartitionDispatchInfo(Relation rel,
949 								 int *num_parted, List **leaf_part_oids)
950 {
951 	List	   *pdlist = NIL;
952 	PartitionDispatchData **pd;
953 	ListCell   *lc;
954 	int			i;
955 
956 	Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
957 
958 	*num_parted = 0;
959 	*leaf_part_oids = NIL;
960 
961 	get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
962 	*num_parted = list_length(pdlist);
963 	pd = (PartitionDispatchData **) palloc(*num_parted *
964 										   sizeof(PartitionDispatchData *));
965 	i = 0;
966 	foreach(lc, pdlist)
967 	{
968 		pd[i++] = lfirst(lc);
969 	}
970 
971 	return pd;
972 }
973 
974 /*
975  * get_partition_dispatch_recurse
976  *		Recursively expand partition tree rooted at rel
977  *
978  * As the partition tree is expanded in a depth-first manner, we maintain two
979  * global lists: of PartitionDispatch objects corresponding to partitioned
980  * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
981  *
982  * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
983  * the order in which the planner's expand_partitioned_rtentry() processes
984  * them.  It's not necessarily the case that the offsets match up exactly,
985  * because constraint exclusion might prune away some partitions on the
986  * planner side, whereas we'll always have the complete list; but unpruned
987  * partitions will appear in the same order in the plan as they are returned
988  * here.
989  */
990 static void
991 get_partition_dispatch_recurse(Relation rel, Relation parent,
992 							   List **pds, List **leaf_part_oids)
993 {
994 	TupleDesc	tupdesc = RelationGetDescr(rel);
995 	PartitionDesc partdesc = RelationGetPartitionDesc(rel);
996 	PartitionKey partkey = RelationGetPartitionKey(rel);
997 	PartitionDispatch pd;
998 	int			i;
999 
1000 	check_stack_depth();
1001 
1002 	/* Build a PartitionDispatch for this table and add it to *pds. */
1003 	pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
1004 	*pds = lappend(*pds, pd);
1005 	pd->reldesc = rel;
1006 	pd->key = partkey;
1007 	pd->keystate = NIL;
1008 	pd->partdesc = partdesc;
1009 	if (parent != NULL)
1010 	{
1011 		/*
1012 		 * For every partitioned table other than the root, we must store a
1013 		 * tuple table slot initialized with its tuple descriptor and a tuple
1014 		 * conversion map to convert a tuple from its parent's rowtype to its
1015 		 * own. That is to make sure that we are looking at the correct row
1016 		 * using the correct tuple descriptor when computing its partition key
1017 		 * for tuple routing.
1018 		 */
1019 		pd->tupslot = MakeSingleTupleTableSlot(tupdesc);
1020 		pd->tupmap = convert_tuples_by_name(RelationGetDescr(parent),
1021 											tupdesc,
1022 											gettext_noop("could not convert row type"));
1023 	}
1024 	else
1025 	{
1026 		/* Not required for the root partitioned table */
1027 		pd->tupslot = NULL;
1028 		pd->tupmap = NULL;
1029 	}
1030 
1031 	/*
1032 	 * Go look at each partition of this table.  If it's a leaf partition,
1033 	 * simply add its OID to *leaf_part_oids.  If it's a partitioned table,
1034 	 * recursively call get_partition_dispatch_recurse(), so that its
1035 	 * partitions are processed as well and a corresponding PartitionDispatch
1036 	 * object gets added to *pds.
1037 	 *
1038 	 * The 'indexes' array is used when searching for a partition matching a
1039 	 * given tuple.  The actual value we store here depends on whether the
1040 	 * array element belongs to a leaf partition or a subpartitioned table.
1041 	 * For leaf partitions we store the index into *leaf_part_oids, and for
1042 	 * sub-partitioned tables we store a negative version of the index into
1043 	 * the *pds list.  Both indexes are 0-based, but the first element of the
1044 	 * *pds list is the root partition, so 0 always means the first leaf. When
1045 	 * searching, if we see a negative value, the search must continue in the
1046 	 * corresponding sub-partition; otherwise, we've identified the correct
1047 	 * partition.
1048 	 */
1049 	pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
1050 	for (i = 0; i < partdesc->nparts; i++)
1051 	{
1052 		Oid			partrelid = partdesc->oids[i];
1053 
1054 		if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
1055 		{
1056 			*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
1057 			pd->indexes[i] = list_length(*leaf_part_oids) - 1;
1058 		}
1059 		else
1060 		{
1061 			/*
1062 			 * We assume all tables in the partition tree were already locked
1063 			 * by the caller.
1064 			 */
1065 			Relation	partrel = heap_open(partrelid, NoLock);
1066 
1067 			pd->indexes[i] = -list_length(*pds);
1068 			get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids);
1069 		}
1070 	}
1071 }
1072 
1073 /* ----------------
1074  *		FormPartitionKeyDatum
1075  *			Construct values[] and isnull[] arrays for the partition key
1076  *			of a tuple.
1077  *
1078  *	pd				Partition dispatch object of the partitioned table
1079  *	slot			Heap tuple from which to extract partition key
1080  *	estate			executor state for evaluating any partition key
1081  *					expressions (must be non-NULL)
1082  *	values			Array of partition key Datums (output area)
1083  *	isnull			Array of is-null indicators (output area)
1084  *
1085  * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1086  * the heap tuple passed in.
1087  * ----------------
1088  */
1089 static void
1090 FormPartitionKeyDatum(PartitionDispatch pd,
1091 					  TupleTableSlot *slot,
1092 					  EState *estate,
1093 					  Datum *values,
1094 					  bool *isnull)
1095 {
1096 	ListCell   *partexpr_item;
1097 	int			i;
1098 
1099 	if (pd->key->partexprs != NIL && pd->keystate == NIL)
1100 	{
1101 		/* Check caller has set up context correctly */
1102 		Assert(estate != NULL &&
1103 			   GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1104 
1105 		/* First time through, set up expression evaluation state */
1106 		pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1107 	}
1108 
1109 	partexpr_item = list_head(pd->keystate);
1110 	for (i = 0; i < pd->key->partnatts; i++)
1111 	{
1112 		AttrNumber	keycol = pd->key->partattrs[i];
1113 		Datum		datum;
1114 		bool		isNull;
1115 
1116 		if (keycol != 0)
1117 		{
1118 			/* Plain column; get the value directly from the heap tuple */
1119 			datum = slot_getattr(slot, keycol, &isNull);
1120 		}
1121 		else
1122 		{
1123 			/* Expression; need to evaluate it */
1124 			if (partexpr_item == NULL)
1125 				elog(ERROR, "wrong number of partition key expressions");
1126 			datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1127 											  GetPerTupleExprContext(estate),
1128 											  &isNull);
1129 			partexpr_item = lnext(partexpr_item);
1130 		}
1131 		values[i] = datum;
1132 		isnull[i] = isNull;
1133 	}
1134 
1135 	if (partexpr_item != NULL)
1136 		elog(ERROR, "wrong number of partition key expressions");
1137 }
1138 
1139 /*
1140  * get_partition_for_tuple
1141  *		Finds partition of relation which accepts the partition key specified
1142  *		in values and isnull
1143  *
1144  * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1145  * found or -1 if none found.
1146  */
1147 static int
1148 get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
1149 {
1150 	int			bound_offset;
1151 	int			part_index = -1;
1152 	PartitionKey key = RelationGetPartitionKey(relation);
1153 	PartitionDesc partdesc = RelationGetPartitionDesc(relation);
1154 	PartitionBoundInfo boundinfo = partdesc->boundinfo;
1155 
1156 	/* Route as appropriate based on partitioning strategy. */
1157 	switch (key->strategy)
1158 	{
1159 		case PARTITION_STRATEGY_HASH:
1160 			{
1161 				uint64		rowHash;
1162 
1163 				rowHash = compute_partition_hash_value(key->partnatts,
1164 													   key->partsupfunc,
1165 													   values, isnull);
1166 
1167 				part_index = boundinfo->indexes[rowHash % boundinfo->nindexes];
1168 			}
1169 			break;
1170 
1171 		case PARTITION_STRATEGY_LIST:
1172 			if (isnull[0])
1173 			{
1174 				if (partition_bound_accepts_nulls(boundinfo))
1175 					part_index = boundinfo->null_index;
1176 			}
1177 			else
1178 			{
1179 				bool		equal = false;
1180 
1181 				bound_offset = partition_list_bsearch(key->partsupfunc,
1182 													  key->partcollation,
1183 													  boundinfo,
1184 													  values[0], &equal);
1185 				if (bound_offset >= 0 && equal)
1186 					part_index = boundinfo->indexes[bound_offset];
1187 			}
1188 			break;
1189 
1190 		case PARTITION_STRATEGY_RANGE:
1191 			{
1192 				bool		equal = false,
1193 							range_partkey_has_null = false;
1194 				int			i;
1195 
1196 				/*
1197 				 * No range includes NULL, so this will be accepted by the
1198 				 * default partition if there is one, and otherwise rejected.
1199 				 */
1200 				for (i = 0; i < key->partnatts; i++)
1201 				{
1202 					if (isnull[i])
1203 					{
1204 						range_partkey_has_null = true;
1205 						break;
1206 					}
1207 				}
1208 
1209 				if (!range_partkey_has_null)
1210 				{
1211 					bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1212 																 key->partcollation,
1213 																 boundinfo,
1214 																 key->partnatts,
1215 																 values,
1216 																 &equal);
1217 
1218 					/*
1219 					 * The bound at bound_offset is less than or equal to the
1220 					 * tuple value, so the bound at offset+1 is the upper
1221 					 * bound of the partition we're looking for, if there
1222 					 * actually exists one.
1223 					 */
1224 					part_index = boundinfo->indexes[bound_offset + 1];
1225 				}
1226 			}
1227 			break;
1228 
1229 		default:
1230 			elog(ERROR, "unexpected partition strategy: %d",
1231 				 (int) key->strategy);
1232 	}
1233 
1234 	/*
1235 	 * part_index < 0 means we failed to find a partition of this parent. Use
1236 	 * the default partition, if there is one.
1237 	 */
1238 	if (part_index < 0)
1239 		part_index = boundinfo->default_index;
1240 
1241 	return part_index;
1242 }
1243 
1244 /*
1245  * ExecBuildSlotPartitionKeyDescription
1246  *
1247  * This works very much like BuildIndexValueDescription() and is currently
1248  * used for building error messages when ExecFindPartition() fails to find
1249  * partition for a row.
1250  */
1251 static char *
1252 ExecBuildSlotPartitionKeyDescription(Relation rel,
1253 									 Datum *values,
1254 									 bool *isnull,
1255 									 int maxfieldlen)
1256 {
1257 	StringInfoData buf;
1258 	PartitionKey key = RelationGetPartitionKey(rel);
1259 	int			partnatts = get_partition_natts(key);
1260 	int			i;
1261 	Oid			relid = RelationGetRelid(rel);
1262 	AclResult	aclresult;
1263 
1264 	if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1265 		return NULL;
1266 
1267 	/* If the user has table-level access, just go build the description. */
1268 	aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1269 	if (aclresult != ACLCHECK_OK)
1270 	{
1271 		/*
1272 		 * Step through the columns of the partition key and make sure the
1273 		 * user has SELECT rights on all of them.
1274 		 */
1275 		for (i = 0; i < partnatts; i++)
1276 		{
1277 			AttrNumber	attnum = get_partition_col_attnum(key, i);
1278 
1279 			/*
1280 			 * If this partition key column is an expression, we return no
1281 			 * detail rather than try to figure out what column(s) the
1282 			 * expression includes and if the user has SELECT rights on them.
1283 			 */
1284 			if (attnum == InvalidAttrNumber ||
1285 				pg_attribute_aclcheck(relid, attnum, GetUserId(),
1286 									  ACL_SELECT) != ACLCHECK_OK)
1287 				return NULL;
1288 		}
1289 	}
1290 
1291 	initStringInfo(&buf);
1292 	appendStringInfo(&buf, "(%s) = (",
1293 					 pg_get_partkeydef_columns(relid, true));
1294 
1295 	for (i = 0; i < partnatts; i++)
1296 	{
1297 		char	   *val;
1298 		int			vallen;
1299 
1300 		if (isnull[i])
1301 			val = "null";
1302 		else
1303 		{
1304 			Oid			foutoid;
1305 			bool		typisvarlena;
1306 
1307 			getTypeOutputInfo(get_partition_col_typid(key, i),
1308 							  &foutoid, &typisvarlena);
1309 			val = OidOutputFunctionCall(foutoid, values[i]);
1310 		}
1311 
1312 		if (i > 0)
1313 			appendStringInfoString(&buf, ", ");
1314 
1315 		/* truncate if needed */
1316 		vallen = strlen(val);
1317 		if (vallen <= maxfieldlen)
1318 			appendStringInfoString(&buf, val);
1319 		else
1320 		{
1321 			vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1322 			appendBinaryStringInfo(&buf, val, vallen);
1323 			appendStringInfoString(&buf, "...");
1324 		}
1325 	}
1326 
1327 	appendStringInfoChar(&buf, ')');
1328 
1329 	return buf.data;
1330 }
1331 
1332 /*
1333  * adjust_partition_tlist
1334  *		Re-order the targetlist entries for a given partition to account for
1335  *		column position differences between the parent and the partition.
1336  *
1337  * The expressions have already been fixed, but we must now re-order the
1338  * entries in case the partition has different column order, and possibly
1339  * add or remove dummy entries for dropped columns.
1340  *
1341  * Although a new List is returned, this feels free to scribble on resno
1342  * fields of the given tlist, so that should be a working copy.
1343  */
1344 static List *
1345 adjust_partition_tlist(List *tlist, TupleConversionMap *map)
1346 {
1347 	List	   *new_tlist = NIL;
1348 	TupleDesc	tupdesc = map->outdesc;
1349 	AttrNumber *attrMap = map->attrMap;
1350 	AttrNumber	attrno;
1351 	ListCell   *lc;
1352 
1353 	for (attrno = 1; attrno <= tupdesc->natts; attrno++)
1354 	{
1355 		Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
1356 		AttrNumber	parentattrno = attrMap[attrno - 1];
1357 		TargetEntry *tle;
1358 
1359 		if (parentattrno != InvalidAttrNumber)
1360 		{
1361 			/*
1362 			 * Use the corresponding entry from the parent's tlist, adjusting
1363 			 * the resno to match the partition's attno.
1364 			 */
1365 			Assert(!att_tup->attisdropped);
1366 			tle = (TargetEntry *) list_nth(tlist, parentattrno - 1);
1367 			Assert(!tle->resjunk);
1368 			Assert(tle->resno == parentattrno);
1369 			tle->resno = attrno;
1370 		}
1371 		else
1372 		{
1373 			/*
1374 			 * For a dropped attribute in the partition, generate a dummy
1375 			 * entry with resno matching the partition's attno.  This should
1376 			 * match what expand_targetlist() does.
1377 			 */
1378 			Const	   *expr;
1379 
1380 			Assert(att_tup->attisdropped);
1381 			expr = makeConst(INT4OID,
1382 							 -1,
1383 							 InvalidOid,
1384 							 sizeof(int32),
1385 							 (Datum) 0,
1386 							 true,	/* isnull */
1387 							 true /* byval */ );
1388 			tle = makeTargetEntry((Expr *) expr,
1389 								  attrno,
1390 								  pstrdup(NameStr(att_tup->attname)),
1391 								  false);
1392 		}
1393 
1394 		new_tlist = lappend(new_tlist, tle);
1395 	}
1396 
1397 	/* Finally, attach any resjunk entries to the end of the new tlist */
1398 	foreach(lc, tlist)
1399 	{
1400 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
1401 
1402 		if (tle->resjunk)
1403 		{
1404 			tle->resno = list_length(new_tlist) + 1;
1405 			new_tlist = lappend(new_tlist, tle);
1406 		}
1407 	}
1408 
1409 	return new_tlist;
1410 }
1411 
1412 /*-------------------------------------------------------------------------
1413  * Run-Time Partition Pruning Support.
1414  *
1415  * The following series of functions exist to support the removal of unneeded
1416  * subplans for queries against partitioned tables.  The supporting functions
1417  * here are designed to work with any plan type which supports an arbitrary
1418  * number of subplans, e.g. Append, MergeAppend.
1419  *
1420  * When pruning involves comparison of a partition key to a constant, it's
1421  * done by the planner.  However, if we have a comparison to a non-constant
1422  * but not volatile expression, that presents an opportunity for run-time
1423  * pruning by the executor, allowing irrelevant partitions to be skipped
1424  * dynamically.
1425  *
1426  * We must distinguish expressions containing PARAM_EXEC Params from
1427  * expressions that don't contain those.  Even though a PARAM_EXEC Param is
1428  * considered to be a stable expression, it can change value from one plan
1429  * node scan to the next during query execution.  Stable comparison
1430  * expressions that don't involve such Params allow partition pruning to be
1431  * done once during executor startup.  Expressions that do involve such Params
1432  * require us to prune separately for each scan of the parent plan node.
1433  *
1434  * Note that pruning away unneeded subplans during executor startup has the
1435  * added benefit of not having to initialize the unneeded subplans at all.
1436  *
1437  *
1438  * Functions:
1439  *
1440  * ExecCreatePartitionPruneState:
1441  *		Creates the PartitionPruneState required by each of the two pruning
1442  *		functions.  Details stored include how to map the partition index
1443  *		returned by the partition pruning code into subplan indexes.
1444  *
1445  * ExecDestroyPartitionPruneState:
1446  *		Deletes a PartitionPruneState. Must be called during executor shutdown.
1447  *
1448  * ExecFindInitialMatchingSubPlans:
1449  *		Returns indexes of matching subplans.  Partition pruning is attempted
1450  *		without any evaluation of expressions containing PARAM_EXEC Params.
1451  *		This function must be called during executor startup for the parent
1452  *		plan before the subplans themselves are initialized.  Subplans which
1453  *		are found not to match by this function must be removed from the
1454  *		plan's list of subplans during execution, as this function performs a
1455  *		remap of the partition index to subplan index map and the newly
1456  *		created map provides indexes only for subplans which remain after
1457  *		calling this function.
1458  *
1459  * ExecFindMatchingSubPlans:
1460  *		Returns indexes of matching subplans after evaluating all available
1461  *		expressions.  This function can only be called during execution and
1462  *		must be called again each time the value of a Param listed in
1463  *		PartitionPruneState's 'execparamids' changes.
1464  *-------------------------------------------------------------------------
1465  */
1466 
1467 /*
1468  * ExecCreatePartitionPruneState
1469  *		Build the data structure required for calling
1470  *		ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
1471  *
1472  * 'planstate' is the parent plan node's execution state.
1473  *
1474  * 'partitionpruneinfo' is a PartitionPruneInfo as generated by
1475  * make_partition_pruneinfo.  Here we build a PartitionPruneState containing a
1476  * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1477  * partitionpruneinfo->prune_infos), each of which contains a
1478  * PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in
1479  * that sublist.  This two-level system is needed to keep from confusing the
1480  * different hierarchies when a UNION ALL contains multiple partitioned tables
1481  * as children.  The data stored in each PartitionedRelPruningData can be
1482  * re-used each time we re-evaluate which partitions match the pruning steps
1483  * provided in each PartitionedRelPruneInfo.
1484  */
1485 PartitionPruneState *
1486 ExecCreatePartitionPruneState(PlanState *planstate,
1487 							  PartitionPruneInfo *partitionpruneinfo)
1488 {
1489 	PartitionPruneState *prunestate;
1490 	int			n_part_hierarchies;
1491 	ListCell   *lc;
1492 	int			i;
1493 
1494 	n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
1495 	Assert(n_part_hierarchies > 0);
1496 
1497 	/*
1498 	 * Allocate the data structure
1499 	 */
1500 	prunestate = (PartitionPruneState *)
1501 		palloc(offsetof(PartitionPruneState, partprunedata) +
1502 			   sizeof(PartitionPruningData *) * n_part_hierarchies);
1503 
1504 	prunestate->execparamids = NULL;
1505 	/* other_subplans can change at runtime, so we need our own copy */
1506 	prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans);
1507 	prunestate->do_initial_prune = false;	/* may be set below */
1508 	prunestate->do_exec_prune = false;	/* may be set below */
1509 	prunestate->num_partprunedata = n_part_hierarchies;
1510 
1511 	/*
1512 	 * Create a short-term memory context which we'll use when making calls to
1513 	 * the partition pruning functions.  This avoids possible memory leaks,
1514 	 * since the pruning functions call comparison functions that aren't under
1515 	 * our control.
1516 	 */
1517 	prunestate->prune_context =
1518 		AllocSetContextCreate(CurrentMemoryContext,
1519 							  "Partition Prune",
1520 							  ALLOCSET_DEFAULT_SIZES);
1521 
1522 	i = 0;
1523 	foreach(lc, partitionpruneinfo->prune_infos)
1524 	{
1525 		List	   *partrelpruneinfos = lfirst_node(List, lc);
1526 		int			npartrelpruneinfos = list_length(partrelpruneinfos);
1527 		PartitionPruningData *prunedata;
1528 		ListCell   *lc2;
1529 		int			j;
1530 
1531 		prunedata = (PartitionPruningData *)
1532 			palloc(offsetof(PartitionPruningData, partrelprunedata) +
1533 				   npartrelpruneinfos * sizeof(PartitionedRelPruningData));
1534 		prunestate->partprunedata[i] = prunedata;
1535 		prunedata->num_partrelprunedata = npartrelpruneinfos;
1536 
1537 		j = 0;
1538 		foreach(lc2, partrelpruneinfos)
1539 		{
1540 			PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
1541 			PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1542 
1543 			/*
1544 			 * We must copy the subplan_map rather than pointing directly to
1545 			 * the plan's version, as we may end up making modifications to it
1546 			 * later.
1547 			 */
1548 			pprune->nparts = pinfo->nparts;
1549 			pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts);
1550 			memcpy(pprune->subplan_map, pinfo->subplan_map,
1551 				   sizeof(int) * pinfo->nparts);
1552 
1553 			/* We can use the subpart_map verbatim, since we never modify it */
1554 			pprune->subpart_map = pinfo->subpart_map;
1555 
1556 			/* present_parts is also subject to later modification */
1557 			pprune->present_parts = bms_copy(pinfo->present_parts);
1558 
1559 			/*
1560 			 * Initialize pruning contexts as needed.
1561 			 */
1562 			pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
1563 			if (pinfo->initial_pruning_steps)
1564 			{
1565 				ExecInitPruningContext(&pprune->initial_context,
1566 									   pinfo->reloid,
1567 									   pinfo->initial_pruning_steps,
1568 									   planstate);
1569 				/* Record whether initial pruning is needed at any level */
1570 				prunestate->do_initial_prune = true;
1571 			}
1572 			pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
1573 			if (pinfo->exec_pruning_steps)
1574 			{
1575 				ExecInitPruningContext(&pprune->exec_context,
1576 									   pinfo->reloid,
1577 									   pinfo->exec_pruning_steps,
1578 									   planstate);
1579 				/* Record whether exec pruning is needed at any level */
1580 				prunestate->do_exec_prune = true;
1581 			}
1582 
1583 			/*
1584 			 * Accumulate the IDs of all PARAM_EXEC Params affecting the
1585 			 * partitioning decisions at this plan node.
1586 			 */
1587 			prunestate->execparamids = bms_add_members(prunestate->execparamids,
1588 													   pinfo->execparamids);
1589 
1590 			j++;
1591 		}
1592 		i++;
1593 	}
1594 
1595 	return prunestate;
1596 }
1597 
1598 /*
1599  * ExecDestroyPartitionPruneState
1600  *		Release resources at plan shutdown.
1601  *
1602  * We don't bother to free any memory here, since the whole executor context
1603  * will be going away shortly.  We do need to release our relcache pins.
1604  */
1605 void
1606 ExecDestroyPartitionPruneState(PartitionPruneState *prunestate)
1607 {
1608 	PartitionPruningData **partprunedata = prunestate->partprunedata;
1609 	int			i;
1610 
1611 	for (i = 0; i < prunestate->num_partprunedata; i++)
1612 	{
1613 		PartitionPruningData *prunedata = partprunedata[i];
1614 		PartitionedRelPruningData *pprune = prunedata->partrelprunedata;
1615 		int			j;
1616 
1617 		for (j = 0; j < prunedata->num_partrelprunedata; j++)
1618 		{
1619 			if (pprune[j].initial_pruning_steps)
1620 				relation_close(pprune[j].initial_context.partrel, NoLock);
1621 			if (pprune[j].exec_pruning_steps)
1622 				relation_close(pprune[j].exec_context.partrel, NoLock);
1623 		}
1624 	}
1625 }
1626 
1627 /*
1628  * Initialize a PartitionPruneContext for the given list of pruning steps.
1629  */
1630 static void
1631 ExecInitPruningContext(PartitionPruneContext *context,
1632 					   Oid reloid,
1633 					   List *pruning_steps,
1634 					   PlanState *planstate)
1635 {
1636 	PartitionKey partkey;
1637 	PartitionDesc partdesc;
1638 	int			n_steps;
1639 	int			partnatts;
1640 	ListCell   *lc;
1641 
1642 	/*
1643 	 * We need to hold a pin on the partitioned table's relcache entry
1644 	 * so that we can rely on its copies of the table's partition key
1645 	 * and partition descriptor.  We need not get a lock though; one
1646 	 * should have been acquired already by InitPlan or
1647 	 * ExecLockNonLeafAppendTables.
1648 	 */
1649 	context->partrel = relation_open(reloid, NoLock);
1650 
1651 	partkey = RelationGetPartitionKey(context->partrel);
1652 	partdesc = RelationGetPartitionDesc(context->partrel);
1653 
1654 	n_steps = list_length(pruning_steps);
1655 
1656 	context->strategy = partkey->strategy;
1657 	context->partnatts = partnatts = partkey->partnatts;
1658 	context->nparts = partdesc->nparts;
1659 	context->boundinfo = partdesc->boundinfo;
1660 	context->partcollation = partkey->partcollation;
1661 	context->partsupfunc = partkey->partsupfunc;
1662 
1663 	/* We'll look up type-specific support functions as needed */
1664 	context->stepcmpfuncs = (FmgrInfo *)
1665 		palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
1666 
1667 	context->ppccontext = CurrentMemoryContext;
1668 	context->planstate = planstate;
1669 
1670 	/* Initialize expression state for each expression we need */
1671 	context->exprstates = (ExprState **)
1672 		palloc0(sizeof(ExprState *) * n_steps * partnatts);
1673 	foreach(lc, pruning_steps)
1674 	{
1675 		PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc);
1676 		ListCell   *lc2;
1677 		int			keyno;
1678 
1679 		/* not needed for other step kinds */
1680 		if (!IsA(step, PartitionPruneStepOp))
1681 			continue;
1682 
1683 		Assert(list_length(step->exprs) <= partnatts);
1684 
1685 		keyno = 0;
1686 		foreach(lc2, step->exprs)
1687 		{
1688 			Expr	   *expr = (Expr *) lfirst(lc2);
1689 
1690 			/* not needed for Consts */
1691 			if (!IsA(expr, Const))
1692 			{
1693 				int			stateidx = PruneCxtStateIdx(partnatts,
1694 														step->step.step_id,
1695 														keyno);
1696 
1697 				context->exprstates[stateidx] =
1698 					ExecInitExpr(expr, context->planstate);
1699 			}
1700 			keyno++;
1701 		}
1702 	}
1703 }
1704 
1705 /*
1706  * ExecFindInitialMatchingSubPlans
1707  *		Identify the set of subplans that cannot be eliminated by initial
1708  *		pruning (disregarding any pruning constraints involving PARAM_EXEC
1709  *		Params).  Also re-map the translation matrix which allows conversion
1710  *		of partition indexes into subplan indexes to account for the unneeded
1711  *		subplans having been removed.
1712  *
1713  * Must only be called once per 'prunestate', and only if initial pruning
1714  * is required.
1715  *
1716  * 'nsubplans' must be passed as the total number of unpruned subplans.
1717  */
1718 Bitmapset *
1719 ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans)
1720 {
1721 	Bitmapset  *result = NULL;
1722 	MemoryContext oldcontext;
1723 	int			i;
1724 
1725 	Assert(prunestate->do_initial_prune);
1726 
1727 	/*
1728 	 * Switch to a temp context to avoid leaking memory in the executor's
1729 	 * memory context.
1730 	 */
1731 	oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
1732 
1733 	/*
1734 	 * For each hierarchy, do the pruning tests, and add deletable subplans'
1735 	 * indexes to "result".
1736 	 */
1737 	for (i = 0; i < prunestate->num_partprunedata; i++)
1738 	{
1739 		PartitionPruningData *prunedata;
1740 		PartitionedRelPruningData *pprune;
1741 
1742 		prunedata = prunestate->partprunedata[i];
1743 		pprune = &prunedata->partrelprunedata[0];
1744 
1745 		/* Perform pruning without using PARAM_EXEC Params */
1746 		find_matching_subplans_recurse(prunedata, pprune, true, &result);
1747 
1748 		/* Expression eval may have used space in node's ps_ExprContext too */
1749 		if (pprune->initial_pruning_steps)
1750 			ResetExprContext(pprune->initial_context.planstate->ps_ExprContext);
1751 	}
1752 
1753 	MemoryContextSwitchTo(oldcontext);
1754 
1755 	/* Copy result out of the temp context before we reset it */
1756 	result = bms_copy(result);
1757 
1758 	/* Add in any subplans that partition pruning didn't account for */
1759 	result = bms_add_members(result, prunestate->other_subplans);
1760 
1761 	MemoryContextReset(prunestate->prune_context);
1762 
1763 	/*
1764 	 * If any subplans were pruned, we must re-sequence the subplan indexes so
1765 	 * that ExecFindMatchingSubPlans properly returns the indexes from the
1766 	 * subplans which will remain after execution of this function.
1767 	 */
1768 	if (bms_num_members(result) < nsubplans)
1769 	{
1770 		int		   *new_subplan_indexes;
1771 		Bitmapset  *new_other_subplans;
1772 		int			i;
1773 		int			newidx;
1774 
1775 		/*
1776 		 * First we must build a temporary array which maps old subplan
1777 		 * indexes to new ones.  While we're at it, also recompute the
1778 		 * other_subplans set, since indexes in it may change.
1779 		 */
1780 		new_subplan_indexes = (int *) palloc(sizeof(int) * nsubplans);
1781 		new_other_subplans = NULL;
1782 		newidx = 0;
1783 		for (i = 0; i < nsubplans; i++)
1784 		{
1785 			if (bms_is_member(i, result))
1786 				new_subplan_indexes[i] = newidx++;
1787 			else
1788 				new_subplan_indexes[i] = -1;	/* Newly pruned */
1789 
1790 			if (bms_is_member(i, prunestate->other_subplans))
1791 				new_other_subplans = bms_add_member(new_other_subplans,
1792 													new_subplan_indexes[i]);
1793 		}
1794 		bms_free(prunestate->other_subplans);
1795 		prunestate->other_subplans = new_other_subplans;
1796 
1797 		/*
1798 		 * Now we can update each PartitionedRelPruneInfo's subplan_map with
1799 		 * new subplan indexes.  We must also recompute its present_parts
1800 		 * bitmap.
1801 		 */
1802 		for (i = 0; i < prunestate->num_partprunedata; i++)
1803 		{
1804 			PartitionPruningData *prunedata = prunestate->partprunedata[i];
1805 			int			j;
1806 
1807 			/*
1808 			 * Within each hierarchy, we perform this loop in back-to-front
1809 			 * order so that we determine present_parts for the lowest-level
1810 			 * partitioned tables first.  This way we can tell whether a
1811 			 * sub-partitioned table's partitions were entirely pruned so we
1812 			 * can exclude that from 'present_parts'.
1813 			 */
1814 			for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
1815 			{
1816 				PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1817 				int			nparts = pprune->nparts;
1818 				int			k;
1819 
1820 				/* We just rebuild present_parts from scratch */
1821 				bms_free(pprune->present_parts);
1822 				pprune->present_parts = NULL;
1823 
1824 				for (k = 0; k < nparts; k++)
1825 				{
1826 					int			oldidx = pprune->subplan_map[k];
1827 					int			subidx;
1828 
1829 					/*
1830 					 * If this partition existed as a subplan then change the
1831 					 * old subplan index to the new subplan index.  The new
1832 					 * index may become -1 if the partition was pruned above,
1833 					 * or it may just come earlier in the subplan list due to
1834 					 * some subplans being removed earlier in the list.  If
1835 					 * it's a subpartition, add it to present_parts unless
1836 					 * it's entirely pruned.
1837 					 */
1838 					if (oldidx >= 0)
1839 					{
1840 						Assert(oldidx < nsubplans);
1841 						pprune->subplan_map[k] = new_subplan_indexes[oldidx];
1842 
1843 						if (new_subplan_indexes[oldidx] >= 0)
1844 							pprune->present_parts =
1845 								bms_add_member(pprune->present_parts, k);
1846 					}
1847 					else if ((subidx = pprune->subpart_map[k]) >= 0)
1848 					{
1849 						PartitionedRelPruningData *subprune;
1850 
1851 						subprune = &prunedata->partrelprunedata[subidx];
1852 
1853 						if (!bms_is_empty(subprune->present_parts))
1854 							pprune->present_parts =
1855 								bms_add_member(pprune->present_parts, k);
1856 					}
1857 				}
1858 			}
1859 		}
1860 
1861 		pfree(new_subplan_indexes);
1862 	}
1863 
1864 	return result;
1865 }
1866 
1867 /*
1868  * ExecFindMatchingSubPlans
1869  *		Determine which subplans match the pruning steps detailed in
1870  *		'prunestate' for the current comparison expression values.
1871  *
1872  * Here we assume we may evaluate PARAM_EXEC Params.
1873  */
1874 Bitmapset *
1875 ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
1876 {
1877 	Bitmapset  *result = NULL;
1878 	MemoryContext oldcontext;
1879 	int			i;
1880 
1881 	/*
1882 	 * Switch to a temp context to avoid leaking memory in the executor's
1883 	 * memory context.
1884 	 */
1885 	oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
1886 
1887 	/*
1888 	 * For each hierarchy, do the pruning tests, and add deletable subplans'
1889 	 * indexes to "result".
1890 	 */
1891 	for (i = 0; i < prunestate->num_partprunedata; i++)
1892 	{
1893 		PartitionPruningData *prunedata;
1894 		PartitionedRelPruningData *pprune;
1895 
1896 		prunedata = prunestate->partprunedata[i];
1897 		pprune = &prunedata->partrelprunedata[0];
1898 
1899 		find_matching_subplans_recurse(prunedata, pprune, false, &result);
1900 
1901 		/* Expression eval may have used space in node's ps_ExprContext too */
1902 		if (pprune->exec_pruning_steps)
1903 			ResetExprContext(pprune->exec_context.planstate->ps_ExprContext);
1904 	}
1905 
1906 	MemoryContextSwitchTo(oldcontext);
1907 
1908 	/* Copy result out of the temp context before we reset it */
1909 	result = bms_copy(result);
1910 
1911 	/* Add in any subplans that partition pruning didn't account for */
1912 	result = bms_add_members(result, prunestate->other_subplans);
1913 
1914 	MemoryContextReset(prunestate->prune_context);
1915 
1916 	return result;
1917 }
1918 
1919 /*
1920  * find_matching_subplans_recurse
1921  *		Recursive worker function for ExecFindMatchingSubPlans and
1922  *		ExecFindInitialMatchingSubPlans
1923  *
1924  * Adds valid (non-prunable) subplan IDs to *validsubplans
1925  */
1926 static void
1927 find_matching_subplans_recurse(PartitionPruningData *prunedata,
1928 							   PartitionedRelPruningData *pprune,
1929 							   bool initial_prune,
1930 							   Bitmapset **validsubplans)
1931 {
1932 	Bitmapset  *partset;
1933 	int			i;
1934 
1935 	/* Guard against stack overflow due to overly deep partition hierarchy. */
1936 	check_stack_depth();
1937 
1938 	/* Only prune if pruning would be useful at this level. */
1939 	if (initial_prune && pprune->initial_pruning_steps)
1940 	{
1941 		partset = get_matching_partitions(&pprune->initial_context,
1942 										  pprune->initial_pruning_steps);
1943 	}
1944 	else if (!initial_prune && pprune->exec_pruning_steps)
1945 	{
1946 		partset = get_matching_partitions(&pprune->exec_context,
1947 										  pprune->exec_pruning_steps);
1948 	}
1949 	else
1950 	{
1951 		/*
1952 		 * If no pruning is to be done, just include all partitions at this
1953 		 * level.
1954 		 */
1955 		partset = pprune->present_parts;
1956 	}
1957 
1958 	/* Translate partset into subplan indexes */
1959 	i = -1;
1960 	while ((i = bms_next_member(partset, i)) >= 0)
1961 	{
1962 		if (pprune->subplan_map[i] >= 0)
1963 			*validsubplans = bms_add_member(*validsubplans,
1964 											pprune->subplan_map[i]);
1965 		else
1966 		{
1967 			int			partidx = pprune->subpart_map[i];
1968 
1969 			if (partidx >= 0)
1970 				find_matching_subplans_recurse(prunedata,
1971 											   &prunedata->partrelprunedata[partidx],
1972 											   initial_prune, validsubplans);
1973 			else
1974 			{
1975 				/*
1976 				 * We get here if the planner already pruned all the sub-
1977 				 * partitions for this partition.  Silently ignore this
1978 				 * partition in this case.  The end result is the same: we
1979 				 * would have pruned all partitions just the same, but we
1980 				 * don't have any pruning steps to execute to verify this.
1981 				 */
1982 			}
1983 		}
1984 	}
1985 }
1986