1 /*-------------------------------------------------------------------------
2  *
3  * execProcnode.c
4  *	 contains dispatch functions which call the appropriate "initialize",
5  *	 "get a tuple", and "cleanup" routines for the given node type.
6  *	 If the node has children, then it will presumably call ExecInitNode,
7  *	 ExecProcNode, or ExecEndNode on its subnodes and do the appropriate
8  *	 processing.
9  *
10  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  *
14  * IDENTIFICATION
15  *	  src/backend/executor/execProcnode.c
16  *
17  *-------------------------------------------------------------------------
18  */
19 /*
20  *	 NOTES
21  *		This used to be three files.  It is now all combined into
22  *		one file so that it is easier to keep the dispatch routines
23  *		in sync when new nodes are added.
24  *
25  *	 EXAMPLE
26  *		Suppose we want the age of the manager of the shoe department and
27  *		the number of employees in that department.  So we have the query:
28  *
29  *				select DEPT.no_emps, EMP.age
30  *				from DEPT, EMP
31  *				where EMP.name = DEPT.mgr and
32  *					  DEPT.name = "shoe"
33  *
34  *		Suppose the planner gives us the following plan:
35  *
36  *						Nest Loop (DEPT.mgr = EMP.name)
37  *						/		\
38  *					   /		 \
39  *				   Seq Scan		Seq Scan
40  *					DEPT		  EMP
41  *				(name = "shoe")
42  *
43  *		ExecutorStart() is called first.
44  *		It calls InitPlan() which calls ExecInitNode() on
45  *		the root of the plan -- the nest loop node.
46  *
47  *	  * ExecInitNode() notices that it is looking at a nest loop and
48  *		as the code below demonstrates, it calls ExecInitNestLoop().
49  *		Eventually this calls ExecInitNode() on the right and left subplans
50  *		and so forth until the entire plan is initialized.  The result
51  *		of ExecInitNode() is a plan state tree built with the same structure
52  *		as the underlying plan tree.
53  *
54  *	  * Then when ExecutorRun() is called, it calls ExecutePlan() which calls
55  *		ExecProcNode() repeatedly on the top node of the plan state tree.
56  *		Each time this happens, ExecProcNode() will end up calling
57  *		ExecNestLoop(), which calls ExecProcNode() on its subplans.
58  *		Each of these subplans is a sequential scan so ExecSeqScan() is
59  *		called.  The slots returned by ExecSeqScan() may contain
60  *		tuples which contain the attributes ExecNestLoop() uses to
61  *		form the tuples it returns.
62  *
63  *	  * Eventually ExecSeqScan() stops returning tuples and the nest
64  *		loop join ends.  Lastly, ExecutorEnd() calls ExecEndNode() which
65  *		calls ExecEndNestLoop() which in turn calls ExecEndNode() on
66  *		its subplans which result in ExecEndSeqScan().
67  *
68  *		This should show how the executor works by having
69  *		ExecInitNode(), ExecProcNode() and ExecEndNode() dispatch
70  *		their work to the appropriate node support routines which may
71  *		in turn call these routines themselves on their subplans.
72  */
73 #include "postgres.h"
74 
75 #include "executor/executor.h"
76 #include "executor/nodeAgg.h"
77 #include "executor/nodeAppend.h"
78 #include "executor/nodeBitmapAnd.h"
79 #include "executor/nodeBitmapHeapscan.h"
80 #include "executor/nodeBitmapIndexscan.h"
81 #include "executor/nodeBitmapOr.h"
82 #include "executor/nodeCtescan.h"
83 #include "executor/nodeCustom.h"
84 #include "executor/nodeForeignscan.h"
85 #include "executor/nodeFunctionscan.h"
86 #include "executor/nodeGather.h"
87 #include "executor/nodeGatherMerge.h"
88 #include "executor/nodeGroup.h"
89 #include "executor/nodeHash.h"
90 #include "executor/nodeHashjoin.h"
91 #include "executor/nodeIncrementalSort.h"
92 #include "executor/nodeIndexonlyscan.h"
93 #include "executor/nodeIndexscan.h"
94 #include "executor/nodeLimit.h"
95 #include "executor/nodeLockRows.h"
96 #include "executor/nodeMaterial.h"
97 #include "executor/nodeMergeAppend.h"
98 #include "executor/nodeMergejoin.h"
99 #include "executor/nodeModifyTable.h"
100 #include "executor/nodeNamedtuplestorescan.h"
101 #include "executor/nodeNestloop.h"
102 #include "executor/nodeProjectSet.h"
103 #include "executor/nodeRecursiveunion.h"
104 #include "executor/nodeResult.h"
105 #include "executor/nodeSamplescan.h"
106 #include "executor/nodeSeqscan.h"
107 #include "executor/nodeSetOp.h"
108 #include "executor/nodeSort.h"
109 #include "executor/nodeSubplan.h"
110 #include "executor/nodeSubqueryscan.h"
111 #include "executor/nodeTableFuncscan.h"
112 #include "executor/nodeTidscan.h"
113 #include "executor/nodeUnique.h"
114 #include "executor/nodeValuesscan.h"
115 #include "executor/nodeWindowAgg.h"
116 #include "executor/nodeWorktablescan.h"
117 #include "miscadmin.h"
118 #include "nodes/nodeFuncs.h"
119 
120 static TupleTableSlot *ExecProcNodeFirst(PlanState *node);
121 static TupleTableSlot *ExecProcNodeInstr(PlanState *node);
122 
123 
124 /* ------------------------------------------------------------------------
125  *		ExecInitNode
126  *
127  *		Recursively initializes all the nodes in the plan tree rooted
128  *		at 'node'.
129  *
130  *		Inputs:
131  *		  'node' is the current node of the plan produced by the query planner
132  *		  'estate' is the shared execution state for the plan tree
133  *		  'eflags' is a bitwise OR of flag bits described in executor.h
134  *
135  *		Returns a PlanState node corresponding to the given Plan node.
136  * ------------------------------------------------------------------------
137  */
138 PlanState *
ExecInitNode(Plan * node,EState * estate,int eflags)139 ExecInitNode(Plan *node, EState *estate, int eflags)
140 {
141 	PlanState  *result;
142 	List	   *subps;
143 	ListCell   *l;
144 
145 	/*
146 	 * do nothing when we get to the end of a leaf on tree.
147 	 */
148 	if (node == NULL)
149 		return NULL;
150 
151 	/*
152 	 * Make sure there's enough stack available. Need to check here, in
153 	 * addition to ExecProcNode() (via ExecProcNodeFirst()), to ensure the
154 	 * stack isn't overrun while initializing the node tree.
155 	 */
156 	check_stack_depth();
157 
158 	switch (nodeTag(node))
159 	{
160 			/*
161 			 * control nodes
162 			 */
163 		case T_Result:
164 			result = (PlanState *) ExecInitResult((Result *) node,
165 												  estate, eflags);
166 			break;
167 
168 		case T_ProjectSet:
169 			result = (PlanState *) ExecInitProjectSet((ProjectSet *) node,
170 													  estate, eflags);
171 			break;
172 
173 		case T_ModifyTable:
174 			result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,
175 													   estate, eflags);
176 			break;
177 
178 		case T_Append:
179 			result = (PlanState *) ExecInitAppend((Append *) node,
180 												  estate, eflags);
181 			break;
182 
183 		case T_MergeAppend:
184 			result = (PlanState *) ExecInitMergeAppend((MergeAppend *) node,
185 													   estate, eflags);
186 			break;
187 
188 		case T_RecursiveUnion:
189 			result = (PlanState *) ExecInitRecursiveUnion((RecursiveUnion *) node,
190 														  estate, eflags);
191 			break;
192 
193 		case T_BitmapAnd:
194 			result = (PlanState *) ExecInitBitmapAnd((BitmapAnd *) node,
195 													 estate, eflags);
196 			break;
197 
198 		case T_BitmapOr:
199 			result = (PlanState *) ExecInitBitmapOr((BitmapOr *) node,
200 													estate, eflags);
201 			break;
202 
203 			/*
204 			 * scan nodes
205 			 */
206 		case T_SeqScan:
207 			result = (PlanState *) ExecInitSeqScan((SeqScan *) node,
208 												   estate, eflags);
209 			break;
210 
211 		case T_SampleScan:
212 			result = (PlanState *) ExecInitSampleScan((SampleScan *) node,
213 													  estate, eflags);
214 			break;
215 
216 		case T_IndexScan:
217 			result = (PlanState *) ExecInitIndexScan((IndexScan *) node,
218 													 estate, eflags);
219 			break;
220 
221 		case T_IndexOnlyScan:
222 			result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node,
223 														 estate, eflags);
224 			break;
225 
226 		case T_BitmapIndexScan:
227 			result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node,
228 														   estate, eflags);
229 			break;
230 
231 		case T_BitmapHeapScan:
232 			result = (PlanState *) ExecInitBitmapHeapScan((BitmapHeapScan *) node,
233 														  estate, eflags);
234 			break;
235 
236 		case T_TidScan:
237 			result = (PlanState *) ExecInitTidScan((TidScan *) node,
238 												   estate, eflags);
239 			break;
240 
241 		case T_SubqueryScan:
242 			result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node,
243 														estate, eflags);
244 			break;
245 
246 		case T_FunctionScan:
247 			result = (PlanState *) ExecInitFunctionScan((FunctionScan *) node,
248 														estate, eflags);
249 			break;
250 
251 		case T_TableFuncScan:
252 			result = (PlanState *) ExecInitTableFuncScan((TableFuncScan *) node,
253 														 estate, eflags);
254 			break;
255 
256 		case T_ValuesScan:
257 			result = (PlanState *) ExecInitValuesScan((ValuesScan *) node,
258 													  estate, eflags);
259 			break;
260 
261 		case T_CteScan:
262 			result = (PlanState *) ExecInitCteScan((CteScan *) node,
263 												   estate, eflags);
264 			break;
265 
266 		case T_NamedTuplestoreScan:
267 			result = (PlanState *) ExecInitNamedTuplestoreScan((NamedTuplestoreScan *) node,
268 															   estate, eflags);
269 			break;
270 
271 		case T_WorkTableScan:
272 			result = (PlanState *) ExecInitWorkTableScan((WorkTableScan *) node,
273 														 estate, eflags);
274 			break;
275 
276 		case T_ForeignScan:
277 			result = (PlanState *) ExecInitForeignScan((ForeignScan *) node,
278 													   estate, eflags);
279 			break;
280 
281 		case T_CustomScan:
282 			result = (PlanState *) ExecInitCustomScan((CustomScan *) node,
283 													  estate, eflags);
284 			break;
285 
286 			/*
287 			 * join nodes
288 			 */
289 		case T_NestLoop:
290 			result = (PlanState *) ExecInitNestLoop((NestLoop *) node,
291 													estate, eflags);
292 			break;
293 
294 		case T_MergeJoin:
295 			result = (PlanState *) ExecInitMergeJoin((MergeJoin *) node,
296 													 estate, eflags);
297 			break;
298 
299 		case T_HashJoin:
300 			result = (PlanState *) ExecInitHashJoin((HashJoin *) node,
301 													estate, eflags);
302 			break;
303 
304 			/*
305 			 * materialization nodes
306 			 */
307 		case T_Material:
308 			result = (PlanState *) ExecInitMaterial((Material *) node,
309 													estate, eflags);
310 			break;
311 
312 		case T_Sort:
313 			result = (PlanState *) ExecInitSort((Sort *) node,
314 												estate, eflags);
315 			break;
316 
317 		case T_IncrementalSort:
318 			result = (PlanState *) ExecInitIncrementalSort((IncrementalSort *) node,
319 														   estate, eflags);
320 			break;
321 
322 		case T_Group:
323 			result = (PlanState *) ExecInitGroup((Group *) node,
324 												 estate, eflags);
325 			break;
326 
327 		case T_Agg:
328 			result = (PlanState *) ExecInitAgg((Agg *) node,
329 											   estate, eflags);
330 			break;
331 
332 		case T_WindowAgg:
333 			result = (PlanState *) ExecInitWindowAgg((WindowAgg *) node,
334 													 estate, eflags);
335 			break;
336 
337 		case T_Unique:
338 			result = (PlanState *) ExecInitUnique((Unique *) node,
339 												  estate, eflags);
340 			break;
341 
342 		case T_Gather:
343 			result = (PlanState *) ExecInitGather((Gather *) node,
344 												  estate, eflags);
345 			break;
346 
347 		case T_GatherMerge:
348 			result = (PlanState *) ExecInitGatherMerge((GatherMerge *) node,
349 													   estate, eflags);
350 			break;
351 
352 		case T_Hash:
353 			result = (PlanState *) ExecInitHash((Hash *) node,
354 												estate, eflags);
355 			break;
356 
357 		case T_SetOp:
358 			result = (PlanState *) ExecInitSetOp((SetOp *) node,
359 												 estate, eflags);
360 			break;
361 
362 		case T_LockRows:
363 			result = (PlanState *) ExecInitLockRows((LockRows *) node,
364 													estate, eflags);
365 			break;
366 
367 		case T_Limit:
368 			result = (PlanState *) ExecInitLimit((Limit *) node,
369 												 estate, eflags);
370 			break;
371 
372 		default:
373 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
374 			result = NULL;		/* keep compiler quiet */
375 			break;
376 	}
377 
378 	ExecSetExecProcNode(result, result->ExecProcNode);
379 
380 	/*
381 	 * Initialize any initPlans present in this node.  The planner put them in
382 	 * a separate list for us.
383 	 */
384 	subps = NIL;
385 	foreach(l, node->initPlan)
386 	{
387 		SubPlan    *subplan = (SubPlan *) lfirst(l);
388 		SubPlanState *sstate;
389 
390 		Assert(IsA(subplan, SubPlan));
391 		sstate = ExecInitSubPlan(subplan, result);
392 		subps = lappend(subps, sstate);
393 	}
394 	result->initPlan = subps;
395 
396 	/* Set up instrumentation for this node if requested */
397 	if (estate->es_instrument)
398 		result->instrument = InstrAlloc(1, estate->es_instrument);
399 
400 	return result;
401 }
402 
403 
404 /*
405  * If a node wants to change its ExecProcNode function after ExecInitNode()
406  * has finished, it should do so with this function.  That way any wrapper
407  * functions can be reinstalled, without the node having to know how that
408  * works.
409  */
410 void
ExecSetExecProcNode(PlanState * node,ExecProcNodeMtd function)411 ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
412 {
413 	/*
414 	 * Add a wrapper around the ExecProcNode callback that checks stack depth
415 	 * during the first execution and maybe adds an instrumentation wrapper.
416 	 * When the callback is changed after execution has already begun that
417 	 * means we'll superfluously execute ExecProcNodeFirst, but that seems ok.
418 	 */
419 	node->ExecProcNodeReal = function;
420 	node->ExecProcNode = ExecProcNodeFirst;
421 }
422 
423 
424 /*
425  * ExecProcNode wrapper that performs some one-time checks, before calling
426  * the relevant node method (possibly via an instrumentation wrapper).
427  */
428 static TupleTableSlot *
ExecProcNodeFirst(PlanState * node)429 ExecProcNodeFirst(PlanState *node)
430 {
431 	/*
432 	 * Perform stack depth check during the first execution of the node.  We
433 	 * only do so the first time round because it turns out to not be cheap on
434 	 * some common architectures (eg. x86).  This relies on the assumption
435 	 * that ExecProcNode calls for a given plan node will always be made at
436 	 * roughly the same stack depth.
437 	 */
438 	check_stack_depth();
439 
440 	/*
441 	 * If instrumentation is required, change the wrapper to one that just
442 	 * does instrumentation.  Otherwise we can dispense with all wrappers and
443 	 * have ExecProcNode() directly call the relevant function from now on.
444 	 */
445 	if (node->instrument)
446 		node->ExecProcNode = ExecProcNodeInstr;
447 	else
448 		node->ExecProcNode = node->ExecProcNodeReal;
449 
450 	return node->ExecProcNode(node);
451 }
452 
453 
454 /*
455  * ExecProcNode wrapper that performs instrumentation calls.  By keeping
456  * this a separate function, we avoid overhead in the normal case where
457  * no instrumentation is wanted.
458  */
459 static TupleTableSlot *
ExecProcNodeInstr(PlanState * node)460 ExecProcNodeInstr(PlanState *node)
461 {
462 	TupleTableSlot *result;
463 
464 	InstrStartNode(node->instrument);
465 
466 	result = node->ExecProcNodeReal(node);
467 
468 	InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
469 
470 	return result;
471 }
472 
473 
474 /* ----------------------------------------------------------------
475  *		MultiExecProcNode
476  *
477  *		Execute a node that doesn't return individual tuples
478  *		(it might return a hashtable, bitmap, etc).  Caller should
479  *		check it got back the expected kind of Node.
480  *
481  * This has essentially the same responsibilities as ExecProcNode,
482  * but it does not do InstrStartNode/InstrStopNode (mainly because
483  * it can't tell how many returned tuples to count).  Each per-node
484  * function must provide its own instrumentation support.
485  * ----------------------------------------------------------------
486  */
487 Node *
MultiExecProcNode(PlanState * node)488 MultiExecProcNode(PlanState *node)
489 {
490 	Node	   *result;
491 
492 	check_stack_depth();
493 
494 	CHECK_FOR_INTERRUPTS();
495 
496 	if (node->chgParam != NULL) /* something changed */
497 		ExecReScan(node);		/* let ReScan handle this */
498 
499 	switch (nodeTag(node))
500 	{
501 			/*
502 			 * Only node types that actually support multiexec will be listed
503 			 */
504 
505 		case T_HashState:
506 			result = MultiExecHash((HashState *) node);
507 			break;
508 
509 		case T_BitmapIndexScanState:
510 			result = MultiExecBitmapIndexScan((BitmapIndexScanState *) node);
511 			break;
512 
513 		case T_BitmapAndState:
514 			result = MultiExecBitmapAnd((BitmapAndState *) node);
515 			break;
516 
517 		case T_BitmapOrState:
518 			result = MultiExecBitmapOr((BitmapOrState *) node);
519 			break;
520 
521 		default:
522 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
523 			result = NULL;
524 			break;
525 	}
526 
527 	return result;
528 }
529 
530 
531 /* ----------------------------------------------------------------
532  *		ExecEndNode
533  *
534  *		Recursively cleans up all the nodes in the plan rooted
535  *		at 'node'.
536  *
537  *		After this operation, the query plan will not be able to be
538  *		processed any further.  This should be called only after
539  *		the query plan has been fully executed.
540  * ----------------------------------------------------------------
541  */
542 void
ExecEndNode(PlanState * node)543 ExecEndNode(PlanState *node)
544 {
545 	/*
546 	 * do nothing when we get to the end of a leaf on tree.
547 	 */
548 	if (node == NULL)
549 		return;
550 
551 	/*
552 	 * Make sure there's enough stack available. Need to check here, in
553 	 * addition to ExecProcNode() (via ExecProcNodeFirst()), because it's not
554 	 * guaranteed that ExecProcNode() is reached for all nodes.
555 	 */
556 	check_stack_depth();
557 
558 	if (node->chgParam != NULL)
559 	{
560 		bms_free(node->chgParam);
561 		node->chgParam = NULL;
562 	}
563 
564 	switch (nodeTag(node))
565 	{
566 			/*
567 			 * control nodes
568 			 */
569 		case T_ResultState:
570 			ExecEndResult((ResultState *) node);
571 			break;
572 
573 		case T_ProjectSetState:
574 			ExecEndProjectSet((ProjectSetState *) node);
575 			break;
576 
577 		case T_ModifyTableState:
578 			ExecEndModifyTable((ModifyTableState *) node);
579 			break;
580 
581 		case T_AppendState:
582 			ExecEndAppend((AppendState *) node);
583 			break;
584 
585 		case T_MergeAppendState:
586 			ExecEndMergeAppend((MergeAppendState *) node);
587 			break;
588 
589 		case T_RecursiveUnionState:
590 			ExecEndRecursiveUnion((RecursiveUnionState *) node);
591 			break;
592 
593 		case T_BitmapAndState:
594 			ExecEndBitmapAnd((BitmapAndState *) node);
595 			break;
596 
597 		case T_BitmapOrState:
598 			ExecEndBitmapOr((BitmapOrState *) node);
599 			break;
600 
601 			/*
602 			 * scan nodes
603 			 */
604 		case T_SeqScanState:
605 			ExecEndSeqScan((SeqScanState *) node);
606 			break;
607 
608 		case T_SampleScanState:
609 			ExecEndSampleScan((SampleScanState *) node);
610 			break;
611 
612 		case T_GatherState:
613 			ExecEndGather((GatherState *) node);
614 			break;
615 
616 		case T_GatherMergeState:
617 			ExecEndGatherMerge((GatherMergeState *) node);
618 			break;
619 
620 		case T_IndexScanState:
621 			ExecEndIndexScan((IndexScanState *) node);
622 			break;
623 
624 		case T_IndexOnlyScanState:
625 			ExecEndIndexOnlyScan((IndexOnlyScanState *) node);
626 			break;
627 
628 		case T_BitmapIndexScanState:
629 			ExecEndBitmapIndexScan((BitmapIndexScanState *) node);
630 			break;
631 
632 		case T_BitmapHeapScanState:
633 			ExecEndBitmapHeapScan((BitmapHeapScanState *) node);
634 			break;
635 
636 		case T_TidScanState:
637 			ExecEndTidScan((TidScanState *) node);
638 			break;
639 
640 		case T_SubqueryScanState:
641 			ExecEndSubqueryScan((SubqueryScanState *) node);
642 			break;
643 
644 		case T_FunctionScanState:
645 			ExecEndFunctionScan((FunctionScanState *) node);
646 			break;
647 
648 		case T_TableFuncScanState:
649 			ExecEndTableFuncScan((TableFuncScanState *) node);
650 			break;
651 
652 		case T_ValuesScanState:
653 			ExecEndValuesScan((ValuesScanState *) node);
654 			break;
655 
656 		case T_CteScanState:
657 			ExecEndCteScan((CteScanState *) node);
658 			break;
659 
660 		case T_NamedTuplestoreScanState:
661 			ExecEndNamedTuplestoreScan((NamedTuplestoreScanState *) node);
662 			break;
663 
664 		case T_WorkTableScanState:
665 			ExecEndWorkTableScan((WorkTableScanState *) node);
666 			break;
667 
668 		case T_ForeignScanState:
669 			ExecEndForeignScan((ForeignScanState *) node);
670 			break;
671 
672 		case T_CustomScanState:
673 			ExecEndCustomScan((CustomScanState *) node);
674 			break;
675 
676 			/*
677 			 * join nodes
678 			 */
679 		case T_NestLoopState:
680 			ExecEndNestLoop((NestLoopState *) node);
681 			break;
682 
683 		case T_MergeJoinState:
684 			ExecEndMergeJoin((MergeJoinState *) node);
685 			break;
686 
687 		case T_HashJoinState:
688 			ExecEndHashJoin((HashJoinState *) node);
689 			break;
690 
691 			/*
692 			 * materialization nodes
693 			 */
694 		case T_MaterialState:
695 			ExecEndMaterial((MaterialState *) node);
696 			break;
697 
698 		case T_SortState:
699 			ExecEndSort((SortState *) node);
700 			break;
701 
702 		case T_IncrementalSortState:
703 			ExecEndIncrementalSort((IncrementalSortState *) node);
704 			break;
705 
706 		case T_GroupState:
707 			ExecEndGroup((GroupState *) node);
708 			break;
709 
710 		case T_AggState:
711 			ExecEndAgg((AggState *) node);
712 			break;
713 
714 		case T_WindowAggState:
715 			ExecEndWindowAgg((WindowAggState *) node);
716 			break;
717 
718 		case T_UniqueState:
719 			ExecEndUnique((UniqueState *) node);
720 			break;
721 
722 		case T_HashState:
723 			ExecEndHash((HashState *) node);
724 			break;
725 
726 		case T_SetOpState:
727 			ExecEndSetOp((SetOpState *) node);
728 			break;
729 
730 		case T_LockRowsState:
731 			ExecEndLockRows((LockRowsState *) node);
732 			break;
733 
734 		case T_LimitState:
735 			ExecEndLimit((LimitState *) node);
736 			break;
737 
738 		default:
739 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
740 			break;
741 	}
742 }
743 
744 /*
745  * ExecShutdownNode
746  *
747  * Give execution nodes a chance to stop asynchronous resource consumption
748  * and release any resources still held.
749  */
750 bool
ExecShutdownNode(PlanState * node)751 ExecShutdownNode(PlanState *node)
752 {
753 	if (node == NULL)
754 		return false;
755 
756 	check_stack_depth();
757 
758 	/*
759 	 * Treat the node as running while we shut it down, but only if it's run
760 	 * at least once already.  We don't expect much CPU consumption during
761 	 * node shutdown, but in the case of Gather or Gather Merge, we may shut
762 	 * down workers at this stage.  If so, their buffer usage will get
763 	 * propagated into pgBufferUsage at this point, and we want to make sure
764 	 * that it gets associated with the Gather node.  We skip this if the node
765 	 * has never been executed, so as to avoid incorrectly making it appear
766 	 * that it has.
767 	 */
768 	if (node->instrument && node->instrument->running)
769 		InstrStartNode(node->instrument);
770 
771 	planstate_tree_walker(node, ExecShutdownNode, NULL);
772 
773 	switch (nodeTag(node))
774 	{
775 		case T_GatherState:
776 			ExecShutdownGather((GatherState *) node);
777 			break;
778 		case T_ForeignScanState:
779 			ExecShutdownForeignScan((ForeignScanState *) node);
780 			break;
781 		case T_CustomScanState:
782 			ExecShutdownCustomScan((CustomScanState *) node);
783 			break;
784 		case T_GatherMergeState:
785 			ExecShutdownGatherMerge((GatherMergeState *) node);
786 			break;
787 		case T_HashState:
788 			ExecShutdownHash((HashState *) node);
789 			break;
790 		case T_HashJoinState:
791 			ExecShutdownHashJoin((HashJoinState *) node);
792 			break;
793 		default:
794 			break;
795 	}
796 
797 	/* Stop the node if we started it above, reporting 0 tuples. */
798 	if (node->instrument && node->instrument->running)
799 		InstrStopNode(node->instrument, 0);
800 
801 	return false;
802 }
803 
804 /*
805  * ExecSetTupleBound
806  *
807  * Set a tuple bound for a planstate node.  This lets child plan nodes
808  * optimize based on the knowledge that the maximum number of tuples that
809  * their parent will demand is limited.  The tuple bound for a node may
810  * only be changed between scans (i.e., after node initialization or just
811  * before an ExecReScan call).
812  *
813  * Any negative tuples_needed value means "no limit", which should be the
814  * default assumption when this is not called at all for a particular node.
815  *
816  * Note: if this is called repeatedly on a plan tree, the exact same set
817  * of nodes must be updated with the new limit each time; be careful that
818  * only unchanging conditions are tested here.
819  */
820 void
ExecSetTupleBound(int64 tuples_needed,PlanState * child_node)821 ExecSetTupleBound(int64 tuples_needed, PlanState *child_node)
822 {
823 	/*
824 	 * Since this function recurses, in principle we should check stack depth
825 	 * here.  In practice, it's probably pointless since the earlier node
826 	 * initialization tree traversal would surely have consumed more stack.
827 	 */
828 
829 	if (IsA(child_node, SortState))
830 	{
831 		/*
832 		 * If it is a Sort node, notify it that it can use bounded sort.
833 		 *
834 		 * Note: it is the responsibility of nodeSort.c to react properly to
835 		 * changes of these parameters.  If we ever redesign this, it'd be a
836 		 * good idea to integrate this signaling with the parameter-change
837 		 * mechanism.
838 		 */
839 		SortState  *sortState = (SortState *) child_node;
840 
841 		if (tuples_needed < 0)
842 		{
843 			/* make sure flag gets reset if needed upon rescan */
844 			sortState->bounded = false;
845 		}
846 		else
847 		{
848 			sortState->bounded = true;
849 			sortState->bound = tuples_needed;
850 		}
851 	}
852 	else if (IsA(child_node, IncrementalSortState))
853 	{
854 		/*
855 		 * If it is an IncrementalSort node, notify it that it can use bounded
856 		 * sort.
857 		 *
858 		 * Note: it is the responsibility of nodeIncrementalSort.c to react
859 		 * properly to changes of these parameters.  If we ever redesign this,
860 		 * it'd be a good idea to integrate this signaling with the
861 		 * parameter-change mechanism.
862 		 */
863 		IncrementalSortState *sortState = (IncrementalSortState *) child_node;
864 
865 		if (tuples_needed < 0)
866 		{
867 			/* make sure flag gets reset if needed upon rescan */
868 			sortState->bounded = false;
869 		}
870 		else
871 		{
872 			sortState->bounded = true;
873 			sortState->bound = tuples_needed;
874 		}
875 	}
876 	else if (IsA(child_node, AppendState))
877 	{
878 		/*
879 		 * If it is an Append, we can apply the bound to any nodes that are
880 		 * children of the Append, since the Append surely need read no more
881 		 * than that many tuples from any one input.
882 		 */
883 		AppendState *aState = (AppendState *) child_node;
884 		int			i;
885 
886 		for (i = 0; i < aState->as_nplans; i++)
887 			ExecSetTupleBound(tuples_needed, aState->appendplans[i]);
888 	}
889 	else if (IsA(child_node, MergeAppendState))
890 	{
891 		/*
892 		 * If it is a MergeAppend, we can apply the bound to any nodes that
893 		 * are children of the MergeAppend, since the MergeAppend surely need
894 		 * read no more than that many tuples from any one input.
895 		 */
896 		MergeAppendState *maState = (MergeAppendState *) child_node;
897 		int			i;
898 
899 		for (i = 0; i < maState->ms_nplans; i++)
900 			ExecSetTupleBound(tuples_needed, maState->mergeplans[i]);
901 	}
902 	else if (IsA(child_node, ResultState))
903 	{
904 		/*
905 		 * Similarly, for a projecting Result, we can apply the bound to its
906 		 * child node.
907 		 *
908 		 * If Result supported qual checking, we'd have to punt on seeing a
909 		 * qual.  Note that having a resconstantqual is not a showstopper: if
910 		 * that condition succeeds it affects nothing, while if it fails, no
911 		 * rows will be demanded from the Result child anyway.
912 		 */
913 		if (outerPlanState(child_node))
914 			ExecSetTupleBound(tuples_needed, outerPlanState(child_node));
915 	}
916 	else if (IsA(child_node, SubqueryScanState))
917 	{
918 		/*
919 		 * We can also descend through SubqueryScan, but only if it has no
920 		 * qual (otherwise it might discard rows).
921 		 */
922 		SubqueryScanState *subqueryState = (SubqueryScanState *) child_node;
923 
924 		if (subqueryState->ss.ps.qual == NULL)
925 			ExecSetTupleBound(tuples_needed, subqueryState->subplan);
926 	}
927 	else if (IsA(child_node, GatherState))
928 	{
929 		/*
930 		 * A Gather node can propagate the bound to its workers.  As with
931 		 * MergeAppend, no one worker could possibly need to return more
932 		 * tuples than the Gather itself needs to.
933 		 *
934 		 * Note: As with Sort, the Gather node is responsible for reacting
935 		 * properly to changes to this parameter.
936 		 */
937 		GatherState *gstate = (GatherState *) child_node;
938 
939 		gstate->tuples_needed = tuples_needed;
940 
941 		/* Also pass down the bound to our own copy of the child plan */
942 		ExecSetTupleBound(tuples_needed, outerPlanState(child_node));
943 	}
944 	else if (IsA(child_node, GatherMergeState))
945 	{
946 		/* Same comments as for Gather */
947 		GatherMergeState *gstate = (GatherMergeState *) child_node;
948 
949 		gstate->tuples_needed = tuples_needed;
950 
951 		ExecSetTupleBound(tuples_needed, outerPlanState(child_node));
952 	}
953 
954 	/*
955 	 * In principle we could descend through any plan node type that is
956 	 * certain not to discard or combine input rows; but on seeing a node that
957 	 * can do that, we can't propagate the bound any further.  For the moment
958 	 * it's unclear that any other cases are worth checking here.
959 	 */
960 }
961