1 /*-------------------------------------------------------------------------
2 *
3 * execProcnode.c
4 * contains dispatch functions which call the appropriate "initialize",
5 * "get a tuple", and "cleanup" routines for the given node type.
6 * If the node has children, then it will presumably call ExecInitNode,
7 * ExecProcNode, or ExecEndNode on its subnodes and do the appropriate
8 * processing.
9 *
10 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 *
14 * IDENTIFICATION
15 * src/backend/executor/execProcnode.c
16 *
17 *-------------------------------------------------------------------------
18 */
19 /*
20 * NOTES
21 * This used to be three files. It is now all combined into
22 * one file so that it is easier to keep the dispatch routines
23 * in sync when new nodes are added.
24 *
25 * EXAMPLE
26 * Suppose we want the age of the manager of the shoe department and
27 * the number of employees in that department. So we have the query:
28 *
29 * select DEPT.no_emps, EMP.age
30 * from DEPT, EMP
31 * where EMP.name = DEPT.mgr and
32 * DEPT.name = "shoe"
33 *
34 * Suppose the planner gives us the following plan:
35 *
36 * Nest Loop (DEPT.mgr = EMP.name)
37 * / \
38 * / \
39 * Seq Scan Seq Scan
40 * DEPT EMP
41 * (name = "shoe")
42 *
43 * ExecutorStart() is called first.
44 * It calls InitPlan() which calls ExecInitNode() on
45 * the root of the plan -- the nest loop node.
46 *
47 * * ExecInitNode() notices that it is looking at a nest loop and
48 * as the code below demonstrates, it calls ExecInitNestLoop().
49 * Eventually this calls ExecInitNode() on the right and left subplans
50 * and so forth until the entire plan is initialized. The result
51 * of ExecInitNode() is a plan state tree built with the same structure
52 * as the underlying plan tree.
53 *
54 * * Then when ExecutorRun() is called, it calls ExecutePlan() which calls
55 * ExecProcNode() repeatedly on the top node of the plan state tree.
56 * Each time this happens, ExecProcNode() will end up calling
57 * ExecNestLoop(), which calls ExecProcNode() on its subplans.
58 * Each of these subplans is a sequential scan so ExecSeqScan() is
59 * called. The slots returned by ExecSeqScan() may contain
60 * tuples which contain the attributes ExecNestLoop() uses to
61 * form the tuples it returns.
62 *
63 * * Eventually ExecSeqScan() stops returning tuples and the nest
64 * loop join ends. Lastly, ExecutorEnd() calls ExecEndNode() which
65 * calls ExecEndNestLoop() which in turn calls ExecEndNode() on
66 * its subplans which result in ExecEndSeqScan().
67 *
68 * This should show how the executor works by having
69 * ExecInitNode(), ExecProcNode() and ExecEndNode() dispatch
70 * their work to the appropriate node support routines which may
71 * in turn call these routines themselves on their subplans.
72 */
73 #include "postgres.h"
74
75 #include "executor/executor.h"
76 #include "executor/nodeAgg.h"
77 #include "executor/nodeAppend.h"
78 #include "executor/nodeBitmapAnd.h"
79 #include "executor/nodeBitmapHeapscan.h"
80 #include "executor/nodeBitmapIndexscan.h"
81 #include "executor/nodeBitmapOr.h"
82 #include "executor/nodeCtescan.h"
83 #include "executor/nodeCustom.h"
84 #include "executor/nodeForeignscan.h"
85 #include "executor/nodeFunctionscan.h"
86 #include "executor/nodeGather.h"
87 #include "executor/nodeGatherMerge.h"
88 #include "executor/nodeGroup.h"
89 #include "executor/nodeHash.h"
90 #include "executor/nodeHashjoin.h"
91 #include "executor/nodeIndexonlyscan.h"
92 #include "executor/nodeIndexscan.h"
93 #include "executor/nodeLimit.h"
94 #include "executor/nodeLockRows.h"
95 #include "executor/nodeMaterial.h"
96 #include "executor/nodeMergeAppend.h"
97 #include "executor/nodeMergejoin.h"
98 #include "executor/nodeModifyTable.h"
99 #include "executor/nodeNamedtuplestorescan.h"
100 #include "executor/nodeNestloop.h"
101 #include "executor/nodeProjectSet.h"
102 #include "executor/nodeRecursiveunion.h"
103 #include "executor/nodeResult.h"
104 #include "executor/nodeSamplescan.h"
105 #include "executor/nodeSeqscan.h"
106 #include "executor/nodeSetOp.h"
107 #include "executor/nodeSort.h"
108 #include "executor/nodeSubplan.h"
109 #include "executor/nodeSubqueryscan.h"
110 #include "executor/nodeTableFuncscan.h"
111 #include "executor/nodeTidscan.h"
112 #include "executor/nodeUnique.h"
113 #include "executor/nodeValuesscan.h"
114 #include "executor/nodeWindowAgg.h"
115 #include "executor/nodeWorktablescan.h"
116 #include "nodes/nodeFuncs.h"
117 #include "miscadmin.h"
118
119
120 static TupleTableSlot *ExecProcNodeFirst(PlanState *node);
121 static TupleTableSlot *ExecProcNodeInstr(PlanState *node);
122
123
124 /* ------------------------------------------------------------------------
125 * ExecInitNode
126 *
127 * Recursively initializes all the nodes in the plan tree rooted
128 * at 'node'.
129 *
130 * Inputs:
131 * 'node' is the current node of the plan produced by the query planner
132 * 'estate' is the shared execution state for the plan tree
133 * 'eflags' is a bitwise OR of flag bits described in executor.h
134 *
135 * Returns a PlanState node corresponding to the given Plan node.
136 * ------------------------------------------------------------------------
137 */
138 PlanState *
ExecInitNode(Plan * node,EState * estate,int eflags)139 ExecInitNode(Plan *node, EState *estate, int eflags)
140 {
141 PlanState *result;
142 List *subps;
143 ListCell *l;
144
145 /*
146 * do nothing when we get to the end of a leaf on tree.
147 */
148 if (node == NULL)
149 return NULL;
150
151 /*
152 * Make sure there's enough stack available. Need to check here, in
153 * addition to ExecProcNode() (via ExecProcNodeFirst()), to ensure the
154 * stack isn't overrun while initializing the node tree.
155 */
156 check_stack_depth();
157
158 switch (nodeTag(node))
159 {
160 /*
161 * control nodes
162 */
163 case T_Result:
164 result = (PlanState *) ExecInitResult((Result *) node,
165 estate, eflags);
166 break;
167
168 case T_ProjectSet:
169 result = (PlanState *) ExecInitProjectSet((ProjectSet *) node,
170 estate, eflags);
171 break;
172
173 case T_ModifyTable:
174 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,
175 estate, eflags);
176 break;
177
178 case T_Append:
179 result = (PlanState *) ExecInitAppend((Append *) node,
180 estate, eflags);
181 break;
182
183 case T_MergeAppend:
184 result = (PlanState *) ExecInitMergeAppend((MergeAppend *) node,
185 estate, eflags);
186 break;
187
188 case T_RecursiveUnion:
189 result = (PlanState *) ExecInitRecursiveUnion((RecursiveUnion *) node,
190 estate, eflags);
191 break;
192
193 case T_BitmapAnd:
194 result = (PlanState *) ExecInitBitmapAnd((BitmapAnd *) node,
195 estate, eflags);
196 break;
197
198 case T_BitmapOr:
199 result = (PlanState *) ExecInitBitmapOr((BitmapOr *) node,
200 estate, eflags);
201 break;
202
203 /*
204 * scan nodes
205 */
206 case T_SeqScan:
207 result = (PlanState *) ExecInitSeqScan((SeqScan *) node,
208 estate, eflags);
209 break;
210
211 case T_SampleScan:
212 result = (PlanState *) ExecInitSampleScan((SampleScan *) node,
213 estate, eflags);
214 break;
215
216 case T_IndexScan:
217 result = (PlanState *) ExecInitIndexScan((IndexScan *) node,
218 estate, eflags);
219 break;
220
221 case T_IndexOnlyScan:
222 result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node,
223 estate, eflags);
224 break;
225
226 case T_BitmapIndexScan:
227 result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node,
228 estate, eflags);
229 break;
230
231 case T_BitmapHeapScan:
232 result = (PlanState *) ExecInitBitmapHeapScan((BitmapHeapScan *) node,
233 estate, eflags);
234 break;
235
236 case T_TidScan:
237 result = (PlanState *) ExecInitTidScan((TidScan *) node,
238 estate, eflags);
239 break;
240
241 case T_SubqueryScan:
242 result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node,
243 estate, eflags);
244 break;
245
246 case T_FunctionScan:
247 result = (PlanState *) ExecInitFunctionScan((FunctionScan *) node,
248 estate, eflags);
249 break;
250
251 case T_TableFuncScan:
252 result = (PlanState *) ExecInitTableFuncScan((TableFuncScan *) node,
253 estate, eflags);
254 break;
255
256 case T_ValuesScan:
257 result = (PlanState *) ExecInitValuesScan((ValuesScan *) node,
258 estate, eflags);
259 break;
260
261 case T_CteScan:
262 result = (PlanState *) ExecInitCteScan((CteScan *) node,
263 estate, eflags);
264 break;
265
266 case T_NamedTuplestoreScan:
267 result = (PlanState *) ExecInitNamedTuplestoreScan((NamedTuplestoreScan *) node,
268 estate, eflags);
269 break;
270
271 case T_WorkTableScan:
272 result = (PlanState *) ExecInitWorkTableScan((WorkTableScan *) node,
273 estate, eflags);
274 break;
275
276 case T_ForeignScan:
277 result = (PlanState *) ExecInitForeignScan((ForeignScan *) node,
278 estate, eflags);
279 break;
280
281 case T_CustomScan:
282 result = (PlanState *) ExecInitCustomScan((CustomScan *) node,
283 estate, eflags);
284 break;
285
286 /*
287 * join nodes
288 */
289 case T_NestLoop:
290 result = (PlanState *) ExecInitNestLoop((NestLoop *) node,
291 estate, eflags);
292 break;
293
294 case T_MergeJoin:
295 result = (PlanState *) ExecInitMergeJoin((MergeJoin *) node,
296 estate, eflags);
297 break;
298
299 case T_HashJoin:
300 result = (PlanState *) ExecInitHashJoin((HashJoin *) node,
301 estate, eflags);
302 break;
303
304 /*
305 * materialization nodes
306 */
307 case T_Material:
308 result = (PlanState *) ExecInitMaterial((Material *) node,
309 estate, eflags);
310 break;
311
312 case T_Sort:
313 result = (PlanState *) ExecInitSort((Sort *) node,
314 estate, eflags);
315 break;
316
317 case T_Group:
318 result = (PlanState *) ExecInitGroup((Group *) node,
319 estate, eflags);
320 break;
321
322 case T_Agg:
323 result = (PlanState *) ExecInitAgg((Agg *) node,
324 estate, eflags);
325 break;
326
327 case T_WindowAgg:
328 result = (PlanState *) ExecInitWindowAgg((WindowAgg *) node,
329 estate, eflags);
330 break;
331
332 case T_Unique:
333 result = (PlanState *) ExecInitUnique((Unique *) node,
334 estate, eflags);
335 break;
336
337 case T_Gather:
338 result = (PlanState *) ExecInitGather((Gather *) node,
339 estate, eflags);
340 break;
341
342 case T_GatherMerge:
343 result = (PlanState *) ExecInitGatherMerge((GatherMerge *) node,
344 estate, eflags);
345 break;
346
347 case T_Hash:
348 result = (PlanState *) ExecInitHash((Hash *) node,
349 estate, eflags);
350 break;
351
352 case T_SetOp:
353 result = (PlanState *) ExecInitSetOp((SetOp *) node,
354 estate, eflags);
355 break;
356
357 case T_LockRows:
358 result = (PlanState *) ExecInitLockRows((LockRows *) node,
359 estate, eflags);
360 break;
361
362 case T_Limit:
363 result = (PlanState *) ExecInitLimit((Limit *) node,
364 estate, eflags);
365 break;
366
367 default:
368 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
369 result = NULL; /* keep compiler quiet */
370 break;
371 }
372
373 ExecSetExecProcNode(result, result->ExecProcNode);
374
375 /*
376 * Initialize any initPlans present in this node. The planner put them in
377 * a separate list for us.
378 */
379 subps = NIL;
380 foreach(l, node->initPlan)
381 {
382 SubPlan *subplan = (SubPlan *) lfirst(l);
383 SubPlanState *sstate;
384
385 Assert(IsA(subplan, SubPlan));
386 sstate = ExecInitSubPlan(subplan, result);
387 subps = lappend(subps, sstate);
388 }
389 result->initPlan = subps;
390
391 /* Set up instrumentation for this node if requested */
392 if (estate->es_instrument)
393 result->instrument = InstrAlloc(1, estate->es_instrument);
394
395 return result;
396 }
397
398
399 /*
400 * If a node wants to change its ExecProcNode function after ExecInitNode()
401 * has finished, it should do so with this function. That way any wrapper
402 * functions can be reinstalled, without the node having to know how that
403 * works.
404 */
405 void
ExecSetExecProcNode(PlanState * node,ExecProcNodeMtd function)406 ExecSetExecProcNode(PlanState *node, ExecProcNodeMtd function)
407 {
408 /*
409 * Add a wrapper around the ExecProcNode callback that checks stack depth
410 * during the first execution and maybe adds an instrumentation wrapper.
411 * When the callback is changed after execution has already begun that
412 * means we'll superfluously execute ExecProcNodeFirst, but that seems ok.
413 */
414 node->ExecProcNodeReal = function;
415 node->ExecProcNode = ExecProcNodeFirst;
416 }
417
418
419 /*
420 * ExecProcNode wrapper that performs some one-time checks, before calling
421 * the relevant node method (possibly via an instrumentation wrapper).
422 */
423 static TupleTableSlot *
ExecProcNodeFirst(PlanState * node)424 ExecProcNodeFirst(PlanState *node)
425 {
426 /*
427 * Perform stack depth check during the first execution of the node. We
428 * only do so the first time round because it turns out to not be cheap on
429 * some common architectures (eg. x86). This relies on the assumption
430 * that ExecProcNode calls for a given plan node will always be made at
431 * roughly the same stack depth.
432 */
433 check_stack_depth();
434
435 /*
436 * If instrumentation is required, change the wrapper to one that just
437 * does instrumentation. Otherwise we can dispense with all wrappers and
438 * have ExecProcNode() directly call the relevant function from now on.
439 */
440 if (node->instrument)
441 node->ExecProcNode = ExecProcNodeInstr;
442 else
443 node->ExecProcNode = node->ExecProcNodeReal;
444
445 return node->ExecProcNode(node);
446 }
447
448
449 /*
450 * ExecProcNode wrapper that performs instrumentation calls. By keeping
451 * this a separate function, we avoid overhead in the normal case where
452 * no instrumentation is wanted.
453 */
454 static TupleTableSlot *
ExecProcNodeInstr(PlanState * node)455 ExecProcNodeInstr(PlanState *node)
456 {
457 TupleTableSlot *result;
458
459 InstrStartNode(node->instrument);
460
461 result = node->ExecProcNodeReal(node);
462
463 InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0);
464
465 return result;
466 }
467
468
469 /* ----------------------------------------------------------------
470 * MultiExecProcNode
471 *
472 * Execute a node that doesn't return individual tuples
473 * (it might return a hashtable, bitmap, etc). Caller should
474 * check it got back the expected kind of Node.
475 *
476 * This has essentially the same responsibilities as ExecProcNode,
477 * but it does not do InstrStartNode/InstrStopNode (mainly because
478 * it can't tell how many returned tuples to count). Each per-node
479 * function must provide its own instrumentation support.
480 * ----------------------------------------------------------------
481 */
482 Node *
MultiExecProcNode(PlanState * node)483 MultiExecProcNode(PlanState *node)
484 {
485 Node *result;
486
487 check_stack_depth();
488
489 CHECK_FOR_INTERRUPTS();
490
491 if (node->chgParam != NULL) /* something changed */
492 ExecReScan(node); /* let ReScan handle this */
493
494 switch (nodeTag(node))
495 {
496 /*
497 * Only node types that actually support multiexec will be listed
498 */
499
500 case T_HashState:
501 result = MultiExecHash((HashState *) node);
502 break;
503
504 case T_BitmapIndexScanState:
505 result = MultiExecBitmapIndexScan((BitmapIndexScanState *) node);
506 break;
507
508 case T_BitmapAndState:
509 result = MultiExecBitmapAnd((BitmapAndState *) node);
510 break;
511
512 case T_BitmapOrState:
513 result = MultiExecBitmapOr((BitmapOrState *) node);
514 break;
515
516 default:
517 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
518 result = NULL;
519 break;
520 }
521
522 return result;
523 }
524
525
526 /* ----------------------------------------------------------------
527 * ExecEndNode
528 *
529 * Recursively cleans up all the nodes in the plan rooted
530 * at 'node'.
531 *
532 * After this operation, the query plan will not be able to be
533 * processed any further. This should be called only after
534 * the query plan has been fully executed.
535 * ----------------------------------------------------------------
536 */
537 void
ExecEndNode(PlanState * node)538 ExecEndNode(PlanState *node)
539 {
540 /*
541 * do nothing when we get to the end of a leaf on tree.
542 */
543 if (node == NULL)
544 return;
545
546 /*
547 * Make sure there's enough stack available. Need to check here, in
548 * addition to ExecProcNode() (via ExecProcNodeFirst()), because it's not
549 * guaranteed that ExecProcNode() is reached for all nodes.
550 */
551 check_stack_depth();
552
553 if (node->chgParam != NULL)
554 {
555 bms_free(node->chgParam);
556 node->chgParam = NULL;
557 }
558
559 switch (nodeTag(node))
560 {
561 /*
562 * control nodes
563 */
564 case T_ResultState:
565 ExecEndResult((ResultState *) node);
566 break;
567
568 case T_ProjectSetState:
569 ExecEndProjectSet((ProjectSetState *) node);
570 break;
571
572 case T_ModifyTableState:
573 ExecEndModifyTable((ModifyTableState *) node);
574 break;
575
576 case T_AppendState:
577 ExecEndAppend((AppendState *) node);
578 break;
579
580 case T_MergeAppendState:
581 ExecEndMergeAppend((MergeAppendState *) node);
582 break;
583
584 case T_RecursiveUnionState:
585 ExecEndRecursiveUnion((RecursiveUnionState *) node);
586 break;
587
588 case T_BitmapAndState:
589 ExecEndBitmapAnd((BitmapAndState *) node);
590 break;
591
592 case T_BitmapOrState:
593 ExecEndBitmapOr((BitmapOrState *) node);
594 break;
595
596 /*
597 * scan nodes
598 */
599 case T_SeqScanState:
600 ExecEndSeqScan((SeqScanState *) node);
601 break;
602
603 case T_SampleScanState:
604 ExecEndSampleScan((SampleScanState *) node);
605 break;
606
607 case T_GatherState:
608 ExecEndGather((GatherState *) node);
609 break;
610
611 case T_GatherMergeState:
612 ExecEndGatherMerge((GatherMergeState *) node);
613 break;
614
615 case T_IndexScanState:
616 ExecEndIndexScan((IndexScanState *) node);
617 break;
618
619 case T_IndexOnlyScanState:
620 ExecEndIndexOnlyScan((IndexOnlyScanState *) node);
621 break;
622
623 case T_BitmapIndexScanState:
624 ExecEndBitmapIndexScan((BitmapIndexScanState *) node);
625 break;
626
627 case T_BitmapHeapScanState:
628 ExecEndBitmapHeapScan((BitmapHeapScanState *) node);
629 break;
630
631 case T_TidScanState:
632 ExecEndTidScan((TidScanState *) node);
633 break;
634
635 case T_SubqueryScanState:
636 ExecEndSubqueryScan((SubqueryScanState *) node);
637 break;
638
639 case T_FunctionScanState:
640 ExecEndFunctionScan((FunctionScanState *) node);
641 break;
642
643 case T_TableFuncScanState:
644 ExecEndTableFuncScan((TableFuncScanState *) node);
645 break;
646
647 case T_ValuesScanState:
648 ExecEndValuesScan((ValuesScanState *) node);
649 break;
650
651 case T_CteScanState:
652 ExecEndCteScan((CteScanState *) node);
653 break;
654
655 case T_NamedTuplestoreScanState:
656 ExecEndNamedTuplestoreScan((NamedTuplestoreScanState *) node);
657 break;
658
659 case T_WorkTableScanState:
660 ExecEndWorkTableScan((WorkTableScanState *) node);
661 break;
662
663 case T_ForeignScanState:
664 ExecEndForeignScan((ForeignScanState *) node);
665 break;
666
667 case T_CustomScanState:
668 ExecEndCustomScan((CustomScanState *) node);
669 break;
670
671 /*
672 * join nodes
673 */
674 case T_NestLoopState:
675 ExecEndNestLoop((NestLoopState *) node);
676 break;
677
678 case T_MergeJoinState:
679 ExecEndMergeJoin((MergeJoinState *) node);
680 break;
681
682 case T_HashJoinState:
683 ExecEndHashJoin((HashJoinState *) node);
684 break;
685
686 /*
687 * materialization nodes
688 */
689 case T_MaterialState:
690 ExecEndMaterial((MaterialState *) node);
691 break;
692
693 case T_SortState:
694 ExecEndSort((SortState *) node);
695 break;
696
697 case T_GroupState:
698 ExecEndGroup((GroupState *) node);
699 break;
700
701 case T_AggState:
702 ExecEndAgg((AggState *) node);
703 break;
704
705 case T_WindowAggState:
706 ExecEndWindowAgg((WindowAggState *) node);
707 break;
708
709 case T_UniqueState:
710 ExecEndUnique((UniqueState *) node);
711 break;
712
713 case T_HashState:
714 ExecEndHash((HashState *) node);
715 break;
716
717 case T_SetOpState:
718 ExecEndSetOp((SetOpState *) node);
719 break;
720
721 case T_LockRowsState:
722 ExecEndLockRows((LockRowsState *) node);
723 break;
724
725 case T_LimitState:
726 ExecEndLimit((LimitState *) node);
727 break;
728
729 default:
730 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
731 break;
732 }
733 }
734
735 /*
736 * ExecShutdownNode
737 *
738 * Give execution nodes a chance to stop asynchronous resource consumption
739 * and release any resources still held.
740 */
741 bool
ExecShutdownNode(PlanState * node)742 ExecShutdownNode(PlanState *node)
743 {
744 if (node == NULL)
745 return false;
746
747 check_stack_depth();
748
749 /*
750 * Treat the node as running while we shut it down, but only if it's run
751 * at least once already. We don't expect much CPU consumption during
752 * node shutdown, but in the case of Gather or Gather Merge, we may shut
753 * down workers at this stage. If so, their buffer usage will get
754 * propagated into pgBufferUsage at this point, and we want to make sure
755 * that it gets associated with the Gather node. We skip this if the node
756 * has never been executed, so as to avoid incorrectly making it appear
757 * that it has.
758 */
759 if (node->instrument && node->instrument->running)
760 InstrStartNode(node->instrument);
761
762 planstate_tree_walker(node, ExecShutdownNode, NULL);
763
764 switch (nodeTag(node))
765 {
766 case T_GatherState:
767 ExecShutdownGather((GatherState *) node);
768 break;
769 case T_ForeignScanState:
770 ExecShutdownForeignScan((ForeignScanState *) node);
771 break;
772 case T_CustomScanState:
773 ExecShutdownCustomScan((CustomScanState *) node);
774 break;
775 case T_GatherMergeState:
776 ExecShutdownGatherMerge((GatherMergeState *) node);
777 break;
778 case T_HashState:
779 ExecShutdownHash((HashState *) node);
780 break;
781 case T_HashJoinState:
782 ExecShutdownHashJoin((HashJoinState *) node);
783 break;
784 default:
785 break;
786 }
787
788 /* Stop the node if we started it above, reporting 0 tuples. */
789 if (node->instrument && node->instrument->running)
790 InstrStopNode(node->instrument, 0);
791
792 return false;
793 }
794
795 /*
796 * ExecSetTupleBound
797 *
798 * Set a tuple bound for a planstate node. This lets child plan nodes
799 * optimize based on the knowledge that the maximum number of tuples that
800 * their parent will demand is limited. The tuple bound for a node may
801 * only be changed between scans (i.e., after node initialization or just
802 * before an ExecReScan call).
803 *
804 * Any negative tuples_needed value means "no limit", which should be the
805 * default assumption when this is not called at all for a particular node.
806 *
807 * Note: if this is called repeatedly on a plan tree, the exact same set
808 * of nodes must be updated with the new limit each time; be careful that
809 * only unchanging conditions are tested here.
810 */
811 void
ExecSetTupleBound(int64 tuples_needed,PlanState * child_node)812 ExecSetTupleBound(int64 tuples_needed, PlanState *child_node)
813 {
814 /*
815 * Since this function recurses, in principle we should check stack depth
816 * here. In practice, it's probably pointless since the earlier node
817 * initialization tree traversal would surely have consumed more stack.
818 */
819
820 if (IsA(child_node, SortState))
821 {
822 /*
823 * If it is a Sort node, notify it that it can use bounded sort.
824 *
825 * Note: it is the responsibility of nodeSort.c to react properly to
826 * changes of these parameters. If we ever redesign this, it'd be a
827 * good idea to integrate this signaling with the parameter-change
828 * mechanism.
829 */
830 SortState *sortState = (SortState *) child_node;
831
832 if (tuples_needed < 0)
833 {
834 /* make sure flag gets reset if needed upon rescan */
835 sortState->bounded = false;
836 }
837 else
838 {
839 sortState->bounded = true;
840 sortState->bound = tuples_needed;
841 }
842 }
843 else if (IsA(child_node, AppendState))
844 {
845 /*
846 * If it is an Append, we can apply the bound to any nodes that are
847 * children of the Append, since the Append surely need read no more
848 * than that many tuples from any one input.
849 */
850 AppendState *aState = (AppendState *) child_node;
851 int i;
852
853 for (i = 0; i < aState->as_nplans; i++)
854 ExecSetTupleBound(tuples_needed, aState->appendplans[i]);
855 }
856 else if (IsA(child_node, MergeAppendState))
857 {
858 /*
859 * If it is a MergeAppend, we can apply the bound to any nodes that
860 * are children of the MergeAppend, since the MergeAppend surely need
861 * read no more than that many tuples from any one input.
862 */
863 MergeAppendState *maState = (MergeAppendState *) child_node;
864 int i;
865
866 for (i = 0; i < maState->ms_nplans; i++)
867 ExecSetTupleBound(tuples_needed, maState->mergeplans[i]);
868 }
869 else if (IsA(child_node, ResultState))
870 {
871 /*
872 * Similarly, for a projecting Result, we can apply the bound to its
873 * child node.
874 *
875 * If Result supported qual checking, we'd have to punt on seeing a
876 * qual. Note that having a resconstantqual is not a showstopper: if
877 * that condition succeeds it affects nothing, while if it fails, no
878 * rows will be demanded from the Result child anyway.
879 */
880 if (outerPlanState(child_node))
881 ExecSetTupleBound(tuples_needed, outerPlanState(child_node));
882 }
883 else if (IsA(child_node, SubqueryScanState))
884 {
885 /*
886 * We can also descend through SubqueryScan, but only if it has no
887 * qual (otherwise it might discard rows).
888 */
889 SubqueryScanState *subqueryState = (SubqueryScanState *) child_node;
890
891 if (subqueryState->ss.ps.qual == NULL)
892 ExecSetTupleBound(tuples_needed, subqueryState->subplan);
893 }
894 else if (IsA(child_node, GatherState))
895 {
896 /*
897 * A Gather node can propagate the bound to its workers. As with
898 * MergeAppend, no one worker could possibly need to return more
899 * tuples than the Gather itself needs to.
900 *
901 * Note: As with Sort, the Gather node is responsible for reacting
902 * properly to changes to this parameter.
903 */
904 GatherState *gstate = (GatherState *) child_node;
905
906 gstate->tuples_needed = tuples_needed;
907
908 /* Also pass down the bound to our own copy of the child plan */
909 ExecSetTupleBound(tuples_needed, outerPlanState(child_node));
910 }
911 else if (IsA(child_node, GatherMergeState))
912 {
913 /* Same comments as for Gather */
914 GatherMergeState *gstate = (GatherMergeState *) child_node;
915
916 gstate->tuples_needed = tuples_needed;
917
918 ExecSetTupleBound(tuples_needed, outerPlanState(child_node));
919 }
920
921 /*
922 * In principle we could descend through any plan node type that is
923 * certain not to discard or combine input rows; but on seeing a node that
924 * can do that, we can't propagate the bound any further. For the moment
925 * it's unclear that any other cases are worth checking here.
926 */
927 }
928