1 /*-------------------------------------------------------------------------
2 *
3 * nodeAppend.c
4 * routines to handle append nodes.
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeAppend.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 /* INTERFACE ROUTINES
16 * ExecInitAppend - initialize the append node
17 * ExecAppend - retrieve the next tuple from the node
18 * ExecEndAppend - shut down the append node
19 * ExecReScanAppend - rescan the append node
20 *
21 * NOTES
22 * Each append node contains a list of one or more subplans which
23 * must be iteratively processed (forwards or backwards).
24 * Tuples are retrieved by executing the 'whichplan'th subplan
25 * until the subplan stops returning tuples, at which point that
26 * plan is shut down and the next started up.
27 *
28 * Append nodes don't make use of their left and right
29 * subtrees, rather they maintain a list of subplans so
30 * a typical append node looks like this in the plan tree:
31 *
32 * ...
33 * /
34 * Append -------+------+------+--- nil
35 * / \ | | |
36 * nil nil ... ... ...
37 * subplans
38 *
39 * Append nodes are currently used for unions, and to support
40 * inheritance queries, where several relations need to be scanned.
41 * For example, in our standard person/student/employee/student-emp
42 * example, where student and employee inherit from person
43 * and student-emp inherits from student and employee, the
44 * query:
45 *
46 * select name from person
47 *
48 * generates the plan:
49 *
50 * |
51 * Append -------+-------+--------+--------+
52 * / \ | | | |
53 * nil nil Scan Scan Scan Scan
54 * | | | |
55 * person employee student student-emp
56 */
57
58 #include "postgres.h"
59
60 #include "executor/execdebug.h"
61 #include "executor/execPartition.h"
62 #include "executor/nodeAppend.h"
63 #include "miscadmin.h"
64
65 /* Shared state for parallel-aware Append. */
66 struct ParallelAppendState
67 {
68 LWLock pa_lock; /* mutual exclusion to choose next subplan */
69 int pa_next_plan; /* next plan to choose by any worker */
70
71 /*
72 * pa_finished[i] should be true if no more workers should select subplan
73 * i. for a non-partial plan, this should be set to true as soon as a
74 * worker selects the plan; for a partial plan, it remains false until
75 * some worker executes the plan to completion.
76 */
77 bool pa_finished[FLEXIBLE_ARRAY_MEMBER];
78 };
79
80 #define INVALID_SUBPLAN_INDEX -1
81 #define NO_MATCHING_SUBPLANS -2
82
83 static TupleTableSlot *ExecAppend(PlanState *pstate);
84 static bool choose_next_subplan_locally(AppendState *node);
85 static bool choose_next_subplan_for_leader(AppendState *node);
86 static bool choose_next_subplan_for_worker(AppendState *node);
87 static void mark_invalid_subplans_as_finished(AppendState *node);
88
89 /* ----------------------------------------------------------------
90 * ExecInitAppend
91 *
92 * Begin all of the subscans of the append node.
93 *
94 * (This is potentially wasteful, since the entire result of the
95 * append node may not be scanned, but this way all of the
96 * structures get allocated in the executor's top level memory
97 * block instead of that of the call to ExecAppend.)
98 * ----------------------------------------------------------------
99 */
100 AppendState *
ExecInitAppend(Append * node,EState * estate,int eflags)101 ExecInitAppend(Append *node, EState *estate, int eflags)
102 {
103 AppendState *appendstate = makeNode(AppendState);
104 PlanState **appendplanstates;
105 Bitmapset *validsubplans;
106 int nplans;
107 int firstvalid;
108 int i,
109 j;
110 ListCell *lc;
111
112 /* check for unsupported flags */
113 Assert(!(eflags & EXEC_FLAG_MARK));
114
115 /*
116 * Lock the non-leaf tables in the partition tree controlled by this node.
117 * It's a no-op for non-partitioned parent tables.
118 */
119 ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
120
121 /*
122 * create new AppendState for our append node
123 */
124 appendstate->ps.plan = (Plan *) node;
125 appendstate->ps.state = estate;
126 appendstate->ps.ExecProcNode = ExecAppend;
127
128 /* Let choose_next_subplan_* function handle setting the first subplan */
129 appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
130
131 /* If run-time partition pruning is enabled, then set that up now */
132 if (node->part_prune_info != NULL)
133 {
134 PartitionPruneState *prunestate;
135
136 /* We may need an expression context to evaluate partition exprs */
137 ExecAssignExprContext(estate, &appendstate->ps);
138
139 /* Create the working data structure for pruning. */
140 prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
141 node->part_prune_info);
142 appendstate->as_prune_state = prunestate;
143
144 /* Perform an initial partition prune, if required. */
145 if (prunestate->do_initial_prune)
146 {
147 /* Determine which subplans survive initial pruning */
148 validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
149 list_length(node->appendplans));
150
151 /*
152 * The case where no subplans survive pruning must be handled
153 * specially. The problem here is that code in explain.c requires
154 * an Append to have at least one subplan in order for it to
155 * properly determine the Vars in that subplan's targetlist. We
156 * sidestep this issue by just initializing the first subplan and
157 * setting as_whichplan to NO_MATCHING_SUBPLANS to indicate that
158 * we don't really need to scan any subnodes.
159 */
160 if (bms_is_empty(validsubplans))
161 {
162 appendstate->as_whichplan = NO_MATCHING_SUBPLANS;
163
164 /* Mark the first as valid so that it's initialized below */
165 validsubplans = bms_make_singleton(0);
166 }
167
168 nplans = bms_num_members(validsubplans);
169 }
170 else
171 {
172 /* We'll need to initialize all subplans */
173 nplans = list_length(node->appendplans);
174 Assert(nplans > 0);
175 validsubplans = bms_add_range(NULL, 0, nplans - 1);
176 }
177
178 /*
179 * If no runtime pruning is required, we can fill as_valid_subplans
180 * immediately, preventing later calls to ExecFindMatchingSubPlans.
181 */
182 if (!prunestate->do_exec_prune)
183 {
184 Assert(nplans > 0);
185 appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
186 }
187 }
188 else
189 {
190 nplans = list_length(node->appendplans);
191
192 /*
193 * When run-time partition pruning is not enabled we can just mark all
194 * subplans as valid; they must also all be initialized.
195 */
196 Assert(nplans > 0);
197 appendstate->as_valid_subplans = validsubplans =
198 bms_add_range(NULL, 0, nplans - 1);
199 appendstate->as_prune_state = NULL;
200 }
201
202 /*
203 * Initialize result tuple type and slot.
204 */
205 ExecInitResultTupleSlotTL(estate, &appendstate->ps);
206
207 appendplanstates = (PlanState **) palloc(nplans *
208 sizeof(PlanState *));
209
210 /*
211 * call ExecInitNode on each of the valid plans to be executed and save
212 * the results into the appendplanstates array.
213 *
214 * While at it, find out the first valid partial plan.
215 */
216 j = i = 0;
217 firstvalid = nplans;
218 foreach(lc, node->appendplans)
219 {
220 if (bms_is_member(i, validsubplans))
221 {
222 Plan *initNode = (Plan *) lfirst(lc);
223
224 /*
225 * Record the lowest appendplans index which is a valid partial
226 * plan.
227 */
228 if (i >= node->first_partial_plan && j < firstvalid)
229 firstvalid = j;
230
231 appendplanstates[j++] = ExecInitNode(initNode, estate, eflags);
232 }
233 i++;
234 }
235
236 appendstate->as_first_partial_plan = firstvalid;
237 appendstate->appendplans = appendplanstates;
238 appendstate->as_nplans = nplans;
239
240 /*
241 * Miscellaneous initialization
242 */
243
244 appendstate->ps.ps_ProjInfo = NULL;
245
246 /* For parallel query, this will be overridden later. */
247 appendstate->choose_next_subplan = choose_next_subplan_locally;
248
249 return appendstate;
250 }
251
252 /* ----------------------------------------------------------------
253 * ExecAppend
254 *
255 * Handles iteration over multiple subplans.
256 * ----------------------------------------------------------------
257 */
258 static TupleTableSlot *
ExecAppend(PlanState * pstate)259 ExecAppend(PlanState *pstate)
260 {
261 AppendState *node = castNode(AppendState, pstate);
262
263 if (node->as_whichplan < 0)
264 {
265 /*
266 * If no subplan has been chosen, we must choose one before
267 * proceeding.
268 */
269 if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
270 !node->choose_next_subplan(node))
271 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
272
273 /* Nothing to do if there are no matching subplans */
274 else if (node->as_whichplan == NO_MATCHING_SUBPLANS)
275 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
276 }
277
278 for (;;)
279 {
280 PlanState *subnode;
281 TupleTableSlot *result;
282
283 CHECK_FOR_INTERRUPTS();
284
285 /*
286 * figure out which subplan we are currently processing
287 */
288 Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
289 subnode = node->appendplans[node->as_whichplan];
290
291 /*
292 * get a tuple from the subplan
293 */
294 result = ExecProcNode(subnode);
295
296 if (!TupIsNull(result))
297 {
298 /*
299 * If the subplan gave us something then return it as-is. We do
300 * NOT make use of the result slot that was set up in
301 * ExecInitAppend; there's no need for it.
302 */
303 return result;
304 }
305
306 /* choose new subplan; if none, we're done */
307 if (!node->choose_next_subplan(node))
308 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
309 }
310 }
311
312 /* ----------------------------------------------------------------
313 * ExecEndAppend
314 *
315 * Shuts down the subscans of the append node.
316 *
317 * Returns nothing of interest.
318 * ----------------------------------------------------------------
319 */
320 void
ExecEndAppend(AppendState * node)321 ExecEndAppend(AppendState *node)
322 {
323 PlanState **appendplans;
324 int nplans;
325 int i;
326
327 /*
328 * get information from the node
329 */
330 appendplans = node->appendplans;
331 nplans = node->as_nplans;
332
333 /*
334 * shut down each of the subscans
335 */
336 for (i = 0; i < nplans; i++)
337 ExecEndNode(appendplans[i]);
338
339 /*
340 * release any resources associated with run-time pruning
341 */
342 if (node->as_prune_state)
343 ExecDestroyPartitionPruneState(node->as_prune_state);
344 }
345
346 void
ExecReScanAppend(AppendState * node)347 ExecReScanAppend(AppendState *node)
348 {
349 int i;
350
351 /*
352 * If any PARAM_EXEC Params used in pruning expressions have changed, then
353 * we'd better unset the valid subplans so that they are reselected for
354 * the new parameter values.
355 */
356 if (node->as_prune_state &&
357 bms_overlap(node->ps.chgParam,
358 node->as_prune_state->execparamids))
359 {
360 bms_free(node->as_valid_subplans);
361 node->as_valid_subplans = NULL;
362 }
363
364 for (i = 0; i < node->as_nplans; i++)
365 {
366 PlanState *subnode = node->appendplans[i];
367
368 /*
369 * ExecReScan doesn't know about my subplans, so I have to do
370 * changed-parameter signaling myself.
371 */
372 if (node->ps.chgParam != NULL)
373 UpdateChangedParamSet(subnode, node->ps.chgParam);
374
375 /*
376 * If chgParam of subnode is not null then plan will be re-scanned by
377 * first ExecProcNode.
378 */
379 if (subnode->chgParam == NULL)
380 ExecReScan(subnode);
381 }
382
383 /* Let choose_next_subplan_* function handle setting the first subplan */
384 node->as_whichplan = INVALID_SUBPLAN_INDEX;
385 }
386
387 /* ----------------------------------------------------------------
388 * Parallel Append Support
389 * ----------------------------------------------------------------
390 */
391
392 /* ----------------------------------------------------------------
393 * ExecAppendEstimate
394 *
395 * Compute the amount of space we'll need in the parallel
396 * query DSM, and inform pcxt->estimator about our needs.
397 * ----------------------------------------------------------------
398 */
399 void
ExecAppendEstimate(AppendState * node,ParallelContext * pcxt)400 ExecAppendEstimate(AppendState *node,
401 ParallelContext *pcxt)
402 {
403 node->pstate_len =
404 add_size(offsetof(ParallelAppendState, pa_finished),
405 sizeof(bool) * node->as_nplans);
406
407 shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
408 shm_toc_estimate_keys(&pcxt->estimator, 1);
409 }
410
411
412 /* ----------------------------------------------------------------
413 * ExecAppendInitializeDSM
414 *
415 * Set up shared state for Parallel Append.
416 * ----------------------------------------------------------------
417 */
418 void
ExecAppendInitializeDSM(AppendState * node,ParallelContext * pcxt)419 ExecAppendInitializeDSM(AppendState *node,
420 ParallelContext *pcxt)
421 {
422 ParallelAppendState *pstate;
423
424 pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
425 memset(pstate, 0, node->pstate_len);
426 LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
427 shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
428
429 node->as_pstate = pstate;
430 node->choose_next_subplan = choose_next_subplan_for_leader;
431 }
432
433 /* ----------------------------------------------------------------
434 * ExecAppendReInitializeDSM
435 *
436 * Reset shared state before beginning a fresh scan.
437 * ----------------------------------------------------------------
438 */
439 void
ExecAppendReInitializeDSM(AppendState * node,ParallelContext * pcxt)440 ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
441 {
442 ParallelAppendState *pstate = node->as_pstate;
443
444 pstate->pa_next_plan = 0;
445 memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
446 }
447
448 /* ----------------------------------------------------------------
449 * ExecAppendInitializeWorker
450 *
451 * Copy relevant information from TOC into planstate, and initialize
452 * whatever is required to choose and execute the optimal subplan.
453 * ----------------------------------------------------------------
454 */
455 void
ExecAppendInitializeWorker(AppendState * node,ParallelWorkerContext * pwcxt)456 ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
457 {
458 node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
459 node->choose_next_subplan = choose_next_subplan_for_worker;
460 }
461
462 /* ----------------------------------------------------------------
463 * choose_next_subplan_locally
464 *
465 * Choose next subplan for a non-parallel-aware Append,
466 * returning false if there are no more.
467 * ----------------------------------------------------------------
468 */
469 static bool
choose_next_subplan_locally(AppendState * node)470 choose_next_subplan_locally(AppendState *node)
471 {
472 int whichplan = node->as_whichplan;
473 int nextplan;
474
475 /* We should never be called when there are no subplans */
476 Assert(whichplan != NO_MATCHING_SUBPLANS);
477
478 /*
479 * If first call then have the bms member function choose the first valid
480 * subplan by initializing whichplan to -1. If there happen to be no
481 * valid subplans then the bms member function will handle that by
482 * returning a negative number which will allow us to exit returning a
483 * false value.
484 */
485 if (whichplan == INVALID_SUBPLAN_INDEX)
486 {
487 if (node->as_valid_subplans == NULL)
488 node->as_valid_subplans =
489 ExecFindMatchingSubPlans(node->as_prune_state);
490
491 whichplan = -1;
492 }
493
494 /* Ensure whichplan is within the expected range */
495 Assert(whichplan >= -1 && whichplan <= node->as_nplans);
496
497 if (ScanDirectionIsForward(node->ps.state->es_direction))
498 nextplan = bms_next_member(node->as_valid_subplans, whichplan);
499 else
500 nextplan = bms_prev_member(node->as_valid_subplans, whichplan);
501
502 if (nextplan < 0)
503 return false;
504
505 node->as_whichplan = nextplan;
506
507 return true;
508 }
509
510 /* ----------------------------------------------------------------
511 * choose_next_subplan_for_leader
512 *
513 * Try to pick a plan which doesn't commit us to doing much
514 * work locally, so that as much work as possible is done in
515 * the workers. Cheapest subplans are at the end.
516 * ----------------------------------------------------------------
517 */
518 static bool
choose_next_subplan_for_leader(AppendState * node)519 choose_next_subplan_for_leader(AppendState *node)
520 {
521 ParallelAppendState *pstate = node->as_pstate;
522
523 /* Backward scan is not supported by parallel-aware plans */
524 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
525
526 /* We should never be called when there are no subplans */
527 Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
528
529 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
530
531 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
532 {
533 /* Mark just-completed subplan as finished. */
534 node->as_pstate->pa_finished[node->as_whichplan] = true;
535 }
536 else
537 {
538 /* Start with last subplan. */
539 node->as_whichplan = node->as_nplans - 1;
540
541 /*
542 * If we've yet to determine the valid subplans then do so now. If
543 * run-time pruning is disabled then the valid subplans will always be
544 * set to all subplans.
545 */
546 if (node->as_valid_subplans == NULL)
547 {
548 node->as_valid_subplans =
549 ExecFindMatchingSubPlans(node->as_prune_state);
550
551 /*
552 * Mark each invalid plan as finished to allow the loop below to
553 * select the first valid subplan.
554 */
555 mark_invalid_subplans_as_finished(node);
556 }
557 }
558
559 /* Loop until we find a subplan to execute. */
560 while (pstate->pa_finished[node->as_whichplan])
561 {
562 if (node->as_whichplan == 0)
563 {
564 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
565 node->as_whichplan = INVALID_SUBPLAN_INDEX;
566 LWLockRelease(&pstate->pa_lock);
567 return false;
568 }
569
570 /*
571 * We needn't pay attention to as_valid_subplans here as all invalid
572 * plans have been marked as finished.
573 */
574 node->as_whichplan--;
575 }
576
577 /* If non-partial, immediately mark as finished. */
578 if (node->as_whichplan < node->as_first_partial_plan)
579 node->as_pstate->pa_finished[node->as_whichplan] = true;
580
581 LWLockRelease(&pstate->pa_lock);
582
583 return true;
584 }
585
586 /* ----------------------------------------------------------------
587 * choose_next_subplan_for_worker
588 *
589 * Choose next subplan for a parallel-aware Append, returning
590 * false if there are no more.
591 *
592 * We start from the first plan and advance through the list;
593 * when we get back to the end, we loop back to the first
594 * partial plan. This assigns the non-partial plans first in
595 * order of descending cost and then spreads out the workers
596 * as evenly as possible across the remaining partial plans.
597 * ----------------------------------------------------------------
598 */
599 static bool
choose_next_subplan_for_worker(AppendState * node)600 choose_next_subplan_for_worker(AppendState *node)
601 {
602 ParallelAppendState *pstate = node->as_pstate;
603
604 /* Backward scan is not supported by parallel-aware plans */
605 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
606
607 /* We should never be called when there are no subplans */
608 Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
609
610 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
611
612 /* Mark just-completed subplan as finished. */
613 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
614 node->as_pstate->pa_finished[node->as_whichplan] = true;
615
616 /*
617 * If we've yet to determine the valid subplans then do so now. If
618 * run-time pruning is disabled then the valid subplans will always be set
619 * to all subplans.
620 */
621 else if (node->as_valid_subplans == NULL)
622 {
623 node->as_valid_subplans =
624 ExecFindMatchingSubPlans(node->as_prune_state);
625 mark_invalid_subplans_as_finished(node);
626 }
627
628 /* If all the plans are already done, we have nothing to do */
629 if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
630 {
631 LWLockRelease(&pstate->pa_lock);
632 return false;
633 }
634
635 /* Save the plan from which we are starting the search. */
636 node->as_whichplan = pstate->pa_next_plan;
637
638 /* Loop until we find a valid subplan to execute. */
639 while (pstate->pa_finished[pstate->pa_next_plan])
640 {
641 int nextplan;
642
643 nextplan = bms_next_member(node->as_valid_subplans,
644 pstate->pa_next_plan);
645 if (nextplan >= 0)
646 {
647 /* Advance to the next valid plan. */
648 pstate->pa_next_plan = nextplan;
649 }
650 else if (node->as_whichplan > node->as_first_partial_plan)
651 {
652 /*
653 * Try looping back to the first valid partial plan, if there is
654 * one. If there isn't, arrange to bail out below.
655 */
656 nextplan = bms_next_member(node->as_valid_subplans,
657 node->as_first_partial_plan - 1);
658 pstate->pa_next_plan =
659 nextplan < 0 ? node->as_whichplan : nextplan;
660 }
661 else
662 {
663 /*
664 * At last plan, and either there are no partial plans or we've
665 * tried them all. Arrange to bail out.
666 */
667 pstate->pa_next_plan = node->as_whichplan;
668 }
669
670 if (pstate->pa_next_plan == node->as_whichplan)
671 {
672 /* We've tried everything! */
673 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
674 LWLockRelease(&pstate->pa_lock);
675 return false;
676 }
677 }
678
679 /* Pick the plan we found, and advance pa_next_plan one more time. */
680 node->as_whichplan = pstate->pa_next_plan;
681 pstate->pa_next_plan = bms_next_member(node->as_valid_subplans,
682 pstate->pa_next_plan);
683
684 /*
685 * If there are no more valid plans then try setting the next plan to the
686 * first valid partial plan.
687 */
688 if (pstate->pa_next_plan < 0)
689 {
690 int nextplan = bms_next_member(node->as_valid_subplans,
691 node->as_first_partial_plan - 1);
692
693 if (nextplan >= 0)
694 pstate->pa_next_plan = nextplan;
695 else
696 {
697 /*
698 * There are no valid partial plans, and we already chose the last
699 * non-partial plan; so flag that there's nothing more for our
700 * fellow workers to do.
701 */
702 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
703 }
704 }
705
706 /* If non-partial, immediately mark as finished. */
707 if (node->as_whichplan < node->as_first_partial_plan)
708 node->as_pstate->pa_finished[node->as_whichplan] = true;
709
710 LWLockRelease(&pstate->pa_lock);
711
712 return true;
713 }
714
715 /*
716 * mark_invalid_subplans_as_finished
717 * Marks the ParallelAppendState's pa_finished as true for each invalid
718 * subplan.
719 *
720 * This function should only be called for parallel Append with run-time
721 * pruning enabled.
722 */
723 static void
mark_invalid_subplans_as_finished(AppendState * node)724 mark_invalid_subplans_as_finished(AppendState *node)
725 {
726 int i;
727
728 /* Only valid to call this while in parallel Append mode */
729 Assert(node->as_pstate);
730
731 /* Shouldn't have been called when run-time pruning is not enabled */
732 Assert(node->as_prune_state);
733
734 /* Nothing to do if all plans are valid */
735 if (bms_num_members(node->as_valid_subplans) == node->as_nplans)
736 return;
737
738 /* Mark all non-valid plans as finished */
739 for (i = 0; i < node->as_nplans; i++)
740 {
741 if (!bms_is_member(i, node->as_valid_subplans))
742 node->as_pstate->pa_finished[i] = true;
743 }
744 }
745