1 /*-------------------------------------------------------------------------
2  *
3  * prepjointree.c
4  *	  Planner preprocessing for subqueries and join tree manipulation.
5  *
6  * NOTE: the intended sequence for invoking these operations is
7  *		replace_empty_jointree
8  *		pull_up_sublinks
9  *		inline_set_returning_functions
10  *		pull_up_subqueries
11  *		flatten_simple_union_all
12  *		do expression preprocessing (including flattening JOIN alias vars)
13  *		reduce_outer_joins
14  *		remove_useless_result_rtes
15  *
16  *
17  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
18  * Portions Copyright (c) 1994, Regents of the University of California
19  *
20  *
21  * IDENTIFICATION
22  *	  src/backend/optimizer/prep/prepjointree.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27 
28 #include "catalog/pg_type.h"
29 #include "nodes/makefuncs.h"
30 #include "nodes/nodeFuncs.h"
31 #include "optimizer/clauses.h"
32 #include "optimizer/optimizer.h"
33 #include "optimizer/placeholder.h"
34 #include "optimizer/prep.h"
35 #include "optimizer/subselect.h"
36 #include "optimizer/tlist.h"
37 #include "parser/parse_relation.h"
38 #include "parser/parsetree.h"
39 #include "rewrite/rewriteManip.h"
40 
41 
42 /* source-code-compatibility hacks for pull_varnos() API change */
43 #define pull_varnos(a,b) pull_varnos_new(a,b)
44 #define pull_varnos_of_level(a,b,c) pull_varnos_of_level_new(a,b,c)
45 
46 typedef struct pullup_replace_vars_context
47 {
48 	PlannerInfo *root;
49 	List	   *targetlist;		/* tlist of subquery being pulled up */
50 	RangeTblEntry *target_rte;	/* RTE of subquery */
51 	Relids		relids;			/* relids within subquery, as numbered after
52 								 * pullup (set only if target_rte->lateral) */
53 	bool	   *outer_hasSubLinks;	/* -> outer query's hasSubLinks */
54 	int			varno;			/* varno of subquery */
55 	bool		need_phvs;		/* do we need PlaceHolderVars? */
56 	bool		wrap_non_vars;	/* do we need 'em on *all* non-Vars? */
57 	Node	  **rv_cache;		/* cache for results with PHVs */
58 } pullup_replace_vars_context;
59 
60 typedef struct reduce_outer_joins_state
61 {
62 	Relids		relids;			/* base relids within this subtree */
63 	bool		contains_outer; /* does subtree contain outer join(s)? */
64 	List	   *sub_states;		/* List of states for subtree components */
65 } reduce_outer_joins_state;
66 
67 static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
68 											   Relids *relids);
69 static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
70 										   Node **jtlink1, Relids available_rels1,
71 										   Node **jtlink2, Relids available_rels2);
72 static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
73 										JoinExpr *lowest_outer_join,
74 										JoinExpr *lowest_nulling_outer_join,
75 										AppendRelInfo *containing_appendrel);
76 static Node *pull_up_simple_subquery(PlannerInfo *root, Node *jtnode,
77 									 RangeTblEntry *rte,
78 									 JoinExpr *lowest_outer_join,
79 									 JoinExpr *lowest_nulling_outer_join,
80 									 AppendRelInfo *containing_appendrel);
81 static Node *pull_up_simple_union_all(PlannerInfo *root, Node *jtnode,
82 									  RangeTblEntry *rte);
83 static void pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root,
84 									   int parentRTindex, Query *setOpQuery,
85 									   int childRToffset);
86 static void make_setop_translation_list(Query *query, Index newvarno,
87 										List **translated_vars);
88 static bool is_simple_subquery(PlannerInfo *root, Query *subquery,
89 							   RangeTblEntry *rte,
90 							   JoinExpr *lowest_outer_join);
91 static Node *pull_up_simple_values(PlannerInfo *root, Node *jtnode,
92 								   RangeTblEntry *rte);
93 static bool is_simple_values(PlannerInfo *root, RangeTblEntry *rte);
94 static bool is_simple_union_all(Query *subquery);
95 static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery,
96 										List *colTypes);
97 static bool is_safe_append_member(Query *subquery);
98 static bool jointree_contains_lateral_outer_refs(PlannerInfo *root,
99 												 Node *jtnode, bool restricted,
100 												 Relids safe_upper_varnos);
101 static void replace_vars_in_jointree(Node *jtnode,
102 									 pullup_replace_vars_context *context,
103 									 JoinExpr *lowest_nulling_outer_join);
104 static Node *pullup_replace_vars(Node *expr,
105 								 pullup_replace_vars_context *context);
106 static Node *pullup_replace_vars_callback(Var *var,
107 										  replace_rte_variables_context *context);
108 static Query *pullup_replace_vars_subquery(Query *query,
109 										   pullup_replace_vars_context *context);
110 static reduce_outer_joins_state *reduce_outer_joins_pass1(Node *jtnode);
111 static void reduce_outer_joins_pass2(Node *jtnode,
112 									 reduce_outer_joins_state *state,
113 									 PlannerInfo *root,
114 									 Relids nonnullable_rels,
115 									 List *nonnullable_vars,
116 									 List *forced_null_vars);
117 static Node *remove_useless_results_recurse(PlannerInfo *root, Node *jtnode);
118 static int	get_result_relid(PlannerInfo *root, Node *jtnode);
119 static void remove_result_refs(PlannerInfo *root, int varno, Node *newjtloc);
120 static bool find_dependent_phvs(PlannerInfo *root, int varno);
121 static bool find_dependent_phvs_in_jointree(PlannerInfo *root,
122 											Node *node, int varno);
123 static void substitute_phv_relids(Node *node,
124 								  int varno, Relids subrelids);
125 static void fix_append_rel_relids(List *append_rel_list, int varno,
126 								  Relids subrelids);
127 static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
128 
129 
130 /*
131  * replace_empty_jointree
132  *		If the Query's jointree is empty, replace it with a dummy RTE_RESULT
133  *		relation.
134  *
135  * By doing this, we can avoid a bunch of corner cases that formerly existed
136  * for SELECTs with omitted FROM clauses.  An example is that a subquery
137  * with empty jointree previously could not be pulled up, because that would
138  * have resulted in an empty relid set, making the subquery not uniquely
139  * identifiable for join or PlaceHolderVar processing.
140  *
141  * Unlike most other functions in this file, this function doesn't recurse;
142  * we rely on other processing to invoke it on sub-queries at suitable times.
143  */
144 void
replace_empty_jointree(Query * parse)145 replace_empty_jointree(Query *parse)
146 {
147 	RangeTblEntry *rte;
148 	Index		rti;
149 	RangeTblRef *rtr;
150 
151 	/* Nothing to do if jointree is already nonempty */
152 	if (parse->jointree->fromlist != NIL)
153 		return;
154 
155 	/* We mustn't change it in the top level of a setop tree, either */
156 	if (parse->setOperations)
157 		return;
158 
159 	/* Create suitable RTE */
160 	rte = makeNode(RangeTblEntry);
161 	rte->rtekind = RTE_RESULT;
162 	rte->eref = makeAlias("*RESULT*", NIL);
163 
164 	/* Add it to rangetable */
165 	parse->rtable = lappend(parse->rtable, rte);
166 	rti = list_length(parse->rtable);
167 
168 	/* And jam a reference into the jointree */
169 	rtr = makeNode(RangeTblRef);
170 	rtr->rtindex = rti;
171 	parse->jointree->fromlist = list_make1(rtr);
172 }
173 
174 /*
175  * pull_up_sublinks
176  *		Attempt to pull up ANY and EXISTS SubLinks to be treated as
177  *		semijoins or anti-semijoins.
178  *
179  * A clause "foo op ANY (sub-SELECT)" can be processed by pulling the
180  * sub-SELECT up to become a rangetable entry and treating the implied
181  * comparisons as quals of a semijoin.  However, this optimization *only*
182  * works at the top level of WHERE or a JOIN/ON clause, because we cannot
183  * distinguish whether the ANY ought to return FALSE or NULL in cases
184  * involving NULL inputs.  Also, in an outer join's ON clause we can only
185  * do this if the sublink is degenerate (ie, references only the nullable
186  * side of the join).  In that case it is legal to push the semijoin
187  * down into the nullable side of the join.  If the sublink references any
188  * nonnullable-side variables then it would have to be evaluated as part
189  * of the outer join, which makes things way too complicated.
190  *
191  * Under similar conditions, EXISTS and NOT EXISTS clauses can be handled
192  * by pulling up the sub-SELECT and creating a semijoin or anti-semijoin.
193  *
194  * This routine searches for such clauses and does the necessary parsetree
195  * transformations if any are found.
196  *
197  * This routine has to run before preprocess_expression(), so the quals
198  * clauses are not yet reduced to implicit-AND format, and are not guaranteed
199  * to be AND/OR-flat either.  That means we need to recursively search through
200  * explicit AND clauses.  We stop as soon as we hit a non-AND item.
201  */
202 void
pull_up_sublinks(PlannerInfo * root)203 pull_up_sublinks(PlannerInfo *root)
204 {
205 	Node	   *jtnode;
206 	Relids		relids;
207 
208 	/* Begin recursion through the jointree */
209 	jtnode = pull_up_sublinks_jointree_recurse(root,
210 											   (Node *) root->parse->jointree,
211 											   &relids);
212 
213 	/*
214 	 * root->parse->jointree must always be a FromExpr, so insert a dummy one
215 	 * if we got a bare RangeTblRef or JoinExpr out of the recursion.
216 	 */
217 	if (IsA(jtnode, FromExpr))
218 		root->parse->jointree = (FromExpr *) jtnode;
219 	else
220 		root->parse->jointree = makeFromExpr(list_make1(jtnode), NULL);
221 }
222 
223 /*
224  * Recurse through jointree nodes for pull_up_sublinks()
225  *
226  * In addition to returning the possibly-modified jointree node, we return
227  * a relids set of the contained rels into *relids.
228  */
229 static Node *
pull_up_sublinks_jointree_recurse(PlannerInfo * root,Node * jtnode,Relids * relids)230 pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
231 								  Relids *relids)
232 {
233 	if (jtnode == NULL)
234 	{
235 		*relids = NULL;
236 	}
237 	else if (IsA(jtnode, RangeTblRef))
238 	{
239 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
240 
241 		*relids = bms_make_singleton(varno);
242 		/* jtnode is returned unmodified */
243 	}
244 	else if (IsA(jtnode, FromExpr))
245 	{
246 		FromExpr   *f = (FromExpr *) jtnode;
247 		List	   *newfromlist = NIL;
248 		Relids		frelids = NULL;
249 		FromExpr   *newf;
250 		Node	   *jtlink;
251 		ListCell   *l;
252 
253 		/* First, recurse to process children and collect their relids */
254 		foreach(l, f->fromlist)
255 		{
256 			Node	   *newchild;
257 			Relids		childrelids;
258 
259 			newchild = pull_up_sublinks_jointree_recurse(root,
260 														 lfirst(l),
261 														 &childrelids);
262 			newfromlist = lappend(newfromlist, newchild);
263 			frelids = bms_join(frelids, childrelids);
264 		}
265 		/* Build the replacement FromExpr; no quals yet */
266 		newf = makeFromExpr(newfromlist, NULL);
267 		/* Set up a link representing the rebuilt jointree */
268 		jtlink = (Node *) newf;
269 		/* Now process qual --- all children are available for use */
270 		newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
271 													&jtlink, frelids,
272 													NULL, NULL);
273 
274 		/*
275 		 * Note that the result will be either newf, or a stack of JoinExprs
276 		 * with newf at the base.  We rely on subsequent optimization steps to
277 		 * flatten this and rearrange the joins as needed.
278 		 *
279 		 * Although we could include the pulled-up subqueries in the returned
280 		 * relids, there's no need since upper quals couldn't refer to their
281 		 * outputs anyway.
282 		 */
283 		*relids = frelids;
284 		jtnode = jtlink;
285 	}
286 	else if (IsA(jtnode, JoinExpr))
287 	{
288 		JoinExpr   *j;
289 		Relids		leftrelids;
290 		Relids		rightrelids;
291 		Node	   *jtlink;
292 
293 		/*
294 		 * Make a modifiable copy of join node, but don't bother copying its
295 		 * subnodes (yet).
296 		 */
297 		j = (JoinExpr *) palloc(sizeof(JoinExpr));
298 		memcpy(j, jtnode, sizeof(JoinExpr));
299 		jtlink = (Node *) j;
300 
301 		/* Recurse to process children and collect their relids */
302 		j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
303 													&leftrelids);
304 		j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
305 													&rightrelids);
306 
307 		/*
308 		 * Now process qual, showing appropriate child relids as available,
309 		 * and attach any pulled-up jointree items at the right place. In the
310 		 * inner-join case we put new JoinExprs above the existing one (much
311 		 * as for a FromExpr-style join).  In outer-join cases the new
312 		 * JoinExprs must go into the nullable side of the outer join. The
313 		 * point of the available_rels machinations is to ensure that we only
314 		 * pull up quals for which that's okay.
315 		 *
316 		 * We don't expect to see any pre-existing JOIN_SEMI or JOIN_ANTI
317 		 * nodes here.
318 		 */
319 		switch (j->jointype)
320 		{
321 			case JOIN_INNER:
322 				j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
323 														 &jtlink,
324 														 bms_union(leftrelids,
325 																   rightrelids),
326 														 NULL, NULL);
327 				break;
328 			case JOIN_LEFT:
329 				j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
330 														 &j->rarg,
331 														 rightrelids,
332 														 NULL, NULL);
333 				break;
334 			case JOIN_FULL:
335 				/* can't do anything with full-join quals */
336 				break;
337 			case JOIN_RIGHT:
338 				j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
339 														 &j->larg,
340 														 leftrelids,
341 														 NULL, NULL);
342 				break;
343 			default:
344 				elog(ERROR, "unrecognized join type: %d",
345 					 (int) j->jointype);
346 				break;
347 		}
348 
349 		/*
350 		 * Although we could include the pulled-up subqueries in the returned
351 		 * relids, there's no need since upper quals couldn't refer to their
352 		 * outputs anyway.  But we *do* need to include the join's own rtindex
353 		 * because we haven't yet collapsed join alias variables, so upper
354 		 * levels would mistakenly think they couldn't use references to this
355 		 * join.
356 		 */
357 		*relids = bms_join(leftrelids, rightrelids);
358 		if (j->rtindex)
359 			*relids = bms_add_member(*relids, j->rtindex);
360 		jtnode = jtlink;
361 	}
362 	else
363 		elog(ERROR, "unrecognized node type: %d",
364 			 (int) nodeTag(jtnode));
365 	return jtnode;
366 }
367 
368 /*
369  * Recurse through top-level qual nodes for pull_up_sublinks()
370  *
371  * jtlink1 points to the link in the jointree where any new JoinExprs should
372  * be inserted if they reference available_rels1 (i.e., available_rels1
373  * denotes the relations present underneath jtlink1).  Optionally, jtlink2 can
374  * point to a second link where new JoinExprs should be inserted if they
375  * reference available_rels2 (pass NULL for both those arguments if not used).
376  * Note that SubLinks referencing both sets of variables cannot be optimized.
377  * If we find multiple pull-up-able SubLinks, they'll get stacked onto jtlink1
378  * and/or jtlink2 in the order we encounter them.  We rely on subsequent
379  * optimization to rearrange the stack if appropriate.
380  *
381  * Returns the replacement qual node, or NULL if the qual should be removed.
382  */
383 static Node *
pull_up_sublinks_qual_recurse(PlannerInfo * root,Node * node,Node ** jtlink1,Relids available_rels1,Node ** jtlink2,Relids available_rels2)384 pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
385 							  Node **jtlink1, Relids available_rels1,
386 							  Node **jtlink2, Relids available_rels2)
387 {
388 	if (node == NULL)
389 		return NULL;
390 	if (IsA(node, SubLink))
391 	{
392 		SubLink    *sublink = (SubLink *) node;
393 		JoinExpr   *j;
394 		Relids		child_rels;
395 
396 		/* Is it a convertible ANY or EXISTS clause? */
397 		if (sublink->subLinkType == ANY_SUBLINK)
398 		{
399 			if ((j = convert_ANY_sublink_to_join(root, sublink,
400 												 available_rels1)) != NULL)
401 			{
402 				/* Yes; insert the new join node into the join tree */
403 				j->larg = *jtlink1;
404 				*jtlink1 = (Node *) j;
405 				/* Recursively process pulled-up jointree nodes */
406 				j->rarg = pull_up_sublinks_jointree_recurse(root,
407 															j->rarg,
408 															&child_rels);
409 
410 				/*
411 				 * Now recursively process the pulled-up quals.  Any inserted
412 				 * joins can get stacked onto either j->larg or j->rarg,
413 				 * depending on which rels they reference.
414 				 */
415 				j->quals = pull_up_sublinks_qual_recurse(root,
416 														 j->quals,
417 														 &j->larg,
418 														 available_rels1,
419 														 &j->rarg,
420 														 child_rels);
421 				/* Return NULL representing constant TRUE */
422 				return NULL;
423 			}
424 			if (available_rels2 != NULL &&
425 				(j = convert_ANY_sublink_to_join(root, sublink,
426 												 available_rels2)) != NULL)
427 			{
428 				/* Yes; insert the new join node into the join tree */
429 				j->larg = *jtlink2;
430 				*jtlink2 = (Node *) j;
431 				/* Recursively process pulled-up jointree nodes */
432 				j->rarg = pull_up_sublinks_jointree_recurse(root,
433 															j->rarg,
434 															&child_rels);
435 
436 				/*
437 				 * Now recursively process the pulled-up quals.  Any inserted
438 				 * joins can get stacked onto either j->larg or j->rarg,
439 				 * depending on which rels they reference.
440 				 */
441 				j->quals = pull_up_sublinks_qual_recurse(root,
442 														 j->quals,
443 														 &j->larg,
444 														 available_rels2,
445 														 &j->rarg,
446 														 child_rels);
447 				/* Return NULL representing constant TRUE */
448 				return NULL;
449 			}
450 		}
451 		else if (sublink->subLinkType == EXISTS_SUBLINK)
452 		{
453 			if ((j = convert_EXISTS_sublink_to_join(root, sublink, false,
454 													available_rels1)) != NULL)
455 			{
456 				/* Yes; insert the new join node into the join tree */
457 				j->larg = *jtlink1;
458 				*jtlink1 = (Node *) j;
459 				/* Recursively process pulled-up jointree nodes */
460 				j->rarg = pull_up_sublinks_jointree_recurse(root,
461 															j->rarg,
462 															&child_rels);
463 
464 				/*
465 				 * Now recursively process the pulled-up quals.  Any inserted
466 				 * joins can get stacked onto either j->larg or j->rarg,
467 				 * depending on which rels they reference.
468 				 */
469 				j->quals = pull_up_sublinks_qual_recurse(root,
470 														 j->quals,
471 														 &j->larg,
472 														 available_rels1,
473 														 &j->rarg,
474 														 child_rels);
475 				/* Return NULL representing constant TRUE */
476 				return NULL;
477 			}
478 			if (available_rels2 != NULL &&
479 				(j = convert_EXISTS_sublink_to_join(root, sublink, false,
480 													available_rels2)) != NULL)
481 			{
482 				/* Yes; insert the new join node into the join tree */
483 				j->larg = *jtlink2;
484 				*jtlink2 = (Node *) j;
485 				/* Recursively process pulled-up jointree nodes */
486 				j->rarg = pull_up_sublinks_jointree_recurse(root,
487 															j->rarg,
488 															&child_rels);
489 
490 				/*
491 				 * Now recursively process the pulled-up quals.  Any inserted
492 				 * joins can get stacked onto either j->larg or j->rarg,
493 				 * depending on which rels they reference.
494 				 */
495 				j->quals = pull_up_sublinks_qual_recurse(root,
496 														 j->quals,
497 														 &j->larg,
498 														 available_rels2,
499 														 &j->rarg,
500 														 child_rels);
501 				/* Return NULL representing constant TRUE */
502 				return NULL;
503 			}
504 		}
505 		/* Else return it unmodified */
506 		return node;
507 	}
508 	if (is_notclause(node))
509 	{
510 		/* If the immediate argument of NOT is EXISTS, try to convert */
511 		SubLink    *sublink = (SubLink *) get_notclausearg((Expr *) node);
512 		JoinExpr   *j;
513 		Relids		child_rels;
514 
515 		if (sublink && IsA(sublink, SubLink))
516 		{
517 			if (sublink->subLinkType == EXISTS_SUBLINK)
518 			{
519 				if ((j = convert_EXISTS_sublink_to_join(root, sublink, true,
520 														available_rels1)) != NULL)
521 				{
522 					/* Yes; insert the new join node into the join tree */
523 					j->larg = *jtlink1;
524 					*jtlink1 = (Node *) j;
525 					/* Recursively process pulled-up jointree nodes */
526 					j->rarg = pull_up_sublinks_jointree_recurse(root,
527 																j->rarg,
528 																&child_rels);
529 
530 					/*
531 					 * Now recursively process the pulled-up quals.  Because
532 					 * we are underneath a NOT, we can't pull up sublinks that
533 					 * reference the left-hand stuff, but it's still okay to
534 					 * pull up sublinks referencing j->rarg.
535 					 */
536 					j->quals = pull_up_sublinks_qual_recurse(root,
537 															 j->quals,
538 															 &j->rarg,
539 															 child_rels,
540 															 NULL, NULL);
541 					/* Return NULL representing constant TRUE */
542 					return NULL;
543 				}
544 				if (available_rels2 != NULL &&
545 					(j = convert_EXISTS_sublink_to_join(root, sublink, true,
546 														available_rels2)) != NULL)
547 				{
548 					/* Yes; insert the new join node into the join tree */
549 					j->larg = *jtlink2;
550 					*jtlink2 = (Node *) j;
551 					/* Recursively process pulled-up jointree nodes */
552 					j->rarg = pull_up_sublinks_jointree_recurse(root,
553 																j->rarg,
554 																&child_rels);
555 
556 					/*
557 					 * Now recursively process the pulled-up quals.  Because
558 					 * we are underneath a NOT, we can't pull up sublinks that
559 					 * reference the left-hand stuff, but it's still okay to
560 					 * pull up sublinks referencing j->rarg.
561 					 */
562 					j->quals = pull_up_sublinks_qual_recurse(root,
563 															 j->quals,
564 															 &j->rarg,
565 															 child_rels,
566 															 NULL, NULL);
567 					/* Return NULL representing constant TRUE */
568 					return NULL;
569 				}
570 			}
571 		}
572 		/* Else return it unmodified */
573 		return node;
574 	}
575 	if (is_andclause(node))
576 	{
577 		/* Recurse into AND clause */
578 		List	   *newclauses = NIL;
579 		ListCell   *l;
580 
581 		foreach(l, ((BoolExpr *) node)->args)
582 		{
583 			Node	   *oldclause = (Node *) lfirst(l);
584 			Node	   *newclause;
585 
586 			newclause = pull_up_sublinks_qual_recurse(root,
587 													  oldclause,
588 													  jtlink1,
589 													  available_rels1,
590 													  jtlink2,
591 													  available_rels2);
592 			if (newclause)
593 				newclauses = lappend(newclauses, newclause);
594 		}
595 		/* We might have got back fewer clauses than we started with */
596 		if (newclauses == NIL)
597 			return NULL;
598 		else if (list_length(newclauses) == 1)
599 			return (Node *) linitial(newclauses);
600 		else
601 			return (Node *) make_andclause(newclauses);
602 	}
603 	/* Stop if not an AND */
604 	return node;
605 }
606 
607 /*
608  * inline_set_returning_functions
609  *		Attempt to "inline" set-returning functions in the FROM clause.
610  *
611  * If an RTE_FUNCTION rtable entry invokes a set-returning function that
612  * contains just a simple SELECT, we can convert the rtable entry to an
613  * RTE_SUBQUERY entry exposing the SELECT directly.  This is especially
614  * useful if the subquery can then be "pulled up" for further optimization,
615  * but we do it even if not, to reduce executor overhead.
616  *
617  * This has to be done before we have started to do any optimization of
618  * subqueries, else any such steps wouldn't get applied to subqueries
619  * obtained via inlining.  However, we do it after pull_up_sublinks
620  * so that we can inline any functions used in SubLink subselects.
621  *
622  * Like most of the planner, this feels free to scribble on its input data
623  * structure.
624  */
625 void
inline_set_returning_functions(PlannerInfo * root)626 inline_set_returning_functions(PlannerInfo *root)
627 {
628 	ListCell   *rt;
629 
630 	foreach(rt, root->parse->rtable)
631 	{
632 		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
633 
634 		if (rte->rtekind == RTE_FUNCTION)
635 		{
636 			Query	   *funcquery;
637 
638 			/* Check safety of expansion, and expand if possible */
639 			funcquery = inline_set_returning_function(root, rte);
640 			if (funcquery)
641 			{
642 				/* Successful expansion, convert the RTE to a subquery */
643 				rte->rtekind = RTE_SUBQUERY;
644 				rte->subquery = funcquery;
645 				rte->security_barrier = false;
646 				/* Clear fields that should not be set in a subquery RTE */
647 				rte->functions = NIL;
648 				rte->funcordinality = false;
649 			}
650 		}
651 	}
652 }
653 
654 /*
655  * pull_up_subqueries
656  *		Look for subqueries in the rangetable that can be pulled up into
657  *		the parent query.  If the subquery has no special features like
658  *		grouping/aggregation then we can merge it into the parent's jointree.
659  *		Also, subqueries that are simple UNION ALL structures can be
660  *		converted into "append relations".
661  */
662 void
pull_up_subqueries(PlannerInfo * root)663 pull_up_subqueries(PlannerInfo *root)
664 {
665 	/* Top level of jointree must always be a FromExpr */
666 	Assert(IsA(root->parse->jointree, FromExpr));
667 	/* Recursion starts with no containing join nor appendrel */
668 	root->parse->jointree = (FromExpr *)
669 		pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
670 								   NULL, NULL, NULL);
671 	/* We should still have a FromExpr */
672 	Assert(IsA(root->parse->jointree, FromExpr));
673 }
674 
675 /*
676  * pull_up_subqueries_recurse
677  *		Recursive guts of pull_up_subqueries.
678  *
679  * This recursively processes the jointree and returns a modified jointree.
680  *
681  * If this jointree node is within either side of an outer join, then
682  * lowest_outer_join references the lowest such JoinExpr node; otherwise
683  * it is NULL.  We use this to constrain the effects of LATERAL subqueries.
684  *
685  * If this jointree node is within the nullable side of an outer join, then
686  * lowest_nulling_outer_join references the lowest such JoinExpr node;
687  * otherwise it is NULL.  This forces use of the PlaceHolderVar mechanism for
688  * references to non-nullable targetlist items, but only for references above
689  * that join.
690  *
691  * If we are looking at a member subquery of an append relation,
692  * containing_appendrel describes that relation; else it is NULL.
693  * This forces use of the PlaceHolderVar mechanism for all non-Var targetlist
694  * items, and puts some additional restrictions on what can be pulled up.
695  *
696  * A tricky aspect of this code is that if we pull up a subquery we have
697  * to replace Vars that reference the subquery's outputs throughout the
698  * parent query, including quals attached to jointree nodes above the one
699  * we are currently processing!  We handle this by being careful to maintain
700  * validity of the jointree structure while recursing, in the following sense:
701  * whenever we recurse, all qual expressions in the tree must be reachable
702  * from the top level, in case the recursive call needs to modify them.
703  *
704  * Notice also that we can't turn pullup_replace_vars loose on the whole
705  * jointree, because it'd return a mutated copy of the tree; we have to
706  * invoke it just on the quals, instead.  This behavior is what makes it
707  * reasonable to pass lowest_outer_join and lowest_nulling_outer_join as
708  * pointers rather than some more-indirect way of identifying the lowest
709  * OJs.  Likewise, we don't replace append_rel_list members but only their
710  * substructure, so the containing_appendrel reference is safe to use.
711  */
712 static Node *
pull_up_subqueries_recurse(PlannerInfo * root,Node * jtnode,JoinExpr * lowest_outer_join,JoinExpr * lowest_nulling_outer_join,AppendRelInfo * containing_appendrel)713 pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
714 						   JoinExpr *lowest_outer_join,
715 						   JoinExpr *lowest_nulling_outer_join,
716 						   AppendRelInfo *containing_appendrel)
717 {
718 	Assert(jtnode != NULL);
719 	if (IsA(jtnode, RangeTblRef))
720 	{
721 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
722 		RangeTblEntry *rte = rt_fetch(varno, root->parse->rtable);
723 
724 		/*
725 		 * Is this a subquery RTE, and if so, is the subquery simple enough to
726 		 * pull up?
727 		 *
728 		 * If we are looking at an append-relation member, we can't pull it up
729 		 * unless is_safe_append_member says so.
730 		 */
731 		if (rte->rtekind == RTE_SUBQUERY &&
732 			is_simple_subquery(root, rte->subquery, rte, lowest_outer_join) &&
733 			(containing_appendrel == NULL ||
734 			 is_safe_append_member(rte->subquery)))
735 			return pull_up_simple_subquery(root, jtnode, rte,
736 										   lowest_outer_join,
737 										   lowest_nulling_outer_join,
738 										   containing_appendrel);
739 
740 		/*
741 		 * Alternatively, is it a simple UNION ALL subquery?  If so, flatten
742 		 * into an "append relation".
743 		 *
744 		 * It's safe to do this regardless of whether this query is itself an
745 		 * appendrel member.  (If you're thinking we should try to flatten the
746 		 * two levels of appendrel together, you're right; but we handle that
747 		 * in set_append_rel_pathlist, not here.)
748 		 */
749 		if (rte->rtekind == RTE_SUBQUERY &&
750 			is_simple_union_all(rte->subquery))
751 			return pull_up_simple_union_all(root, jtnode, rte);
752 
753 		/*
754 		 * Or perhaps it's a simple VALUES RTE?
755 		 *
756 		 * We don't allow VALUES pullup below an outer join nor into an
757 		 * appendrel (such cases are impossible anyway at the moment).
758 		 */
759 		if (rte->rtekind == RTE_VALUES &&
760 			lowest_outer_join == NULL &&
761 			containing_appendrel == NULL &&
762 			is_simple_values(root, rte))
763 			return pull_up_simple_values(root, jtnode, rte);
764 
765 		/* Otherwise, do nothing at this node. */
766 	}
767 	else if (IsA(jtnode, FromExpr))
768 	{
769 		FromExpr   *f = (FromExpr *) jtnode;
770 		ListCell   *l;
771 
772 		Assert(containing_appendrel == NULL);
773 		/* Recursively transform all the child nodes */
774 		foreach(l, f->fromlist)
775 		{
776 			lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
777 												   lowest_outer_join,
778 												   lowest_nulling_outer_join,
779 												   NULL);
780 		}
781 	}
782 	else if (IsA(jtnode, JoinExpr))
783 	{
784 		JoinExpr   *j = (JoinExpr *) jtnode;
785 
786 		Assert(containing_appendrel == NULL);
787 		/* Recurse, being careful to tell myself when inside outer join */
788 		switch (j->jointype)
789 		{
790 			case JOIN_INNER:
791 				j->larg = pull_up_subqueries_recurse(root, j->larg,
792 													 lowest_outer_join,
793 													 lowest_nulling_outer_join,
794 													 NULL);
795 				j->rarg = pull_up_subqueries_recurse(root, j->rarg,
796 													 lowest_outer_join,
797 													 lowest_nulling_outer_join,
798 													 NULL);
799 				break;
800 			case JOIN_LEFT:
801 			case JOIN_SEMI:
802 			case JOIN_ANTI:
803 				j->larg = pull_up_subqueries_recurse(root, j->larg,
804 													 j,
805 													 lowest_nulling_outer_join,
806 													 NULL);
807 				j->rarg = pull_up_subqueries_recurse(root, j->rarg,
808 													 j,
809 													 j,
810 													 NULL);
811 				break;
812 			case JOIN_FULL:
813 				j->larg = pull_up_subqueries_recurse(root, j->larg,
814 													 j,
815 													 j,
816 													 NULL);
817 				j->rarg = pull_up_subqueries_recurse(root, j->rarg,
818 													 j,
819 													 j,
820 													 NULL);
821 				break;
822 			case JOIN_RIGHT:
823 				j->larg = pull_up_subqueries_recurse(root, j->larg,
824 													 j,
825 													 j,
826 													 NULL);
827 				j->rarg = pull_up_subqueries_recurse(root, j->rarg,
828 													 j,
829 													 lowest_nulling_outer_join,
830 													 NULL);
831 				break;
832 			default:
833 				elog(ERROR, "unrecognized join type: %d",
834 					 (int) j->jointype);
835 				break;
836 		}
837 	}
838 	else
839 		elog(ERROR, "unrecognized node type: %d",
840 			 (int) nodeTag(jtnode));
841 	return jtnode;
842 }
843 
844 /*
845  * pull_up_simple_subquery
846  *		Attempt to pull up a single simple subquery.
847  *
848  * jtnode is a RangeTblRef that has been tentatively identified as a simple
849  * subquery by pull_up_subqueries.  We return the replacement jointree node,
850  * or jtnode itself if we determine that the subquery can't be pulled up
851  * after all.
852  *
853  * rte is the RangeTblEntry referenced by jtnode.  Remaining parameters are
854  * as for pull_up_subqueries_recurse.
855  */
856 static Node *
pull_up_simple_subquery(PlannerInfo * root,Node * jtnode,RangeTblEntry * rte,JoinExpr * lowest_outer_join,JoinExpr * lowest_nulling_outer_join,AppendRelInfo * containing_appendrel)857 pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
858 						JoinExpr *lowest_outer_join,
859 						JoinExpr *lowest_nulling_outer_join,
860 						AppendRelInfo *containing_appendrel)
861 {
862 	Query	   *parse = root->parse;
863 	int			varno = ((RangeTblRef *) jtnode)->rtindex;
864 	Query	   *subquery;
865 	PlannerInfo *subroot;
866 	int			rtoffset;
867 	pullup_replace_vars_context rvcontext;
868 	ListCell   *lc;
869 
870 	/*
871 	 * Need a modifiable copy of the subquery to hack on.  Even if we didn't
872 	 * sometimes choose not to pull up below, we must do this to avoid
873 	 * problems if the same subquery is referenced from multiple jointree
874 	 * items (which can't happen normally, but might after rule rewriting).
875 	 */
876 	subquery = copyObject(rte->subquery);
877 
878 	/*
879 	 * Create a PlannerInfo data structure for this subquery.
880 	 *
881 	 * NOTE: the next few steps should match the first processing in
882 	 * subquery_planner().  Can we refactor to avoid code duplication, or
883 	 * would that just make things uglier?
884 	 */
885 	subroot = makeNode(PlannerInfo);
886 	subroot->parse = subquery;
887 	subroot->glob = root->glob;
888 	subroot->query_level = root->query_level;
889 	subroot->parent_root = root->parent_root;
890 	subroot->plan_params = NIL;
891 	subroot->outer_params = NULL;
892 	subroot->planner_cxt = CurrentMemoryContext;
893 	subroot->init_plans = NIL;
894 	subroot->cte_plan_ids = NIL;
895 	subroot->multiexpr_params = NIL;
896 	subroot->eq_classes = NIL;
897 	subroot->append_rel_list = NIL;
898 	subroot->rowMarks = NIL;
899 	memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
900 	memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
901 	subroot->processed_tlist = NIL;
902 	subroot->grouping_map = NULL;
903 	subroot->minmax_aggs = NIL;
904 	subroot->qual_security_level = 0;
905 	subroot->inhTargetKind = INHKIND_NONE;
906 	subroot->hasRecursion = false;
907 	subroot->wt_param_id = -1;
908 	subroot->non_recursive_path = NULL;
909 
910 	/* No CTEs to worry about */
911 	Assert(subquery->cteList == NIL);
912 
913 	/*
914 	 * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
915 	 * that we don't need so many special cases to deal with that situation.
916 	 */
917 	replace_empty_jointree(subquery);
918 
919 	/*
920 	 * Pull up any SubLinks within the subquery's quals, so that we don't
921 	 * leave unoptimized SubLinks behind.
922 	 */
923 	if (subquery->hasSubLinks)
924 		pull_up_sublinks(subroot);
925 
926 	/*
927 	 * Similarly, inline any set-returning functions in its rangetable.
928 	 */
929 	inline_set_returning_functions(subroot);
930 
931 	/*
932 	 * Recursively pull up the subquery's subqueries, so that
933 	 * pull_up_subqueries' processing is complete for its jointree and
934 	 * rangetable.
935 	 *
936 	 * Note: it's okay that the subquery's recursion starts with NULL for
937 	 * containing-join info, even if we are within an outer join in the upper
938 	 * query; the lower query starts with a clean slate for outer-join
939 	 * semantics.  Likewise, we needn't pass down appendrel state.
940 	 */
941 	pull_up_subqueries(subroot);
942 
943 	/*
944 	 * Now we must recheck whether the subquery is still simple enough to pull
945 	 * up.  If not, abandon processing it.
946 	 *
947 	 * We don't really need to recheck all the conditions involved, but it's
948 	 * easier just to keep this "if" looking the same as the one in
949 	 * pull_up_subqueries_recurse.
950 	 */
951 	if (is_simple_subquery(root, subquery, rte, lowest_outer_join) &&
952 		(containing_appendrel == NULL || is_safe_append_member(subquery)))
953 	{
954 		/* good to go */
955 	}
956 	else
957 	{
958 		/*
959 		 * Give up, return unmodified RangeTblRef.
960 		 *
961 		 * Note: The work we just did will be redone when the subquery gets
962 		 * planned on its own.  Perhaps we could avoid that by storing the
963 		 * modified subquery back into the rangetable, but I'm not gonna risk
964 		 * it now.
965 		 */
966 		return jtnode;
967 	}
968 
969 	/*
970 	 * We must flatten any join alias Vars in the subquery's targetlist,
971 	 * because pulling up the subquery's subqueries might have changed their
972 	 * expansions into arbitrary expressions, which could affect
973 	 * pullup_replace_vars' decisions about whether PlaceHolderVar wrappers
974 	 * are needed for tlist entries.  (Likely it'd be better to do
975 	 * flatten_join_alias_vars on the whole query tree at some earlier stage,
976 	 * maybe even in the rewriter; but for now let's just fix this case here.)
977 	 */
978 	subquery->targetList = (List *)
979 		flatten_join_alias_vars(subroot->parse, (Node *) subquery->targetList);
980 
981 	/*
982 	 * Adjust level-0 varnos in subquery so that we can append its rangetable
983 	 * to upper query's.  We have to fix the subquery's append_rel_list as
984 	 * well.
985 	 */
986 	rtoffset = list_length(parse->rtable);
987 	OffsetVarNodes((Node *) subquery, rtoffset, 0);
988 	OffsetVarNodes((Node *) subroot->append_rel_list, rtoffset, 0);
989 
990 	/*
991 	 * Upper-level vars in subquery are now one level closer to their parent
992 	 * than before.
993 	 */
994 	IncrementVarSublevelsUp((Node *) subquery, -1, 1);
995 	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, -1, 1);
996 
997 	/*
998 	 * The subquery's targetlist items are now in the appropriate form to
999 	 * insert into the top query, except that we may need to wrap them in
1000 	 * PlaceHolderVars.  Set up required context data for pullup_replace_vars.
1001 	 */
1002 	rvcontext.root = root;
1003 	rvcontext.targetlist = subquery->targetList;
1004 	rvcontext.target_rte = rte;
1005 	if (rte->lateral)
1006 		rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree,
1007 												  true);
1008 	else						/* won't need relids */
1009 		rvcontext.relids = NULL;
1010 	rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
1011 	rvcontext.varno = varno;
1012 	/* these flags will be set below, if needed */
1013 	rvcontext.need_phvs = false;
1014 	rvcontext.wrap_non_vars = false;
1015 	/* initialize cache array with indexes 0 .. length(tlist) */
1016 	rvcontext.rv_cache = palloc0((list_length(subquery->targetList) + 1) *
1017 								 sizeof(Node *));
1018 
1019 	/*
1020 	 * If we are under an outer join then non-nullable items and lateral
1021 	 * references may have to be turned into PlaceHolderVars.
1022 	 */
1023 	if (lowest_nulling_outer_join != NULL)
1024 		rvcontext.need_phvs = true;
1025 
1026 	/*
1027 	 * If we are dealing with an appendrel member then anything that's not a
1028 	 * simple Var has to be turned into a PlaceHolderVar.  We force this to
1029 	 * ensure that what we pull up doesn't get merged into a surrounding
1030 	 * expression during later processing and then fail to match the
1031 	 * expression actually available from the appendrel.
1032 	 */
1033 	if (containing_appendrel != NULL)
1034 	{
1035 		rvcontext.need_phvs = true;
1036 		rvcontext.wrap_non_vars = true;
1037 	}
1038 
1039 	/*
1040 	 * If the parent query uses grouping sets, we need a PlaceHolderVar for
1041 	 * anything that's not a simple Var.  Again, this ensures that expressions
1042 	 * retain their separate identity so that they will match grouping set
1043 	 * columns when appropriate.  (It'd be sufficient to wrap values used in
1044 	 * grouping set columns, and do so only in non-aggregated portions of the
1045 	 * tlist and havingQual, but that would require a lot of infrastructure
1046 	 * that pullup_replace_vars hasn't currently got.)
1047 	 */
1048 	if (parse->groupingSets)
1049 	{
1050 		rvcontext.need_phvs = true;
1051 		rvcontext.wrap_non_vars = true;
1052 	}
1053 
1054 	/*
1055 	 * Replace all of the top query's references to the subquery's outputs
1056 	 * with copies of the adjusted subtlist items, being careful not to
1057 	 * replace any of the jointree structure. (This'd be a lot cleaner if we
1058 	 * could use query_tree_mutator.)  We have to use PHVs in the targetList,
1059 	 * returningList, and havingQual, since those are certainly above any
1060 	 * outer join.  replace_vars_in_jointree tracks its location in the
1061 	 * jointree and uses PHVs or not appropriately.
1062 	 */
1063 	parse->targetList = (List *)
1064 		pullup_replace_vars((Node *) parse->targetList, &rvcontext);
1065 	parse->returningList = (List *)
1066 		pullup_replace_vars((Node *) parse->returningList, &rvcontext);
1067 	if (parse->onConflict)
1068 	{
1069 		parse->onConflict->onConflictSet = (List *)
1070 			pullup_replace_vars((Node *) parse->onConflict->onConflictSet,
1071 								&rvcontext);
1072 		parse->onConflict->onConflictWhere =
1073 			pullup_replace_vars(parse->onConflict->onConflictWhere,
1074 								&rvcontext);
1075 
1076 		/*
1077 		 * We assume ON CONFLICT's arbiterElems, arbiterWhere, exclRelTlist
1078 		 * can't contain any references to a subquery
1079 		 */
1080 	}
1081 	replace_vars_in_jointree((Node *) parse->jointree, &rvcontext,
1082 							 lowest_nulling_outer_join);
1083 	Assert(parse->setOperations == NULL);
1084 	parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext);
1085 
1086 	/*
1087 	 * Replace references in the translated_vars lists of appendrels. When
1088 	 * pulling up an appendrel member, we do not need PHVs in the list of the
1089 	 * parent appendrel --- there isn't any outer join between. Elsewhere, use
1090 	 * PHVs for safety.  (This analysis could be made tighter but it seems
1091 	 * unlikely to be worth much trouble.)
1092 	 */
1093 	foreach(lc, root->append_rel_list)
1094 	{
1095 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1096 		bool		save_need_phvs = rvcontext.need_phvs;
1097 
1098 		if (appinfo == containing_appendrel)
1099 			rvcontext.need_phvs = false;
1100 		appinfo->translated_vars = (List *)
1101 			pullup_replace_vars((Node *) appinfo->translated_vars, &rvcontext);
1102 		rvcontext.need_phvs = save_need_phvs;
1103 	}
1104 
1105 	/*
1106 	 * Replace references in the joinaliasvars lists of join RTEs.
1107 	 *
1108 	 * You might think that we could avoid using PHVs for alias vars of joins
1109 	 * below lowest_nulling_outer_join, but that doesn't work because the
1110 	 * alias vars could be referenced above that join; we need the PHVs to be
1111 	 * present in such references after the alias vars get flattened.  (It
1112 	 * might be worth trying to be smarter here, someday.)
1113 	 */
1114 	foreach(lc, parse->rtable)
1115 	{
1116 		RangeTblEntry *otherrte = (RangeTblEntry *) lfirst(lc);
1117 
1118 		if (otherrte->rtekind == RTE_JOIN)
1119 			otherrte->joinaliasvars = (List *)
1120 				pullup_replace_vars((Node *) otherrte->joinaliasvars,
1121 									&rvcontext);
1122 	}
1123 
1124 	/*
1125 	 * If the subquery had a LATERAL marker, propagate that to any of its
1126 	 * child RTEs that could possibly now contain lateral cross-references.
1127 	 * The children might or might not contain any actual lateral
1128 	 * cross-references, but we have to mark the pulled-up child RTEs so that
1129 	 * later planner stages will check for such.
1130 	 */
1131 	if (rte->lateral)
1132 	{
1133 		foreach(lc, subquery->rtable)
1134 		{
1135 			RangeTblEntry *child_rte = (RangeTblEntry *) lfirst(lc);
1136 
1137 			switch (child_rte->rtekind)
1138 			{
1139 				case RTE_RELATION:
1140 					if (child_rte->tablesample)
1141 						child_rte->lateral = true;
1142 					break;
1143 				case RTE_SUBQUERY:
1144 				case RTE_FUNCTION:
1145 				case RTE_VALUES:
1146 				case RTE_TABLEFUNC:
1147 					child_rte->lateral = true;
1148 					break;
1149 				case RTE_JOIN:
1150 				case RTE_CTE:
1151 				case RTE_NAMEDTUPLESTORE:
1152 				case RTE_RESULT:
1153 					/* these can't contain any lateral references */
1154 					break;
1155 			}
1156 		}
1157 	}
1158 
1159 	/*
1160 	 * Now append the adjusted rtable entries to upper query. (We hold off
1161 	 * until after fixing the upper rtable entries; no point in running that
1162 	 * code on the subquery ones too.)
1163 	 */
1164 	parse->rtable = list_concat(parse->rtable, subquery->rtable);
1165 
1166 	/*
1167 	 * Pull up any FOR UPDATE/SHARE markers, too.  (OffsetVarNodes already
1168 	 * adjusted the marker rtindexes, so just concat the lists.)
1169 	 */
1170 	parse->rowMarks = list_concat(parse->rowMarks, subquery->rowMarks);
1171 
1172 	/*
1173 	 * We also have to fix the relid sets of any PlaceHolderVar nodes in the
1174 	 * parent query.  (This could perhaps be done by pullup_replace_vars(),
1175 	 * but it seems cleaner to use two passes.)  Note in particular that any
1176 	 * PlaceHolderVar nodes just created by pullup_replace_vars() will be
1177 	 * adjusted, so having created them with the subquery's varno is correct.
1178 	 *
1179 	 * Likewise, relids appearing in AppendRelInfo nodes have to be fixed. We
1180 	 * already checked that this won't require introducing multiple subrelids
1181 	 * into the single-slot AppendRelInfo structs.
1182 	 */
1183 	if (parse->hasSubLinks || root->glob->lastPHId != 0 ||
1184 		root->append_rel_list)
1185 	{
1186 		Relids		subrelids;
1187 
1188 		subrelids = get_relids_in_jointree((Node *) subquery->jointree, false);
1189 		substitute_phv_relids((Node *) parse, varno, subrelids);
1190 		fix_append_rel_relids(root->append_rel_list, varno, subrelids);
1191 	}
1192 
1193 	/*
1194 	 * And now add subquery's AppendRelInfos to our list.
1195 	 */
1196 	root->append_rel_list = list_concat(root->append_rel_list,
1197 										subroot->append_rel_list);
1198 
1199 	/*
1200 	 * We don't have to do the equivalent bookkeeping for outer-join info,
1201 	 * because that hasn't been set up yet.  placeholder_list likewise.
1202 	 */
1203 	Assert(root->join_info_list == NIL);
1204 	Assert(subroot->join_info_list == NIL);
1205 	Assert(root->placeholder_list == NIL);
1206 	Assert(subroot->placeholder_list == NIL);
1207 
1208 	/*
1209 	 * Miscellaneous housekeeping.
1210 	 *
1211 	 * Although replace_rte_variables() faithfully updated parse->hasSubLinks
1212 	 * if it copied any SubLinks out of the subquery's targetlist, we still
1213 	 * could have SubLinks added to the query in the expressions of FUNCTION
1214 	 * and VALUES RTEs copied up from the subquery.  So it's necessary to copy
1215 	 * subquery->hasSubLinks anyway.  Perhaps this can be improved someday.
1216 	 */
1217 	parse->hasSubLinks |= subquery->hasSubLinks;
1218 
1219 	/* If subquery had any RLS conditions, now main query does too */
1220 	parse->hasRowSecurity |= subquery->hasRowSecurity;
1221 
1222 	/*
1223 	 * subquery won't be pulled up if it hasAggs, hasWindowFuncs, or
1224 	 * hasTargetSRFs, so no work needed on those flags
1225 	 */
1226 
1227 	/*
1228 	 * Return the adjusted subquery jointree to replace the RangeTblRef entry
1229 	 * in parent's jointree; or, if the FromExpr is degenerate, just return
1230 	 * its single member.
1231 	 */
1232 	Assert(IsA(subquery->jointree, FromExpr));
1233 	Assert(subquery->jointree->fromlist != NIL);
1234 	if (subquery->jointree->quals == NULL &&
1235 		list_length(subquery->jointree->fromlist) == 1)
1236 		return (Node *) linitial(subquery->jointree->fromlist);
1237 
1238 	return (Node *) subquery->jointree;
1239 }
1240 
1241 /*
1242  * pull_up_simple_union_all
1243  *		Pull up a single simple UNION ALL subquery.
1244  *
1245  * jtnode is a RangeTblRef that has been identified as a simple UNION ALL
1246  * subquery by pull_up_subqueries.  We pull up the leaf subqueries and
1247  * build an "append relation" for the union set.  The result value is just
1248  * jtnode, since we don't actually need to change the query jointree.
1249  */
1250 static Node *
pull_up_simple_union_all(PlannerInfo * root,Node * jtnode,RangeTblEntry * rte)1251 pull_up_simple_union_all(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
1252 {
1253 	int			varno = ((RangeTblRef *) jtnode)->rtindex;
1254 	Query	   *subquery = rte->subquery;
1255 	int			rtoffset = list_length(root->parse->rtable);
1256 	List	   *rtable;
1257 
1258 	/*
1259 	 * Make a modifiable copy of the subquery's rtable, so we can adjust
1260 	 * upper-level Vars in it.  There are no such Vars in the setOperations
1261 	 * tree proper, so fixing the rtable should be sufficient.
1262 	 */
1263 	rtable = copyObject(subquery->rtable);
1264 
1265 	/*
1266 	 * Upper-level vars in subquery are now one level closer to their parent
1267 	 * than before.  We don't have to worry about offsetting varnos, though,
1268 	 * because the UNION leaf queries can't cross-reference each other.
1269 	 */
1270 	IncrementVarSublevelsUp_rtable(rtable, -1, 1);
1271 
1272 	/*
1273 	 * If the UNION ALL subquery had a LATERAL marker, propagate that to all
1274 	 * its children.  The individual children might or might not contain any
1275 	 * actual lateral cross-references, but we have to mark the pulled-up
1276 	 * child RTEs so that later planner stages will check for such.
1277 	 */
1278 	if (rte->lateral)
1279 	{
1280 		ListCell   *rt;
1281 
1282 		foreach(rt, rtable)
1283 		{
1284 			RangeTblEntry *child_rte = (RangeTblEntry *) lfirst(rt);
1285 
1286 			Assert(child_rte->rtekind == RTE_SUBQUERY);
1287 			child_rte->lateral = true;
1288 		}
1289 	}
1290 
1291 	/*
1292 	 * Append child RTEs to parent rtable.
1293 	 */
1294 	root->parse->rtable = list_concat(root->parse->rtable, rtable);
1295 
1296 	/*
1297 	 * Recursively scan the subquery's setOperations tree and add
1298 	 * AppendRelInfo nodes for leaf subqueries to the parent's
1299 	 * append_rel_list.  Also apply pull_up_subqueries to the leaf subqueries.
1300 	 */
1301 	Assert(subquery->setOperations);
1302 	pull_up_union_leaf_queries(subquery->setOperations, root, varno, subquery,
1303 							   rtoffset);
1304 
1305 	/*
1306 	 * Mark the parent as an append relation.
1307 	 */
1308 	rte->inh = true;
1309 
1310 	return jtnode;
1311 }
1312 
1313 /*
1314  * pull_up_union_leaf_queries -- recursive guts of pull_up_simple_union_all
1315  *
1316  * Build an AppendRelInfo for each leaf query in the setop tree, and then
1317  * apply pull_up_subqueries to the leaf query.
1318  *
1319  * Note that setOpQuery is the Query containing the setOp node, whose tlist
1320  * contains references to all the setop output columns.  When called from
1321  * pull_up_simple_union_all, this is *not* the same as root->parse, which is
1322  * the parent Query we are pulling up into.
1323  *
1324  * parentRTindex is the appendrel parent's index in root->parse->rtable.
1325  *
1326  * The child RTEs have already been copied to the parent.  childRToffset
1327  * tells us where in the parent's range table they were copied.  When called
1328  * from flatten_simple_union_all, childRToffset is 0 since the child RTEs
1329  * were already in root->parse->rtable and no RT index adjustment is needed.
1330  */
1331 static void
pull_up_union_leaf_queries(Node * setOp,PlannerInfo * root,int parentRTindex,Query * setOpQuery,int childRToffset)1332 pull_up_union_leaf_queries(Node *setOp, PlannerInfo *root, int parentRTindex,
1333 						   Query *setOpQuery, int childRToffset)
1334 {
1335 	if (IsA(setOp, RangeTblRef))
1336 	{
1337 		RangeTblRef *rtr = (RangeTblRef *) setOp;
1338 		int			childRTindex;
1339 		AppendRelInfo *appinfo;
1340 
1341 		/*
1342 		 * Calculate the index in the parent's range table
1343 		 */
1344 		childRTindex = childRToffset + rtr->rtindex;
1345 
1346 		/*
1347 		 * Build a suitable AppendRelInfo, and attach to parent's list.
1348 		 */
1349 		appinfo = makeNode(AppendRelInfo);
1350 		appinfo->parent_relid = parentRTindex;
1351 		appinfo->child_relid = childRTindex;
1352 		appinfo->parent_reltype = InvalidOid;
1353 		appinfo->child_reltype = InvalidOid;
1354 		make_setop_translation_list(setOpQuery, childRTindex,
1355 									&appinfo->translated_vars);
1356 		appinfo->parent_reloid = InvalidOid;
1357 		root->append_rel_list = lappend(root->append_rel_list, appinfo);
1358 
1359 		/*
1360 		 * Recursively apply pull_up_subqueries to the new child RTE.  (We
1361 		 * must build the AppendRelInfo first, because this will modify it.)
1362 		 * Note that we can pass NULL for containing-join info even if we're
1363 		 * actually under an outer join, because the child's expressions
1364 		 * aren't going to propagate up to the join.  Also, we ignore the
1365 		 * possibility that pull_up_subqueries_recurse() returns a different
1366 		 * jointree node than what we pass it; if it does, the important thing
1367 		 * is that it replaced the child relid in the AppendRelInfo node.
1368 		 */
1369 		rtr = makeNode(RangeTblRef);
1370 		rtr->rtindex = childRTindex;
1371 		(void) pull_up_subqueries_recurse(root, (Node *) rtr,
1372 										  NULL, NULL, appinfo);
1373 	}
1374 	else if (IsA(setOp, SetOperationStmt))
1375 	{
1376 		SetOperationStmt *op = (SetOperationStmt *) setOp;
1377 
1378 		/* Recurse to reach leaf queries */
1379 		pull_up_union_leaf_queries(op->larg, root, parentRTindex, setOpQuery,
1380 								   childRToffset);
1381 		pull_up_union_leaf_queries(op->rarg, root, parentRTindex, setOpQuery,
1382 								   childRToffset);
1383 	}
1384 	else
1385 	{
1386 		elog(ERROR, "unrecognized node type: %d",
1387 			 (int) nodeTag(setOp));
1388 	}
1389 }
1390 
1391 /*
1392  * make_setop_translation_list
1393  *	  Build the list of translations from parent Vars to child Vars for
1394  *	  a UNION ALL member.  (At this point it's just a simple list of
1395  *	  referencing Vars, but if we succeed in pulling up the member
1396  *	  subquery, the Vars will get replaced by pulled-up expressions.)
1397  */
1398 static void
make_setop_translation_list(Query * query,Index newvarno,List ** translated_vars)1399 make_setop_translation_list(Query *query, Index newvarno,
1400 							List **translated_vars)
1401 {
1402 	List	   *vars = NIL;
1403 	ListCell   *l;
1404 
1405 	foreach(l, query->targetList)
1406 	{
1407 		TargetEntry *tle = (TargetEntry *) lfirst(l);
1408 
1409 		if (tle->resjunk)
1410 			continue;
1411 
1412 		vars = lappend(vars, makeVarFromTargetEntry(newvarno, tle));
1413 	}
1414 
1415 	*translated_vars = vars;
1416 }
1417 
1418 /*
1419  * is_simple_subquery
1420  *	  Check a subquery in the range table to see if it's simple enough
1421  *	  to pull up into the parent query.
1422  *
1423  * rte is the RTE_SUBQUERY RangeTblEntry that contained the subquery.
1424  * (Note subquery is not necessarily equal to rte->subquery; it could be a
1425  * processed copy of that.)
1426  * lowest_outer_join is the lowest outer join above the subquery, or NULL.
1427  */
1428 static bool
is_simple_subquery(PlannerInfo * root,Query * subquery,RangeTblEntry * rte,JoinExpr * lowest_outer_join)1429 is_simple_subquery(PlannerInfo *root, Query *subquery, RangeTblEntry *rte,
1430 				   JoinExpr *lowest_outer_join)
1431 {
1432 	/*
1433 	 * Let's just make sure it's a valid subselect ...
1434 	 */
1435 	if (!IsA(subquery, Query) ||
1436 		subquery->commandType != CMD_SELECT)
1437 		elog(ERROR, "subquery is bogus");
1438 
1439 	/*
1440 	 * Can't currently pull up a query with setops (unless it's simple UNION
1441 	 * ALL, which is handled by a different code path). Maybe after querytree
1442 	 * redesign...
1443 	 */
1444 	if (subquery->setOperations)
1445 		return false;
1446 
1447 	/*
1448 	 * Can't pull up a subquery involving grouping, aggregation, SRFs,
1449 	 * sorting, limiting, or WITH.  (XXX WITH could possibly be allowed later)
1450 	 *
1451 	 * We also don't pull up a subquery that has explicit FOR UPDATE/SHARE
1452 	 * clauses, because pullup would cause the locking to occur semantically
1453 	 * higher than it should.  Implicit FOR UPDATE/SHARE is okay because in
1454 	 * that case the locking was originally declared in the upper query
1455 	 * anyway.
1456 	 */
1457 	if (subquery->hasAggs ||
1458 		subquery->hasWindowFuncs ||
1459 		subquery->hasTargetSRFs ||
1460 		subquery->groupClause ||
1461 		subquery->groupingSets ||
1462 		subquery->havingQual ||
1463 		subquery->sortClause ||
1464 		subquery->distinctClause ||
1465 		subquery->limitOffset ||
1466 		subquery->limitCount ||
1467 		subquery->hasForUpdate ||
1468 		subquery->cteList)
1469 		return false;
1470 
1471 	/*
1472 	 * Don't pull up if the RTE represents a security-barrier view; we
1473 	 * couldn't prevent information leakage once the RTE's Vars are scattered
1474 	 * about in the upper query.
1475 	 */
1476 	if (rte->security_barrier)
1477 		return false;
1478 
1479 	/*
1480 	 * If the subquery is LATERAL, check for pullup restrictions from that.
1481 	 */
1482 	if (rte->lateral)
1483 	{
1484 		bool		restricted;
1485 		Relids		safe_upper_varnos;
1486 
1487 		/*
1488 		 * The subquery's WHERE and JOIN/ON quals mustn't contain any lateral
1489 		 * references to rels outside a higher outer join (including the case
1490 		 * where the outer join is within the subquery itself).  In such a
1491 		 * case, pulling up would result in a situation where we need to
1492 		 * postpone quals from below an outer join to above it, which is
1493 		 * probably completely wrong and in any case is a complication that
1494 		 * doesn't seem worth addressing at the moment.
1495 		 */
1496 		if (lowest_outer_join != NULL)
1497 		{
1498 			restricted = true;
1499 			safe_upper_varnos = get_relids_in_jointree((Node *) lowest_outer_join,
1500 													   true);
1501 		}
1502 		else
1503 		{
1504 			restricted = false;
1505 			safe_upper_varnos = NULL;	/* doesn't matter */
1506 		}
1507 
1508 		if (jointree_contains_lateral_outer_refs(root,
1509 												 (Node *) subquery->jointree,
1510 												 restricted, safe_upper_varnos))
1511 			return false;
1512 
1513 		/*
1514 		 * If there's an outer join above the LATERAL subquery, also disallow
1515 		 * pullup if the subquery's targetlist has any references to rels
1516 		 * outside the outer join, since these might get pulled into quals
1517 		 * above the subquery (but in or below the outer join) and then lead
1518 		 * to qual-postponement issues similar to the case checked for above.
1519 		 * (We wouldn't need to prevent pullup if no such references appear in
1520 		 * outer-query quals, but we don't have enough info here to check
1521 		 * that.  Also, maybe this restriction could be removed if we forced
1522 		 * such refs to be wrapped in PlaceHolderVars, even when they're below
1523 		 * the nearest outer join?	But it's a pretty hokey usage, so not
1524 		 * clear this is worth sweating over.)
1525 		 */
1526 		if (lowest_outer_join != NULL)
1527 		{
1528 			Relids		lvarnos = pull_varnos_of_level(root,
1529 													   (Node *) subquery->targetList,
1530 													   1);
1531 
1532 			if (!bms_is_subset(lvarnos, safe_upper_varnos))
1533 				return false;
1534 		}
1535 	}
1536 
1537 	/*
1538 	 * Don't pull up a subquery that has any volatile functions in its
1539 	 * targetlist.  Otherwise we might introduce multiple evaluations of these
1540 	 * functions, if they get copied to multiple places in the upper query,
1541 	 * leading to surprising results.  (Note: the PlaceHolderVar mechanism
1542 	 * doesn't quite guarantee single evaluation; else we could pull up anyway
1543 	 * and just wrap such items in PlaceHolderVars ...)
1544 	 */
1545 	if (contain_volatile_functions((Node *) subquery->targetList))
1546 		return false;
1547 
1548 	return true;
1549 }
1550 
1551 /*
1552  * pull_up_simple_values
1553  *		Pull up a single simple VALUES RTE.
1554  *
1555  * jtnode is a RangeTblRef that has been identified as a simple VALUES RTE
1556  * by pull_up_subqueries.  We always return a RangeTblRef representing a
1557  * RESULT RTE to replace it (all failure cases should have been detected by
1558  * is_simple_values()).  Actually, what we return is just jtnode, because
1559  * we replace the VALUES RTE in the rangetable with the RESULT RTE.
1560  *
1561  * rte is the RangeTblEntry referenced by jtnode.  Because of the limited
1562  * possible usage of VALUES RTEs, we do not need the remaining parameters
1563  * of pull_up_subqueries_recurse.
1564  */
1565 static Node *
pull_up_simple_values(PlannerInfo * root,Node * jtnode,RangeTblEntry * rte)1566 pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
1567 {
1568 	Query	   *parse = root->parse;
1569 	int			varno = ((RangeTblRef *) jtnode)->rtindex;
1570 	List	   *values_list;
1571 	List	   *tlist;
1572 	AttrNumber	attrno;
1573 	pullup_replace_vars_context rvcontext;
1574 	ListCell   *lc;
1575 
1576 	Assert(rte->rtekind == RTE_VALUES);
1577 	Assert(list_length(rte->values_lists) == 1);
1578 
1579 	/*
1580 	 * Need a modifiable copy of the VALUES list to hack on, just in case it's
1581 	 * multiply referenced.
1582 	 */
1583 	values_list = copyObject(linitial(rte->values_lists));
1584 
1585 	/*
1586 	 * The VALUES RTE can't contain any Vars of level zero, let alone any that
1587 	 * are join aliases, so no need to flatten join alias Vars.
1588 	 */
1589 	Assert(!contain_vars_of_level((Node *) values_list, 0));
1590 
1591 	/*
1592 	 * Set up required context data for pullup_replace_vars.  In particular,
1593 	 * we have to make the VALUES list look like a subquery targetlist.
1594 	 */
1595 	tlist = NIL;
1596 	attrno = 1;
1597 	foreach(lc, values_list)
1598 	{
1599 		tlist = lappend(tlist,
1600 						makeTargetEntry((Expr *) lfirst(lc),
1601 										attrno,
1602 										NULL,
1603 										false));
1604 		attrno++;
1605 	}
1606 	rvcontext.root = root;
1607 	rvcontext.targetlist = tlist;
1608 	rvcontext.target_rte = rte;
1609 	rvcontext.relids = NULL;
1610 	rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
1611 	rvcontext.varno = varno;
1612 	rvcontext.need_phvs = false;
1613 	rvcontext.wrap_non_vars = false;
1614 	/* initialize cache array with indexes 0 .. length(tlist) */
1615 	rvcontext.rv_cache = palloc0((list_length(tlist) + 1) *
1616 								 sizeof(Node *));
1617 
1618 	/*
1619 	 * Replace all of the top query's references to the RTE's outputs with
1620 	 * copies of the adjusted VALUES expressions, being careful not to replace
1621 	 * any of the jointree structure. (This'd be a lot cleaner if we could use
1622 	 * query_tree_mutator.)  Much of this should be no-ops in the dummy Query
1623 	 * that surrounds a VALUES RTE, but it's not enough code to be worth
1624 	 * removing.
1625 	 */
1626 	parse->targetList = (List *)
1627 		pullup_replace_vars((Node *) parse->targetList, &rvcontext);
1628 	parse->returningList = (List *)
1629 		pullup_replace_vars((Node *) parse->returningList, &rvcontext);
1630 	if (parse->onConflict)
1631 	{
1632 		parse->onConflict->onConflictSet = (List *)
1633 			pullup_replace_vars((Node *) parse->onConflict->onConflictSet,
1634 								&rvcontext);
1635 		parse->onConflict->onConflictWhere =
1636 			pullup_replace_vars(parse->onConflict->onConflictWhere,
1637 								&rvcontext);
1638 
1639 		/*
1640 		 * We assume ON CONFLICT's arbiterElems, arbiterWhere, exclRelTlist
1641 		 * can't contain any references to a subquery
1642 		 */
1643 	}
1644 	replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, NULL);
1645 	Assert(parse->setOperations == NULL);
1646 	parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext);
1647 
1648 	/*
1649 	 * There should be no appendrels to fix, nor any join alias Vars, nor any
1650 	 * outer joins and hence no PlaceHolderVars.
1651 	 */
1652 	Assert(root->append_rel_list == NIL);
1653 	Assert(list_length(parse->rtable) == 1);
1654 	Assert(root->join_info_list == NIL);
1655 	Assert(root->placeholder_list == NIL);
1656 
1657 	/*
1658 	 * Replace the VALUES RTE with a RESULT RTE.  The VALUES RTE is the only
1659 	 * rtable entry in the current query level, so this is easy.
1660 	 */
1661 	Assert(list_length(parse->rtable) == 1);
1662 
1663 	/* Create suitable RTE */
1664 	rte = makeNode(RangeTblEntry);
1665 	rte->rtekind = RTE_RESULT;
1666 	rte->eref = makeAlias("*RESULT*", NIL);
1667 
1668 	/* Replace rangetable */
1669 	parse->rtable = list_make1(rte);
1670 
1671 	/* We could manufacture a new RangeTblRef, but the one we have is fine */
1672 	Assert(varno == 1);
1673 
1674 	return jtnode;
1675 }
1676 
1677 /*
1678  * is_simple_values
1679  *	  Check a VALUES RTE in the range table to see if it's simple enough
1680  *	  to pull up into the parent query.
1681  *
1682  * rte is the RTE_VALUES RangeTblEntry to check.
1683  */
1684 static bool
is_simple_values(PlannerInfo * root,RangeTblEntry * rte)1685 is_simple_values(PlannerInfo *root, RangeTblEntry *rte)
1686 {
1687 	Assert(rte->rtekind == RTE_VALUES);
1688 
1689 	/*
1690 	 * There must be exactly one VALUES list, else it's not semantically
1691 	 * correct to replace the VALUES RTE with a RESULT RTE, nor would we have
1692 	 * a unique set of expressions to substitute into the parent query.
1693 	 */
1694 	if (list_length(rte->values_lists) != 1)
1695 		return false;
1696 
1697 	/*
1698 	 * Because VALUES can't appear under an outer join (or at least, we won't
1699 	 * try to pull it up if it does), we need not worry about LATERAL, nor
1700 	 * about validity of PHVs for the VALUES' outputs.
1701 	 */
1702 
1703 	/*
1704 	 * Don't pull up a VALUES that contains any set-returning or volatile
1705 	 * functions.  The considerations here are basically identical to the
1706 	 * restrictions on a pull-able subquery's targetlist.
1707 	 */
1708 	if (expression_returns_set((Node *) rte->values_lists) ||
1709 		contain_volatile_functions((Node *) rte->values_lists))
1710 		return false;
1711 
1712 	/*
1713 	 * Do not pull up a VALUES that's not the only RTE in its parent query.
1714 	 * This is actually the only case that the parser will generate at the
1715 	 * moment, and assuming this is true greatly simplifies
1716 	 * pull_up_simple_values().
1717 	 */
1718 	if (list_length(root->parse->rtable) != 1 ||
1719 		rte != (RangeTblEntry *) linitial(root->parse->rtable))
1720 		return false;
1721 
1722 	return true;
1723 }
1724 
1725 /*
1726  * is_simple_union_all
1727  *	  Check a subquery to see if it's a simple UNION ALL.
1728  *
1729  * We require all the setops to be UNION ALL (no mixing) and there can't be
1730  * any datatype coercions involved, ie, all the leaf queries must emit the
1731  * same datatypes.
1732  */
1733 static bool
is_simple_union_all(Query * subquery)1734 is_simple_union_all(Query *subquery)
1735 {
1736 	SetOperationStmt *topop;
1737 
1738 	/* Let's just make sure it's a valid subselect ... */
1739 	if (!IsA(subquery, Query) ||
1740 		subquery->commandType != CMD_SELECT)
1741 		elog(ERROR, "subquery is bogus");
1742 
1743 	/* Is it a set-operation query at all? */
1744 	topop = castNode(SetOperationStmt, subquery->setOperations);
1745 	if (!topop)
1746 		return false;
1747 
1748 	/* Can't handle ORDER BY, LIMIT/OFFSET, locking, or WITH */
1749 	if (subquery->sortClause ||
1750 		subquery->limitOffset ||
1751 		subquery->limitCount ||
1752 		subquery->rowMarks ||
1753 		subquery->cteList)
1754 		return false;
1755 
1756 	/* Recursively check the tree of set operations */
1757 	return is_simple_union_all_recurse((Node *) topop, subquery,
1758 									   topop->colTypes);
1759 }
1760 
1761 static bool
is_simple_union_all_recurse(Node * setOp,Query * setOpQuery,List * colTypes)1762 is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes)
1763 {
1764 	if (IsA(setOp, RangeTblRef))
1765 	{
1766 		RangeTblRef *rtr = (RangeTblRef *) setOp;
1767 		RangeTblEntry *rte = rt_fetch(rtr->rtindex, setOpQuery->rtable);
1768 		Query	   *subquery = rte->subquery;
1769 
1770 		Assert(subquery != NULL);
1771 
1772 		/* Leaf nodes are OK if they match the toplevel column types */
1773 		/* We don't have to compare typmods or collations here */
1774 		return tlist_same_datatypes(subquery->targetList, colTypes, true);
1775 	}
1776 	else if (IsA(setOp, SetOperationStmt))
1777 	{
1778 		SetOperationStmt *op = (SetOperationStmt *) setOp;
1779 
1780 		/* Must be UNION ALL */
1781 		if (op->op != SETOP_UNION || !op->all)
1782 			return false;
1783 
1784 		/* Recurse to check inputs */
1785 		return is_simple_union_all_recurse(op->larg, setOpQuery, colTypes) &&
1786 			is_simple_union_all_recurse(op->rarg, setOpQuery, colTypes);
1787 	}
1788 	else
1789 	{
1790 		elog(ERROR, "unrecognized node type: %d",
1791 			 (int) nodeTag(setOp));
1792 		return false;			/* keep compiler quiet */
1793 	}
1794 }
1795 
1796 /*
1797  * is_safe_append_member
1798  *	  Check a subquery that is a leaf of a UNION ALL appendrel to see if it's
1799  *	  safe to pull up.
1800  */
1801 static bool
is_safe_append_member(Query * subquery)1802 is_safe_append_member(Query *subquery)
1803 {
1804 	FromExpr   *jtnode;
1805 
1806 	/*
1807 	 * It's only safe to pull up the child if its jointree contains exactly
1808 	 * one RTE, else the AppendRelInfo data structure breaks. The one base RTE
1809 	 * could be buried in several levels of FromExpr, however.  Also, if the
1810 	 * child's jointree is completely empty, we can pull up because
1811 	 * pull_up_simple_subquery will insert a single RTE_RESULT RTE instead.
1812 	 *
1813 	 * Also, the child can't have any WHERE quals because there's no place to
1814 	 * put them in an appendrel.  (This is a bit annoying...) If we didn't
1815 	 * need to check this, we'd just test whether get_relids_in_jointree()
1816 	 * yields a singleton set, to be more consistent with the coding of
1817 	 * fix_append_rel_relids().
1818 	 */
1819 	jtnode = subquery->jointree;
1820 	Assert(IsA(jtnode, FromExpr));
1821 	/* Check the completely-empty case */
1822 	if (jtnode->fromlist == NIL && jtnode->quals == NULL)
1823 		return true;
1824 	/* Check the more general case */
1825 	while (IsA(jtnode, FromExpr))
1826 	{
1827 		if (jtnode->quals != NULL)
1828 			return false;
1829 		if (list_length(jtnode->fromlist) != 1)
1830 			return false;
1831 		jtnode = linitial(jtnode->fromlist);
1832 	}
1833 	if (!IsA(jtnode, RangeTblRef))
1834 		return false;
1835 
1836 	return true;
1837 }
1838 
1839 /*
1840  * jointree_contains_lateral_outer_refs
1841  *		Check for disallowed lateral references in a jointree's quals
1842  *
1843  * If restricted is false, all level-1 Vars are allowed (but we still must
1844  * search the jointree, since it might contain outer joins below which there
1845  * will be restrictions).  If restricted is true, return true when any qual
1846  * in the jointree contains level-1 Vars coming from outside the rels listed
1847  * in safe_upper_varnos.
1848  */
1849 static bool
jointree_contains_lateral_outer_refs(PlannerInfo * root,Node * jtnode,bool restricted,Relids safe_upper_varnos)1850 jointree_contains_lateral_outer_refs(PlannerInfo *root, Node *jtnode,
1851 									 bool restricted,
1852 									 Relids safe_upper_varnos)
1853 {
1854 	if (jtnode == NULL)
1855 		return false;
1856 	if (IsA(jtnode, RangeTblRef))
1857 		return false;
1858 	else if (IsA(jtnode, FromExpr))
1859 	{
1860 		FromExpr   *f = (FromExpr *) jtnode;
1861 		ListCell   *l;
1862 
1863 		/* First, recurse to check child joins */
1864 		foreach(l, f->fromlist)
1865 		{
1866 			if (jointree_contains_lateral_outer_refs(root,
1867 													 lfirst(l),
1868 													 restricted,
1869 													 safe_upper_varnos))
1870 				return true;
1871 		}
1872 
1873 		/* Then check the top-level quals */
1874 		if (restricted &&
1875 			!bms_is_subset(pull_varnos_of_level(root, f->quals, 1),
1876 						   safe_upper_varnos))
1877 			return true;
1878 	}
1879 	else if (IsA(jtnode, JoinExpr))
1880 	{
1881 		JoinExpr   *j = (JoinExpr *) jtnode;
1882 
1883 		/*
1884 		 * If this is an outer join, we mustn't allow any upper lateral
1885 		 * references in or below it.
1886 		 */
1887 		if (j->jointype != JOIN_INNER)
1888 		{
1889 			restricted = true;
1890 			safe_upper_varnos = NULL;
1891 		}
1892 
1893 		/* Check the child joins */
1894 		if (jointree_contains_lateral_outer_refs(root,
1895 												 j->larg,
1896 												 restricted,
1897 												 safe_upper_varnos))
1898 			return true;
1899 		if (jointree_contains_lateral_outer_refs(root,
1900 												 j->rarg,
1901 												 restricted,
1902 												 safe_upper_varnos))
1903 			return true;
1904 
1905 		/* Check the JOIN's qual clauses */
1906 		if (restricted &&
1907 			!bms_is_subset(pull_varnos_of_level(root, j->quals, 1),
1908 						   safe_upper_varnos))
1909 			return true;
1910 	}
1911 	else
1912 		elog(ERROR, "unrecognized node type: %d",
1913 			 (int) nodeTag(jtnode));
1914 	return false;
1915 }
1916 
1917 /*
1918  * Helper routine for pull_up_subqueries: do pullup_replace_vars on every
1919  * expression in the jointree, without changing the jointree structure itself.
1920  * Ugly, but there's no other way...
1921  *
1922  * If we are at or below lowest_nulling_outer_join, we can suppress use of
1923  * PlaceHolderVars wrapped around the replacement expressions.
1924  */
1925 static void
replace_vars_in_jointree(Node * jtnode,pullup_replace_vars_context * context,JoinExpr * lowest_nulling_outer_join)1926 replace_vars_in_jointree(Node *jtnode,
1927 						 pullup_replace_vars_context *context,
1928 						 JoinExpr *lowest_nulling_outer_join)
1929 {
1930 	if (jtnode == NULL)
1931 		return;
1932 	if (IsA(jtnode, RangeTblRef))
1933 	{
1934 		/*
1935 		 * If the RangeTblRef refers to a LATERAL subquery (that isn't the
1936 		 * same subquery we're pulling up), it might contain references to the
1937 		 * target subquery, which we must replace.  We drive this from the
1938 		 * jointree scan, rather than a scan of the rtable, for a couple of
1939 		 * reasons: we can avoid processing no-longer-referenced RTEs, and we
1940 		 * can use the appropriate setting of need_phvs depending on whether
1941 		 * the RTE is above possibly-nulling outer joins or not.
1942 		 */
1943 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
1944 
1945 		if (varno != context->varno)	/* ignore target subquery itself */
1946 		{
1947 			RangeTblEntry *rte = rt_fetch(varno, context->root->parse->rtable);
1948 
1949 			Assert(rte != context->target_rte);
1950 			if (rte->lateral)
1951 			{
1952 				switch (rte->rtekind)
1953 				{
1954 					case RTE_RELATION:
1955 						/* shouldn't be marked LATERAL unless tablesample */
1956 						Assert(rte->tablesample);
1957 						rte->tablesample = (TableSampleClause *)
1958 							pullup_replace_vars((Node *) rte->tablesample,
1959 												context);
1960 						break;
1961 					case RTE_SUBQUERY:
1962 						rte->subquery =
1963 							pullup_replace_vars_subquery(rte->subquery,
1964 														 context);
1965 						break;
1966 					case RTE_FUNCTION:
1967 						rte->functions = (List *)
1968 							pullup_replace_vars((Node *) rte->functions,
1969 												context);
1970 						break;
1971 					case RTE_TABLEFUNC:
1972 						rte->tablefunc = (TableFunc *)
1973 							pullup_replace_vars((Node *) rte->tablefunc,
1974 												context);
1975 						break;
1976 					case RTE_VALUES:
1977 						rte->values_lists = (List *)
1978 							pullup_replace_vars((Node *) rte->values_lists,
1979 												context);
1980 						break;
1981 					case RTE_JOIN:
1982 					case RTE_CTE:
1983 					case RTE_NAMEDTUPLESTORE:
1984 					case RTE_RESULT:
1985 						/* these shouldn't be marked LATERAL */
1986 						Assert(false);
1987 						break;
1988 				}
1989 			}
1990 		}
1991 	}
1992 	else if (IsA(jtnode, FromExpr))
1993 	{
1994 		FromExpr   *f = (FromExpr *) jtnode;
1995 		ListCell   *l;
1996 
1997 		foreach(l, f->fromlist)
1998 			replace_vars_in_jointree(lfirst(l), context,
1999 									 lowest_nulling_outer_join);
2000 		f->quals = pullup_replace_vars(f->quals, context);
2001 	}
2002 	else if (IsA(jtnode, JoinExpr))
2003 	{
2004 		JoinExpr   *j = (JoinExpr *) jtnode;
2005 		bool		save_need_phvs = context->need_phvs;
2006 
2007 		if (j == lowest_nulling_outer_join)
2008 		{
2009 			/* no more PHVs in or below this join */
2010 			context->need_phvs = false;
2011 			lowest_nulling_outer_join = NULL;
2012 		}
2013 		replace_vars_in_jointree(j->larg, context, lowest_nulling_outer_join);
2014 		replace_vars_in_jointree(j->rarg, context, lowest_nulling_outer_join);
2015 
2016 		/*
2017 		 * Use PHVs within the join quals of a full join, even when it's the
2018 		 * lowest nulling outer join.  Otherwise, we cannot identify which
2019 		 * side of the join a pulled-up var-free expression came from, which
2020 		 * can lead to failure to make a plan at all because none of the quals
2021 		 * appear to be mergeable or hashable conditions.  For this purpose we
2022 		 * don't care about the state of wrap_non_vars, so leave it alone.
2023 		 */
2024 		if (j->jointype == JOIN_FULL)
2025 			context->need_phvs = true;
2026 
2027 		j->quals = pullup_replace_vars(j->quals, context);
2028 
2029 		/*
2030 		 * We don't bother to update the colvars list, since it won't be used
2031 		 * again ...
2032 		 */
2033 		context->need_phvs = save_need_phvs;
2034 	}
2035 	else
2036 		elog(ERROR, "unrecognized node type: %d",
2037 			 (int) nodeTag(jtnode));
2038 }
2039 
2040 /*
2041  * Apply pullup variable replacement throughout an expression tree
2042  *
2043  * Returns a modified copy of the tree, so this can't be used where we
2044  * need to do in-place replacement.
2045  */
2046 static Node *
pullup_replace_vars(Node * expr,pullup_replace_vars_context * context)2047 pullup_replace_vars(Node *expr, pullup_replace_vars_context *context)
2048 {
2049 	return replace_rte_variables(expr,
2050 								 context->varno, 0,
2051 								 pullup_replace_vars_callback,
2052 								 (void *) context,
2053 								 context->outer_hasSubLinks);
2054 }
2055 
2056 static Node *
pullup_replace_vars_callback(Var * var,replace_rte_variables_context * context)2057 pullup_replace_vars_callback(Var *var,
2058 							 replace_rte_variables_context *context)
2059 {
2060 	pullup_replace_vars_context *rcon = (pullup_replace_vars_context *) context->callback_arg;
2061 	int			varattno = var->varattno;
2062 	Node	   *newnode;
2063 
2064 	/*
2065 	 * If PlaceHolderVars are needed, we cache the modified expressions in
2066 	 * rcon->rv_cache[].  This is not in hopes of any material speed gain
2067 	 * within this function, but to avoid generating identical PHVs with
2068 	 * different IDs.  That would result in duplicate evaluations at runtime,
2069 	 * and possibly prevent optimizations that rely on recognizing different
2070 	 * references to the same subquery output as being equal().  So it's worth
2071 	 * a bit of extra effort to avoid it.
2072 	 */
2073 	if (rcon->need_phvs &&
2074 		varattno >= InvalidAttrNumber &&
2075 		varattno <= list_length(rcon->targetlist) &&
2076 		rcon->rv_cache[varattno] != NULL)
2077 	{
2078 		/* Just copy the entry and fall through to adjust its varlevelsup */
2079 		newnode = copyObject(rcon->rv_cache[varattno]);
2080 	}
2081 	else if (varattno == InvalidAttrNumber)
2082 	{
2083 		/* Must expand whole-tuple reference into RowExpr */
2084 		RowExpr    *rowexpr;
2085 		List	   *colnames;
2086 		List	   *fields;
2087 		bool		save_need_phvs = rcon->need_phvs;
2088 		int			save_sublevelsup = context->sublevels_up;
2089 
2090 		/*
2091 		 * If generating an expansion for a var of a named rowtype (ie, this
2092 		 * is a plain relation RTE), then we must include dummy items for
2093 		 * dropped columns.  If the var is RECORD (ie, this is a JOIN), then
2094 		 * omit dropped columns. Either way, attach column names to the
2095 		 * RowExpr for use of ruleutils.c.
2096 		 *
2097 		 * In order to be able to cache the results, we always generate the
2098 		 * expansion with varlevelsup = 0, and then adjust if needed.
2099 		 */
2100 		expandRTE(rcon->target_rte,
2101 				  var->varno, 0 /* not varlevelsup */ , var->location,
2102 				  (var->vartype != RECORDOID),
2103 				  &colnames, &fields);
2104 		/* Adjust the generated per-field Vars, but don't insert PHVs */
2105 		rcon->need_phvs = false;
2106 		context->sublevels_up = 0;	/* to match the expandRTE output */
2107 		fields = (List *) replace_rte_variables_mutator((Node *) fields,
2108 														context);
2109 		rcon->need_phvs = save_need_phvs;
2110 		context->sublevels_up = save_sublevelsup;
2111 
2112 		rowexpr = makeNode(RowExpr);
2113 		rowexpr->args = fields;
2114 		rowexpr->row_typeid = var->vartype;
2115 		rowexpr->row_format = COERCE_IMPLICIT_CAST;
2116 		rowexpr->colnames = colnames;
2117 		rowexpr->location = var->location;
2118 		newnode = (Node *) rowexpr;
2119 
2120 		/*
2121 		 * Insert PlaceHolderVar if needed.  Notice that we are wrapping one
2122 		 * PlaceHolderVar around the whole RowExpr, rather than putting one
2123 		 * around each element of the row.  This is because we need the
2124 		 * expression to yield NULL, not ROW(NULL,NULL,...) when it is forced
2125 		 * to null by an outer join.
2126 		 */
2127 		if (rcon->need_phvs)
2128 		{
2129 			/* RowExpr is certainly not strict, so always need PHV */
2130 			newnode = (Node *)
2131 				make_placeholder_expr(rcon->root,
2132 									  (Expr *) newnode,
2133 									  bms_make_singleton(rcon->varno));
2134 			/* cache it with the PHV, and with varlevelsup still zero */
2135 			rcon->rv_cache[InvalidAttrNumber] = copyObject(newnode);
2136 		}
2137 	}
2138 	else
2139 	{
2140 		/* Normal case referencing one targetlist element */
2141 		TargetEntry *tle = get_tle_by_resno(rcon->targetlist, varattno);
2142 
2143 		if (tle == NULL)		/* shouldn't happen */
2144 			elog(ERROR, "could not find attribute %d in subquery targetlist",
2145 				 varattno);
2146 
2147 		/* Make a copy of the tlist item to return */
2148 		newnode = (Node *) copyObject(tle->expr);
2149 
2150 		/* Insert PlaceHolderVar if needed */
2151 		if (rcon->need_phvs)
2152 		{
2153 			bool		wrap;
2154 
2155 			if (newnode && IsA(newnode, Var) &&
2156 				((Var *) newnode)->varlevelsup == 0)
2157 			{
2158 				/*
2159 				 * Simple Vars always escape being wrapped, unless they are
2160 				 * lateral references to something outside the subquery being
2161 				 * pulled up.  (Even then, we could omit the PlaceHolderVar if
2162 				 * the referenced rel is under the same lowest outer join, but
2163 				 * it doesn't seem worth the trouble to check that.)
2164 				 */
2165 				if (rcon->target_rte->lateral &&
2166 					!bms_is_member(((Var *) newnode)->varno, rcon->relids))
2167 					wrap = true;
2168 				else
2169 					wrap = false;
2170 			}
2171 			else if (newnode && IsA(newnode, PlaceHolderVar) &&
2172 					 ((PlaceHolderVar *) newnode)->phlevelsup == 0)
2173 			{
2174 				/* No need to wrap a PlaceHolderVar with another one, either */
2175 				wrap = false;
2176 			}
2177 			else if (rcon->wrap_non_vars)
2178 			{
2179 				/* Wrap all non-Vars in a PlaceHolderVar */
2180 				wrap = true;
2181 			}
2182 			else
2183 			{
2184 				/*
2185 				 * If it contains a Var of the subquery being pulled up, and
2186 				 * does not contain any non-strict constructs, then it's
2187 				 * certainly nullable so we don't need to insert a
2188 				 * PlaceHolderVar.
2189 				 *
2190 				 * This analysis could be tighter: in particular, a non-strict
2191 				 * construct hidden within a lower-level PlaceHolderVar is not
2192 				 * reason to add another PHV.  But for now it doesn't seem
2193 				 * worth the code to be more exact.
2194 				 *
2195 				 * Note: in future maybe we should insert a PlaceHolderVar
2196 				 * anyway, if the tlist item is expensive to evaluate?
2197 				 *
2198 				 * For a LATERAL subquery, we have to check the actual var
2199 				 * membership of the node, but if it's non-lateral then any
2200 				 * level-zero var must belong to the subquery.
2201 				 */
2202 				if ((rcon->target_rte->lateral ?
2203 					 bms_overlap(pull_varnos(rcon->root, (Node *) newnode),
2204 								 rcon->relids) :
2205 					 contain_vars_of_level((Node *) newnode, 0)) &&
2206 					!contain_nonstrict_functions((Node *) newnode))
2207 				{
2208 					/* No wrap needed */
2209 					wrap = false;
2210 				}
2211 				else
2212 				{
2213 					/* Else wrap it in a PlaceHolderVar */
2214 					wrap = true;
2215 				}
2216 			}
2217 
2218 			if (wrap)
2219 				newnode = (Node *)
2220 					make_placeholder_expr(rcon->root,
2221 										  (Expr *) newnode,
2222 										  bms_make_singleton(rcon->varno));
2223 
2224 			/*
2225 			 * Cache it if possible (ie, if the attno is in range, which it
2226 			 * probably always should be).  We can cache the value even if we
2227 			 * decided we didn't need a PHV, since this result will be
2228 			 * suitable for any request that has need_phvs.
2229 			 */
2230 			if (varattno > InvalidAttrNumber &&
2231 				varattno <= list_length(rcon->targetlist))
2232 				rcon->rv_cache[varattno] = copyObject(newnode);
2233 		}
2234 	}
2235 
2236 	/* Must adjust varlevelsup if tlist item is from higher query */
2237 	if (var->varlevelsup > 0)
2238 		IncrementVarSublevelsUp(newnode, var->varlevelsup, 0);
2239 
2240 	return newnode;
2241 }
2242 
2243 /*
2244  * Apply pullup variable replacement to a subquery
2245  *
2246  * This needs to be different from pullup_replace_vars() because
2247  * replace_rte_variables will think that it shouldn't increment sublevels_up
2248  * before entering the Query; so we need to call it with sublevels_up == 1.
2249  */
2250 static Query *
pullup_replace_vars_subquery(Query * query,pullup_replace_vars_context * context)2251 pullup_replace_vars_subquery(Query *query,
2252 							 pullup_replace_vars_context *context)
2253 {
2254 	Assert(IsA(query, Query));
2255 	return (Query *) replace_rte_variables((Node *) query,
2256 										   context->varno, 1,
2257 										   pullup_replace_vars_callback,
2258 										   (void *) context,
2259 										   NULL);
2260 }
2261 
2262 
2263 /*
2264  * flatten_simple_union_all
2265  *		Try to optimize top-level UNION ALL structure into an appendrel
2266  *
2267  * If a query's setOperations tree consists entirely of simple UNION ALL
2268  * operations, flatten it into an append relation, which we can process more
2269  * intelligently than the general setops case.  Otherwise, do nothing.
2270  *
2271  * In most cases, this can succeed only for a top-level query, because for a
2272  * subquery in FROM, the parent query's invocation of pull_up_subqueries would
2273  * already have flattened the UNION via pull_up_simple_union_all.  But there
2274  * are a few cases we can support here but not in that code path, for example
2275  * when the subquery also contains ORDER BY.
2276  */
2277 void
flatten_simple_union_all(PlannerInfo * root)2278 flatten_simple_union_all(PlannerInfo *root)
2279 {
2280 	Query	   *parse = root->parse;
2281 	SetOperationStmt *topop;
2282 	Node	   *leftmostjtnode;
2283 	int			leftmostRTI;
2284 	RangeTblEntry *leftmostRTE;
2285 	int			childRTI;
2286 	RangeTblEntry *childRTE;
2287 	RangeTblRef *rtr;
2288 
2289 	/* Shouldn't be called unless query has setops */
2290 	topop = castNode(SetOperationStmt, parse->setOperations);
2291 	Assert(topop);
2292 
2293 	/* Can't optimize away a recursive UNION */
2294 	if (root->hasRecursion)
2295 		return;
2296 
2297 	/*
2298 	 * Recursively check the tree of set operations.  If not all UNION ALL
2299 	 * with identical column types, punt.
2300 	 */
2301 	if (!is_simple_union_all_recurse((Node *) topop, parse, topop->colTypes))
2302 		return;
2303 
2304 	/*
2305 	 * Locate the leftmost leaf query in the setops tree.  The upper query's
2306 	 * Vars all refer to this RTE (see transformSetOperationStmt).
2307 	 */
2308 	leftmostjtnode = topop->larg;
2309 	while (leftmostjtnode && IsA(leftmostjtnode, SetOperationStmt))
2310 		leftmostjtnode = ((SetOperationStmt *) leftmostjtnode)->larg;
2311 	Assert(leftmostjtnode && IsA(leftmostjtnode, RangeTblRef));
2312 	leftmostRTI = ((RangeTblRef *) leftmostjtnode)->rtindex;
2313 	leftmostRTE = rt_fetch(leftmostRTI, parse->rtable);
2314 	Assert(leftmostRTE->rtekind == RTE_SUBQUERY);
2315 
2316 	/*
2317 	 * Make a copy of the leftmost RTE and add it to the rtable.  This copy
2318 	 * will represent the leftmost leaf query in its capacity as a member of
2319 	 * the appendrel.  The original will represent the appendrel as a whole.
2320 	 * (We must do things this way because the upper query's Vars have to be
2321 	 * seen as referring to the whole appendrel.)
2322 	 */
2323 	childRTE = copyObject(leftmostRTE);
2324 	parse->rtable = lappend(parse->rtable, childRTE);
2325 	childRTI = list_length(parse->rtable);
2326 
2327 	/* Modify the setops tree to reference the child copy */
2328 	((RangeTblRef *) leftmostjtnode)->rtindex = childRTI;
2329 
2330 	/* Modify the formerly-leftmost RTE to mark it as an appendrel parent */
2331 	leftmostRTE->inh = true;
2332 
2333 	/*
2334 	 * Form a RangeTblRef for the appendrel, and insert it into FROM.  The top
2335 	 * Query of a setops tree should have had an empty FromClause initially.
2336 	 */
2337 	rtr = makeNode(RangeTblRef);
2338 	rtr->rtindex = leftmostRTI;
2339 	Assert(parse->jointree->fromlist == NIL);
2340 	parse->jointree->fromlist = list_make1(rtr);
2341 
2342 	/*
2343 	 * Now pretend the query has no setops.  We must do this before trying to
2344 	 * do subquery pullup, because of Assert in pull_up_simple_subquery.
2345 	 */
2346 	parse->setOperations = NULL;
2347 
2348 	/*
2349 	 * Build AppendRelInfo information, and apply pull_up_subqueries to the
2350 	 * leaf queries of the UNION ALL.  (We must do that now because they
2351 	 * weren't previously referenced by the jointree, and so were missed by
2352 	 * the main invocation of pull_up_subqueries.)
2353 	 */
2354 	pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
2355 }
2356 
2357 
2358 /*
2359  * reduce_outer_joins
2360  *		Attempt to reduce outer joins to plain inner joins.
2361  *
2362  * The idea here is that given a query like
2363  *		SELECT ... FROM a LEFT JOIN b ON (...) WHERE b.y = 42;
2364  * we can reduce the LEFT JOIN to a plain JOIN if the "=" operator in WHERE
2365  * is strict.  The strict operator will always return NULL, causing the outer
2366  * WHERE to fail, on any row where the LEFT JOIN filled in NULLs for b's
2367  * columns.  Therefore, there's no need for the join to produce null-extended
2368  * rows in the first place --- which makes it a plain join not an outer join.
2369  * (This scenario may not be very likely in a query written out by hand, but
2370  * it's reasonably likely when pushing quals down into complex views.)
2371  *
2372  * More generally, an outer join can be reduced in strength if there is a
2373  * strict qual above it in the qual tree that constrains a Var from the
2374  * nullable side of the join to be non-null.  (For FULL joins this applies
2375  * to each side separately.)
2376  *
2377  * Another transformation we apply here is to recognize cases like
2378  *		SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.y IS NULL;
2379  * If the join clause is strict for b.y, then only null-extended rows could
2380  * pass the upper WHERE, and we can conclude that what the query is really
2381  * specifying is an anti-semijoin.  We change the join type from JOIN_LEFT
2382  * to JOIN_ANTI.  The IS NULL clause then becomes redundant, and must be
2383  * removed to prevent bogus selectivity calculations, but we leave it to
2384  * distribute_qual_to_rels to get rid of such clauses.
2385  *
2386  * Also, we get rid of JOIN_RIGHT cases by flipping them around to become
2387  * JOIN_LEFT.  This saves some code here and in some later planner routines,
2388  * but the main reason to do it is to not need to invent a JOIN_REVERSE_ANTI
2389  * join type.
2390  *
2391  * To ease recognition of strict qual clauses, we require this routine to be
2392  * run after expression preprocessing (i.e., qual canonicalization and JOIN
2393  * alias-var expansion).
2394  */
2395 void
reduce_outer_joins(PlannerInfo * root)2396 reduce_outer_joins(PlannerInfo *root)
2397 {
2398 	reduce_outer_joins_state *state;
2399 
2400 	/*
2401 	 * To avoid doing strictness checks on more quals than necessary, we want
2402 	 * to stop descending the jointree as soon as there are no outer joins
2403 	 * below our current point.  This consideration forces a two-pass process.
2404 	 * The first pass gathers information about which base rels appear below
2405 	 * each side of each join clause, and about whether there are outer
2406 	 * join(s) below each side of each join clause. The second pass examines
2407 	 * qual clauses and changes join types as it descends the tree.
2408 	 */
2409 	state = reduce_outer_joins_pass1((Node *) root->parse->jointree);
2410 
2411 	/* planner.c shouldn't have called me if no outer joins */
2412 	if (state == NULL || !state->contains_outer)
2413 		elog(ERROR, "so where are the outer joins?");
2414 
2415 	reduce_outer_joins_pass2((Node *) root->parse->jointree,
2416 							 state, root, NULL, NIL, NIL);
2417 }
2418 
2419 /*
2420  * reduce_outer_joins_pass1 - phase 1 data collection
2421  *
2422  * Returns a state node describing the given jointree node.
2423  */
2424 static reduce_outer_joins_state *
reduce_outer_joins_pass1(Node * jtnode)2425 reduce_outer_joins_pass1(Node *jtnode)
2426 {
2427 	reduce_outer_joins_state *result;
2428 
2429 	result = (reduce_outer_joins_state *)
2430 		palloc(sizeof(reduce_outer_joins_state));
2431 	result->relids = NULL;
2432 	result->contains_outer = false;
2433 	result->sub_states = NIL;
2434 
2435 	if (jtnode == NULL)
2436 		return result;
2437 	if (IsA(jtnode, RangeTblRef))
2438 	{
2439 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
2440 
2441 		result->relids = bms_make_singleton(varno);
2442 	}
2443 	else if (IsA(jtnode, FromExpr))
2444 	{
2445 		FromExpr   *f = (FromExpr *) jtnode;
2446 		ListCell   *l;
2447 
2448 		foreach(l, f->fromlist)
2449 		{
2450 			reduce_outer_joins_state *sub_state;
2451 
2452 			sub_state = reduce_outer_joins_pass1(lfirst(l));
2453 			result->relids = bms_add_members(result->relids,
2454 											 sub_state->relids);
2455 			result->contains_outer |= sub_state->contains_outer;
2456 			result->sub_states = lappend(result->sub_states, sub_state);
2457 		}
2458 	}
2459 	else if (IsA(jtnode, JoinExpr))
2460 	{
2461 		JoinExpr   *j = (JoinExpr *) jtnode;
2462 		reduce_outer_joins_state *sub_state;
2463 
2464 		/* join's own RT index is not wanted in result->relids */
2465 		if (IS_OUTER_JOIN(j->jointype))
2466 			result->contains_outer = true;
2467 
2468 		sub_state = reduce_outer_joins_pass1(j->larg);
2469 		result->relids = bms_add_members(result->relids,
2470 										 sub_state->relids);
2471 		result->contains_outer |= sub_state->contains_outer;
2472 		result->sub_states = lappend(result->sub_states, sub_state);
2473 
2474 		sub_state = reduce_outer_joins_pass1(j->rarg);
2475 		result->relids = bms_add_members(result->relids,
2476 										 sub_state->relids);
2477 		result->contains_outer |= sub_state->contains_outer;
2478 		result->sub_states = lappend(result->sub_states, sub_state);
2479 	}
2480 	else
2481 		elog(ERROR, "unrecognized node type: %d",
2482 			 (int) nodeTag(jtnode));
2483 	return result;
2484 }
2485 
2486 /*
2487  * reduce_outer_joins_pass2 - phase 2 processing
2488  *
2489  *	jtnode: current jointree node
2490  *	state: state data collected by phase 1 for this node
2491  *	root: toplevel planner state
2492  *	nonnullable_rels: set of base relids forced non-null by upper quals
2493  *	nonnullable_vars: list of Vars forced non-null by upper quals
2494  *	forced_null_vars: list of Vars forced null by upper quals
2495  */
2496 static void
reduce_outer_joins_pass2(Node * jtnode,reduce_outer_joins_state * state,PlannerInfo * root,Relids nonnullable_rels,List * nonnullable_vars,List * forced_null_vars)2497 reduce_outer_joins_pass2(Node *jtnode,
2498 						 reduce_outer_joins_state *state,
2499 						 PlannerInfo *root,
2500 						 Relids nonnullable_rels,
2501 						 List *nonnullable_vars,
2502 						 List *forced_null_vars)
2503 {
2504 	/*
2505 	 * pass 2 should never descend as far as an empty subnode or base rel,
2506 	 * because it's only called on subtrees marked as contains_outer.
2507 	 */
2508 	if (jtnode == NULL)
2509 		elog(ERROR, "reached empty jointree");
2510 	if (IsA(jtnode, RangeTblRef))
2511 		elog(ERROR, "reached base rel");
2512 	else if (IsA(jtnode, FromExpr))
2513 	{
2514 		FromExpr   *f = (FromExpr *) jtnode;
2515 		ListCell   *l;
2516 		ListCell   *s;
2517 		Relids		pass_nonnullable_rels;
2518 		List	   *pass_nonnullable_vars;
2519 		List	   *pass_forced_null_vars;
2520 
2521 		/* Scan quals to see if we can add any constraints */
2522 		pass_nonnullable_rels = find_nonnullable_rels(f->quals);
2523 		pass_nonnullable_rels = bms_add_members(pass_nonnullable_rels,
2524 												nonnullable_rels);
2525 		/* NB: we rely on list_concat to not damage its second argument */
2526 		pass_nonnullable_vars = find_nonnullable_vars(f->quals);
2527 		pass_nonnullable_vars = list_concat(pass_nonnullable_vars,
2528 											nonnullable_vars);
2529 		pass_forced_null_vars = find_forced_null_vars(f->quals);
2530 		pass_forced_null_vars = list_concat(pass_forced_null_vars,
2531 											forced_null_vars);
2532 		/* And recurse --- but only into interesting subtrees */
2533 		Assert(list_length(f->fromlist) == list_length(state->sub_states));
2534 		forboth(l, f->fromlist, s, state->sub_states)
2535 		{
2536 			reduce_outer_joins_state *sub_state = lfirst(s);
2537 
2538 			if (sub_state->contains_outer)
2539 				reduce_outer_joins_pass2(lfirst(l), sub_state, root,
2540 										 pass_nonnullable_rels,
2541 										 pass_nonnullable_vars,
2542 										 pass_forced_null_vars);
2543 		}
2544 		bms_free(pass_nonnullable_rels);
2545 		/* can't so easily clean up var lists, unfortunately */
2546 	}
2547 	else if (IsA(jtnode, JoinExpr))
2548 	{
2549 		JoinExpr   *j = (JoinExpr *) jtnode;
2550 		int			rtindex = j->rtindex;
2551 		JoinType	jointype = j->jointype;
2552 		reduce_outer_joins_state *left_state = linitial(state->sub_states);
2553 		reduce_outer_joins_state *right_state = lsecond(state->sub_states);
2554 		List	   *local_nonnullable_vars = NIL;
2555 		bool		computed_local_nonnullable_vars = false;
2556 
2557 		/* Can we simplify this join? */
2558 		switch (jointype)
2559 		{
2560 			case JOIN_INNER:
2561 				break;
2562 			case JOIN_LEFT:
2563 				if (bms_overlap(nonnullable_rels, right_state->relids))
2564 					jointype = JOIN_INNER;
2565 				break;
2566 			case JOIN_RIGHT:
2567 				if (bms_overlap(nonnullable_rels, left_state->relids))
2568 					jointype = JOIN_INNER;
2569 				break;
2570 			case JOIN_FULL:
2571 				if (bms_overlap(nonnullable_rels, left_state->relids))
2572 				{
2573 					if (bms_overlap(nonnullable_rels, right_state->relids))
2574 						jointype = JOIN_INNER;
2575 					else
2576 						jointype = JOIN_LEFT;
2577 				}
2578 				else
2579 				{
2580 					if (bms_overlap(nonnullable_rels, right_state->relids))
2581 						jointype = JOIN_RIGHT;
2582 				}
2583 				break;
2584 			case JOIN_SEMI:
2585 			case JOIN_ANTI:
2586 
2587 				/*
2588 				 * These could only have been introduced by pull_up_sublinks,
2589 				 * so there's no way that upper quals could refer to their
2590 				 * righthand sides, and no point in checking.
2591 				 */
2592 				break;
2593 			default:
2594 				elog(ERROR, "unrecognized join type: %d",
2595 					 (int) jointype);
2596 				break;
2597 		}
2598 
2599 		/*
2600 		 * Convert JOIN_RIGHT to JOIN_LEFT.  Note that in the case where we
2601 		 * reduced JOIN_FULL to JOIN_RIGHT, this will mean the JoinExpr no
2602 		 * longer matches the internal ordering of any CoalesceExpr's built to
2603 		 * represent merged join variables.  We don't care about that at
2604 		 * present, but be wary of it ...
2605 		 */
2606 		if (jointype == JOIN_RIGHT)
2607 		{
2608 			Node	   *tmparg;
2609 
2610 			tmparg = j->larg;
2611 			j->larg = j->rarg;
2612 			j->rarg = tmparg;
2613 			jointype = JOIN_LEFT;
2614 			right_state = linitial(state->sub_states);
2615 			left_state = lsecond(state->sub_states);
2616 		}
2617 
2618 		/*
2619 		 * See if we can reduce JOIN_LEFT to JOIN_ANTI.  This is the case if
2620 		 * the join's own quals are strict for any var that was forced null by
2621 		 * higher qual levels.  NOTE: there are other ways that we could
2622 		 * detect an anti-join, in particular if we were to check whether Vars
2623 		 * coming from the RHS must be non-null because of table constraints.
2624 		 * That seems complicated and expensive though (in particular, one
2625 		 * would have to be wary of lower outer joins). For the moment this
2626 		 * seems sufficient.
2627 		 */
2628 		if (jointype == JOIN_LEFT)
2629 		{
2630 			List	   *overlap;
2631 
2632 			local_nonnullable_vars = find_nonnullable_vars(j->quals);
2633 			computed_local_nonnullable_vars = true;
2634 
2635 			/*
2636 			 * It's not sufficient to check whether local_nonnullable_vars and
2637 			 * forced_null_vars overlap: we need to know if the overlap
2638 			 * includes any RHS variables.
2639 			 */
2640 			overlap = list_intersection(local_nonnullable_vars,
2641 										forced_null_vars);
2642 			if (overlap != NIL &&
2643 				bms_overlap(pull_varnos(root, (Node *) overlap),
2644 							right_state->relids))
2645 				jointype = JOIN_ANTI;
2646 		}
2647 
2648 		/* Apply the jointype change, if any, to both jointree node and RTE */
2649 		if (rtindex && jointype != j->jointype)
2650 		{
2651 			RangeTblEntry *rte = rt_fetch(rtindex, root->parse->rtable);
2652 
2653 			Assert(rte->rtekind == RTE_JOIN);
2654 			Assert(rte->jointype == j->jointype);
2655 			rte->jointype = jointype;
2656 		}
2657 		j->jointype = jointype;
2658 
2659 		/* Only recurse if there's more to do below here */
2660 		if (left_state->contains_outer || right_state->contains_outer)
2661 		{
2662 			Relids		local_nonnullable_rels;
2663 			List	   *local_forced_null_vars;
2664 			Relids		pass_nonnullable_rels;
2665 			List	   *pass_nonnullable_vars;
2666 			List	   *pass_forced_null_vars;
2667 
2668 			/*
2669 			 * If this join is (now) inner, we can add any constraints its
2670 			 * quals provide to those we got from above.  But if it is outer,
2671 			 * we can pass down the local constraints only into the nullable
2672 			 * side, because an outer join never eliminates any rows from its
2673 			 * non-nullable side.  Also, there is no point in passing upper
2674 			 * constraints into the nullable side, since if there were any
2675 			 * we'd have been able to reduce the join.  (In the case of upper
2676 			 * forced-null constraints, we *must not* pass them into the
2677 			 * nullable side --- they either applied here, or not.) The upshot
2678 			 * is that we pass either the local or the upper constraints,
2679 			 * never both, to the children of an outer join.
2680 			 *
2681 			 * Note that a SEMI join works like an inner join here: it's okay
2682 			 * to pass down both local and upper constraints.  (There can't be
2683 			 * any upper constraints affecting its inner side, but it's not
2684 			 * worth having a separate code path to avoid passing them.)
2685 			 *
2686 			 * At a FULL join we just punt and pass nothing down --- is it
2687 			 * possible to be smarter?
2688 			 */
2689 			if (jointype != JOIN_FULL)
2690 			{
2691 				local_nonnullable_rels = find_nonnullable_rels(j->quals);
2692 				if (!computed_local_nonnullable_vars)
2693 					local_nonnullable_vars = find_nonnullable_vars(j->quals);
2694 				local_forced_null_vars = find_forced_null_vars(j->quals);
2695 				if (jointype == JOIN_INNER || jointype == JOIN_SEMI)
2696 				{
2697 					/* OK to merge upper and local constraints */
2698 					local_nonnullable_rels = bms_add_members(local_nonnullable_rels,
2699 															 nonnullable_rels);
2700 					local_nonnullable_vars = list_concat(local_nonnullable_vars,
2701 														 nonnullable_vars);
2702 					local_forced_null_vars = list_concat(local_forced_null_vars,
2703 														 forced_null_vars);
2704 				}
2705 			}
2706 			else
2707 			{
2708 				/* no use in calculating these */
2709 				local_nonnullable_rels = NULL;
2710 				local_forced_null_vars = NIL;
2711 			}
2712 
2713 			if (left_state->contains_outer)
2714 			{
2715 				if (jointype == JOIN_INNER || jointype == JOIN_SEMI)
2716 				{
2717 					/* pass union of local and upper constraints */
2718 					pass_nonnullable_rels = local_nonnullable_rels;
2719 					pass_nonnullable_vars = local_nonnullable_vars;
2720 					pass_forced_null_vars = local_forced_null_vars;
2721 				}
2722 				else if (jointype != JOIN_FULL) /* ie, LEFT or ANTI */
2723 				{
2724 					/* can't pass local constraints to non-nullable side */
2725 					pass_nonnullable_rels = nonnullable_rels;
2726 					pass_nonnullable_vars = nonnullable_vars;
2727 					pass_forced_null_vars = forced_null_vars;
2728 				}
2729 				else
2730 				{
2731 					/* no constraints pass through JOIN_FULL */
2732 					pass_nonnullable_rels = NULL;
2733 					pass_nonnullable_vars = NIL;
2734 					pass_forced_null_vars = NIL;
2735 				}
2736 				reduce_outer_joins_pass2(j->larg, left_state, root,
2737 										 pass_nonnullable_rels,
2738 										 pass_nonnullable_vars,
2739 										 pass_forced_null_vars);
2740 			}
2741 
2742 			if (right_state->contains_outer)
2743 			{
2744 				if (jointype != JOIN_FULL)	/* ie, INNER/LEFT/SEMI/ANTI */
2745 				{
2746 					/* pass appropriate constraints, per comment above */
2747 					pass_nonnullable_rels = local_nonnullable_rels;
2748 					pass_nonnullable_vars = local_nonnullable_vars;
2749 					pass_forced_null_vars = local_forced_null_vars;
2750 				}
2751 				else
2752 				{
2753 					/* no constraints pass through JOIN_FULL */
2754 					pass_nonnullable_rels = NULL;
2755 					pass_nonnullable_vars = NIL;
2756 					pass_forced_null_vars = NIL;
2757 				}
2758 				reduce_outer_joins_pass2(j->rarg, right_state, root,
2759 										 pass_nonnullable_rels,
2760 										 pass_nonnullable_vars,
2761 										 pass_forced_null_vars);
2762 			}
2763 			bms_free(local_nonnullable_rels);
2764 		}
2765 	}
2766 	else
2767 		elog(ERROR, "unrecognized node type: %d",
2768 			 (int) nodeTag(jtnode));
2769 }
2770 
2771 
2772 /*
2773  * remove_useless_result_rtes
2774  *		Attempt to remove RTE_RESULT RTEs from the join tree.
2775  *
2776  * We can remove RTE_RESULT entries from the join tree using the knowledge
2777  * that RTE_RESULT returns exactly one row and has no output columns.  Hence,
2778  * if one is inner-joined to anything else, we can delete it.  Optimizations
2779  * are also possible for some outer-join cases, as detailed below.
2780  *
2781  * Some of these optimizations depend on recognizing empty (constant-true)
2782  * quals for FromExprs and JoinExprs.  That makes it useful to apply this
2783  * optimization pass after expression preprocessing, since that will have
2784  * eliminated constant-true quals, allowing more cases to be recognized as
2785  * optimizable.  What's more, the usual reason for an RTE_RESULT to be present
2786  * is that we pulled up a subquery or VALUES clause, thus very possibly
2787  * replacing Vars with constants, making it more likely that a qual can be
2788  * reduced to constant true.  Also, because some optimizations depend on
2789  * the outer-join type, it's best to have done reduce_outer_joins() first.
2790  *
2791  * A PlaceHolderVar referencing an RTE_RESULT RTE poses an obstacle to this
2792  * process: we must remove the RTE_RESULT's relid from the PHV's phrels, but
2793  * we must not reduce the phrels set to empty.  If that would happen, and
2794  * the RTE_RESULT is an immediate child of an outer join, we have to give up
2795  * and not remove the RTE_RESULT: there is noplace else to evaluate the
2796  * PlaceHolderVar.  (That is, in such cases the RTE_RESULT *does* have output
2797  * columns.)  But if the RTE_RESULT is an immediate child of an inner join,
2798  * we can usually change the PlaceHolderVar's phrels so as to evaluate it at
2799  * the inner join instead.  This is OK because we really only care that PHVs
2800  * are evaluated above or below the correct outer joins.  We can't, however,
2801  * postpone the evaluation of a PHV to above where it is used; so there are
2802  * some checks below on whether output PHVs are laterally referenced in the
2803  * other join input rel(s).
2804  *
2805  * We used to try to do this work as part of pull_up_subqueries() where the
2806  * potentially-optimizable cases get introduced; but it's way simpler, and
2807  * more effective, to do it separately.
2808  */
2809 void
remove_useless_result_rtes(PlannerInfo * root)2810 remove_useless_result_rtes(PlannerInfo *root)
2811 {
2812 	ListCell   *cell;
2813 	ListCell   *prev;
2814 	ListCell   *next;
2815 
2816 	/* Top level of jointree must always be a FromExpr */
2817 	Assert(IsA(root->parse->jointree, FromExpr));
2818 	/* Recurse ... */
2819 	root->parse->jointree = (FromExpr *)
2820 		remove_useless_results_recurse(root, (Node *) root->parse->jointree);
2821 	/* We should still have a FromExpr */
2822 	Assert(IsA(root->parse->jointree, FromExpr));
2823 
2824 	/*
2825 	 * Remove any PlanRowMark referencing an RTE_RESULT RTE.  We obviously
2826 	 * must do that for any RTE_RESULT that we just removed.  But one for a
2827 	 * RTE that we did not remove can be dropped anyway: since the RTE has
2828 	 * only one possible output row, there is no need for EPQ to mark and
2829 	 * restore that row.
2830 	 *
2831 	 * It's necessary, not optional, to remove the PlanRowMark for a surviving
2832 	 * RTE_RESULT RTE; otherwise we'll generate a whole-row Var for the
2833 	 * RTE_RESULT, which the executor has no support for.
2834 	 */
2835 	prev = NULL;
2836 	for (cell = list_head(root->rowMarks); cell; cell = next)
2837 	{
2838 		PlanRowMark *rc = (PlanRowMark *) lfirst(cell);
2839 
2840 		next = lnext(cell);
2841 		if (rt_fetch(rc->rti, root->parse->rtable)->rtekind == RTE_RESULT)
2842 			root->rowMarks = list_delete_cell(root->rowMarks, cell, prev);
2843 		else
2844 			prev = cell;
2845 	}
2846 }
2847 
2848 /*
2849  * remove_useless_results_recurse
2850  *		Recursive guts of remove_useless_result_rtes.
2851  *
2852  * This recursively processes the jointree and returns a modified jointree.
2853  */
2854 static Node *
remove_useless_results_recurse(PlannerInfo * root,Node * jtnode)2855 remove_useless_results_recurse(PlannerInfo *root, Node *jtnode)
2856 {
2857 	Assert(jtnode != NULL);
2858 	if (IsA(jtnode, RangeTblRef))
2859 	{
2860 		/* Can't immediately do anything with a RangeTblRef */
2861 	}
2862 	else if (IsA(jtnode, FromExpr))
2863 	{
2864 		FromExpr   *f = (FromExpr *) jtnode;
2865 		Relids		result_relids = NULL;
2866 		ListCell   *cell;
2867 		ListCell   *prev;
2868 		ListCell   *next;
2869 
2870 		/*
2871 		 * We can drop RTE_RESULT rels from the fromlist so long as at least
2872 		 * one child remains, since joining to a one-row table changes
2873 		 * nothing.  (But we can't drop a RTE_RESULT that computes PHV(s) that
2874 		 * are needed by some sibling.  The cleanup transformation below would
2875 		 * reassign the PHVs to be computed at the join, which is too late for
2876 		 * the sibling's use.)  The easiest way to mechanize this rule is to
2877 		 * modify the list in-place, using list_delete_cell.
2878 		 */
2879 		prev = NULL;
2880 		for (cell = list_head(f->fromlist); cell; cell = next)
2881 		{
2882 			Node	   *child = (Node *) lfirst(cell);
2883 			int			varno;
2884 
2885 			/* Recursively transform child ... */
2886 			child = remove_useless_results_recurse(root, child);
2887 			/* ... and stick it back into the tree */
2888 			lfirst(cell) = child;
2889 			next = lnext(cell);
2890 
2891 			/*
2892 			 * If it's an RTE_RESULT with at least one sibling, and no sibling
2893 			 * references dependent PHVs, we can drop it.  We don't yet know
2894 			 * what the inner join's final relid set will be, so postpone
2895 			 * cleanup of PHVs etc till after this loop.
2896 			 */
2897 			if (list_length(f->fromlist) > 1 &&
2898 				(varno = get_result_relid(root, child)) != 0 &&
2899 				!find_dependent_phvs_in_jointree(root, (Node *) f, varno))
2900 			{
2901 				f->fromlist = list_delete_cell(f->fromlist, cell, prev);
2902 				result_relids = bms_add_member(result_relids, varno);
2903 			}
2904 			else
2905 				prev = cell;
2906 		}
2907 
2908 		/*
2909 		 * Clean up if we dropped any RTE_RESULT RTEs.  This is a bit
2910 		 * inefficient if there's more than one, but it seems better to
2911 		 * optimize the support code for the single-relid case.
2912 		 */
2913 		if (result_relids)
2914 		{
2915 			int			varno = -1;
2916 
2917 			while ((varno = bms_next_member(result_relids, varno)) >= 0)
2918 				remove_result_refs(root, varno, (Node *) f);
2919 		}
2920 
2921 		/*
2922 		 * If we're not at the top of the jointree, it's valid to simplify a
2923 		 * degenerate FromExpr into its single child.  (At the top, we must
2924 		 * keep the FromExpr since Query.jointree is required to point to a
2925 		 * FromExpr.)
2926 		 */
2927 		if (f != root->parse->jointree &&
2928 			f->quals == NULL &&
2929 			list_length(f->fromlist) == 1)
2930 			return (Node *) linitial(f->fromlist);
2931 	}
2932 	else if (IsA(jtnode, JoinExpr))
2933 	{
2934 		JoinExpr   *j = (JoinExpr *) jtnode;
2935 		int			varno;
2936 
2937 		/* First, recurse */
2938 		j->larg = remove_useless_results_recurse(root, j->larg);
2939 		j->rarg = remove_useless_results_recurse(root, j->rarg);
2940 
2941 		/* Apply join-type-specific optimization rules */
2942 		switch (j->jointype)
2943 		{
2944 			case JOIN_INNER:
2945 
2946 				/*
2947 				 * An inner join is equivalent to a FromExpr, so if either
2948 				 * side was simplified to an RTE_RESULT rel, we can replace
2949 				 * the join with a FromExpr with just the other side; and if
2950 				 * the qual is empty (JOIN ON TRUE) then we can omit the
2951 				 * FromExpr as well.
2952 				 *
2953 				 * Just as in the FromExpr case, we can't simplify if the
2954 				 * other input rel references any PHVs that are marked as to
2955 				 * be evaluated at the RTE_RESULT rel, because we can't
2956 				 * postpone their evaluation in that case.  But we only have
2957 				 * to check this in cases where it's syntactically legal for
2958 				 * the other input to have a LATERAL reference to the
2959 				 * RTE_RESULT rel.  Only RHSes of inner and left joins are
2960 				 * allowed to have such refs.
2961 				 */
2962 				if ((varno = get_result_relid(root, j->larg)) != 0 &&
2963 					!find_dependent_phvs_in_jointree(root, j->rarg, varno))
2964 				{
2965 					remove_result_refs(root, varno, j->rarg);
2966 					if (j->quals)
2967 						jtnode = (Node *)
2968 							makeFromExpr(list_make1(j->rarg), j->quals);
2969 					else
2970 						jtnode = j->rarg;
2971 				}
2972 				else if ((varno = get_result_relid(root, j->rarg)) != 0)
2973 				{
2974 					remove_result_refs(root, varno, j->larg);
2975 					if (j->quals)
2976 						jtnode = (Node *)
2977 							makeFromExpr(list_make1(j->larg), j->quals);
2978 					else
2979 						jtnode = j->larg;
2980 				}
2981 				break;
2982 			case JOIN_LEFT:
2983 
2984 				/*
2985 				 * We can simplify this case if the RHS is an RTE_RESULT, with
2986 				 * two different possibilities:
2987 				 *
2988 				 * If the qual is empty (JOIN ON TRUE), then the join can be
2989 				 * strength-reduced to a plain inner join, since each LHS row
2990 				 * necessarily has exactly one join partner.  So we can always
2991 				 * discard the RHS, much as in the JOIN_INNER case above.
2992 				 * (Again, the LHS could not contain a lateral reference to
2993 				 * the RHS.)
2994 				 *
2995 				 * Otherwise, it's still true that each LHS row should be
2996 				 * returned exactly once, and since the RHS returns no columns
2997 				 * (unless there are PHVs that have to be evaluated there), we
2998 				 * don't much care if it's null-extended or not.  So in this
2999 				 * case also, we can just ignore the qual and discard the left
3000 				 * join.
3001 				 */
3002 				if ((varno = get_result_relid(root, j->rarg)) != 0 &&
3003 					(j->quals == NULL ||
3004 					 !find_dependent_phvs(root, varno)))
3005 				{
3006 					remove_result_refs(root, varno, j->larg);
3007 					jtnode = j->larg;
3008 				}
3009 				break;
3010 			case JOIN_RIGHT:
3011 				/* Mirror-image of the JOIN_LEFT case */
3012 				if ((varno = get_result_relid(root, j->larg)) != 0 &&
3013 					(j->quals == NULL ||
3014 					 !find_dependent_phvs(root, varno)))
3015 				{
3016 					remove_result_refs(root, varno, j->rarg);
3017 					jtnode = j->rarg;
3018 				}
3019 				break;
3020 			case JOIN_SEMI:
3021 
3022 				/*
3023 				 * We may simplify this case if the RHS is an RTE_RESULT; the
3024 				 * join qual becomes effectively just a filter qual for the
3025 				 * LHS, since we should either return the LHS row or not.  For
3026 				 * simplicity we inject the filter qual into a new FromExpr.
3027 				 *
3028 				 * Unlike the LEFT/RIGHT cases, we just Assert that there are
3029 				 * no PHVs that need to be evaluated at the semijoin's RHS,
3030 				 * since the rest of the query couldn't reference any outputs
3031 				 * of the semijoin's RHS.
3032 				 */
3033 				if ((varno = get_result_relid(root, j->rarg)) != 0)
3034 				{
3035 					Assert(!find_dependent_phvs(root, varno));
3036 					remove_result_refs(root, varno, j->larg);
3037 					if (j->quals)
3038 						jtnode = (Node *)
3039 							makeFromExpr(list_make1(j->larg), j->quals);
3040 					else
3041 						jtnode = j->larg;
3042 				}
3043 				break;
3044 			case JOIN_FULL:
3045 			case JOIN_ANTI:
3046 				/* We have no special smarts for these cases */
3047 				break;
3048 			default:
3049 				elog(ERROR, "unrecognized join type: %d",
3050 					 (int) j->jointype);
3051 				break;
3052 		}
3053 	}
3054 	else
3055 		elog(ERROR, "unrecognized node type: %d",
3056 			 (int) nodeTag(jtnode));
3057 	return jtnode;
3058 }
3059 
3060 /*
3061  * get_result_relid
3062  *		If jtnode is a RangeTblRef for an RTE_RESULT RTE, return its relid;
3063  *		otherwise return 0.
3064  */
3065 static int
get_result_relid(PlannerInfo * root,Node * jtnode)3066 get_result_relid(PlannerInfo *root, Node *jtnode)
3067 {
3068 	int			varno;
3069 
3070 	if (!IsA(jtnode, RangeTblRef))
3071 		return 0;
3072 	varno = ((RangeTblRef *) jtnode)->rtindex;
3073 	if (rt_fetch(varno, root->parse->rtable)->rtekind != RTE_RESULT)
3074 		return 0;
3075 	return varno;
3076 }
3077 
3078 /*
3079  * remove_result_refs
3080  *		Helper routine for dropping an unneeded RTE_RESULT RTE.
3081  *
3082  * This doesn't physically remove the RTE from the jointree, because that's
3083  * more easily handled in remove_useless_results_recurse.  What it does do
3084  * is the necessary cleanup in the rest of the tree: we must adjust any PHVs
3085  * that may reference the RTE.  Be sure to call this at a point where the
3086  * jointree is valid (no disconnected nodes).
3087  *
3088  * Note that we don't need to process the append_rel_list, since RTEs
3089  * referenced directly in the jointree won't be appendrel members.
3090  *
3091  * varno is the RTE_RESULT's relid.
3092  * newjtloc is the jointree location at which any PHVs referencing the
3093  * RTE_RESULT should be evaluated instead.
3094  */
3095 static void
remove_result_refs(PlannerInfo * root,int varno,Node * newjtloc)3096 remove_result_refs(PlannerInfo *root, int varno, Node *newjtloc)
3097 {
3098 	/* Fix up PlaceHolderVars as needed */
3099 	/* If there are no PHVs anywhere, we can skip this bit */
3100 	if (root->glob->lastPHId != 0)
3101 	{
3102 		Relids		subrelids;
3103 
3104 		subrelids = get_relids_in_jointree(newjtloc, false);
3105 		Assert(!bms_is_empty(subrelids));
3106 		substitute_phv_relids((Node *) root->parse, varno, subrelids);
3107 		fix_append_rel_relids(root->append_rel_list, varno, subrelids);
3108 	}
3109 
3110 	/*
3111 	 * We also need to remove any PlanRowMark referencing the RTE, but we
3112 	 * postpone that work until we return to remove_useless_result_rtes.
3113 	 */
3114 }
3115 
3116 
3117 /*
3118  * find_dependent_phvs - are there any PlaceHolderVars whose relids are
3119  * exactly the given varno?
3120  *
3121  * find_dependent_phvs should be used when we want to see if there are
3122  * any such PHVs anywhere in the Query.  Another use-case is to see if
3123  * a subtree of the join tree contains such PHVs; but for that, we have
3124  * to look not only at the join tree nodes themselves but at the
3125  * referenced RTEs.  For that, use find_dependent_phvs_in_jointree.
3126  */
3127 
3128 typedef struct
3129 {
3130 	Relids		relids;
3131 	int			sublevels_up;
3132 } find_dependent_phvs_context;
3133 
3134 static bool
find_dependent_phvs_walker(Node * node,find_dependent_phvs_context * context)3135 find_dependent_phvs_walker(Node *node,
3136 						   find_dependent_phvs_context *context)
3137 {
3138 	if (node == NULL)
3139 		return false;
3140 	if (IsA(node, PlaceHolderVar))
3141 	{
3142 		PlaceHolderVar *phv = (PlaceHolderVar *) node;
3143 
3144 		if (phv->phlevelsup == context->sublevels_up &&
3145 			bms_equal(context->relids, phv->phrels))
3146 			return true;
3147 		/* fall through to examine children */
3148 	}
3149 	if (IsA(node, Query))
3150 	{
3151 		/* Recurse into subselects */
3152 		bool		result;
3153 
3154 		context->sublevels_up++;
3155 		result = query_tree_walker((Query *) node,
3156 								   find_dependent_phvs_walker,
3157 								   (void *) context, 0);
3158 		context->sublevels_up--;
3159 		return result;
3160 	}
3161 	/* Shouldn't need to handle planner auxiliary nodes here */
3162 	Assert(!IsA(node, SpecialJoinInfo));
3163 	Assert(!IsA(node, AppendRelInfo));
3164 	Assert(!IsA(node, PlaceHolderInfo));
3165 	Assert(!IsA(node, MinMaxAggInfo));
3166 
3167 	return expression_tree_walker(node, find_dependent_phvs_walker,
3168 								  (void *) context);
3169 }
3170 
3171 static bool
find_dependent_phvs(PlannerInfo * root,int varno)3172 find_dependent_phvs(PlannerInfo *root, int varno)
3173 {
3174 	find_dependent_phvs_context context;
3175 
3176 	/* If there are no PHVs anywhere, we needn't work hard */
3177 	if (root->glob->lastPHId == 0)
3178 		return false;
3179 
3180 	context.relids = bms_make_singleton(varno);
3181 	context.sublevels_up = 0;
3182 
3183 	return query_tree_walker(root->parse,
3184 							 find_dependent_phvs_walker,
3185 							 (void *) &context,
3186 							 0);
3187 }
3188 
3189 static bool
find_dependent_phvs_in_jointree(PlannerInfo * root,Node * node,int varno)3190 find_dependent_phvs_in_jointree(PlannerInfo *root, Node *node, int varno)
3191 {
3192 	find_dependent_phvs_context context;
3193 	Relids		subrelids;
3194 	int			relid;
3195 
3196 	/* If there are no PHVs anywhere, we needn't work hard */
3197 	if (root->glob->lastPHId == 0)
3198 		return false;
3199 
3200 	context.relids = bms_make_singleton(varno);
3201 	context.sublevels_up = 0;
3202 
3203 	/*
3204 	 * See if the jointree fragment itself contains references (in join quals)
3205 	 */
3206 	if (find_dependent_phvs_walker(node, &context))
3207 		return true;
3208 
3209 	/*
3210 	 * Otherwise, identify the set of referenced RTEs (we can ignore joins,
3211 	 * since they should be flattened already, so their join alias lists no
3212 	 * longer matter), and tediously check each RTE.  We can ignore RTEs that
3213 	 * are not marked LATERAL, though, since they couldn't possibly contain
3214 	 * any cross-references to other RTEs.
3215 	 */
3216 	subrelids = get_relids_in_jointree(node, false);
3217 	relid = -1;
3218 	while ((relid = bms_next_member(subrelids, relid)) >= 0)
3219 	{
3220 		RangeTblEntry *rte = rt_fetch(relid, root->parse->rtable);
3221 
3222 		if (rte->lateral &&
3223 			range_table_entry_walker(rte,
3224 									 find_dependent_phvs_walker,
3225 									 (void *) &context,
3226 									 0))
3227 			return true;
3228 	}
3229 
3230 	return false;
3231 }
3232 
3233 /*
3234  * substitute_phv_relids - adjust PlaceHolderVar relid sets after pulling up
3235  * a subquery or removing an RTE_RESULT jointree item
3236  *
3237  * Find any PlaceHolderVar nodes in the given tree that reference the
3238  * pulled-up relid, and change them to reference the replacement relid(s).
3239  *
3240  * NOTE: although this has the form of a walker, we cheat and modify the
3241  * nodes in-place.  This should be OK since the tree was copied by
3242  * pullup_replace_vars earlier.  Avoid scribbling on the original values of
3243  * the bitmapsets, though, because expression_tree_mutator doesn't copy those.
3244  */
3245 
3246 typedef struct
3247 {
3248 	int			varno;
3249 	int			sublevels_up;
3250 	Relids		subrelids;
3251 } substitute_phv_relids_context;
3252 
3253 static bool
substitute_phv_relids_walker(Node * node,substitute_phv_relids_context * context)3254 substitute_phv_relids_walker(Node *node,
3255 							 substitute_phv_relids_context *context)
3256 {
3257 	if (node == NULL)
3258 		return false;
3259 	if (IsA(node, PlaceHolderVar))
3260 	{
3261 		PlaceHolderVar *phv = (PlaceHolderVar *) node;
3262 
3263 		if (phv->phlevelsup == context->sublevels_up &&
3264 			bms_is_member(context->varno, phv->phrels))
3265 		{
3266 			phv->phrels = bms_union(phv->phrels,
3267 									context->subrelids);
3268 			phv->phrels = bms_del_member(phv->phrels,
3269 										 context->varno);
3270 			/* Assert we haven't broken the PHV */
3271 			Assert(!bms_is_empty(phv->phrels));
3272 		}
3273 		/* fall through to examine children */
3274 	}
3275 	if (IsA(node, Query))
3276 	{
3277 		/* Recurse into subselects */
3278 		bool		result;
3279 
3280 		context->sublevels_up++;
3281 		result = query_tree_walker((Query *) node,
3282 								   substitute_phv_relids_walker,
3283 								   (void *) context, 0);
3284 		context->sublevels_up--;
3285 		return result;
3286 	}
3287 	/* Shouldn't need to handle planner auxiliary nodes here */
3288 	Assert(!IsA(node, SpecialJoinInfo));
3289 	Assert(!IsA(node, AppendRelInfo));
3290 	Assert(!IsA(node, PlaceHolderInfo));
3291 	Assert(!IsA(node, MinMaxAggInfo));
3292 
3293 	return expression_tree_walker(node, substitute_phv_relids_walker,
3294 								  (void *) context);
3295 }
3296 
3297 static void
substitute_phv_relids(Node * node,int varno,Relids subrelids)3298 substitute_phv_relids(Node *node, int varno, Relids subrelids)
3299 {
3300 	substitute_phv_relids_context context;
3301 
3302 	context.varno = varno;
3303 	context.sublevels_up = 0;
3304 	context.subrelids = subrelids;
3305 
3306 	/*
3307 	 * Must be prepared to start with a Query or a bare expression tree.
3308 	 */
3309 	query_or_expression_tree_walker(node,
3310 									substitute_phv_relids_walker,
3311 									(void *) &context,
3312 									0);
3313 }
3314 
3315 /*
3316  * fix_append_rel_relids: update RT-index fields of AppendRelInfo nodes
3317  *
3318  * When we pull up a subquery, any AppendRelInfo references to the subquery's
3319  * RT index have to be replaced by the substituted relid (and there had better
3320  * be only one).  We also need to apply substitute_phv_relids to their
3321  * translated_vars lists, since those might contain PlaceHolderVars.
3322  *
3323  * We assume we may modify the AppendRelInfo nodes in-place.
3324  */
3325 static void
fix_append_rel_relids(List * append_rel_list,int varno,Relids subrelids)3326 fix_append_rel_relids(List *append_rel_list, int varno, Relids subrelids)
3327 {
3328 	ListCell   *l;
3329 	int			subvarno = -1;
3330 
3331 	/*
3332 	 * We only want to extract the member relid once, but we mustn't fail
3333 	 * immediately if there are multiple members; it could be that none of the
3334 	 * AppendRelInfo nodes refer to it.  So compute it on first use. Note that
3335 	 * bms_singleton_member will complain if set is not singleton.
3336 	 */
3337 	foreach(l, append_rel_list)
3338 	{
3339 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
3340 
3341 		/* The parent_relid shouldn't ever be a pullup target */
3342 		Assert(appinfo->parent_relid != varno);
3343 
3344 		if (appinfo->child_relid == varno)
3345 		{
3346 			if (subvarno < 0)
3347 				subvarno = bms_singleton_member(subrelids);
3348 			appinfo->child_relid = subvarno;
3349 		}
3350 
3351 		/* Also fix up any PHVs in its translated vars */
3352 		substitute_phv_relids((Node *) appinfo->translated_vars,
3353 							  varno, subrelids);
3354 	}
3355 }
3356 
3357 /*
3358  * get_relids_in_jointree: get set of RT indexes present in a jointree
3359  *
3360  * If include_joins is true, join RT indexes are included; if false,
3361  * only base rels are included.
3362  */
3363 Relids
get_relids_in_jointree(Node * jtnode,bool include_joins)3364 get_relids_in_jointree(Node *jtnode, bool include_joins)
3365 {
3366 	Relids		result = NULL;
3367 
3368 	if (jtnode == NULL)
3369 		return result;
3370 	if (IsA(jtnode, RangeTblRef))
3371 	{
3372 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
3373 
3374 		result = bms_make_singleton(varno);
3375 	}
3376 	else if (IsA(jtnode, FromExpr))
3377 	{
3378 		FromExpr   *f = (FromExpr *) jtnode;
3379 		ListCell   *l;
3380 
3381 		foreach(l, f->fromlist)
3382 		{
3383 			result = bms_join(result,
3384 							  get_relids_in_jointree(lfirst(l),
3385 													 include_joins));
3386 		}
3387 	}
3388 	else if (IsA(jtnode, JoinExpr))
3389 	{
3390 		JoinExpr   *j = (JoinExpr *) jtnode;
3391 
3392 		result = get_relids_in_jointree(j->larg, include_joins);
3393 		result = bms_join(result,
3394 						  get_relids_in_jointree(j->rarg, include_joins));
3395 		if (include_joins && j->rtindex)
3396 			result = bms_add_member(result, j->rtindex);
3397 	}
3398 	else
3399 		elog(ERROR, "unrecognized node type: %d",
3400 			 (int) nodeTag(jtnode));
3401 	return result;
3402 }
3403 
3404 /*
3405  * get_relids_for_join: get set of base RT indexes making up a join
3406  */
3407 Relids
get_relids_for_join(Query * query,int joinrelid)3408 get_relids_for_join(Query *query, int joinrelid)
3409 {
3410 	Node	   *jtnode;
3411 
3412 	jtnode = find_jointree_node_for_rel((Node *) query->jointree,
3413 										joinrelid);
3414 	if (!jtnode)
3415 		elog(ERROR, "could not find join node %d", joinrelid);
3416 	return get_relids_in_jointree(jtnode, false);
3417 }
3418 
3419 /*
3420  * find_jointree_node_for_rel: locate jointree node for a base or join RT index
3421  *
3422  * Returns NULL if not found
3423  */
3424 static Node *
find_jointree_node_for_rel(Node * jtnode,int relid)3425 find_jointree_node_for_rel(Node *jtnode, int relid)
3426 {
3427 	if (jtnode == NULL)
3428 		return NULL;
3429 	if (IsA(jtnode, RangeTblRef))
3430 	{
3431 		int			varno = ((RangeTblRef *) jtnode)->rtindex;
3432 
3433 		if (relid == varno)
3434 			return jtnode;
3435 	}
3436 	else if (IsA(jtnode, FromExpr))
3437 	{
3438 		FromExpr   *f = (FromExpr *) jtnode;
3439 		ListCell   *l;
3440 
3441 		foreach(l, f->fromlist)
3442 		{
3443 			jtnode = find_jointree_node_for_rel(lfirst(l), relid);
3444 			if (jtnode)
3445 				return jtnode;
3446 		}
3447 	}
3448 	else if (IsA(jtnode, JoinExpr))
3449 	{
3450 		JoinExpr   *j = (JoinExpr *) jtnode;
3451 
3452 		if (relid == j->rtindex)
3453 			return jtnode;
3454 		jtnode = find_jointree_node_for_rel(j->larg, relid);
3455 		if (jtnode)
3456 			return jtnode;
3457 		jtnode = find_jointree_node_for_rel(j->rarg, relid);
3458 		if (jtnode)
3459 			return jtnode;
3460 	}
3461 	else
3462 		elog(ERROR, "unrecognized node type: %d",
3463 			 (int) nodeTag(jtnode));
3464 	return NULL;
3465 }
3466