1 /*-------------------------------------------------------------------------
2  *
3  * tlist.c
4  *	  Target list manipulation routines
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/optimizer/util/tlist.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "nodes/makefuncs.h"
18 #include "nodes/nodeFuncs.h"
19 #include "optimizer/cost.h"
20 #include "optimizer/optimizer.h"
21 #include "optimizer/tlist.h"
22 
23 
24 /*
25  * Data structures for split_pathtarget_at_srfs().  To preserve the identity
26  * of sortgroupref items even if they are textually equal(), what we track is
27  * not just bare expressions but expressions plus their sortgroupref indexes.
28  */
29 typedef struct
30 {
31 	Node	   *expr;			/* some subexpression of a PathTarget */
32 	Index		sortgroupref;	/* its sortgroupref, or 0 if none */
33 } split_pathtarget_item;
34 
35 typedef struct
36 {
37 	/* This is a List of bare expressions: */
38 	List	   *input_target_exprs; /* exprs available from input */
39 	/* These are Lists of Lists of split_pathtarget_items: */
40 	List	   *level_srfs;		/* SRF exprs to evaluate at each level */
41 	List	   *level_input_vars;	/* input vars needed at each level */
42 	List	   *level_input_srfs;	/* input SRFs needed at each level */
43 	/* These are Lists of split_pathtarget_items: */
44 	List	   *current_input_vars; /* vars needed in current subexpr */
45 	List	   *current_input_srfs; /* SRFs needed in current subexpr */
46 	/* Auxiliary data for current split_pathtarget_walker traversal: */
47 	int			current_depth;	/* max SRF depth in current subexpr */
48 	Index		current_sgref;	/* current subexpr's sortgroupref, or 0 */
49 } split_pathtarget_context;
50 
51 static bool split_pathtarget_walker(Node *node,
52 									split_pathtarget_context *context);
53 static void add_sp_item_to_pathtarget(PathTarget *target,
54 									  split_pathtarget_item *item);
55 static void add_sp_items_to_pathtarget(PathTarget *target, List *items);
56 
57 
58 /*****************************************************************************
59  *		Target list creation and searching utilities
60  *****************************************************************************/
61 
62 /*
63  * tlist_member
64  *	  Finds the (first) member of the given tlist whose expression is
65  *	  equal() to the given expression.  Result is NULL if no such member.
66  */
67 TargetEntry *
tlist_member(Expr * node,List * targetlist)68 tlist_member(Expr *node, List *targetlist)
69 {
70 	ListCell   *temp;
71 
72 	foreach(temp, targetlist)
73 	{
74 		TargetEntry *tlentry = (TargetEntry *) lfirst(temp);
75 
76 		if (equal(node, tlentry->expr))
77 			return tlentry;
78 	}
79 	return NULL;
80 }
81 
82 /*
83  * tlist_member_match_var
84  *	  Same as above, except that we match the provided Var on the basis
85  *	  of varno/varattno/varlevelsup/vartype only, rather than full equal().
86  *
87  * This is needed in some cases where we can't be sure of an exact typmod
88  * match.  For safety, though, we insist on vartype match.
89  */
90 static TargetEntry *
tlist_member_match_var(Var * var,List * targetlist)91 tlist_member_match_var(Var *var, List *targetlist)
92 {
93 	ListCell   *temp;
94 
95 	foreach(temp, targetlist)
96 	{
97 		TargetEntry *tlentry = (TargetEntry *) lfirst(temp);
98 		Var		   *tlvar = (Var *) tlentry->expr;
99 
100 		if (!tlvar || !IsA(tlvar, Var))
101 			continue;
102 		if (var->varno == tlvar->varno &&
103 			var->varattno == tlvar->varattno &&
104 			var->varlevelsup == tlvar->varlevelsup &&
105 			var->vartype == tlvar->vartype)
106 			return tlentry;
107 	}
108 	return NULL;
109 }
110 
111 /*
112  * add_to_flat_tlist
113  *		Add more items to a flattened tlist (if they're not already in it)
114  *
115  * 'tlist' is the flattened tlist
116  * 'exprs' is a list of expressions (usually, but not necessarily, Vars)
117  *
118  * Returns the extended tlist.
119  */
120 List *
add_to_flat_tlist(List * tlist,List * exprs)121 add_to_flat_tlist(List *tlist, List *exprs)
122 {
123 	int			next_resno = list_length(tlist) + 1;
124 	ListCell   *lc;
125 
126 	foreach(lc, exprs)
127 	{
128 		Expr	   *expr = (Expr *) lfirst(lc);
129 
130 		if (!tlist_member(expr, tlist))
131 		{
132 			TargetEntry *tle;
133 
134 			tle = makeTargetEntry(copyObject(expr), /* copy needed?? */
135 								  next_resno++,
136 								  NULL,
137 								  false);
138 			tlist = lappend(tlist, tle);
139 		}
140 	}
141 	return tlist;
142 }
143 
144 
145 /*
146  * get_tlist_exprs
147  *		Get just the expression subtrees of a tlist
148  *
149  * Resjunk columns are ignored unless includeJunk is true
150  */
151 List *
get_tlist_exprs(List * tlist,bool includeJunk)152 get_tlist_exprs(List *tlist, bool includeJunk)
153 {
154 	List	   *result = NIL;
155 	ListCell   *l;
156 
157 	foreach(l, tlist)
158 	{
159 		TargetEntry *tle = (TargetEntry *) lfirst(l);
160 
161 		if (tle->resjunk && !includeJunk)
162 			continue;
163 
164 		result = lappend(result, tle->expr);
165 	}
166 	return result;
167 }
168 
169 
170 /*
171  * count_nonjunk_tlist_entries
172  *		What it says ...
173  */
174 int
count_nonjunk_tlist_entries(List * tlist)175 count_nonjunk_tlist_entries(List *tlist)
176 {
177 	int			len = 0;
178 	ListCell   *l;
179 
180 	foreach(l, tlist)
181 	{
182 		TargetEntry *tle = (TargetEntry *) lfirst(l);
183 
184 		if (!tle->resjunk)
185 			len++;
186 	}
187 	return len;
188 }
189 
190 
191 /*
192  * tlist_same_exprs
193  *		Check whether two target lists contain the same expressions
194  *
195  * Note: this function is used to decide whether it's safe to jam a new tlist
196  * into a non-projection-capable plan node.  Obviously we can't do that unless
197  * the node's tlist shows it already returns the column values we want.
198  * However, we can ignore the TargetEntry attributes resname, ressortgroupref,
199  * resorigtbl, resorigcol, and resjunk, because those are only labelings that
200  * don't affect the row values computed by the node.  (Moreover, if we didn't
201  * ignore them, we'd frequently fail to make the desired optimization, since
202  * the planner tends to not bother to make resname etc. valid in intermediate
203  * plan nodes.)  Note that on success, the caller must still jam the desired
204  * tlist into the plan node, else it won't have the desired labeling fields.
205  */
206 bool
tlist_same_exprs(List * tlist1,List * tlist2)207 tlist_same_exprs(List *tlist1, List *tlist2)
208 {
209 	ListCell   *lc1,
210 			   *lc2;
211 
212 	if (list_length(tlist1) != list_length(tlist2))
213 		return false;			/* not same length, so can't match */
214 
215 	forboth(lc1, tlist1, lc2, tlist2)
216 	{
217 		TargetEntry *tle1 = (TargetEntry *) lfirst(lc1);
218 		TargetEntry *tle2 = (TargetEntry *) lfirst(lc2);
219 
220 		if (!equal(tle1->expr, tle2->expr))
221 			return false;
222 	}
223 
224 	return true;
225 }
226 
227 
228 /*
229  * Does tlist have same output datatypes as listed in colTypes?
230  *
231  * Resjunk columns are ignored if junkOK is true; otherwise presence of
232  * a resjunk column will always cause a 'false' result.
233  *
234  * Note: currently no callers care about comparing typmods.
235  */
236 bool
tlist_same_datatypes(List * tlist,List * colTypes,bool junkOK)237 tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
238 {
239 	ListCell   *l;
240 	ListCell   *curColType = list_head(colTypes);
241 
242 	foreach(l, tlist)
243 	{
244 		TargetEntry *tle = (TargetEntry *) lfirst(l);
245 
246 		if (tle->resjunk)
247 		{
248 			if (!junkOK)
249 				return false;
250 		}
251 		else
252 		{
253 			if (curColType == NULL)
254 				return false;	/* tlist longer than colTypes */
255 			if (exprType((Node *) tle->expr) != lfirst_oid(curColType))
256 				return false;
257 			curColType = lnext(colTypes, curColType);
258 		}
259 	}
260 	if (curColType != NULL)
261 		return false;			/* tlist shorter than colTypes */
262 	return true;
263 }
264 
265 /*
266  * Does tlist have same exposed collations as listed in colCollations?
267  *
268  * Identical logic to the above, but for collations.
269  */
270 bool
tlist_same_collations(List * tlist,List * colCollations,bool junkOK)271 tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
272 {
273 	ListCell   *l;
274 	ListCell   *curColColl = list_head(colCollations);
275 
276 	foreach(l, tlist)
277 	{
278 		TargetEntry *tle = (TargetEntry *) lfirst(l);
279 
280 		if (tle->resjunk)
281 		{
282 			if (!junkOK)
283 				return false;
284 		}
285 		else
286 		{
287 			if (curColColl == NULL)
288 				return false;	/* tlist longer than colCollations */
289 			if (exprCollation((Node *) tle->expr) != lfirst_oid(curColColl))
290 				return false;
291 			curColColl = lnext(colCollations, curColColl);
292 		}
293 	}
294 	if (curColColl != NULL)
295 		return false;			/* tlist shorter than colCollations */
296 	return true;
297 }
298 
299 /*
300  * apply_tlist_labeling
301  *		Apply the TargetEntry labeling attributes of src_tlist to dest_tlist
302  *
303  * This is useful for reattaching column names etc to a plan's final output
304  * targetlist.
305  */
306 void
apply_tlist_labeling(List * dest_tlist,List * src_tlist)307 apply_tlist_labeling(List *dest_tlist, List *src_tlist)
308 {
309 	ListCell   *ld,
310 			   *ls;
311 
312 	Assert(list_length(dest_tlist) == list_length(src_tlist));
313 	forboth(ld, dest_tlist, ls, src_tlist)
314 	{
315 		TargetEntry *dest_tle = (TargetEntry *) lfirst(ld);
316 		TargetEntry *src_tle = (TargetEntry *) lfirst(ls);
317 
318 		Assert(dest_tle->resno == src_tle->resno);
319 		dest_tle->resname = src_tle->resname;
320 		dest_tle->ressortgroupref = src_tle->ressortgroupref;
321 		dest_tle->resorigtbl = src_tle->resorigtbl;
322 		dest_tle->resorigcol = src_tle->resorigcol;
323 		dest_tle->resjunk = src_tle->resjunk;
324 	}
325 }
326 
327 
328 /*
329  * get_sortgroupref_tle
330  *		Find the targetlist entry matching the given SortGroupRef index,
331  *		and return it.
332  */
333 TargetEntry *
get_sortgroupref_tle(Index sortref,List * targetList)334 get_sortgroupref_tle(Index sortref, List *targetList)
335 {
336 	ListCell   *l;
337 
338 	foreach(l, targetList)
339 	{
340 		TargetEntry *tle = (TargetEntry *) lfirst(l);
341 
342 		if (tle->ressortgroupref == sortref)
343 			return tle;
344 	}
345 
346 	elog(ERROR, "ORDER/GROUP BY expression not found in targetlist");
347 	return NULL;				/* keep compiler quiet */
348 }
349 
350 /*
351  * get_sortgroupclause_tle
352  *		Find the targetlist entry matching the given SortGroupClause
353  *		by ressortgroupref, and return it.
354  */
355 TargetEntry *
get_sortgroupclause_tle(SortGroupClause * sgClause,List * targetList)356 get_sortgroupclause_tle(SortGroupClause *sgClause,
357 						List *targetList)
358 {
359 	return get_sortgroupref_tle(sgClause->tleSortGroupRef, targetList);
360 }
361 
362 /*
363  * get_sortgroupclause_expr
364  *		Find the targetlist entry matching the given SortGroupClause
365  *		by ressortgroupref, and return its expression.
366  */
367 Node *
get_sortgroupclause_expr(SortGroupClause * sgClause,List * targetList)368 get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
369 {
370 	TargetEntry *tle = get_sortgroupclause_tle(sgClause, targetList);
371 
372 	return (Node *) tle->expr;
373 }
374 
375 /*
376  * get_sortgrouplist_exprs
377  *		Given a list of SortGroupClauses, build a list
378  *		of the referenced targetlist expressions.
379  */
380 List *
get_sortgrouplist_exprs(List * sgClauses,List * targetList)381 get_sortgrouplist_exprs(List *sgClauses, List *targetList)
382 {
383 	List	   *result = NIL;
384 	ListCell   *l;
385 
386 	foreach(l, sgClauses)
387 	{
388 		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
389 		Node	   *sortexpr;
390 
391 		sortexpr = get_sortgroupclause_expr(sortcl, targetList);
392 		result = lappend(result, sortexpr);
393 	}
394 	return result;
395 }
396 
397 
398 /*****************************************************************************
399  *		Functions to extract data from a list of SortGroupClauses
400  *
401  * These don't really belong in tlist.c, but they are sort of related to the
402  * functions just above, and they don't seem to deserve their own file.
403  *****************************************************************************/
404 
405 /*
406  * get_sortgroupref_clause
407  *		Find the SortGroupClause matching the given SortGroupRef index,
408  *		and return it.
409  */
410 SortGroupClause *
get_sortgroupref_clause(Index sortref,List * clauses)411 get_sortgroupref_clause(Index sortref, List *clauses)
412 {
413 	ListCell   *l;
414 
415 	foreach(l, clauses)
416 	{
417 		SortGroupClause *cl = (SortGroupClause *) lfirst(l);
418 
419 		if (cl->tleSortGroupRef == sortref)
420 			return cl;
421 	}
422 
423 	elog(ERROR, "ORDER/GROUP BY expression not found in list");
424 	return NULL;				/* keep compiler quiet */
425 }
426 
427 /*
428  * get_sortgroupref_clause_noerr
429  *		As above, but return NULL rather than throwing an error if not found.
430  */
431 SortGroupClause *
get_sortgroupref_clause_noerr(Index sortref,List * clauses)432 get_sortgroupref_clause_noerr(Index sortref, List *clauses)
433 {
434 	ListCell   *l;
435 
436 	foreach(l, clauses)
437 	{
438 		SortGroupClause *cl = (SortGroupClause *) lfirst(l);
439 
440 		if (cl->tleSortGroupRef == sortref)
441 			return cl;
442 	}
443 
444 	return NULL;
445 }
446 
447 /*
448  * extract_grouping_ops - make an array of the equality operator OIDs
449  *		for a SortGroupClause list
450  */
451 Oid *
extract_grouping_ops(List * groupClause)452 extract_grouping_ops(List *groupClause)
453 {
454 	int			numCols = list_length(groupClause);
455 	int			colno = 0;
456 	Oid		   *groupOperators;
457 	ListCell   *glitem;
458 
459 	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
460 
461 	foreach(glitem, groupClause)
462 	{
463 		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
464 
465 		groupOperators[colno] = groupcl->eqop;
466 		Assert(OidIsValid(groupOperators[colno]));
467 		colno++;
468 	}
469 
470 	return groupOperators;
471 }
472 
473 /*
474  * extract_grouping_collations - make an array of the grouping column collations
475  *		for a SortGroupClause list
476  */
477 Oid *
extract_grouping_collations(List * groupClause,List * tlist)478 extract_grouping_collations(List *groupClause, List *tlist)
479 {
480 	int			numCols = list_length(groupClause);
481 	int			colno = 0;
482 	Oid		   *grpCollations;
483 	ListCell   *glitem;
484 
485 	grpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
486 
487 	foreach(glitem, groupClause)
488 	{
489 		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
490 		TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
491 
492 		grpCollations[colno++] = exprCollation((Node *) tle->expr);
493 	}
494 
495 	return grpCollations;
496 }
497 
498 /*
499  * extract_grouping_cols - make an array of the grouping column resnos
500  *		for a SortGroupClause list
501  */
502 AttrNumber *
extract_grouping_cols(List * groupClause,List * tlist)503 extract_grouping_cols(List *groupClause, List *tlist)
504 {
505 	AttrNumber *grpColIdx;
506 	int			numCols = list_length(groupClause);
507 	int			colno = 0;
508 	ListCell   *glitem;
509 
510 	grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
511 
512 	foreach(glitem, groupClause)
513 	{
514 		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
515 		TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
516 
517 		grpColIdx[colno++] = tle->resno;
518 	}
519 
520 	return grpColIdx;
521 }
522 
523 /*
524  * grouping_is_sortable - is it possible to implement grouping list by sorting?
525  *
526  * This is easy since the parser will have included a sortop if one exists.
527  */
528 bool
grouping_is_sortable(List * groupClause)529 grouping_is_sortable(List *groupClause)
530 {
531 	ListCell   *glitem;
532 
533 	foreach(glitem, groupClause)
534 	{
535 		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
536 
537 		if (!OidIsValid(groupcl->sortop))
538 			return false;
539 	}
540 	return true;
541 }
542 
543 /*
544  * grouping_is_hashable - is it possible to implement grouping list by hashing?
545  *
546  * We rely on the parser to have set the hashable flag correctly.
547  */
548 bool
grouping_is_hashable(List * groupClause)549 grouping_is_hashable(List *groupClause)
550 {
551 	ListCell   *glitem;
552 
553 	foreach(glitem, groupClause)
554 	{
555 		SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
556 
557 		if (!groupcl->hashable)
558 			return false;
559 	}
560 	return true;
561 }
562 
563 
564 /*****************************************************************************
565  *		PathTarget manipulation functions
566  *
567  * PathTarget is a somewhat stripped-down version of a full targetlist; it
568  * omits all the TargetEntry decoration except (optionally) sortgroupref data,
569  * and it adds evaluation cost and output data width info.
570  *****************************************************************************/
571 
572 /*
573  * make_pathtarget_from_tlist
574  *	  Construct a PathTarget equivalent to the given targetlist.
575  *
576  * This leaves the cost and width fields as zeroes.  Most callers will want
577  * to use create_pathtarget(), so as to get those set.
578  */
579 PathTarget *
make_pathtarget_from_tlist(List * tlist)580 make_pathtarget_from_tlist(List *tlist)
581 {
582 	PathTarget *target = makeNode(PathTarget);
583 	int			i;
584 	ListCell   *lc;
585 
586 	target->sortgrouprefs = (Index *) palloc(list_length(tlist) * sizeof(Index));
587 
588 	i = 0;
589 	foreach(lc, tlist)
590 	{
591 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
592 
593 		target->exprs = lappend(target->exprs, tle->expr);
594 		target->sortgrouprefs[i] = tle->ressortgroupref;
595 		i++;
596 	}
597 
598 	/*
599 	 * Mark volatility as unknown.  The contain_volatile_functions function
600 	 * will determine if there are any volatile functions when called for the
601 	 * first time with this PathTarget.
602 	 */
603 	target->has_volatile_expr = VOLATILITY_UNKNOWN;
604 
605 	return target;
606 }
607 
608 /*
609  * make_tlist_from_pathtarget
610  *	  Construct a targetlist from a PathTarget.
611  */
612 List *
make_tlist_from_pathtarget(PathTarget * target)613 make_tlist_from_pathtarget(PathTarget *target)
614 {
615 	List	   *tlist = NIL;
616 	int			i;
617 	ListCell   *lc;
618 
619 	i = 0;
620 	foreach(lc, target->exprs)
621 	{
622 		Expr	   *expr = (Expr *) lfirst(lc);
623 		TargetEntry *tle;
624 
625 		tle = makeTargetEntry(expr,
626 							  i + 1,
627 							  NULL,
628 							  false);
629 		if (target->sortgrouprefs)
630 			tle->ressortgroupref = target->sortgrouprefs[i];
631 		tlist = lappend(tlist, tle);
632 		i++;
633 	}
634 
635 	return tlist;
636 }
637 
638 /*
639  * copy_pathtarget
640  *	  Copy a PathTarget.
641  *
642  * The new PathTarget has its own exprs List, but shares the underlying
643  * target expression trees with the old one.
644  */
645 PathTarget *
copy_pathtarget(PathTarget * src)646 copy_pathtarget(PathTarget *src)
647 {
648 	PathTarget *dst = makeNode(PathTarget);
649 
650 	/* Copy scalar fields */
651 	memcpy(dst, src, sizeof(PathTarget));
652 	/* Shallow-copy the expression list */
653 	dst->exprs = list_copy(src->exprs);
654 	/* Duplicate sortgrouprefs if any (if not, the memcpy handled this) */
655 	if (src->sortgrouprefs)
656 	{
657 		Size		nbytes = list_length(src->exprs) * sizeof(Index);
658 
659 		dst->sortgrouprefs = (Index *) palloc(nbytes);
660 		memcpy(dst->sortgrouprefs, src->sortgrouprefs, nbytes);
661 	}
662 	return dst;
663 }
664 
665 /*
666  * create_empty_pathtarget
667  *	  Create an empty (zero columns, zero cost) PathTarget.
668  */
669 PathTarget *
create_empty_pathtarget(void)670 create_empty_pathtarget(void)
671 {
672 	/* This is easy, but we don't want callers to hard-wire this ... */
673 	return makeNode(PathTarget);
674 }
675 
676 /*
677  * add_column_to_pathtarget
678  *		Append a target column to the PathTarget.
679  *
680  * As with make_pathtarget_from_tlist, we leave it to the caller to update
681  * the cost and width fields.
682  */
683 void
add_column_to_pathtarget(PathTarget * target,Expr * expr,Index sortgroupref)684 add_column_to_pathtarget(PathTarget *target, Expr *expr, Index sortgroupref)
685 {
686 	/* Updating the exprs list is easy ... */
687 	target->exprs = lappend(target->exprs, expr);
688 	/* ... the sortgroupref data, a bit less so */
689 	if (target->sortgrouprefs)
690 	{
691 		int			nexprs = list_length(target->exprs);
692 
693 		/* This might look inefficient, but actually it's usually cheap */
694 		target->sortgrouprefs = (Index *)
695 			repalloc(target->sortgrouprefs, nexprs * sizeof(Index));
696 		target->sortgrouprefs[nexprs - 1] = sortgroupref;
697 	}
698 	else if (sortgroupref)
699 	{
700 		/* Adding sortgroupref labeling to a previously unlabeled target */
701 		int			nexprs = list_length(target->exprs);
702 
703 		target->sortgrouprefs = (Index *) palloc0(nexprs * sizeof(Index));
704 		target->sortgrouprefs[nexprs - 1] = sortgroupref;
705 	}
706 
707 	/*
708 	 * Reset has_volatile_expr to UNKNOWN.  We just leave it up to
709 	 * contain_volatile_functions to set this properly again.  Technically we
710 	 * could save some effort here and just check the new Expr, but it seems
711 	 * better to keep the logic for setting this flag in one location rather
712 	 * than duplicating the logic here.
713 	 */
714 	if (target->has_volatile_expr == VOLATILITY_NOVOLATILE)
715 		target->has_volatile_expr = VOLATILITY_UNKNOWN;
716 }
717 
718 /*
719  * add_new_column_to_pathtarget
720  *		Append a target column to the PathTarget, but only if it's not
721  *		equal() to any pre-existing target expression.
722  *
723  * The caller cannot specify a sortgroupref, since it would be unclear how
724  * to merge that with a pre-existing column.
725  *
726  * As with make_pathtarget_from_tlist, we leave it to the caller to update
727  * the cost and width fields.
728  */
729 void
add_new_column_to_pathtarget(PathTarget * target,Expr * expr)730 add_new_column_to_pathtarget(PathTarget *target, Expr *expr)
731 {
732 	if (!list_member(target->exprs, expr))
733 		add_column_to_pathtarget(target, expr, 0);
734 }
735 
736 /*
737  * add_new_columns_to_pathtarget
738  *		Apply add_new_column_to_pathtarget() for each element of the list.
739  */
740 void
add_new_columns_to_pathtarget(PathTarget * target,List * exprs)741 add_new_columns_to_pathtarget(PathTarget *target, List *exprs)
742 {
743 	ListCell   *lc;
744 
745 	foreach(lc, exprs)
746 	{
747 		Expr	   *expr = (Expr *) lfirst(lc);
748 
749 		add_new_column_to_pathtarget(target, expr);
750 	}
751 }
752 
753 /*
754  * apply_pathtarget_labeling_to_tlist
755  *		Apply any sortgrouprefs in the PathTarget to matching tlist entries
756  *
757  * Here, we do not assume that the tlist entries are one-for-one with the
758  * PathTarget.  The intended use of this function is to deal with cases
759  * where createplan.c has decided to use some other tlist and we have
760  * to identify what matches exist.
761  */
762 void
apply_pathtarget_labeling_to_tlist(List * tlist,PathTarget * target)763 apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
764 {
765 	int			i;
766 	ListCell   *lc;
767 
768 	/* Nothing to do if PathTarget has no sortgrouprefs data */
769 	if (target->sortgrouprefs == NULL)
770 		return;
771 
772 	i = 0;
773 	foreach(lc, target->exprs)
774 	{
775 		Expr	   *expr = (Expr *) lfirst(lc);
776 		TargetEntry *tle;
777 
778 		if (target->sortgrouprefs[i])
779 		{
780 			/*
781 			 * For Vars, use tlist_member_match_var's weakened matching rule;
782 			 * this allows us to deal with some cases where a set-returning
783 			 * function has been inlined, so that we now have more knowledge
784 			 * about what it returns than we did when the original Var was
785 			 * created.  Otherwise, use regular equal() to find the matching
786 			 * TLE.  (In current usage, only the Var case is actually needed;
787 			 * but it seems best to have sane behavior here for non-Vars too.)
788 			 */
789 			if (expr && IsA(expr, Var))
790 				tle = tlist_member_match_var((Var *) expr, tlist);
791 			else
792 				tle = tlist_member(expr, tlist);
793 
794 			/*
795 			 * Complain if noplace for the sortgrouprefs label, or if we'd
796 			 * have to label a column twice.  (The case where it already has
797 			 * the desired label probably can't happen, but we may as well
798 			 * allow for it.)
799 			 */
800 			if (!tle)
801 				elog(ERROR, "ORDER/GROUP BY expression not found in targetlist");
802 			if (tle->ressortgroupref != 0 &&
803 				tle->ressortgroupref != target->sortgrouprefs[i])
804 				elog(ERROR, "targetlist item has multiple sortgroupref labels");
805 
806 			tle->ressortgroupref = target->sortgrouprefs[i];
807 		}
808 		i++;
809 	}
810 }
811 
812 /*
813  * split_pathtarget_at_srfs
814  *		Split given PathTarget into multiple levels to position SRFs safely
815  *
816  * The executor can only handle set-returning functions that appear at the
817  * top level of the targetlist of a ProjectSet plan node.  If we have any SRFs
818  * that are not at top level, we need to split up the evaluation into multiple
819  * plan levels in which each level satisfies this constraint.  This function
820  * creates appropriate PathTarget(s) for each level.
821  *
822  * As an example, consider the tlist expression
823  *		x + srf1(srf2(y + z))
824  * This expression should appear as-is in the top PathTarget, but below that
825  * we must have a PathTarget containing
826  *		x, srf1(srf2(y + z))
827  * and below that, another PathTarget containing
828  *		x, srf2(y + z)
829  * and below that, another PathTarget containing
830  *		x, y, z
831  * When these tlists are processed by setrefs.c, subexpressions that match
832  * output expressions of the next lower tlist will be replaced by Vars,
833  * so that what the executor gets are tlists looking like
834  *		Var1 + Var2
835  *		Var1, srf1(Var2)
836  *		Var1, srf2(Var2 + Var3)
837  *		x, y, z
838  * which satisfy the desired property.
839  *
840  * Another example is
841  *		srf1(x), srf2(srf3(y))
842  * That must appear as-is in the top PathTarget, but below that we need
843  *		srf1(x), srf3(y)
844  * That is, each SRF must be computed at a level corresponding to the nesting
845  * depth of SRFs within its arguments.
846  *
847  * In some cases, a SRF has already been evaluated in some previous plan level
848  * and we shouldn't expand it again (that is, what we see in the target is
849  * already meant as a reference to a lower subexpression).  So, don't expand
850  * any tlist expressions that appear in input_target, if that's not NULL.
851  *
852  * It's also important that we preserve any sortgroupref annotation appearing
853  * in the given target, especially on expressions matching input_target items.
854  *
855  * The outputs of this function are two parallel lists, one a list of
856  * PathTargets and the other an integer list of bool flags indicating
857  * whether the corresponding PathTarget contains any evaluatable SRFs.
858  * The lists are given in the order they'd need to be evaluated in, with
859  * the "lowest" PathTarget first.  So the last list entry is always the
860  * originally given PathTarget, and any entries before it indicate evaluation
861  * levels that must be inserted below it.  The first list entry must not
862  * contain any SRFs (other than ones duplicating input_target entries), since
863  * it will typically be attached to a plan node that cannot evaluate SRFs.
864  *
865  * Note: using a list for the flags may seem like overkill, since there
866  * are only a few possible patterns for which levels contain SRFs.
867  * But this representation decouples callers from that knowledge.
868  */
869 void
split_pathtarget_at_srfs(PlannerInfo * root,PathTarget * target,PathTarget * input_target,List ** targets,List ** targets_contain_srfs)870 split_pathtarget_at_srfs(PlannerInfo *root,
871 						 PathTarget *target, PathTarget *input_target,
872 						 List **targets, List **targets_contain_srfs)
873 {
874 	split_pathtarget_context context;
875 	int			max_depth;
876 	bool		need_extra_projection;
877 	List	   *prev_level_tlist;
878 	int			lci;
879 	ListCell   *lc,
880 			   *lc1,
881 			   *lc2,
882 			   *lc3;
883 
884 	/*
885 	 * It's not unusual for planner.c to pass us two physically identical
886 	 * targets, in which case we can conclude without further ado that all
887 	 * expressions are available from the input.  (The logic below would
888 	 * arrive at the same conclusion, but much more tediously.)
889 	 */
890 	if (target == input_target)
891 	{
892 		*targets = list_make1(target);
893 		*targets_contain_srfs = list_make1_int(false);
894 		return;
895 	}
896 
897 	/* Pass any input_target exprs down to split_pathtarget_walker() */
898 	context.input_target_exprs = input_target ? input_target->exprs : NIL;
899 
900 	/*
901 	 * Initialize with empty level-zero lists, and no levels after that.
902 	 * (Note: we could dispense with representing level zero explicitly, since
903 	 * it will never receive any SRFs, but then we'd have to special-case that
904 	 * level when we get to building result PathTargets.  Level zero describes
905 	 * the SRF-free PathTarget that will be given to the input plan node.)
906 	 */
907 	context.level_srfs = list_make1(NIL);
908 	context.level_input_vars = list_make1(NIL);
909 	context.level_input_srfs = list_make1(NIL);
910 
911 	/* Initialize data we'll accumulate across all the target expressions */
912 	context.current_input_vars = NIL;
913 	context.current_input_srfs = NIL;
914 	max_depth = 0;
915 	need_extra_projection = false;
916 
917 	/* Scan each expression in the PathTarget looking for SRFs */
918 	lci = 0;
919 	foreach(lc, target->exprs)
920 	{
921 		Node	   *node = (Node *) lfirst(lc);
922 
923 		/* Tell split_pathtarget_walker about this expr's sortgroupref */
924 		context.current_sgref = get_pathtarget_sortgroupref(target, lci);
925 		lci++;
926 
927 		/*
928 		 * Find all SRFs and Vars (and Var-like nodes) in this expression, and
929 		 * enter them into appropriate lists within the context struct.
930 		 */
931 		context.current_depth = 0;
932 		split_pathtarget_walker(node, &context);
933 
934 		/* An expression containing no SRFs is of no further interest */
935 		if (context.current_depth == 0)
936 			continue;
937 
938 		/*
939 		 * Track max SRF nesting depth over the whole PathTarget.  Also, if
940 		 * this expression establishes a new max depth, we no longer care
941 		 * whether previous expressions contained nested SRFs; we can handle
942 		 * any required projection for them in the final ProjectSet node.
943 		 */
944 		if (max_depth < context.current_depth)
945 		{
946 			max_depth = context.current_depth;
947 			need_extra_projection = false;
948 		}
949 
950 		/*
951 		 * If any maximum-depth SRF is not at the top level of its expression,
952 		 * we'll need an extra Result node to compute the top-level scalar
953 		 * expression.
954 		 */
955 		if (max_depth == context.current_depth && !IS_SRF_CALL(node))
956 			need_extra_projection = true;
957 	}
958 
959 	/*
960 	 * If we found no SRFs needing evaluation (maybe they were all present in
961 	 * input_target, or maybe they were all removed by const-simplification),
962 	 * then no ProjectSet is needed; fall out.
963 	 */
964 	if (max_depth == 0)
965 	{
966 		*targets = list_make1(target);
967 		*targets_contain_srfs = list_make1_int(false);
968 		return;
969 	}
970 
971 	/*
972 	 * The Vars and SRF outputs needed at top level can be added to the last
973 	 * level_input lists if we don't need an extra projection step.  If we do
974 	 * need one, add a SRF-free level to the lists.
975 	 */
976 	if (need_extra_projection)
977 	{
978 		context.level_srfs = lappend(context.level_srfs, NIL);
979 		context.level_input_vars = lappend(context.level_input_vars,
980 										   context.current_input_vars);
981 		context.level_input_srfs = lappend(context.level_input_srfs,
982 										   context.current_input_srfs);
983 	}
984 	else
985 	{
986 		lc = list_nth_cell(context.level_input_vars, max_depth);
987 		lfirst(lc) = list_concat(lfirst(lc), context.current_input_vars);
988 		lc = list_nth_cell(context.level_input_srfs, max_depth);
989 		lfirst(lc) = list_concat(lfirst(lc), context.current_input_srfs);
990 	}
991 
992 	/*
993 	 * Now construct the output PathTargets.  The original target can be used
994 	 * as-is for the last one, but we need to construct a new SRF-free target
995 	 * representing what the preceding plan node has to emit, as well as a
996 	 * target for each intermediate ProjectSet node.
997 	 */
998 	*targets = *targets_contain_srfs = NIL;
999 	prev_level_tlist = NIL;
1000 
1001 	forthree(lc1, context.level_srfs,
1002 			 lc2, context.level_input_vars,
1003 			 lc3, context.level_input_srfs)
1004 	{
1005 		List	   *level_srfs = (List *) lfirst(lc1);
1006 		PathTarget *ntarget;
1007 
1008 		if (lnext(context.level_srfs, lc1) == NULL)
1009 		{
1010 			ntarget = target;
1011 		}
1012 		else
1013 		{
1014 			ntarget = create_empty_pathtarget();
1015 
1016 			/*
1017 			 * This target should actually evaluate any SRFs of the current
1018 			 * level, and it needs to propagate forward any Vars needed by
1019 			 * later levels, as well as SRFs computed earlier and needed by
1020 			 * later levels.
1021 			 */
1022 			add_sp_items_to_pathtarget(ntarget, level_srfs);
1023 			for_each_cell(lc, context.level_input_vars,
1024 						  lnext(context.level_input_vars, lc2))
1025 			{
1026 				List	   *input_vars = (List *) lfirst(lc);
1027 
1028 				add_sp_items_to_pathtarget(ntarget, input_vars);
1029 			}
1030 			for_each_cell(lc, context.level_input_srfs,
1031 						  lnext(context.level_input_srfs, lc3))
1032 			{
1033 				List	   *input_srfs = (List *) lfirst(lc);
1034 				ListCell   *lcx;
1035 
1036 				foreach(lcx, input_srfs)
1037 				{
1038 					split_pathtarget_item *item = lfirst(lcx);
1039 
1040 					if (list_member(prev_level_tlist, item->expr))
1041 						add_sp_item_to_pathtarget(ntarget, item);
1042 				}
1043 			}
1044 			set_pathtarget_cost_width(root, ntarget);
1045 		}
1046 
1047 		/*
1048 		 * Add current target and does-it-compute-SRFs flag to output lists.
1049 		 */
1050 		*targets = lappend(*targets, ntarget);
1051 		*targets_contain_srfs = lappend_int(*targets_contain_srfs,
1052 											(level_srfs != NIL));
1053 
1054 		/* Remember this level's output for next pass */
1055 		prev_level_tlist = ntarget->exprs;
1056 	}
1057 }
1058 
1059 /*
1060  * Recursively examine expressions for split_pathtarget_at_srfs.
1061  *
1062  * Note we make no effort here to prevent duplicate entries in the output
1063  * lists.  Duplicates will be gotten rid of later.
1064  */
1065 static bool
split_pathtarget_walker(Node * node,split_pathtarget_context * context)1066 split_pathtarget_walker(Node *node, split_pathtarget_context *context)
1067 {
1068 	if (node == NULL)
1069 		return false;
1070 
1071 	/*
1072 	 * A subexpression that matches an expression already computed in
1073 	 * input_target can be treated like a Var (which indeed it will be after
1074 	 * setrefs.c gets done with it), even if it's actually a SRF.  Record it
1075 	 * as being needed for the current expression, and ignore any
1076 	 * substructure.  (Note in particular that this preserves the identity of
1077 	 * any expressions that appear as sortgrouprefs in input_target.)
1078 	 */
1079 	if (list_member(context->input_target_exprs, node))
1080 	{
1081 		split_pathtarget_item *item = palloc(sizeof(split_pathtarget_item));
1082 
1083 		item->expr = node;
1084 		item->sortgroupref = context->current_sgref;
1085 		context->current_input_vars = lappend(context->current_input_vars,
1086 											  item);
1087 		return false;
1088 	}
1089 
1090 	/*
1091 	 * Vars and Var-like constructs are expected to be gotten from the input,
1092 	 * too.  We assume that these constructs cannot contain any SRFs (if one
1093 	 * does, there will be an executor failure from a misplaced SRF).
1094 	 */
1095 	if (IsA(node, Var) ||
1096 		IsA(node, PlaceHolderVar) ||
1097 		IsA(node, Aggref) ||
1098 		IsA(node, GroupingFunc) ||
1099 		IsA(node, WindowFunc))
1100 	{
1101 		split_pathtarget_item *item = palloc(sizeof(split_pathtarget_item));
1102 
1103 		item->expr = node;
1104 		item->sortgroupref = context->current_sgref;
1105 		context->current_input_vars = lappend(context->current_input_vars,
1106 											  item);
1107 		return false;
1108 	}
1109 
1110 	/*
1111 	 * If it's a SRF, recursively examine its inputs, determine its level, and
1112 	 * make appropriate entries in the output lists.
1113 	 */
1114 	if (IS_SRF_CALL(node))
1115 	{
1116 		split_pathtarget_item *item = palloc(sizeof(split_pathtarget_item));
1117 		List	   *save_input_vars = context->current_input_vars;
1118 		List	   *save_input_srfs = context->current_input_srfs;
1119 		int			save_current_depth = context->current_depth;
1120 		int			srf_depth;
1121 		ListCell   *lc;
1122 
1123 		item->expr = node;
1124 		item->sortgroupref = context->current_sgref;
1125 
1126 		context->current_input_vars = NIL;
1127 		context->current_input_srfs = NIL;
1128 		context->current_depth = 0;
1129 		context->current_sgref = 0; /* subexpressions are not sortgroup items */
1130 
1131 		(void) expression_tree_walker(node, split_pathtarget_walker,
1132 									  (void *) context);
1133 
1134 		/* Depth is one more than any SRF below it */
1135 		srf_depth = context->current_depth + 1;
1136 
1137 		/* If new record depth, initialize another level of output lists */
1138 		if (srf_depth >= list_length(context->level_srfs))
1139 		{
1140 			context->level_srfs = lappend(context->level_srfs, NIL);
1141 			context->level_input_vars = lappend(context->level_input_vars, NIL);
1142 			context->level_input_srfs = lappend(context->level_input_srfs, NIL);
1143 		}
1144 
1145 		/* Record this SRF as needing to be evaluated at appropriate level */
1146 		lc = list_nth_cell(context->level_srfs, srf_depth);
1147 		lfirst(lc) = lappend(lfirst(lc), item);
1148 
1149 		/* Record its inputs as being needed at the same level */
1150 		lc = list_nth_cell(context->level_input_vars, srf_depth);
1151 		lfirst(lc) = list_concat(lfirst(lc), context->current_input_vars);
1152 		lc = list_nth_cell(context->level_input_srfs, srf_depth);
1153 		lfirst(lc) = list_concat(lfirst(lc), context->current_input_srfs);
1154 
1155 		/*
1156 		 * Restore caller-level state and update it for presence of this SRF.
1157 		 * Notice we report the SRF itself as being needed for evaluation of
1158 		 * surrounding expression.
1159 		 */
1160 		context->current_input_vars = save_input_vars;
1161 		context->current_input_srfs = lappend(save_input_srfs, item);
1162 		context->current_depth = Max(save_current_depth, srf_depth);
1163 
1164 		/* We're done here */
1165 		return false;
1166 	}
1167 
1168 	/*
1169 	 * Otherwise, the node is a scalar (non-set) expression, so recurse to
1170 	 * examine its inputs.
1171 	 */
1172 	context->current_sgref = 0; /* subexpressions are not sortgroup items */
1173 	return expression_tree_walker(node, split_pathtarget_walker,
1174 								  (void *) context);
1175 }
1176 
1177 /*
1178  * Add a split_pathtarget_item to the PathTarget, unless a matching item is
1179  * already present.  This is like add_new_column_to_pathtarget, but allows
1180  * for sortgrouprefs to be handled.  An item having zero sortgroupref can
1181  * be merged with one that has a sortgroupref, acquiring the latter's
1182  * sortgroupref.
1183  *
1184  * Note that we don't worry about possibly adding duplicate sortgrouprefs
1185  * to the PathTarget.  That would be bad, but it should be impossible unless
1186  * the target passed to split_pathtarget_at_srfs already had duplicates.
1187  * As long as it didn't, we can have at most one split_pathtarget_item with
1188  * any particular nonzero sortgroupref.
1189  */
1190 static void
add_sp_item_to_pathtarget(PathTarget * target,split_pathtarget_item * item)1191 add_sp_item_to_pathtarget(PathTarget *target, split_pathtarget_item *item)
1192 {
1193 	int			lci;
1194 	ListCell   *lc;
1195 
1196 	/*
1197 	 * Look for a pre-existing entry that is equal() and does not have a
1198 	 * conflicting sortgroupref already.
1199 	 */
1200 	lci = 0;
1201 	foreach(lc, target->exprs)
1202 	{
1203 		Node	   *node = (Node *) lfirst(lc);
1204 		Index		sgref = get_pathtarget_sortgroupref(target, lci);
1205 
1206 		if ((item->sortgroupref == sgref ||
1207 			 item->sortgroupref == 0 ||
1208 			 sgref == 0) &&
1209 			equal(item->expr, node))
1210 		{
1211 			/* Found a match.  Assign item's sortgroupref if it has one. */
1212 			if (item->sortgroupref)
1213 			{
1214 				if (target->sortgrouprefs == NULL)
1215 				{
1216 					target->sortgrouprefs = (Index *)
1217 						palloc0(list_length(target->exprs) * sizeof(Index));
1218 				}
1219 				target->sortgrouprefs[lci] = item->sortgroupref;
1220 			}
1221 			return;
1222 		}
1223 		lci++;
1224 	}
1225 
1226 	/*
1227 	 * No match, so add item to PathTarget.  Copy the expr for safety.
1228 	 */
1229 	add_column_to_pathtarget(target, (Expr *) copyObject(item->expr),
1230 							 item->sortgroupref);
1231 }
1232 
1233 /*
1234  * Apply add_sp_item_to_pathtarget to each element of list.
1235  */
1236 static void
add_sp_items_to_pathtarget(PathTarget * target,List * items)1237 add_sp_items_to_pathtarget(PathTarget *target, List *items)
1238 {
1239 	ListCell   *lc;
1240 
1241 	foreach(lc, items)
1242 	{
1243 		split_pathtarget_item *item = lfirst(lc);
1244 
1245 		add_sp_item_to_pathtarget(target, item);
1246 	}
1247 }
1248