1 /*-------------------------------------------------------------------------
2  *
3  * parse_clause.c
4  *	  handle clauses in parser
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/parser/parse_clause.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "miscadmin.h"
19 
20 #include "access/heapam.h"
21 #include "access/tsmapi.h"
22 #include "catalog/catalog.h"
23 #include "catalog/heap.h"
24 #include "catalog/pg_am.h"
25 #include "catalog/pg_collation.h"
26 #include "catalog/pg_constraint_fn.h"
27 #include "catalog/pg_type.h"
28 #include "commands/defrem.h"
29 #include "nodes/makefuncs.h"
30 #include "nodes/nodeFuncs.h"
31 #include "optimizer/tlist.h"
32 #include "optimizer/var.h"
33 #include "parser/analyze.h"
34 #include "parser/parsetree.h"
35 #include "parser/parser.h"
36 #include "parser/parse_clause.h"
37 #include "parser/parse_coerce.h"
38 #include "parser/parse_collate.h"
39 #include "parser/parse_expr.h"
40 #include "parser/parse_func.h"
41 #include "parser/parse_oper.h"
42 #include "parser/parse_relation.h"
43 #include "parser/parse_target.h"
44 #include "parser/parse_type.h"
45 #include "rewrite/rewriteManip.h"
46 #include "utils/guc.h"
47 #include "utils/lsyscache.h"
48 #include "utils/rel.h"
49 
50 
51 /* Convenience macro for the most common makeNamespaceItem() case */
52 #define makeDefaultNSItem(rte)	makeNamespaceItem(rte, true, true, false, true)
53 
54 static void extractRemainingColumns(List *common_colnames,
55 						List *src_colnames, List *src_colvars,
56 						List **res_colnames, List **res_colvars);
57 static Node *transformJoinUsingClause(ParseState *pstate,
58 						 RangeTblEntry *leftRTE, RangeTblEntry *rightRTE,
59 						 List *leftVars, List *rightVars);
60 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
61 					  List *namespace);
62 static RangeTblEntry *getRTEForSpecialRelationTypes(ParseState *pstate,
63 							  RangeVar *rv);
64 static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r);
65 static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
66 						RangeSubselect *r);
67 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
68 					   RangeFunction *r);
69 static RangeTblEntry *transformRangeTableFunc(ParseState *pstate,
70 						RangeTableFunc *t);
71 static TableSampleClause *transformRangeTableSample(ParseState *pstate,
72 						  RangeTableSample *rts);
73 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
74 						RangeTblEntry **top_rte, int *top_rti,
75 						List **namespace);
76 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
77 				   Var *l_colvar, Var *r_colvar);
78 static ParseNamespaceItem *makeNamespaceItem(RangeTblEntry *rte,
79 				  bool rel_visible, bool cols_visible,
80 				  bool lateral_only, bool lateral_ok);
81 static void setNamespaceColumnVisibility(List *namespace, bool cols_visible);
82 static void setNamespaceLateralState(List *namespace,
83 						 bool lateral_only, bool lateral_ok);
84 static void checkExprIsVarFree(ParseState *pstate, Node *n,
85 				   const char *constructName);
86 static TargetEntry *findTargetlistEntrySQL92(ParseState *pstate, Node *node,
87 						 List **tlist, ParseExprKind exprKind);
88 static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node,
89 						 List **tlist, ParseExprKind exprKind);
90 static int get_matching_location(int sortgroupref,
91 					  List *sortgrouprefs, List *exprs);
92 static List *resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
93 						  Relation heapRel);
94 static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
95 					 List *grouplist, List *targetlist, int location);
96 static WindowClause *findWindowClause(List *wclist, const char *name);
97 static Node *transformFrameOffset(ParseState *pstate, int frameOptions,
98 					 Node *clause);
99 
100 
101 /*
102  * transformFromClause -
103  *	  Process the FROM clause and add items to the query's range table,
104  *	  joinlist, and namespace.
105  *
106  * Note: we assume that the pstate's p_rtable, p_joinlist, and p_namespace
107  * lists were initialized to NIL when the pstate was created.
108  * We will add onto any entries already present --- this is needed for rule
109  * processing, as well as for UPDATE and DELETE.
110  */
111 void
transformFromClause(ParseState * pstate,List * frmList)112 transformFromClause(ParseState *pstate, List *frmList)
113 {
114 	ListCell   *fl;
115 
116 	/*
117 	 * The grammar will have produced a list of RangeVars, RangeSubselects,
118 	 * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding
119 	 * entries to the rtable), check for duplicate refnames, and then add it
120 	 * to the joinlist and namespace.
121 	 *
122 	 * Note we must process the items left-to-right for proper handling of
123 	 * LATERAL references.
124 	 */
125 	foreach(fl, frmList)
126 	{
127 		Node	   *n = lfirst(fl);
128 		RangeTblEntry *rte;
129 		int			rtindex;
130 		List	   *namespace;
131 
132 		n = transformFromClauseItem(pstate, n,
133 									&rte,
134 									&rtindex,
135 									&namespace);
136 
137 		checkNameSpaceConflicts(pstate, pstate->p_namespace, namespace);
138 
139 		/* Mark the new namespace items as visible only to LATERAL */
140 		setNamespaceLateralState(namespace, true, true);
141 
142 		pstate->p_joinlist = lappend(pstate->p_joinlist, n);
143 		pstate->p_namespace = list_concat(pstate->p_namespace, namespace);
144 	}
145 
146 	/*
147 	 * We're done parsing the FROM list, so make all namespace items
148 	 * unconditionally visible.  Note that this will also reset lateral_only
149 	 * for any namespace items that were already present when we were called;
150 	 * but those should have been that way already.
151 	 */
152 	setNamespaceLateralState(pstate->p_namespace, false, true);
153 }
154 
155 /*
156  * setTargetTable
157  *	  Add the target relation of INSERT/UPDATE/DELETE to the range table,
158  *	  and make the special links to it in the ParseState.
159  *
160  *	  We also open the target relation and acquire a write lock on it.
161  *	  This must be done before processing the FROM list, in case the target
162  *	  is also mentioned as a source relation --- we want to be sure to grab
163  *	  the write lock before any read lock.
164  *
165  *	  If alsoSource is true, add the target to the query's joinlist and
166  *	  namespace.  For INSERT, we don't want the target to be joined to;
167  *	  it's a destination of tuples, not a source.   For UPDATE/DELETE,
168  *	  we do need to scan or join the target.  (NOTE: we do not bother
169  *	  to check for namespace conflict; we assume that the namespace was
170  *	  initially empty in these cases.)
171  *
172  *	  Finally, we mark the relation as requiring the permissions specified
173  *	  by requiredPerms.
174  *
175  *	  Returns the rangetable index of the target relation.
176  */
177 int
setTargetTable(ParseState * pstate,RangeVar * relation,bool inh,bool alsoSource,AclMode requiredPerms)178 setTargetTable(ParseState *pstate, RangeVar *relation,
179 			   bool inh, bool alsoSource, AclMode requiredPerms)
180 {
181 	RangeTblEntry *rte;
182 	int			rtindex;
183 
184 	/*
185 	 * ENRs hide tables of the same name, so we need to check for them first.
186 	 * In contrast, CTEs don't hide tables (for this purpose).
187 	 */
188 	if (relation->schemaname == NULL &&
189 		scanNameSpaceForENR(pstate, relation->relname))
190 		ereport(ERROR,
191 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
192 				 errmsg("relation \"%s\" cannot be the target of a modifying statement",
193 						relation->relname)));
194 
195 	/* Close old target; this could only happen for multi-action rules */
196 	if (pstate->p_target_relation != NULL)
197 		heap_close(pstate->p_target_relation, NoLock);
198 
199 	/*
200 	 * Open target rel and grab suitable lock (which we will hold till end of
201 	 * transaction).
202 	 *
203 	 * free_parsestate() will eventually do the corresponding heap_close(),
204 	 * but *not* release the lock.
205 	 */
206 	pstate->p_target_relation = parserOpenTable(pstate, relation,
207 												RowExclusiveLock);
208 
209 	/*
210 	 * Now build an RTE.
211 	 */
212 	rte = addRangeTableEntryForRelation(pstate, pstate->p_target_relation,
213 										relation->alias, inh, false);
214 	pstate->p_target_rangetblentry = rte;
215 
216 	/* assume new rte is at end */
217 	rtindex = list_length(pstate->p_rtable);
218 	Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
219 
220 	/*
221 	 * Override addRangeTableEntry's default ACL_SELECT permissions check, and
222 	 * instead mark target table as requiring exactly the specified
223 	 * permissions.
224 	 *
225 	 * If we find an explicit reference to the rel later during parse
226 	 * analysis, we will add the ACL_SELECT bit back again; see
227 	 * markVarForSelectPriv and its callers.
228 	 */
229 	rte->requiredPerms = requiredPerms;
230 
231 	/*
232 	 * If UPDATE/DELETE, add table to joinlist and namespace.
233 	 *
234 	 * Note: some callers know that they can find the new ParseNamespaceItem
235 	 * at the end of the pstate->p_namespace list.  This is a bit ugly but not
236 	 * worth complicating this function's signature for.
237 	 */
238 	if (alsoSource)
239 		addRTEtoQuery(pstate, rte, true, true, true);
240 
241 	return rtindex;
242 }
243 
244 /*
245  * Given a relation-options list (of DefElems), return true iff the specified
246  * table/result set should be created with OIDs. This needs to be done after
247  * parsing the query string because the return value can depend upon the
248  * default_with_oids GUC var.
249  *
250  * In some situations, we want to reject an OIDS option even if it's present.
251  * That's (rather messily) handled here rather than reloptions.c, because that
252  * code explicitly punts checking for oids to here.
253  */
254 bool
interpretOidsOption(List * defList,bool allowOids)255 interpretOidsOption(List *defList, bool allowOids)
256 {
257 	ListCell   *cell;
258 
259 	/* Scan list to see if OIDS was included */
260 	foreach(cell, defList)
261 	{
262 		DefElem    *def = (DefElem *) lfirst(cell);
263 
264 		if (def->defnamespace == NULL &&
265 			pg_strcasecmp(def->defname, "oids") == 0)
266 		{
267 			if (!allowOids)
268 				ereport(ERROR,
269 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
270 						 errmsg("unrecognized parameter \"%s\"",
271 								def->defname)));
272 			return defGetBoolean(def);
273 		}
274 	}
275 
276 	/* Force no-OIDS result if caller disallows OIDS. */
277 	if (!allowOids)
278 		return false;
279 
280 	/* OIDS option was not specified, so use default. */
281 	return default_with_oids;
282 }
283 
284 /*
285  * Extract all not-in-common columns from column lists of a source table
286  */
287 static void
extractRemainingColumns(List * common_colnames,List * src_colnames,List * src_colvars,List ** res_colnames,List ** res_colvars)288 extractRemainingColumns(List *common_colnames,
289 						List *src_colnames, List *src_colvars,
290 						List **res_colnames, List **res_colvars)
291 {
292 	List	   *new_colnames = NIL;
293 	List	   *new_colvars = NIL;
294 	ListCell   *lnames,
295 			   *lvars;
296 
297 	Assert(list_length(src_colnames) == list_length(src_colvars));
298 
299 	forboth(lnames, src_colnames, lvars, src_colvars)
300 	{
301 		char	   *colname = strVal(lfirst(lnames));
302 		bool		match = false;
303 		ListCell   *cnames;
304 
305 		foreach(cnames, common_colnames)
306 		{
307 			char	   *ccolname = strVal(lfirst(cnames));
308 
309 			if (strcmp(colname, ccolname) == 0)
310 			{
311 				match = true;
312 				break;
313 			}
314 		}
315 
316 		if (!match)
317 		{
318 			new_colnames = lappend(new_colnames, lfirst(lnames));
319 			new_colvars = lappend(new_colvars, lfirst(lvars));
320 		}
321 	}
322 
323 	*res_colnames = new_colnames;
324 	*res_colvars = new_colvars;
325 }
326 
327 /* transformJoinUsingClause()
328  *	  Build a complete ON clause from a partially-transformed USING list.
329  *	  We are given lists of nodes representing left and right match columns.
330  *	  Result is a transformed qualification expression.
331  */
332 static Node *
transformJoinUsingClause(ParseState * pstate,RangeTblEntry * leftRTE,RangeTblEntry * rightRTE,List * leftVars,List * rightVars)333 transformJoinUsingClause(ParseState *pstate,
334 						 RangeTblEntry *leftRTE, RangeTblEntry *rightRTE,
335 						 List *leftVars, List *rightVars)
336 {
337 	Node	   *result;
338 	List	   *andargs = NIL;
339 	ListCell   *lvars,
340 			   *rvars;
341 
342 	/*
343 	 * We cheat a little bit here by building an untransformed operator tree
344 	 * whose leaves are the already-transformed Vars.  This requires collusion
345 	 * from transformExpr(), which normally could be expected to complain
346 	 * about already-transformed subnodes.  However, this does mean that we
347 	 * have to mark the columns as requiring SELECT privilege for ourselves;
348 	 * transformExpr() won't do it.
349 	 */
350 	forboth(lvars, leftVars, rvars, rightVars)
351 	{
352 		Var		   *lvar = (Var *) lfirst(lvars);
353 		Var		   *rvar = (Var *) lfirst(rvars);
354 		A_Expr	   *e;
355 
356 		/* Require read access to the join variables */
357 		markVarForSelectPriv(pstate, lvar, leftRTE);
358 		markVarForSelectPriv(pstate, rvar, rightRTE);
359 
360 		/* Now create the lvar = rvar join condition */
361 		e = makeSimpleA_Expr(AEXPR_OP, "=",
362 							 (Node *) copyObject(lvar), (Node *) copyObject(rvar),
363 							 -1);
364 
365 		/* Prepare to combine into an AND clause, if multiple join columns */
366 		andargs = lappend(andargs, e);
367 	}
368 
369 	/* Only need an AND if there's more than one join column */
370 	if (list_length(andargs) == 1)
371 		result = (Node *) linitial(andargs);
372 	else
373 		result = (Node *) makeBoolExpr(AND_EXPR, andargs, -1);
374 
375 	/*
376 	 * Since the references are already Vars, and are certainly from the input
377 	 * relations, we don't have to go through the same pushups that
378 	 * transformJoinOnClause() does.  Just invoke transformExpr() to fix up
379 	 * the operators, and we're done.
380 	 */
381 	result = transformExpr(pstate, result, EXPR_KIND_JOIN_USING);
382 
383 	result = coerce_to_boolean(pstate, result, "JOIN/USING");
384 
385 	return result;
386 }
387 
388 /* transformJoinOnClause()
389  *	  Transform the qual conditions for JOIN/ON.
390  *	  Result is a transformed qualification expression.
391  */
392 static Node *
transformJoinOnClause(ParseState * pstate,JoinExpr * j,List * namespace)393 transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
394 {
395 	Node	   *result;
396 	List	   *save_namespace;
397 
398 	/*
399 	 * The namespace that the join expression should see is just the two
400 	 * subtrees of the JOIN plus any outer references from upper pstate
401 	 * levels.  Temporarily set this pstate's namespace accordingly.  (We need
402 	 * not check for refname conflicts, because transformFromClauseItem()
403 	 * already did.)  All namespace items are marked visible regardless of
404 	 * LATERAL state.
405 	 */
406 	setNamespaceLateralState(namespace, false, true);
407 
408 	save_namespace = pstate->p_namespace;
409 	pstate->p_namespace = namespace;
410 
411 	result = transformWhereClause(pstate, j->quals,
412 								  EXPR_KIND_JOIN_ON, "JOIN/ON");
413 
414 	pstate->p_namespace = save_namespace;
415 
416 	return result;
417 }
418 
419 /*
420  * transformTableEntry --- transform a RangeVar (simple relation reference)
421  */
422 static RangeTblEntry *
transformTableEntry(ParseState * pstate,RangeVar * r)423 transformTableEntry(ParseState *pstate, RangeVar *r)
424 {
425 	RangeTblEntry *rte;
426 
427 	/* We need only build a range table entry */
428 	rte = addRangeTableEntry(pstate, r, r->alias, r->inh, true);
429 
430 	return rte;
431 }
432 
433 /*
434  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
435  */
436 static RangeTblEntry *
transformRangeSubselect(ParseState * pstate,RangeSubselect * r)437 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
438 {
439 	Query	   *query;
440 	RangeTblEntry *rte;
441 
442 	/*
443 	 * We require user to supply an alias for a subselect, per SQL92. To relax
444 	 * this, we'd have to be prepared to gin up a unique alias for an
445 	 * unlabeled subselect.  (This is just elog, not ereport, because the
446 	 * grammar should have enforced it already.  It'd probably be better to
447 	 * report the error here, but we don't have a good error location here.)
448 	 */
449 	if (r->alias == NULL)
450 		elog(ERROR, "subquery in FROM must have an alias");
451 
452 	/*
453 	 * Set p_expr_kind to show this parse level is recursing to a subselect.
454 	 * We can't be nested within any expression, so don't need save-restore
455 	 * logic here.
456 	 */
457 	Assert(pstate->p_expr_kind == EXPR_KIND_NONE);
458 	pstate->p_expr_kind = EXPR_KIND_FROM_SUBSELECT;
459 
460 	/*
461 	 * If the subselect is LATERAL, make lateral_only names of this level
462 	 * visible to it.  (LATERAL can't nest within a single pstate level, so we
463 	 * don't need save/restore logic here.)
464 	 */
465 	Assert(!pstate->p_lateral_active);
466 	pstate->p_lateral_active = r->lateral;
467 
468 	/*
469 	 * Analyze and transform the subquery.
470 	 */
471 	query = parse_sub_analyze(r->subquery, pstate, NULL,
472 							  isLockedRefname(pstate, r->alias->aliasname),
473 							  true);
474 
475 	/* Restore state */
476 	pstate->p_lateral_active = false;
477 	pstate->p_expr_kind = EXPR_KIND_NONE;
478 
479 	/*
480 	 * Check that we got a SELECT.  Anything else should be impossible given
481 	 * restrictions of the grammar, but check anyway.
482 	 */
483 	if (!IsA(query, Query) ||
484 		query->commandType != CMD_SELECT)
485 		elog(ERROR, "unexpected non-SELECT command in subquery in FROM");
486 
487 	/*
488 	 * OK, build an RTE for the subquery.
489 	 */
490 	rte = addRangeTableEntryForSubquery(pstate,
491 										query,
492 										r->alias,
493 										r->lateral,
494 										true);
495 
496 	return rte;
497 }
498 
499 
500 /*
501  * transformRangeFunction --- transform a function call appearing in FROM
502  */
503 static RangeTblEntry *
transformRangeFunction(ParseState * pstate,RangeFunction * r)504 transformRangeFunction(ParseState *pstate, RangeFunction *r)
505 {
506 	List	   *funcexprs = NIL;
507 	List	   *funcnames = NIL;
508 	List	   *coldeflists = NIL;
509 	bool		is_lateral;
510 	RangeTblEntry *rte;
511 	ListCell   *lc;
512 
513 	/*
514 	 * We make lateral_only names of this level visible, whether or not the
515 	 * RangeFunction is explicitly marked LATERAL.  This is needed for SQL
516 	 * spec compliance in the case of UNNEST(), and seems useful on
517 	 * convenience grounds for all functions in FROM.
518 	 *
519 	 * (LATERAL can't nest within a single pstate level, so we don't need
520 	 * save/restore logic here.)
521 	 */
522 	Assert(!pstate->p_lateral_active);
523 	pstate->p_lateral_active = true;
524 
525 	/*
526 	 * Transform the raw expressions.
527 	 *
528 	 * While transforming, also save function names for possible use as alias
529 	 * and column names.  We use the same transformation rules as for a SELECT
530 	 * output expression.  For a FuncCall node, the result will be the
531 	 * function name, but it is possible for the grammar to hand back other
532 	 * node types.
533 	 *
534 	 * We have to get this info now, because FigureColname only works on raw
535 	 * parsetrees.  Actually deciding what to do with the names is left up to
536 	 * addRangeTableEntryForFunction.
537 	 *
538 	 * Likewise, collect column definition lists if there were any.  But
539 	 * complain if we find one here and the RangeFunction has one too.
540 	 */
541 	foreach(lc, r->functions)
542 	{
543 		List	   *pair = (List *) lfirst(lc);
544 		Node	   *fexpr;
545 		List	   *coldeflist;
546 		Node	   *newfexpr;
547 		Node	   *last_srf;
548 
549 		/* Disassemble the function-call/column-def-list pairs */
550 		Assert(list_length(pair) == 2);
551 		fexpr = (Node *) linitial(pair);
552 		coldeflist = (List *) lsecond(pair);
553 
554 		/*
555 		 * If we find a function call unnest() with more than one argument and
556 		 * no special decoration, transform it into separate unnest() calls on
557 		 * each argument.  This is a kluge, for sure, but it's less nasty than
558 		 * other ways of implementing the SQL-standard UNNEST() syntax.
559 		 *
560 		 * If there is any decoration (including a coldeflist), we don't
561 		 * transform, which probably means a no-such-function error later.  We
562 		 * could alternatively throw an error right now, but that doesn't seem
563 		 * tremendously helpful.  If someone is using any such decoration,
564 		 * then they're not using the SQL-standard syntax, and they're more
565 		 * likely expecting an un-tweaked function call.
566 		 *
567 		 * Note: the transformation changes a non-schema-qualified unnest()
568 		 * function name into schema-qualified pg_catalog.unnest().  This
569 		 * choice is also a bit debatable, but it seems reasonable to force
570 		 * use of built-in unnest() when we make this transformation.
571 		 */
572 		if (IsA(fexpr, FuncCall))
573 		{
574 			FuncCall   *fc = (FuncCall *) fexpr;
575 
576 			if (list_length(fc->funcname) == 1 &&
577 				strcmp(strVal(linitial(fc->funcname)), "unnest") == 0 &&
578 				list_length(fc->args) > 1 &&
579 				fc->agg_order == NIL &&
580 				fc->agg_filter == NULL &&
581 				!fc->agg_star &&
582 				!fc->agg_distinct &&
583 				!fc->func_variadic &&
584 				fc->over == NULL &&
585 				coldeflist == NIL)
586 			{
587 				ListCell   *lc;
588 
589 				foreach(lc, fc->args)
590 				{
591 					Node	   *arg = (Node *) lfirst(lc);
592 					FuncCall   *newfc;
593 
594 					last_srf = pstate->p_last_srf;
595 
596 					newfc = makeFuncCall(SystemFuncName("unnest"),
597 										 list_make1(arg),
598 										 fc->location);
599 
600 					newfexpr = transformExpr(pstate, (Node *) newfc,
601 											 EXPR_KIND_FROM_FUNCTION);
602 
603 					/* nodeFunctionscan.c requires SRFs to be at top level */
604 					if (pstate->p_last_srf != last_srf &&
605 						pstate->p_last_srf != newfexpr)
606 						ereport(ERROR,
607 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
608 								 errmsg("set-returning functions must appear at top level of FROM"),
609 								 parser_errposition(pstate,
610 													exprLocation(pstate->p_last_srf))));
611 
612 					funcexprs = lappend(funcexprs, newfexpr);
613 
614 					funcnames = lappend(funcnames,
615 										FigureColname((Node *) newfc));
616 
617 					/* coldeflist is empty, so no error is possible */
618 
619 					coldeflists = lappend(coldeflists, coldeflist);
620 				}
621 				continue;		/* done with this function item */
622 			}
623 		}
624 
625 		/* normal case ... */
626 		last_srf = pstate->p_last_srf;
627 
628 		newfexpr = transformExpr(pstate, fexpr,
629 								 EXPR_KIND_FROM_FUNCTION);
630 
631 		/* nodeFunctionscan.c requires SRFs to be at top level */
632 		if (pstate->p_last_srf != last_srf &&
633 			pstate->p_last_srf != newfexpr)
634 			ereport(ERROR,
635 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
636 					 errmsg("set-returning functions must appear at top level of FROM"),
637 					 parser_errposition(pstate,
638 										exprLocation(pstate->p_last_srf))));
639 
640 		funcexprs = lappend(funcexprs, newfexpr);
641 
642 		funcnames = lappend(funcnames,
643 							FigureColname(fexpr));
644 
645 		if (coldeflist && r->coldeflist)
646 			ereport(ERROR,
647 					(errcode(ERRCODE_SYNTAX_ERROR),
648 					 errmsg("multiple column definition lists are not allowed for the same function"),
649 					 parser_errposition(pstate,
650 										exprLocation((Node *) r->coldeflist))));
651 
652 		coldeflists = lappend(coldeflists, coldeflist);
653 	}
654 
655 	pstate->p_lateral_active = false;
656 
657 	/*
658 	 * We must assign collations now so that the RTE exposes correct collation
659 	 * info for Vars created from it.
660 	 */
661 	assign_list_collations(pstate, funcexprs);
662 
663 	/*
664 	 * Install the top-level coldeflist if there was one (we already checked
665 	 * that there was no conflicting per-function coldeflist).
666 	 *
667 	 * We only allow this when there's a single function (even after UNNEST
668 	 * expansion) and no WITH ORDINALITY.  The reason for the latter
669 	 * restriction is that it's not real clear whether the ordinality column
670 	 * should be in the coldeflist, and users are too likely to make mistakes
671 	 * in one direction or the other.  Putting the coldeflist inside ROWS
672 	 * FROM() is much clearer in this case.
673 	 */
674 	if (r->coldeflist)
675 	{
676 		if (list_length(funcexprs) != 1)
677 		{
678 			if (r->is_rowsfrom)
679 				ereport(ERROR,
680 						(errcode(ERRCODE_SYNTAX_ERROR),
681 						 errmsg("ROWS FROM() with multiple functions cannot have a column definition list"),
682 						 errhint("Put a separate column definition list for each function inside ROWS FROM()."),
683 						 parser_errposition(pstate,
684 											exprLocation((Node *) r->coldeflist))));
685 			else
686 				ereport(ERROR,
687 						(errcode(ERRCODE_SYNTAX_ERROR),
688 						 errmsg("UNNEST() with multiple arguments cannot have a column definition list"),
689 						 errhint("Use separate UNNEST() calls inside ROWS FROM(), and attach a column definition list to each one."),
690 						 parser_errposition(pstate,
691 											exprLocation((Node *) r->coldeflist))));
692 		}
693 		if (r->ordinality)
694 			ereport(ERROR,
695 					(errcode(ERRCODE_SYNTAX_ERROR),
696 					 errmsg("WITH ORDINALITY cannot be used with a column definition list"),
697 					 errhint("Put the column definition list inside ROWS FROM()."),
698 					 parser_errposition(pstate,
699 										exprLocation((Node *) r->coldeflist))));
700 
701 		coldeflists = list_make1(r->coldeflist);
702 	}
703 
704 	/*
705 	 * Mark the RTE as LATERAL if the user said LATERAL explicitly, or if
706 	 * there are any lateral cross-references in it.
707 	 */
708 	is_lateral = r->lateral || contain_vars_of_level((Node *) funcexprs, 0);
709 
710 	/*
711 	 * OK, build an RTE for the function.
712 	 */
713 	rte = addRangeTableEntryForFunction(pstate,
714 										funcnames, funcexprs, coldeflists,
715 										r, is_lateral, true);
716 
717 	return rte;
718 }
719 
720 /*
721  * transformRangeTableFunc -
722  *			Transform a raw RangeTableFunc into TableFunc.
723  *
724  * Transform the namespace clauses, the document-generating expression, the
725  * row-generating expression, the column-generating expressions, and the
726  * default value expressions.
727  */
728 static RangeTblEntry *
transformRangeTableFunc(ParseState * pstate,RangeTableFunc * rtf)729 transformRangeTableFunc(ParseState *pstate, RangeTableFunc *rtf)
730 {
731 	TableFunc  *tf = makeNode(TableFunc);
732 	const char *constructName;
733 	Oid			docType;
734 	RangeTblEntry *rte;
735 	bool		is_lateral;
736 	ListCell   *col;
737 	char	  **names;
738 	int			colno;
739 
740 	/* Currently only XMLTABLE is supported */
741 	constructName = "XMLTABLE";
742 	docType = XMLOID;
743 
744 	/*
745 	 * We make lateral_only names of this level visible, whether or not the
746 	 * RangeTableFunc is explicitly marked LATERAL.  This is needed for SQL
747 	 * spec compliance and seems useful on convenience grounds for all
748 	 * functions in FROM.
749 	 *
750 	 * (LATERAL can't nest within a single pstate level, so we don't need
751 	 * save/restore logic here.)
752 	 */
753 	Assert(!pstate->p_lateral_active);
754 	pstate->p_lateral_active = true;
755 
756 	/* Transform and apply typecast to the row-generating expression ... */
757 	Assert(rtf->rowexpr != NULL);
758 	tf->rowexpr = coerce_to_specific_type(pstate,
759 										  transformExpr(pstate, rtf->rowexpr, EXPR_KIND_FROM_FUNCTION),
760 										  TEXTOID,
761 										  constructName);
762 	assign_expr_collations(pstate, tf->rowexpr);
763 
764 	/* ... and to the document itself */
765 	Assert(rtf->docexpr != NULL);
766 	tf->docexpr = coerce_to_specific_type(pstate,
767 										  transformExpr(pstate, rtf->docexpr, EXPR_KIND_FROM_FUNCTION),
768 										  docType,
769 										  constructName);
770 	assign_expr_collations(pstate, tf->docexpr);
771 
772 	/* undef ordinality column number */
773 	tf->ordinalitycol = -1;
774 
775 	/* Process column specs */
776 	names = palloc(sizeof(char *) * list_length(rtf->columns));
777 
778 	colno = 0;
779 	foreach(col, rtf->columns)
780 	{
781 		RangeTableFuncCol *rawc = (RangeTableFuncCol *) lfirst(col);
782 		Oid			typid;
783 		int32		typmod;
784 		Node	   *colexpr;
785 		Node	   *coldefexpr;
786 		int			j;
787 
788 		tf->colnames = lappend(tf->colnames,
789 							   makeString(pstrdup(rawc->colname)));
790 
791 		/*
792 		 * Determine the type and typmod for the new column. FOR ORDINALITY
793 		 * columns are INTEGER per spec; the others are user-specified.
794 		 */
795 		if (rawc->for_ordinality)
796 		{
797 			if (tf->ordinalitycol != -1)
798 				ereport(ERROR,
799 						(errcode(ERRCODE_SYNTAX_ERROR),
800 						 errmsg("only one FOR ORDINALITY column is allowed"),
801 						 parser_errposition(pstate, rawc->location)));
802 
803 			typid = INT4OID;
804 			typmod = -1;
805 			tf->ordinalitycol = colno;
806 		}
807 		else
808 		{
809 			if (rawc->typeName->setof)
810 				ereport(ERROR,
811 						(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
812 						 errmsg("column \"%s\" cannot be declared SETOF",
813 								rawc->colname),
814 						 parser_errposition(pstate, rawc->location)));
815 
816 			typenameTypeIdAndMod(pstate, rawc->typeName,
817 								 &typid, &typmod);
818 		}
819 
820 		tf->coltypes = lappend_oid(tf->coltypes, typid);
821 		tf->coltypmods = lappend_int(tf->coltypmods, typmod);
822 		tf->colcollations = lappend_oid(tf->colcollations,
823 										type_is_collatable(typid) ? DEFAULT_COLLATION_OID : InvalidOid);
824 
825 		/* Transform the PATH and DEFAULT expressions */
826 		if (rawc->colexpr)
827 		{
828 			colexpr = coerce_to_specific_type(pstate,
829 											  transformExpr(pstate, rawc->colexpr,
830 															EXPR_KIND_FROM_FUNCTION),
831 											  TEXTOID,
832 											  constructName);
833 			assign_expr_collations(pstate, colexpr);
834 		}
835 		else
836 			colexpr = NULL;
837 
838 		if (rawc->coldefexpr)
839 		{
840 			coldefexpr = coerce_to_specific_type_typmod(pstate,
841 														transformExpr(pstate, rawc->coldefexpr,
842 																	  EXPR_KIND_FROM_FUNCTION),
843 														typid, typmod,
844 														constructName);
845 			assign_expr_collations(pstate, coldefexpr);
846 		}
847 		else
848 			coldefexpr = NULL;
849 
850 		tf->colexprs = lappend(tf->colexprs, colexpr);
851 		tf->coldefexprs = lappend(tf->coldefexprs, coldefexpr);
852 
853 		if (rawc->is_not_null)
854 			tf->notnulls = bms_add_member(tf->notnulls, colno);
855 
856 		/* make sure column names are unique */
857 		for (j = 0; j < colno; j++)
858 			if (strcmp(names[j], rawc->colname) == 0)
859 				ereport(ERROR,
860 						(errcode(ERRCODE_SYNTAX_ERROR),
861 						 errmsg("column name \"%s\" is not unique",
862 								rawc->colname),
863 						 parser_errposition(pstate, rawc->location)));
864 		names[colno] = rawc->colname;
865 
866 		colno++;
867 	}
868 	pfree(names);
869 
870 	/* Namespaces, if any, also need to be transformed */
871 	if (rtf->namespaces != NIL)
872 	{
873 		ListCell   *ns;
874 		ListCell   *lc2;
875 		List	   *ns_uris = NIL;
876 		List	   *ns_names = NIL;
877 		bool		default_ns_seen = false;
878 
879 		foreach(ns, rtf->namespaces)
880 		{
881 			ResTarget  *r = (ResTarget *) lfirst(ns);
882 			Node	   *ns_uri;
883 
884 			Assert(IsA(r, ResTarget));
885 			ns_uri = transformExpr(pstate, r->val, EXPR_KIND_FROM_FUNCTION);
886 			ns_uri = coerce_to_specific_type(pstate, ns_uri,
887 											 TEXTOID, constructName);
888 			assign_expr_collations(pstate, ns_uri);
889 			ns_uris = lappend(ns_uris, ns_uri);
890 
891 			/* Verify consistency of name list: no dupes, only one DEFAULT */
892 			if (r->name != NULL)
893 			{
894 				foreach(lc2, ns_names)
895 				{
896 					Value	   *ns_node = (Value *) lfirst(lc2);
897 
898 					if (ns_node == NULL)
899 						continue;
900 					if (strcmp(strVal(ns_node), r->name) == 0)
901 						ereport(ERROR,
902 								(errcode(ERRCODE_SYNTAX_ERROR),
903 								 errmsg("namespace name \"%s\" is not unique",
904 										r->name),
905 								 parser_errposition(pstate, r->location)));
906 				}
907 			}
908 			else
909 			{
910 				if (default_ns_seen)
911 					ereport(ERROR,
912 							(errcode(ERRCODE_SYNTAX_ERROR),
913 							 errmsg("only one default namespace is allowed"),
914 							 parser_errposition(pstate, r->location)));
915 				default_ns_seen = true;
916 			}
917 
918 			/* We represent DEFAULT by a null pointer */
919 			ns_names = lappend(ns_names,
920 							   r->name ? makeString(r->name) : NULL);
921 		}
922 
923 		tf->ns_uris = ns_uris;
924 		tf->ns_names = ns_names;
925 	}
926 
927 	tf->location = rtf->location;
928 
929 	pstate->p_lateral_active = false;
930 
931 	/*
932 	 * Mark the RTE as LATERAL if the user said LATERAL explicitly, or if
933 	 * there are any lateral cross-references in it.
934 	 */
935 	is_lateral = rtf->lateral || contain_vars_of_level((Node *) tf, 0);
936 
937 	rte = addRangeTableEntryForTableFunc(pstate,
938 										 tf, rtf->alias, is_lateral, true);
939 
940 	return rte;
941 }
942 
943 /*
944  * transformRangeTableSample --- transform a TABLESAMPLE clause
945  *
946  * Caller has already transformed rts->relation, we just have to validate
947  * the remaining fields and create a TableSampleClause node.
948  */
949 static TableSampleClause *
transformRangeTableSample(ParseState * pstate,RangeTableSample * rts)950 transformRangeTableSample(ParseState *pstate, RangeTableSample *rts)
951 {
952 	TableSampleClause *tablesample;
953 	Oid			handlerOid;
954 	Oid			funcargtypes[1];
955 	TsmRoutine *tsm;
956 	List	   *fargs;
957 	ListCell   *larg,
958 			   *ltyp;
959 
960 	/*
961 	 * To validate the sample method name, look up the handler function, which
962 	 * has the same name, one dummy INTERNAL argument, and a result type of
963 	 * tsm_handler.  (Note: tablesample method names are not schema-qualified
964 	 * in the SQL standard; but since they are just functions to us, we allow
965 	 * schema qualification to resolve any potential ambiguity.)
966 	 */
967 	funcargtypes[0] = INTERNALOID;
968 
969 	handlerOid = LookupFuncName(rts->method, 1, funcargtypes, true);
970 
971 	/* we want error to complain about no-such-method, not no-such-function */
972 	if (!OidIsValid(handlerOid))
973 		ereport(ERROR,
974 				(errcode(ERRCODE_UNDEFINED_OBJECT),
975 				 errmsg("tablesample method %s does not exist",
976 						NameListToString(rts->method)),
977 				 parser_errposition(pstate, rts->location)));
978 
979 	/* check that handler has correct return type */
980 	if (get_func_rettype(handlerOid) != TSM_HANDLEROID)
981 		ereport(ERROR,
982 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
983 				 errmsg("function %s must return type %s",
984 						NameListToString(rts->method), "tsm_handler"),
985 				 parser_errposition(pstate, rts->location)));
986 
987 	/* OK, run the handler to get TsmRoutine, for argument type info */
988 	tsm = GetTsmRoutine(handlerOid);
989 
990 	tablesample = makeNode(TableSampleClause);
991 	tablesample->tsmhandler = handlerOid;
992 
993 	/* check user provided the expected number of arguments */
994 	if (list_length(rts->args) != list_length(tsm->parameterTypes))
995 		ereport(ERROR,
996 				(errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
997 				 errmsg_plural("tablesample method %s requires %d argument, not %d",
998 							   "tablesample method %s requires %d arguments, not %d",
999 							   list_length(tsm->parameterTypes),
1000 							   NameListToString(rts->method),
1001 							   list_length(tsm->parameterTypes),
1002 							   list_length(rts->args)),
1003 				 parser_errposition(pstate, rts->location)));
1004 
1005 	/*
1006 	 * Transform the arguments, typecasting them as needed.  Note we must also
1007 	 * assign collations now, because assign_query_collations() doesn't
1008 	 * examine any substructure of RTEs.
1009 	 */
1010 	fargs = NIL;
1011 	forboth(larg, rts->args, ltyp, tsm->parameterTypes)
1012 	{
1013 		Node	   *arg = (Node *) lfirst(larg);
1014 		Oid			argtype = lfirst_oid(ltyp);
1015 
1016 		arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
1017 		arg = coerce_to_specific_type(pstate, arg, argtype, "TABLESAMPLE");
1018 		assign_expr_collations(pstate, arg);
1019 		fargs = lappend(fargs, arg);
1020 	}
1021 	tablesample->args = fargs;
1022 
1023 	/* Process REPEATABLE (seed) */
1024 	if (rts->repeatable != NULL)
1025 	{
1026 		Node	   *arg;
1027 
1028 		if (!tsm->repeatable_across_queries)
1029 			ereport(ERROR,
1030 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1031 					 errmsg("tablesample method %s does not support REPEATABLE",
1032 							NameListToString(rts->method)),
1033 					 parser_errposition(pstate, rts->location)));
1034 
1035 		arg = transformExpr(pstate, rts->repeatable, EXPR_KIND_FROM_FUNCTION);
1036 		arg = coerce_to_specific_type(pstate, arg, FLOAT8OID, "REPEATABLE");
1037 		assign_expr_collations(pstate, arg);
1038 		tablesample->repeatable = (Expr *) arg;
1039 	}
1040 	else
1041 		tablesample->repeatable = NULL;
1042 
1043 	return tablesample;
1044 }
1045 
1046 /*
1047  * getRTEForSpecialRelationTypes
1048  *
1049  * If given RangeVar refers to a CTE or an EphemeralNamedRelation,
1050  * build and return an appropriate RTE, otherwise return NULL
1051  */
1052 static RangeTblEntry *
getRTEForSpecialRelationTypes(ParseState * pstate,RangeVar * rv)1053 getRTEForSpecialRelationTypes(ParseState *pstate, RangeVar *rv)
1054 {
1055 	CommonTableExpr *cte;
1056 	Index		levelsup;
1057 	RangeTblEntry *rte;
1058 
1059 	/*
1060 	 * if it is a qualified name, it can't be a CTE or tuplestore reference
1061 	 */
1062 	if (rv->schemaname)
1063 		return NULL;
1064 
1065 	cte = scanNameSpaceForCTE(pstate, rv->relname, &levelsup);
1066 	if (cte)
1067 		rte = addRangeTableEntryForCTE(pstate, cte, levelsup, rv, true);
1068 	else if (scanNameSpaceForENR(pstate, rv->relname))
1069 		rte = addRangeTableEntryForENR(pstate, rv, true);
1070 	else
1071 		rte = NULL;
1072 
1073 	return rte;
1074 }
1075 
1076 /*
1077  * transformFromClauseItem -
1078  *	  Transform a FROM-clause item, adding any required entries to the
1079  *	  range table list being built in the ParseState, and return the
1080  *	  transformed item ready to include in the joinlist.  Also build a
1081  *	  ParseNamespaceItem list describing the names exposed by this item.
1082  *	  This routine can recurse to handle SQL92 JOIN expressions.
1083  *
1084  * The function return value is the node to add to the jointree (a
1085  * RangeTblRef or JoinExpr).  Additional output parameters are:
1086  *
1087  * *top_rte: receives the RTE corresponding to the jointree item.
1088  * (We could extract this from the function return node, but it saves cycles
1089  * to pass it back separately.)
1090  *
1091  * *top_rti: receives the rangetable index of top_rte.  (Ditto.)
1092  *
1093  * *namespace: receives a List of ParseNamespaceItems for the RTEs exposed
1094  * as table/column names by this item.  (The lateral_only flags in these items
1095  * are indeterminate and should be explicitly set by the caller before use.)
1096  */
1097 static Node *
transformFromClauseItem(ParseState * pstate,Node * n,RangeTblEntry ** top_rte,int * top_rti,List ** namespace)1098 transformFromClauseItem(ParseState *pstate, Node *n,
1099 						RangeTblEntry **top_rte, int *top_rti,
1100 						List **namespace)
1101 {
1102 	if (IsA(n, RangeVar))
1103 	{
1104 		/* Plain relation reference, or perhaps a CTE reference */
1105 		RangeVar   *rv = (RangeVar *) n;
1106 		RangeTblRef *rtr;
1107 		RangeTblEntry *rte;
1108 		int			rtindex;
1109 
1110 		/* Check if it's a CTE or tuplestore reference */
1111 		rte = getRTEForSpecialRelationTypes(pstate, rv);
1112 
1113 		/* if not found above, must be a table reference */
1114 		if (!rte)
1115 			rte = transformTableEntry(pstate, rv);
1116 
1117 		/* assume new rte is at end */
1118 		rtindex = list_length(pstate->p_rtable);
1119 		Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
1120 		*top_rte = rte;
1121 		*top_rti = rtindex;
1122 		*namespace = list_make1(makeDefaultNSItem(rte));
1123 		rtr = makeNode(RangeTblRef);
1124 		rtr->rtindex = rtindex;
1125 		return (Node *) rtr;
1126 	}
1127 	else if (IsA(n, RangeSubselect))
1128 	{
1129 		/* sub-SELECT is like a plain relation */
1130 		RangeTblRef *rtr;
1131 		RangeTblEntry *rte;
1132 		int			rtindex;
1133 
1134 		rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
1135 		/* assume new rte is at end */
1136 		rtindex = list_length(pstate->p_rtable);
1137 		Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
1138 		*top_rte = rte;
1139 		*top_rti = rtindex;
1140 		*namespace = list_make1(makeDefaultNSItem(rte));
1141 		rtr = makeNode(RangeTblRef);
1142 		rtr->rtindex = rtindex;
1143 		return (Node *) rtr;
1144 	}
1145 	else if (IsA(n, RangeFunction))
1146 	{
1147 		/* function is like a plain relation */
1148 		RangeTblRef *rtr;
1149 		RangeTblEntry *rte;
1150 		int			rtindex;
1151 
1152 		rte = transformRangeFunction(pstate, (RangeFunction *) n);
1153 		/* assume new rte is at end */
1154 		rtindex = list_length(pstate->p_rtable);
1155 		Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
1156 		*top_rte = rte;
1157 		*top_rti = rtindex;
1158 		*namespace = list_make1(makeDefaultNSItem(rte));
1159 		rtr = makeNode(RangeTblRef);
1160 		rtr->rtindex = rtindex;
1161 		return (Node *) rtr;
1162 	}
1163 	else if (IsA(n, RangeTableFunc))
1164 	{
1165 		/* table function is like a plain relation */
1166 		RangeTblRef *rtr;
1167 		RangeTblEntry *rte;
1168 		int			rtindex;
1169 
1170 		rte = transformRangeTableFunc(pstate, (RangeTableFunc *) n);
1171 		/* assume new rte is at end */
1172 		rtindex = list_length(pstate->p_rtable);
1173 		Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
1174 		*top_rte = rte;
1175 		*top_rti = rtindex;
1176 		*namespace = list_make1(makeDefaultNSItem(rte));
1177 		rtr = makeNode(RangeTblRef);
1178 		rtr->rtindex = rtindex;
1179 		return (Node *) rtr;
1180 	}
1181 	else if (IsA(n, RangeTableSample))
1182 	{
1183 		/* TABLESAMPLE clause (wrapping some other valid FROM node) */
1184 		RangeTableSample *rts = (RangeTableSample *) n;
1185 		Node	   *rel;
1186 		RangeTblRef *rtr;
1187 		RangeTblEntry *rte;
1188 
1189 		/* Recursively transform the contained relation */
1190 		rel = transformFromClauseItem(pstate, rts->relation,
1191 									  top_rte, top_rti, namespace);
1192 		/* Currently, grammar could only return a RangeVar as contained rel */
1193 		rtr = castNode(RangeTblRef, rel);
1194 		rte = rt_fetch(rtr->rtindex, pstate->p_rtable);
1195 		/* We only support this on plain relations and matviews */
1196 		if (rte->relkind != RELKIND_RELATION &&
1197 			rte->relkind != RELKIND_MATVIEW &&
1198 			rte->relkind != RELKIND_PARTITIONED_TABLE)
1199 			ereport(ERROR,
1200 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1201 					 errmsg("TABLESAMPLE clause can only be applied to tables and materialized views"),
1202 					 parser_errposition(pstate, exprLocation(rts->relation))));
1203 
1204 		/* Transform TABLESAMPLE details and attach to the RTE */
1205 		rte->tablesample = transformRangeTableSample(pstate, rts);
1206 		return (Node *) rtr;
1207 	}
1208 	else if (IsA(n, JoinExpr))
1209 	{
1210 		/* A newfangled join expression */
1211 		JoinExpr   *j = (JoinExpr *) n;
1212 		RangeTblEntry *l_rte;
1213 		RangeTblEntry *r_rte;
1214 		int			l_rtindex;
1215 		int			r_rtindex;
1216 		List	   *l_namespace,
1217 				   *r_namespace,
1218 				   *my_namespace,
1219 				   *l_colnames,
1220 				   *r_colnames,
1221 				   *res_colnames,
1222 				   *l_colvars,
1223 				   *r_colvars,
1224 				   *res_colvars;
1225 		bool		lateral_ok;
1226 		int			sv_namespace_length;
1227 		RangeTblEntry *rte;
1228 		int			k;
1229 
1230 		/*
1231 		 * Recursively process the left subtree, then the right.  We must do
1232 		 * it in this order for correct visibility of LATERAL references.
1233 		 */
1234 		j->larg = transformFromClauseItem(pstate, j->larg,
1235 										  &l_rte,
1236 										  &l_rtindex,
1237 										  &l_namespace);
1238 
1239 		/*
1240 		 * Make the left-side RTEs available for LATERAL access within the
1241 		 * right side, by temporarily adding them to the pstate's namespace
1242 		 * list.  Per SQL:2008, if the join type is not INNER or LEFT then the
1243 		 * left-side names must still be exposed, but it's an error to
1244 		 * reference them.  (Stupid design, but that's what it says.)  Hence,
1245 		 * we always push them into the namespace, but mark them as not
1246 		 * lateral_ok if the jointype is wrong.
1247 		 *
1248 		 * Notice that we don't require the merged namespace list to be
1249 		 * conflict-free.  See the comments for scanNameSpaceForRefname().
1250 		 *
1251 		 * NB: this coding relies on the fact that list_concat is not
1252 		 * destructive to its second argument.
1253 		 */
1254 		lateral_ok = (j->jointype == JOIN_INNER || j->jointype == JOIN_LEFT);
1255 		setNamespaceLateralState(l_namespace, true, lateral_ok);
1256 
1257 		sv_namespace_length = list_length(pstate->p_namespace);
1258 		pstate->p_namespace = list_concat(pstate->p_namespace, l_namespace);
1259 
1260 		/* And now we can process the RHS */
1261 		j->rarg = transformFromClauseItem(pstate, j->rarg,
1262 										  &r_rte,
1263 										  &r_rtindex,
1264 										  &r_namespace);
1265 
1266 		/* Remove the left-side RTEs from the namespace list again */
1267 		pstate->p_namespace = list_truncate(pstate->p_namespace,
1268 											sv_namespace_length);
1269 
1270 		/*
1271 		 * Check for conflicting refnames in left and right subtrees. Must do
1272 		 * this because higher levels will assume I hand back a self-
1273 		 * consistent namespace list.
1274 		 */
1275 		checkNameSpaceConflicts(pstate, l_namespace, r_namespace);
1276 
1277 		/*
1278 		 * Generate combined namespace info for possible use below.
1279 		 */
1280 		my_namespace = list_concat(l_namespace, r_namespace);
1281 
1282 		/*
1283 		 * Extract column name and var lists from both subtrees
1284 		 *
1285 		 * Note: expandRTE returns new lists, safe for me to modify
1286 		 */
1287 		expandRTE(l_rte, l_rtindex, 0, -1, false,
1288 				  &l_colnames, &l_colvars);
1289 		expandRTE(r_rte, r_rtindex, 0, -1, false,
1290 				  &r_colnames, &r_colvars);
1291 
1292 		/*
1293 		 * Natural join does not explicitly specify columns; must generate
1294 		 * columns to join. Need to run through the list of columns from each
1295 		 * table or join result and match up the column names. Use the first
1296 		 * table, and check every column in the second table for a match.
1297 		 * (We'll check that the matches were unique later on.) The result of
1298 		 * this step is a list of column names just like an explicitly-written
1299 		 * USING list.
1300 		 */
1301 		if (j->isNatural)
1302 		{
1303 			List	   *rlist = NIL;
1304 			ListCell   *lx,
1305 					   *rx;
1306 
1307 			Assert(j->usingClause == NIL);	/* shouldn't have USING() too */
1308 
1309 			foreach(lx, l_colnames)
1310 			{
1311 				char	   *l_colname = strVal(lfirst(lx));
1312 				Value	   *m_name = NULL;
1313 
1314 				foreach(rx, r_colnames)
1315 				{
1316 					char	   *r_colname = strVal(lfirst(rx));
1317 
1318 					if (strcmp(l_colname, r_colname) == 0)
1319 					{
1320 						m_name = makeString(l_colname);
1321 						break;
1322 					}
1323 				}
1324 
1325 				/* matched a right column? then keep as join column... */
1326 				if (m_name != NULL)
1327 					rlist = lappend(rlist, m_name);
1328 			}
1329 
1330 			j->usingClause = rlist;
1331 		}
1332 
1333 		/*
1334 		 * Now transform the join qualifications, if any.
1335 		 */
1336 		res_colnames = NIL;
1337 		res_colvars = NIL;
1338 
1339 		if (j->usingClause)
1340 		{
1341 			/*
1342 			 * JOIN/USING (or NATURAL JOIN, as transformed above). Transform
1343 			 * the list into an explicit ON-condition, and generate a list of
1344 			 * merged result columns.
1345 			 */
1346 			List	   *ucols = j->usingClause;
1347 			List	   *l_usingvars = NIL;
1348 			List	   *r_usingvars = NIL;
1349 			ListCell   *ucol;
1350 
1351 			Assert(j->quals == NULL);	/* shouldn't have ON() too */
1352 
1353 			foreach(ucol, ucols)
1354 			{
1355 				char	   *u_colname = strVal(lfirst(ucol));
1356 				ListCell   *col;
1357 				int			ndx;
1358 				int			l_index = -1;
1359 				int			r_index = -1;
1360 				Var		   *l_colvar,
1361 						   *r_colvar;
1362 
1363 				/* Check for USING(foo,foo) */
1364 				foreach(col, res_colnames)
1365 				{
1366 					char	   *res_colname = strVal(lfirst(col));
1367 
1368 					if (strcmp(res_colname, u_colname) == 0)
1369 						ereport(ERROR,
1370 								(errcode(ERRCODE_DUPLICATE_COLUMN),
1371 								 errmsg("column name \"%s\" appears more than once in USING clause",
1372 										u_colname)));
1373 				}
1374 
1375 				/* Find it in left input */
1376 				ndx = 0;
1377 				foreach(col, l_colnames)
1378 				{
1379 					char	   *l_colname = strVal(lfirst(col));
1380 
1381 					if (strcmp(l_colname, u_colname) == 0)
1382 					{
1383 						if (l_index >= 0)
1384 							ereport(ERROR,
1385 									(errcode(ERRCODE_AMBIGUOUS_COLUMN),
1386 									 errmsg("common column name \"%s\" appears more than once in left table",
1387 											u_colname)));
1388 						l_index = ndx;
1389 					}
1390 					ndx++;
1391 				}
1392 				if (l_index < 0)
1393 					ereport(ERROR,
1394 							(errcode(ERRCODE_UNDEFINED_COLUMN),
1395 							 errmsg("column \"%s\" specified in USING clause does not exist in left table",
1396 									u_colname)));
1397 
1398 				/* Find it in right input */
1399 				ndx = 0;
1400 				foreach(col, r_colnames)
1401 				{
1402 					char	   *r_colname = strVal(lfirst(col));
1403 
1404 					if (strcmp(r_colname, u_colname) == 0)
1405 					{
1406 						if (r_index >= 0)
1407 							ereport(ERROR,
1408 									(errcode(ERRCODE_AMBIGUOUS_COLUMN),
1409 									 errmsg("common column name \"%s\" appears more than once in right table",
1410 											u_colname)));
1411 						r_index = ndx;
1412 					}
1413 					ndx++;
1414 				}
1415 				if (r_index < 0)
1416 					ereport(ERROR,
1417 							(errcode(ERRCODE_UNDEFINED_COLUMN),
1418 							 errmsg("column \"%s\" specified in USING clause does not exist in right table",
1419 									u_colname)));
1420 
1421 				l_colvar = list_nth(l_colvars, l_index);
1422 				l_usingvars = lappend(l_usingvars, l_colvar);
1423 				r_colvar = list_nth(r_colvars, r_index);
1424 				r_usingvars = lappend(r_usingvars, r_colvar);
1425 
1426 				res_colnames = lappend(res_colnames, lfirst(ucol));
1427 				res_colvars = lappend(res_colvars,
1428 									  buildMergedJoinVar(pstate,
1429 														 j->jointype,
1430 														 l_colvar,
1431 														 r_colvar));
1432 			}
1433 
1434 			j->quals = transformJoinUsingClause(pstate,
1435 												l_rte,
1436 												r_rte,
1437 												l_usingvars,
1438 												r_usingvars);
1439 		}
1440 		else if (j->quals)
1441 		{
1442 			/* User-written ON-condition; transform it */
1443 			j->quals = transformJoinOnClause(pstate, j, my_namespace);
1444 		}
1445 		else
1446 		{
1447 			/* CROSS JOIN: no quals */
1448 		}
1449 
1450 		/* Add remaining columns from each side to the output columns */
1451 		extractRemainingColumns(res_colnames,
1452 								l_colnames, l_colvars,
1453 								&l_colnames, &l_colvars);
1454 		extractRemainingColumns(res_colnames,
1455 								r_colnames, r_colvars,
1456 								&r_colnames, &r_colvars);
1457 		res_colnames = list_concat(res_colnames, l_colnames);
1458 		res_colvars = list_concat(res_colvars, l_colvars);
1459 		res_colnames = list_concat(res_colnames, r_colnames);
1460 		res_colvars = list_concat(res_colvars, r_colvars);
1461 
1462 		/*
1463 		 * Check alias (AS clause), if any.
1464 		 */
1465 		if (j->alias)
1466 		{
1467 			if (j->alias->colnames != NIL)
1468 			{
1469 				if (list_length(j->alias->colnames) > list_length(res_colnames))
1470 					ereport(ERROR,
1471 							(errcode(ERRCODE_SYNTAX_ERROR),
1472 							 errmsg("column alias list for \"%s\" has too many entries",
1473 									j->alias->aliasname)));
1474 			}
1475 		}
1476 
1477 		/*
1478 		 * Now build an RTE for the result of the join
1479 		 */
1480 		rte = addRangeTableEntryForJoin(pstate,
1481 										res_colnames,
1482 										j->jointype,
1483 										res_colvars,
1484 										j->alias,
1485 										true);
1486 
1487 		/* assume new rte is at end */
1488 		j->rtindex = list_length(pstate->p_rtable);
1489 		Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
1490 
1491 		*top_rte = rte;
1492 		*top_rti = j->rtindex;
1493 
1494 		/* make a matching link to the JoinExpr for later use */
1495 		for (k = list_length(pstate->p_joinexprs) + 1; k < j->rtindex; k++)
1496 			pstate->p_joinexprs = lappend(pstate->p_joinexprs, NULL);
1497 		pstate->p_joinexprs = lappend(pstate->p_joinexprs, j);
1498 		Assert(list_length(pstate->p_joinexprs) == j->rtindex);
1499 
1500 		/*
1501 		 * Prepare returned namespace list.  If the JOIN has an alias then it
1502 		 * hides the contained RTEs completely; otherwise, the contained RTEs
1503 		 * are still visible as table names, but are not visible for
1504 		 * unqualified column-name access.
1505 		 *
1506 		 * Note: if there are nested alias-less JOINs, the lower-level ones
1507 		 * will remain in the list although they have neither p_rel_visible
1508 		 * nor p_cols_visible set.  We could delete such list items, but it's
1509 		 * unclear that it's worth expending cycles to do so.
1510 		 */
1511 		if (j->alias != NULL)
1512 			my_namespace = NIL;
1513 		else
1514 			setNamespaceColumnVisibility(my_namespace, false);
1515 
1516 		/*
1517 		 * The join RTE itself is always made visible for unqualified column
1518 		 * names.  It's visible as a relation name only if it has an alias.
1519 		 */
1520 		*namespace = lappend(my_namespace,
1521 							 makeNamespaceItem(rte,
1522 											   (j->alias != NULL),
1523 											   true,
1524 											   false,
1525 											   true));
1526 
1527 		return (Node *) j;
1528 	}
1529 	else
1530 		elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
1531 	return NULL;				/* can't get here, keep compiler quiet */
1532 }
1533 
1534 /*
1535  * buildMergedJoinVar -
1536  *	  generate a suitable replacement expression for a merged join column
1537  */
1538 static Node *
buildMergedJoinVar(ParseState * pstate,JoinType jointype,Var * l_colvar,Var * r_colvar)1539 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
1540 				   Var *l_colvar, Var *r_colvar)
1541 {
1542 	Oid			outcoltype;
1543 	int32		outcoltypmod;
1544 	Node	   *l_node,
1545 			   *r_node,
1546 			   *res_node;
1547 
1548 	/*
1549 	 * Choose output type if input types are dissimilar.
1550 	 */
1551 	outcoltype = l_colvar->vartype;
1552 	outcoltypmod = l_colvar->vartypmod;
1553 	if (outcoltype != r_colvar->vartype)
1554 	{
1555 		outcoltype = select_common_type(pstate,
1556 										list_make2(l_colvar, r_colvar),
1557 										"JOIN/USING",
1558 										NULL);
1559 		outcoltypmod = -1;		/* ie, unknown */
1560 	}
1561 	else if (outcoltypmod != r_colvar->vartypmod)
1562 	{
1563 		/* same type, but not same typmod */
1564 		outcoltypmod = -1;		/* ie, unknown */
1565 	}
1566 
1567 	/*
1568 	 * Insert coercion functions if needed.  Note that a difference in typmod
1569 	 * can only happen if input has typmod but outcoltypmod is -1. In that
1570 	 * case we insert a RelabelType to clearly mark that result's typmod is
1571 	 * not same as input.  We never need coerce_type_typmod.
1572 	 */
1573 	if (l_colvar->vartype != outcoltype)
1574 		l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
1575 							 outcoltype, outcoltypmod,
1576 							 COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
1577 	else if (l_colvar->vartypmod != outcoltypmod)
1578 		l_node = (Node *) makeRelabelType((Expr *) l_colvar,
1579 										  outcoltype, outcoltypmod,
1580 										  InvalidOid,	/* fixed below */
1581 										  COERCE_IMPLICIT_CAST);
1582 	else
1583 		l_node = (Node *) l_colvar;
1584 
1585 	if (r_colvar->vartype != outcoltype)
1586 		r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
1587 							 outcoltype, outcoltypmod,
1588 							 COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
1589 	else if (r_colvar->vartypmod != outcoltypmod)
1590 		r_node = (Node *) makeRelabelType((Expr *) r_colvar,
1591 										  outcoltype, outcoltypmod,
1592 										  InvalidOid,	/* fixed below */
1593 										  COERCE_IMPLICIT_CAST);
1594 	else
1595 		r_node = (Node *) r_colvar;
1596 
1597 	/*
1598 	 * Choose what to emit
1599 	 */
1600 	switch (jointype)
1601 	{
1602 		case JOIN_INNER:
1603 
1604 			/*
1605 			 * We can use either var; prefer non-coerced one if available.
1606 			 */
1607 			if (IsA(l_node, Var))
1608 				res_node = l_node;
1609 			else if (IsA(r_node, Var))
1610 				res_node = r_node;
1611 			else
1612 				res_node = l_node;
1613 			break;
1614 		case JOIN_LEFT:
1615 			/* Always use left var */
1616 			res_node = l_node;
1617 			break;
1618 		case JOIN_RIGHT:
1619 			/* Always use right var */
1620 			res_node = r_node;
1621 			break;
1622 		case JOIN_FULL:
1623 			{
1624 				/*
1625 				 * Here we must build a COALESCE expression to ensure that the
1626 				 * join output is non-null if either input is.
1627 				 */
1628 				CoalesceExpr *c = makeNode(CoalesceExpr);
1629 
1630 				c->coalescetype = outcoltype;
1631 				/* coalescecollid will get set below */
1632 				c->args = list_make2(l_node, r_node);
1633 				c->location = -1;
1634 				res_node = (Node *) c;
1635 				break;
1636 			}
1637 		default:
1638 			elog(ERROR, "unrecognized join type: %d", (int) jointype);
1639 			res_node = NULL;	/* keep compiler quiet */
1640 			break;
1641 	}
1642 
1643 	/*
1644 	 * Apply assign_expr_collations to fix up the collation info in the
1645 	 * coercion and CoalesceExpr nodes, if we made any.  This must be done now
1646 	 * so that the join node's alias vars show correct collation info.
1647 	 */
1648 	assign_expr_collations(pstate, res_node);
1649 
1650 	return res_node;
1651 }
1652 
1653 /*
1654  * makeNamespaceItem -
1655  *	  Convenience subroutine to construct a ParseNamespaceItem.
1656  */
1657 static ParseNamespaceItem *
makeNamespaceItem(RangeTblEntry * rte,bool rel_visible,bool cols_visible,bool lateral_only,bool lateral_ok)1658 makeNamespaceItem(RangeTblEntry *rte, bool rel_visible, bool cols_visible,
1659 				  bool lateral_only, bool lateral_ok)
1660 {
1661 	ParseNamespaceItem *nsitem;
1662 
1663 	nsitem = (ParseNamespaceItem *) palloc(sizeof(ParseNamespaceItem));
1664 	nsitem->p_rte = rte;
1665 	nsitem->p_rel_visible = rel_visible;
1666 	nsitem->p_cols_visible = cols_visible;
1667 	nsitem->p_lateral_only = lateral_only;
1668 	nsitem->p_lateral_ok = lateral_ok;
1669 	return nsitem;
1670 }
1671 
1672 /*
1673  * setNamespaceColumnVisibility -
1674  *	  Convenience subroutine to update cols_visible flags in a namespace list.
1675  */
1676 static void
setNamespaceColumnVisibility(List * namespace,bool cols_visible)1677 setNamespaceColumnVisibility(List *namespace, bool cols_visible)
1678 {
1679 	ListCell   *lc;
1680 
1681 	foreach(lc, namespace)
1682 	{
1683 		ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(lc);
1684 
1685 		nsitem->p_cols_visible = cols_visible;
1686 	}
1687 }
1688 
1689 /*
1690  * setNamespaceLateralState -
1691  *	  Convenience subroutine to update LATERAL flags in a namespace list.
1692  */
1693 static void
setNamespaceLateralState(List * namespace,bool lateral_only,bool lateral_ok)1694 setNamespaceLateralState(List *namespace, bool lateral_only, bool lateral_ok)
1695 {
1696 	ListCell   *lc;
1697 
1698 	foreach(lc, namespace)
1699 	{
1700 		ParseNamespaceItem *nsitem = (ParseNamespaceItem *) lfirst(lc);
1701 
1702 		nsitem->p_lateral_only = lateral_only;
1703 		nsitem->p_lateral_ok = lateral_ok;
1704 	}
1705 }
1706 
1707 
1708 /*
1709  * transformWhereClause -
1710  *	  Transform the qualification and make sure it is of type boolean.
1711  *	  Used for WHERE and allied clauses.
1712  *
1713  * constructName does not affect the semantics, but is used in error messages
1714  */
1715 Node *
transformWhereClause(ParseState * pstate,Node * clause,ParseExprKind exprKind,const char * constructName)1716 transformWhereClause(ParseState *pstate, Node *clause,
1717 					 ParseExprKind exprKind, const char *constructName)
1718 {
1719 	Node	   *qual;
1720 
1721 	if (clause == NULL)
1722 		return NULL;
1723 
1724 	qual = transformExpr(pstate, clause, exprKind);
1725 
1726 	qual = coerce_to_boolean(pstate, qual, constructName);
1727 
1728 	return qual;
1729 }
1730 
1731 
1732 /*
1733  * transformLimitClause -
1734  *	  Transform the expression and make sure it is of type bigint.
1735  *	  Used for LIMIT and allied clauses.
1736  *
1737  * Note: as of Postgres 8.2, LIMIT expressions are expected to yield int8,
1738  * rather than int4 as before.
1739  *
1740  * constructName does not affect the semantics, but is used in error messages
1741  */
1742 Node *
transformLimitClause(ParseState * pstate,Node * clause,ParseExprKind exprKind,const char * constructName)1743 transformLimitClause(ParseState *pstate, Node *clause,
1744 					 ParseExprKind exprKind, const char *constructName)
1745 {
1746 	Node	   *qual;
1747 
1748 	if (clause == NULL)
1749 		return NULL;
1750 
1751 	qual = transformExpr(pstate, clause, exprKind);
1752 
1753 	qual = coerce_to_specific_type(pstate, qual, INT8OID, constructName);
1754 
1755 	/* LIMIT can't refer to any variables of the current query */
1756 	checkExprIsVarFree(pstate, qual, constructName);
1757 
1758 	return qual;
1759 }
1760 
1761 /*
1762  * checkExprIsVarFree
1763  *		Check that given expr has no Vars of the current query level
1764  *		(aggregates and window functions should have been rejected already).
1765  *
1766  * This is used to check expressions that have to have a consistent value
1767  * across all rows of the query, such as a LIMIT.  Arguably it should reject
1768  * volatile functions, too, but we don't do that --- whatever value the
1769  * function gives on first execution is what you get.
1770  *
1771  * constructName does not affect the semantics, but is used in error messages
1772  */
1773 static void
checkExprIsVarFree(ParseState * pstate,Node * n,const char * constructName)1774 checkExprIsVarFree(ParseState *pstate, Node *n, const char *constructName)
1775 {
1776 	if (contain_vars_of_level(n, 0))
1777 	{
1778 		ereport(ERROR,
1779 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1780 		/* translator: %s is name of a SQL construct, eg LIMIT */
1781 				 errmsg("argument of %s must not contain variables",
1782 						constructName),
1783 				 parser_errposition(pstate,
1784 									locate_var_of_level(n, 0))));
1785 	}
1786 }
1787 
1788 
1789 /*
1790  * checkTargetlistEntrySQL92 -
1791  *	  Validate a targetlist entry found by findTargetlistEntrySQL92
1792  *
1793  * When we select a pre-existing tlist entry as a result of syntax such
1794  * as "GROUP BY 1", we have to make sure it is acceptable for use in the
1795  * indicated clause type; transformExpr() will have treated it as a regular
1796  * targetlist item.
1797  */
1798 static void
checkTargetlistEntrySQL92(ParseState * pstate,TargetEntry * tle,ParseExprKind exprKind)1799 checkTargetlistEntrySQL92(ParseState *pstate, TargetEntry *tle,
1800 						  ParseExprKind exprKind)
1801 {
1802 	switch (exprKind)
1803 	{
1804 		case EXPR_KIND_GROUP_BY:
1805 			/* reject aggregates and window functions */
1806 			if (pstate->p_hasAggs &&
1807 				contain_aggs_of_level((Node *) tle->expr, 0))
1808 				ereport(ERROR,
1809 						(errcode(ERRCODE_GROUPING_ERROR),
1810 				/* translator: %s is name of a SQL construct, eg GROUP BY */
1811 						 errmsg("aggregate functions are not allowed in %s",
1812 								ParseExprKindName(exprKind)),
1813 						 parser_errposition(pstate,
1814 											locate_agg_of_level((Node *) tle->expr, 0))));
1815 			if (pstate->p_hasWindowFuncs &&
1816 				contain_windowfuncs((Node *) tle->expr))
1817 				ereport(ERROR,
1818 						(errcode(ERRCODE_WINDOWING_ERROR),
1819 				/* translator: %s is name of a SQL construct, eg GROUP BY */
1820 						 errmsg("window functions are not allowed in %s",
1821 								ParseExprKindName(exprKind)),
1822 						 parser_errposition(pstate,
1823 											locate_windowfunc((Node *) tle->expr))));
1824 			break;
1825 		case EXPR_KIND_ORDER_BY:
1826 			/* no extra checks needed */
1827 			break;
1828 		case EXPR_KIND_DISTINCT_ON:
1829 			/* no extra checks needed */
1830 			break;
1831 		default:
1832 			elog(ERROR, "unexpected exprKind in checkTargetlistEntrySQL92");
1833 			break;
1834 	}
1835 }
1836 
1837 /*
1838  *	findTargetlistEntrySQL92 -
1839  *	  Returns the targetlist entry matching the given (untransformed) node.
1840  *	  If no matching entry exists, one is created and appended to the target
1841  *	  list as a "resjunk" node.
1842  *
1843  * This function supports the old SQL92 ORDER BY interpretation, where the
1844  * expression is an output column name or number.  If we fail to find a
1845  * match of that sort, we fall through to the SQL99 rules.  For historical
1846  * reasons, Postgres also allows this interpretation for GROUP BY, though
1847  * the standard never did.  However, for GROUP BY we prefer a SQL99 match.
1848  * This function is *not* used for WINDOW definitions.
1849  *
1850  * node		the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1851  * tlist	the target list (passed by reference so we can append to it)
1852  * exprKind identifies clause type being processed
1853  */
1854 static TargetEntry *
findTargetlistEntrySQL92(ParseState * pstate,Node * node,List ** tlist,ParseExprKind exprKind)1855 findTargetlistEntrySQL92(ParseState *pstate, Node *node, List **tlist,
1856 						 ParseExprKind exprKind)
1857 {
1858 	ListCell   *tl;
1859 
1860 	/*----------
1861 	 * Handle two special cases as mandated by the SQL92 spec:
1862 	 *
1863 	 * 1. Bare ColumnName (no qualifier or subscripts)
1864 	 *	  For a bare identifier, we search for a matching column name
1865 	 *	  in the existing target list.  Multiple matches are an error
1866 	 *	  unless they refer to identical values; for example,
1867 	 *	  we allow	SELECT a, a FROM table ORDER BY a
1868 	 *	  but not	SELECT a AS b, b FROM table ORDER BY b
1869 	 *	  If no match is found, we fall through and treat the identifier
1870 	 *	  as an expression.
1871 	 *	  For GROUP BY, it is incorrect to match the grouping item against
1872 	 *	  targetlist entries: according to SQL92, an identifier in GROUP BY
1873 	 *	  is a reference to a column name exposed by FROM, not to a target
1874 	 *	  list column.  However, many implementations (including pre-7.0
1875 	 *	  PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1876 	 *	  to see if the identifier matches any FROM column name, and only
1877 	 *	  try for a targetlist name if it doesn't.  This ensures that we
1878 	 *	  adhere to the spec in the case where the name could be both.
1879 	 *	  DISTINCT ON isn't in the standard, so we can do what we like there;
1880 	 *	  we choose to make it work like ORDER BY, on the rather flimsy
1881 	 *	  grounds that ordinary DISTINCT works on targetlist entries.
1882 	 *
1883 	 * 2. IntegerConstant
1884 	 *	  This means to use the n'th item in the existing target list.
1885 	 *	  Note that it would make no sense to order/group/distinct by an
1886 	 *	  actual constant, so this does not create a conflict with SQL99.
1887 	 *	  GROUP BY column-number is not allowed by SQL92, but since
1888 	 *	  the standard has no other behavior defined for this syntax,
1889 	 *	  we may as well accept this common extension.
1890 	 *
1891 	 * Note that pre-existing resjunk targets must not be used in either case,
1892 	 * since the user didn't write them in his SELECT list.
1893 	 *
1894 	 * If neither special case applies, fall through to treat the item as
1895 	 * an expression per SQL99.
1896 	 *----------
1897 	 */
1898 	if (IsA(node, ColumnRef) &&
1899 		list_length(((ColumnRef *) node)->fields) == 1 &&
1900 		IsA(linitial(((ColumnRef *) node)->fields), String))
1901 	{
1902 		char	   *name = strVal(linitial(((ColumnRef *) node)->fields));
1903 		int			location = ((ColumnRef *) node)->location;
1904 
1905 		if (exprKind == EXPR_KIND_GROUP_BY)
1906 		{
1907 			/*
1908 			 * In GROUP BY, we must prefer a match against a FROM-clause
1909 			 * column to one against the targetlist.  Look to see if there is
1910 			 * a matching column.  If so, fall through to use SQL99 rules.
1911 			 * NOTE: if name could refer ambiguously to more than one column
1912 			 * name exposed by FROM, colNameToVar will ereport(ERROR). That's
1913 			 * just what we want here.
1914 			 *
1915 			 * Small tweak for 7.4.3: ignore matches in upper query levels.
1916 			 * This effectively changes the search order for bare names to (1)
1917 			 * local FROM variables, (2) local targetlist aliases, (3) outer
1918 			 * FROM variables, whereas before it was (1) (3) (2). SQL92 and
1919 			 * SQL99 do not allow GROUPing BY an outer reference, so this
1920 			 * breaks no cases that are legal per spec, and it seems a more
1921 			 * self-consistent behavior.
1922 			 */
1923 			if (colNameToVar(pstate, name, true, location) != NULL)
1924 				name = NULL;
1925 		}
1926 
1927 		if (name != NULL)
1928 		{
1929 			TargetEntry *target_result = NULL;
1930 
1931 			foreach(tl, *tlist)
1932 			{
1933 				TargetEntry *tle = (TargetEntry *) lfirst(tl);
1934 
1935 				if (!tle->resjunk &&
1936 					strcmp(tle->resname, name) == 0)
1937 				{
1938 					if (target_result != NULL)
1939 					{
1940 						if (!equal(target_result->expr, tle->expr))
1941 							ereport(ERROR,
1942 									(errcode(ERRCODE_AMBIGUOUS_COLUMN),
1943 
1944 							/*------
1945 							  translator: first %s is name of a SQL construct, eg ORDER BY */
1946 									 errmsg("%s \"%s\" is ambiguous",
1947 											ParseExprKindName(exprKind),
1948 											name),
1949 									 parser_errposition(pstate, location)));
1950 					}
1951 					else
1952 						target_result = tle;
1953 					/* Stay in loop to check for ambiguity */
1954 				}
1955 			}
1956 			if (target_result != NULL)
1957 			{
1958 				/* return the first match, after suitable validation */
1959 				checkTargetlistEntrySQL92(pstate, target_result, exprKind);
1960 				return target_result;
1961 			}
1962 		}
1963 	}
1964 	if (IsA(node, A_Const))
1965 	{
1966 		Value	   *val = &((A_Const *) node)->val;
1967 		int			location = ((A_Const *) node)->location;
1968 		int			targetlist_pos = 0;
1969 		int			target_pos;
1970 
1971 		if (!IsA(val, Integer))
1972 			ereport(ERROR,
1973 					(errcode(ERRCODE_SYNTAX_ERROR),
1974 			/* translator: %s is name of a SQL construct, eg ORDER BY */
1975 					 errmsg("non-integer constant in %s",
1976 							ParseExprKindName(exprKind)),
1977 					 parser_errposition(pstate, location)));
1978 
1979 		target_pos = intVal(val);
1980 		foreach(tl, *tlist)
1981 		{
1982 			TargetEntry *tle = (TargetEntry *) lfirst(tl);
1983 
1984 			if (!tle->resjunk)
1985 			{
1986 				if (++targetlist_pos == target_pos)
1987 				{
1988 					/* return the unique match, after suitable validation */
1989 					checkTargetlistEntrySQL92(pstate, tle, exprKind);
1990 					return tle;
1991 				}
1992 			}
1993 		}
1994 		ereport(ERROR,
1995 				(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1996 		/* translator: %s is name of a SQL construct, eg ORDER BY */
1997 				 errmsg("%s position %d is not in select list",
1998 						ParseExprKindName(exprKind), target_pos),
1999 				 parser_errposition(pstate, location)));
2000 	}
2001 
2002 	/*
2003 	 * Otherwise, we have an expression, so process it per SQL99 rules.
2004 	 */
2005 	return findTargetlistEntrySQL99(pstate, node, tlist, exprKind);
2006 }
2007 
2008 /*
2009  *	findTargetlistEntrySQL99 -
2010  *	  Returns the targetlist entry matching the given (untransformed) node.
2011  *	  If no matching entry exists, one is created and appended to the target
2012  *	  list as a "resjunk" node.
2013  *
2014  * This function supports the SQL99 interpretation, wherein the expression
2015  * is just an ordinary expression referencing input column names.
2016  *
2017  * node		the ORDER BY, GROUP BY, etc expression to be matched
2018  * tlist	the target list (passed by reference so we can append to it)
2019  * exprKind identifies clause type being processed
2020  */
2021 static TargetEntry *
findTargetlistEntrySQL99(ParseState * pstate,Node * node,List ** tlist,ParseExprKind exprKind)2022 findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist,
2023 						 ParseExprKind exprKind)
2024 {
2025 	TargetEntry *target_result;
2026 	ListCell   *tl;
2027 	Node	   *expr;
2028 
2029 	/*
2030 	 * Convert the untransformed node to a transformed expression, and search
2031 	 * for a match in the tlist.  NOTE: it doesn't really matter whether there
2032 	 * is more than one match.  Also, we are willing to match an existing
2033 	 * resjunk target here, though the SQL92 cases above must ignore resjunk
2034 	 * targets.
2035 	 */
2036 	expr = transformExpr(pstate, node, exprKind);
2037 
2038 	foreach(tl, *tlist)
2039 	{
2040 		TargetEntry *tle = (TargetEntry *) lfirst(tl);
2041 		Node	   *texpr;
2042 
2043 		/*
2044 		 * Ignore any implicit cast on the existing tlist expression.
2045 		 *
2046 		 * This essentially allows the ORDER/GROUP/etc item to adopt the same
2047 		 * datatype previously selected for a textually-equivalent tlist item.
2048 		 * There can't be any implicit cast at top level in an ordinary SELECT
2049 		 * tlist at this stage, but the case does arise with ORDER BY in an
2050 		 * aggregate function.
2051 		 */
2052 		texpr = strip_implicit_coercions((Node *) tle->expr);
2053 
2054 		if (equal(expr, texpr))
2055 			return tle;
2056 	}
2057 
2058 	/*
2059 	 * If no matches, construct a new target entry which is appended to the
2060 	 * end of the target list.  This target is given resjunk = TRUE so that it
2061 	 * will not be projected into the final tuple.
2062 	 */
2063 	target_result = transformTargetEntry(pstate, node, expr, exprKind,
2064 										 NULL, true);
2065 
2066 	*tlist = lappend(*tlist, target_result);
2067 
2068 	return target_result;
2069 }
2070 
2071 /*-------------------------------------------------------------------------
2072  * Flatten out parenthesized sublists in grouping lists, and some cases
2073  * of nested grouping sets.
2074  *
2075  * Inside a grouping set (ROLLUP, CUBE, or GROUPING SETS), we expect the
2076  * content to be nested no more than 2 deep: i.e. ROLLUP((a,b),(c,d)) is
2077  * ok, but ROLLUP((a,(b,c)),d) is flattened to ((a,b,c),d), which we then
2078  * (later) normalize to ((a,b,c),(d)).
2079  *
2080  * CUBE or ROLLUP can be nested inside GROUPING SETS (but not the reverse),
2081  * and we leave that alone if we find it. But if we see GROUPING SETS inside
2082  * GROUPING SETS, we can flatten and normalize as follows:
2083  *	 GROUPING SETS (a, (b,c), GROUPING SETS ((c,d),(e)), (f,g))
2084  * becomes
2085  *	 GROUPING SETS ((a), (b,c), (c,d), (e), (f,g))
2086  *
2087  * This is per the spec's syntax transformations, but these are the only such
2088  * transformations we do in parse analysis, so that queries retain the
2089  * originally specified grouping set syntax for CUBE and ROLLUP as much as
2090  * possible when deparsed. (Full expansion of the result into a list of
2091  * grouping sets is left to the planner.)
2092  *
2093  * When we're done, the resulting list should contain only these possible
2094  * elements:
2095  *	 - an expression
2096  *	 - a CUBE or ROLLUP with a list of expressions nested 2 deep
2097  *	 - a GROUPING SET containing any of:
2098  *		- expression lists
2099  *		- empty grouping sets
2100  *		- CUBE or ROLLUP nodes with lists nested 2 deep
2101  * The return is a new list, but doesn't deep-copy the old nodes except for
2102  * GroupingSet nodes.
2103  *
2104  * As a side effect, flag whether the list has any GroupingSet nodes.
2105  *-------------------------------------------------------------------------
2106  */
2107 static Node *
flatten_grouping_sets(Node * expr,bool toplevel,bool * hasGroupingSets)2108 flatten_grouping_sets(Node *expr, bool toplevel, bool *hasGroupingSets)
2109 {
2110 	/* just in case of pathological input */
2111 	check_stack_depth();
2112 
2113 	if (expr == (Node *) NIL)
2114 		return (Node *) NIL;
2115 
2116 	switch (expr->type)
2117 	{
2118 		case T_RowExpr:
2119 			{
2120 				RowExpr    *r = (RowExpr *) expr;
2121 
2122 				if (r->row_format == COERCE_IMPLICIT_CAST)
2123 					return flatten_grouping_sets((Node *) r->args,
2124 												 false, NULL);
2125 			}
2126 			break;
2127 		case T_GroupingSet:
2128 			{
2129 				GroupingSet *gset = (GroupingSet *) expr;
2130 				ListCell   *l2;
2131 				List	   *result_set = NIL;
2132 
2133 				if (hasGroupingSets)
2134 					*hasGroupingSets = true;
2135 
2136 				/*
2137 				 * at the top level, we skip over all empty grouping sets; the
2138 				 * caller can supply the canonical GROUP BY () if nothing is
2139 				 * left.
2140 				 */
2141 
2142 				if (toplevel && gset->kind == GROUPING_SET_EMPTY)
2143 					return (Node *) NIL;
2144 
2145 				foreach(l2, gset->content)
2146 				{
2147 					Node	   *n1 = lfirst(l2);
2148 					Node	   *n2 = flatten_grouping_sets(n1, false, NULL);
2149 
2150 					if (IsA(n1, GroupingSet) &&
2151 						((GroupingSet *) n1)->kind == GROUPING_SET_SETS)
2152 					{
2153 						result_set = list_concat(result_set, (List *) n2);
2154 					}
2155 					else
2156 						result_set = lappend(result_set, n2);
2157 				}
2158 
2159 				/*
2160 				 * At top level, keep the grouping set node; but if we're in a
2161 				 * nested grouping set, then we need to concat the flattened
2162 				 * result into the outer list if it's simply nested.
2163 				 */
2164 
2165 				if (toplevel || (gset->kind != GROUPING_SET_SETS))
2166 				{
2167 					return (Node *) makeGroupingSet(gset->kind, result_set, gset->location);
2168 				}
2169 				else
2170 					return (Node *) result_set;
2171 			}
2172 		case T_List:
2173 			{
2174 				List	   *result = NIL;
2175 				ListCell   *l;
2176 
2177 				foreach(l, (List *) expr)
2178 				{
2179 					Node	   *n = flatten_grouping_sets(lfirst(l), toplevel, hasGroupingSets);
2180 
2181 					if (n != (Node *) NIL)
2182 					{
2183 						if (IsA(n, List))
2184 							result = list_concat(result, (List *) n);
2185 						else
2186 							result = lappend(result, n);
2187 					}
2188 				}
2189 
2190 				return (Node *) result;
2191 			}
2192 		default:
2193 			break;
2194 	}
2195 
2196 	return expr;
2197 }
2198 
2199 /*
2200  * Transform a single expression within a GROUP BY clause or grouping set.
2201  *
2202  * The expression is added to the targetlist if not already present, and to the
2203  * flatresult list (which will become the groupClause) if not already present
2204  * there.  The sortClause is consulted for operator and sort order hints.
2205  *
2206  * Returns the ressortgroupref of the expression.
2207  *
2208  * flatresult	reference to flat list of SortGroupClause nodes
2209  * seen_local	bitmapset of sortgrouprefs already seen at the local level
2210  * pstate		ParseState
2211  * gexpr		node to transform
2212  * targetlist	reference to TargetEntry list
2213  * sortClause	ORDER BY clause (SortGroupClause nodes)
2214  * exprKind		expression kind
2215  * useSQL99		SQL99 rather than SQL92 syntax
2216  * toplevel		false if within any grouping set
2217  */
2218 static Index
transformGroupClauseExpr(List ** flatresult,Bitmapset * seen_local,ParseState * pstate,Node * gexpr,List ** targetlist,List * sortClause,ParseExprKind exprKind,bool useSQL99,bool toplevel)2219 transformGroupClauseExpr(List **flatresult, Bitmapset *seen_local,
2220 						 ParseState *pstate, Node *gexpr,
2221 						 List **targetlist, List *sortClause,
2222 						 ParseExprKind exprKind, bool useSQL99, bool toplevel)
2223 {
2224 	TargetEntry *tle;
2225 	bool		found = false;
2226 
2227 	if (useSQL99)
2228 		tle = findTargetlistEntrySQL99(pstate, gexpr,
2229 									   targetlist, exprKind);
2230 	else
2231 		tle = findTargetlistEntrySQL92(pstate, gexpr,
2232 									   targetlist, exprKind);
2233 
2234 	if (tle->ressortgroupref > 0)
2235 	{
2236 		ListCell   *sl;
2237 
2238 		/*
2239 		 * Eliminate duplicates (GROUP BY x, x) but only at local level.
2240 		 * (Duplicates in grouping sets can affect the number of returned
2241 		 * rows, so can't be dropped indiscriminately.)
2242 		 *
2243 		 * Since we don't care about anything except the sortgroupref, we can
2244 		 * use a bitmapset rather than scanning lists.
2245 		 */
2246 		if (bms_is_member(tle->ressortgroupref, seen_local))
2247 			return 0;
2248 
2249 		/*
2250 		 * If we're already in the flat clause list, we don't need to consider
2251 		 * adding ourselves again.
2252 		 */
2253 		found = targetIsInSortList(tle, InvalidOid, *flatresult);
2254 		if (found)
2255 			return tle->ressortgroupref;
2256 
2257 		/*
2258 		 * If the GROUP BY tlist entry also appears in ORDER BY, copy operator
2259 		 * info from the (first) matching ORDER BY item.  This means that if
2260 		 * you write something like "GROUP BY foo ORDER BY foo USING <<<", the
2261 		 * GROUP BY operation silently takes on the equality semantics implied
2262 		 * by the ORDER BY.  There are two reasons to do this: it improves the
2263 		 * odds that we can implement both GROUP BY and ORDER BY with a single
2264 		 * sort step, and it allows the user to choose the equality semantics
2265 		 * used by GROUP BY, should she be working with a datatype that has
2266 		 * more than one equality operator.
2267 		 *
2268 		 * If we're in a grouping set, though, we force our requested ordering
2269 		 * to be NULLS LAST, because if we have any hope of using a sorted agg
2270 		 * for the job, we're going to be tacking on generated NULL values
2271 		 * after the corresponding groups. If the user demands nulls first,
2272 		 * another sort step is going to be inevitable, but that's the
2273 		 * planner's problem.
2274 		 */
2275 
2276 		foreach(sl, sortClause)
2277 		{
2278 			SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2279 
2280 			if (sc->tleSortGroupRef == tle->ressortgroupref)
2281 			{
2282 				SortGroupClause *grpc = copyObject(sc);
2283 
2284 				if (!toplevel)
2285 					grpc->nulls_first = false;
2286 				*flatresult = lappend(*flatresult, grpc);
2287 				found = true;
2288 				break;
2289 			}
2290 		}
2291 	}
2292 
2293 	/*
2294 	 * If no match in ORDER BY, just add it to the result using default
2295 	 * sort/group semantics.
2296 	 */
2297 	if (!found)
2298 		*flatresult = addTargetToGroupList(pstate, tle,
2299 										   *flatresult, *targetlist,
2300 										   exprLocation(gexpr));
2301 
2302 	/*
2303 	 * _something_ must have assigned us a sortgroupref by now...
2304 	 */
2305 
2306 	return tle->ressortgroupref;
2307 }
2308 
2309 /*
2310  * Transform a list of expressions within a GROUP BY clause or grouping set.
2311  *
2312  * The list of expressions belongs to a single clause within which duplicates
2313  * can be safely eliminated.
2314  *
2315  * Returns an integer list of ressortgroupref values.
2316  *
2317  * flatresult	reference to flat list of SortGroupClause nodes
2318  * pstate		ParseState
2319  * list			nodes to transform
2320  * targetlist	reference to TargetEntry list
2321  * sortClause	ORDER BY clause (SortGroupClause nodes)
2322  * exprKind		expression kind
2323  * useSQL99		SQL99 rather than SQL92 syntax
2324  * toplevel		false if within any grouping set
2325  */
2326 static List *
transformGroupClauseList(List ** flatresult,ParseState * pstate,List * list,List ** targetlist,List * sortClause,ParseExprKind exprKind,bool useSQL99,bool toplevel)2327 transformGroupClauseList(List **flatresult,
2328 						 ParseState *pstate, List *list,
2329 						 List **targetlist, List *sortClause,
2330 						 ParseExprKind exprKind, bool useSQL99, bool toplevel)
2331 {
2332 	Bitmapset  *seen_local = NULL;
2333 	List	   *result = NIL;
2334 	ListCell   *gl;
2335 
2336 	foreach(gl, list)
2337 	{
2338 		Node	   *gexpr = (Node *) lfirst(gl);
2339 
2340 		Index		ref = transformGroupClauseExpr(flatresult,
2341 												   seen_local,
2342 												   pstate,
2343 												   gexpr,
2344 												   targetlist,
2345 												   sortClause,
2346 												   exprKind,
2347 												   useSQL99,
2348 												   toplevel);
2349 
2350 		if (ref > 0)
2351 		{
2352 			seen_local = bms_add_member(seen_local, ref);
2353 			result = lappend_int(result, ref);
2354 		}
2355 	}
2356 
2357 	return result;
2358 }
2359 
2360 /*
2361  * Transform a grouping set and (recursively) its content.
2362  *
2363  * The grouping set might be a GROUPING SETS node with other grouping sets
2364  * inside it, but SETS within SETS have already been flattened out before
2365  * reaching here.
2366  *
2367  * Returns the transformed node, which now contains SIMPLE nodes with lists
2368  * of ressortgrouprefs rather than expressions.
2369  *
2370  * flatresult	reference to flat list of SortGroupClause nodes
2371  * pstate		ParseState
2372  * gset			grouping set to transform
2373  * targetlist	reference to TargetEntry list
2374  * sortClause	ORDER BY clause (SortGroupClause nodes)
2375  * exprKind		expression kind
2376  * useSQL99		SQL99 rather than SQL92 syntax
2377  * toplevel		false if within any grouping set
2378  */
2379 static Node *
transformGroupingSet(List ** flatresult,ParseState * pstate,GroupingSet * gset,List ** targetlist,List * sortClause,ParseExprKind exprKind,bool useSQL99,bool toplevel)2380 transformGroupingSet(List **flatresult,
2381 					 ParseState *pstate, GroupingSet *gset,
2382 					 List **targetlist, List *sortClause,
2383 					 ParseExprKind exprKind, bool useSQL99, bool toplevel)
2384 {
2385 	ListCell   *gl;
2386 	List	   *content = NIL;
2387 
2388 	Assert(toplevel || gset->kind != GROUPING_SET_SETS);
2389 
2390 	foreach(gl, gset->content)
2391 	{
2392 		Node	   *n = lfirst(gl);
2393 
2394 		if (IsA(n, List))
2395 		{
2396 			List	   *l = transformGroupClauseList(flatresult,
2397 													 pstate, (List *) n,
2398 													 targetlist, sortClause,
2399 													 exprKind, useSQL99, false);
2400 
2401 			content = lappend(content, makeGroupingSet(GROUPING_SET_SIMPLE,
2402 													   l,
2403 													   exprLocation(n)));
2404 		}
2405 		else if (IsA(n, GroupingSet))
2406 		{
2407 			GroupingSet *gset2 = (GroupingSet *) lfirst(gl);
2408 
2409 			content = lappend(content, transformGroupingSet(flatresult,
2410 															pstate, gset2,
2411 															targetlist, sortClause,
2412 															exprKind, useSQL99, false));
2413 		}
2414 		else
2415 		{
2416 			Index		ref = transformGroupClauseExpr(flatresult,
2417 													   NULL,
2418 													   pstate,
2419 													   n,
2420 													   targetlist,
2421 													   sortClause,
2422 													   exprKind,
2423 													   useSQL99,
2424 													   false);
2425 
2426 			content = lappend(content, makeGroupingSet(GROUPING_SET_SIMPLE,
2427 													   list_make1_int(ref),
2428 													   exprLocation(n)));
2429 		}
2430 	}
2431 
2432 	/* Arbitrarily cap the size of CUBE, which has exponential growth */
2433 	if (gset->kind == GROUPING_SET_CUBE)
2434 	{
2435 		if (list_length(content) > 12)
2436 			ereport(ERROR,
2437 					(errcode(ERRCODE_TOO_MANY_COLUMNS),
2438 					 errmsg("CUBE is limited to 12 elements"),
2439 					 parser_errposition(pstate, gset->location)));
2440 	}
2441 
2442 	return (Node *) makeGroupingSet(gset->kind, content, gset->location);
2443 }
2444 
2445 
2446 /*
2447  * transformGroupClause -
2448  *	  transform a GROUP BY clause
2449  *
2450  * GROUP BY items will be added to the targetlist (as resjunk columns)
2451  * if not already present, so the targetlist must be passed by reference.
2452  *
2453  * This is also used for window PARTITION BY clauses (which act almost the
2454  * same, but are always interpreted per SQL99 rules).
2455  *
2456  * Grouping sets make this a lot more complex than it was. Our goal here is
2457  * twofold: we make a flat list of SortGroupClause nodes referencing each
2458  * distinct expression used for grouping, with those expressions added to the
2459  * targetlist if needed. At the same time, we build the groupingSets tree,
2460  * which stores only ressortgrouprefs as integer lists inside GroupingSet nodes
2461  * (possibly nested, but limited in depth: a GROUPING_SET_SETS node can contain
2462  * nested SIMPLE, CUBE or ROLLUP nodes, but not more sets - we flatten that
2463  * out; while CUBE and ROLLUP can contain only SIMPLE nodes).
2464  *
2465  * We skip much of the hard work if there are no grouping sets.
2466  *
2467  * One subtlety is that the groupClause list can end up empty while the
2468  * groupingSets list is not; this happens if there are only empty grouping
2469  * sets, or an explicit GROUP BY (). This has the same effect as specifying
2470  * aggregates or a HAVING clause with no GROUP BY; the output is one row per
2471  * grouping set even if the input is empty.
2472  *
2473  * Returns the transformed (flat) groupClause.
2474  *
2475  * pstate		ParseState
2476  * grouplist	clause to transform
2477  * groupingSets reference to list to contain the grouping set tree
2478  * targetlist	reference to TargetEntry list
2479  * sortClause	ORDER BY clause (SortGroupClause nodes)
2480  * exprKind		expression kind
2481  * useSQL99		SQL99 rather than SQL92 syntax
2482  */
2483 List *
transformGroupClause(ParseState * pstate,List * grouplist,List ** groupingSets,List ** targetlist,List * sortClause,ParseExprKind exprKind,bool useSQL99)2484 transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets,
2485 					 List **targetlist, List *sortClause,
2486 					 ParseExprKind exprKind, bool useSQL99)
2487 {
2488 	List	   *result = NIL;
2489 	List	   *flat_grouplist;
2490 	List	   *gsets = NIL;
2491 	ListCell   *gl;
2492 	bool		hasGroupingSets = false;
2493 	Bitmapset  *seen_local = NULL;
2494 
2495 	/*
2496 	 * Recursively flatten implicit RowExprs. (Technically this is only needed
2497 	 * for GROUP BY, per the syntax rules for grouping sets, but we do it
2498 	 * anyway.)
2499 	 */
2500 	flat_grouplist = (List *) flatten_grouping_sets((Node *) grouplist,
2501 													true,
2502 													&hasGroupingSets);
2503 
2504 	/*
2505 	 * If the list is now empty, but hasGroupingSets is true, it's because we
2506 	 * elided redundant empty grouping sets. Restore a single empty grouping
2507 	 * set to leave a canonical form: GROUP BY ()
2508 	 */
2509 
2510 	if (flat_grouplist == NIL && hasGroupingSets)
2511 	{
2512 		flat_grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY,
2513 													NIL,
2514 													exprLocation((Node *) grouplist)));
2515 	}
2516 
2517 	foreach(gl, flat_grouplist)
2518 	{
2519 		Node	   *gexpr = (Node *) lfirst(gl);
2520 
2521 		if (IsA(gexpr, GroupingSet))
2522 		{
2523 			GroupingSet *gset = (GroupingSet *) gexpr;
2524 
2525 			switch (gset->kind)
2526 			{
2527 				case GROUPING_SET_EMPTY:
2528 					gsets = lappend(gsets, gset);
2529 					break;
2530 				case GROUPING_SET_SIMPLE:
2531 					/* can't happen */
2532 					Assert(false);
2533 					break;
2534 				case GROUPING_SET_SETS:
2535 				case GROUPING_SET_CUBE:
2536 				case GROUPING_SET_ROLLUP:
2537 					gsets = lappend(gsets,
2538 									transformGroupingSet(&result,
2539 														 pstate, gset,
2540 														 targetlist, sortClause,
2541 														 exprKind, useSQL99, true));
2542 					break;
2543 			}
2544 		}
2545 		else
2546 		{
2547 			Index		ref = transformGroupClauseExpr(&result, seen_local,
2548 													   pstate, gexpr,
2549 													   targetlist, sortClause,
2550 													   exprKind, useSQL99, true);
2551 
2552 			if (ref > 0)
2553 			{
2554 				seen_local = bms_add_member(seen_local, ref);
2555 				if (hasGroupingSets)
2556 					gsets = lappend(gsets,
2557 									makeGroupingSet(GROUPING_SET_SIMPLE,
2558 													list_make1_int(ref),
2559 													exprLocation(gexpr)));
2560 			}
2561 		}
2562 	}
2563 
2564 	/* parser should prevent this */
2565 	Assert(gsets == NIL || groupingSets != NULL);
2566 
2567 	if (groupingSets)
2568 		*groupingSets = gsets;
2569 
2570 	return result;
2571 }
2572 
2573 /*
2574  * transformSortClause -
2575  *	  transform an ORDER BY clause
2576  *
2577  * ORDER BY items will be added to the targetlist (as resjunk columns)
2578  * if not already present, so the targetlist must be passed by reference.
2579  *
2580  * This is also used for window and aggregate ORDER BY clauses (which act
2581  * almost the same, but are always interpreted per SQL99 rules).
2582  */
2583 List *
transformSortClause(ParseState * pstate,List * orderlist,List ** targetlist,ParseExprKind exprKind,bool useSQL99)2584 transformSortClause(ParseState *pstate,
2585 					List *orderlist,
2586 					List **targetlist,
2587 					ParseExprKind exprKind,
2588 					bool useSQL99)
2589 {
2590 	List	   *sortlist = NIL;
2591 	ListCell   *olitem;
2592 
2593 	foreach(olitem, orderlist)
2594 	{
2595 		SortBy	   *sortby = (SortBy *) lfirst(olitem);
2596 		TargetEntry *tle;
2597 
2598 		if (useSQL99)
2599 			tle = findTargetlistEntrySQL99(pstate, sortby->node,
2600 										   targetlist, exprKind);
2601 		else
2602 			tle = findTargetlistEntrySQL92(pstate, sortby->node,
2603 										   targetlist, exprKind);
2604 
2605 		sortlist = addTargetToSortList(pstate, tle,
2606 									   sortlist, *targetlist, sortby);
2607 	}
2608 
2609 	return sortlist;
2610 }
2611 
2612 /*
2613  * transformWindowDefinitions -
2614  *		transform window definitions (WindowDef to WindowClause)
2615  */
2616 List *
transformWindowDefinitions(ParseState * pstate,List * windowdefs,List ** targetlist)2617 transformWindowDefinitions(ParseState *pstate,
2618 						   List *windowdefs,
2619 						   List **targetlist)
2620 {
2621 	List	   *result = NIL;
2622 	Index		winref = 0;
2623 	ListCell   *lc;
2624 
2625 	foreach(lc, windowdefs)
2626 	{
2627 		WindowDef  *windef = (WindowDef *) lfirst(lc);
2628 		WindowClause *refwc = NULL;
2629 		List	   *partitionClause;
2630 		List	   *orderClause;
2631 		WindowClause *wc;
2632 
2633 		winref++;
2634 
2635 		/*
2636 		 * Check for duplicate window names.
2637 		 */
2638 		if (windef->name &&
2639 			findWindowClause(result, windef->name) != NULL)
2640 			ereport(ERROR,
2641 					(errcode(ERRCODE_WINDOWING_ERROR),
2642 					 errmsg("window \"%s\" is already defined", windef->name),
2643 					 parser_errposition(pstate, windef->location)));
2644 
2645 		/*
2646 		 * If it references a previous window, look that up.
2647 		 */
2648 		if (windef->refname)
2649 		{
2650 			refwc = findWindowClause(result, windef->refname);
2651 			if (refwc == NULL)
2652 				ereport(ERROR,
2653 						(errcode(ERRCODE_UNDEFINED_OBJECT),
2654 						 errmsg("window \"%s\" does not exist",
2655 								windef->refname),
2656 						 parser_errposition(pstate, windef->location)));
2657 		}
2658 
2659 		/*
2660 		 * Transform PARTITION and ORDER specs, if any.  These are treated
2661 		 * almost exactly like top-level GROUP BY and ORDER BY clauses,
2662 		 * including the special handling of nondefault operator semantics.
2663 		 */
2664 		orderClause = transformSortClause(pstate,
2665 										  windef->orderClause,
2666 										  targetlist,
2667 										  EXPR_KIND_WINDOW_ORDER,
2668 										  true /* force SQL99 rules */ );
2669 		partitionClause = transformGroupClause(pstate,
2670 											   windef->partitionClause,
2671 											   NULL,
2672 											   targetlist,
2673 											   orderClause,
2674 											   EXPR_KIND_WINDOW_PARTITION,
2675 											   true /* force SQL99 rules */ );
2676 
2677 		/*
2678 		 * And prepare the new WindowClause.
2679 		 */
2680 		wc = makeNode(WindowClause);
2681 		wc->name = windef->name;
2682 		wc->refname = windef->refname;
2683 
2684 		/*
2685 		 * Per spec, a windowdef that references a previous one copies the
2686 		 * previous partition clause (and mustn't specify its own).  It can
2687 		 * specify its own ordering clause, but only if the previous one had
2688 		 * none.  It always specifies its own frame clause, and the previous
2689 		 * one must not have a frame clause.  Yeah, it's bizarre that each of
2690 		 * these cases works differently, but SQL:2008 says so; see 7.11
2691 		 * <window clause> syntax rule 10 and general rule 1.  The frame
2692 		 * clause rule is especially bizarre because it makes "OVER foo"
2693 		 * different from "OVER (foo)", and requires the latter to throw an
2694 		 * error if foo has a nondefault frame clause.  Well, ours not to
2695 		 * reason why, but we do go out of our way to throw a useful error
2696 		 * message for such cases.
2697 		 */
2698 		if (refwc)
2699 		{
2700 			if (partitionClause)
2701 				ereport(ERROR,
2702 						(errcode(ERRCODE_WINDOWING_ERROR),
2703 						 errmsg("cannot override PARTITION BY clause of window \"%s\"",
2704 								windef->refname),
2705 						 parser_errposition(pstate, windef->location)));
2706 			wc->partitionClause = copyObject(refwc->partitionClause);
2707 		}
2708 		else
2709 			wc->partitionClause = partitionClause;
2710 		if (refwc)
2711 		{
2712 			if (orderClause && refwc->orderClause)
2713 				ereport(ERROR,
2714 						(errcode(ERRCODE_WINDOWING_ERROR),
2715 						 errmsg("cannot override ORDER BY clause of window \"%s\"",
2716 								windef->refname),
2717 						 parser_errposition(pstate, windef->location)));
2718 			if (orderClause)
2719 			{
2720 				wc->orderClause = orderClause;
2721 				wc->copiedOrder = false;
2722 			}
2723 			else
2724 			{
2725 				wc->orderClause = copyObject(refwc->orderClause);
2726 				wc->copiedOrder = true;
2727 			}
2728 		}
2729 		else
2730 		{
2731 			wc->orderClause = orderClause;
2732 			wc->copiedOrder = false;
2733 		}
2734 		if (refwc && refwc->frameOptions != FRAMEOPTION_DEFAULTS)
2735 		{
2736 			/*
2737 			 * Use this message if this is a WINDOW clause, or if it's an OVER
2738 			 * clause that includes ORDER BY or framing clauses.  (We already
2739 			 * rejected PARTITION BY above, so no need to check that.)
2740 			 */
2741 			if (windef->name ||
2742 				orderClause || windef->frameOptions != FRAMEOPTION_DEFAULTS)
2743 				ereport(ERROR,
2744 						(errcode(ERRCODE_WINDOWING_ERROR),
2745 						 errmsg("cannot copy window \"%s\" because it has a frame clause",
2746 								windef->refname),
2747 						 parser_errposition(pstate, windef->location)));
2748 			/* Else this clause is just OVER (foo), so say this: */
2749 			ereport(ERROR,
2750 					(errcode(ERRCODE_WINDOWING_ERROR),
2751 					 errmsg("cannot copy window \"%s\" because it has a frame clause",
2752 							windef->refname),
2753 					 errhint("Omit the parentheses in this OVER clause."),
2754 					 parser_errposition(pstate, windef->location)));
2755 		}
2756 		wc->frameOptions = windef->frameOptions;
2757 		/* Process frame offset expressions */
2758 		wc->startOffset = transformFrameOffset(pstate, wc->frameOptions,
2759 											   windef->startOffset);
2760 		wc->endOffset = transformFrameOffset(pstate, wc->frameOptions,
2761 											 windef->endOffset);
2762 		wc->winref = winref;
2763 
2764 		result = lappend(result, wc);
2765 	}
2766 
2767 	return result;
2768 }
2769 
2770 /*
2771  * transformDistinctClause -
2772  *	  transform a DISTINCT clause
2773  *
2774  * Since we may need to add items to the query's targetlist, that list
2775  * is passed by reference.
2776  *
2777  * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
2778  * possible into the distinctClause.  This avoids a possible need to re-sort,
2779  * and allows the user to choose the equality semantics used by DISTINCT,
2780  * should she be working with a datatype that has more than one equality
2781  * operator.
2782  *
2783  * is_agg is true if we are transforming an aggregate(DISTINCT ...)
2784  * function call.  This does not affect any behavior, only the phrasing
2785  * of error messages.
2786  */
2787 List *
transformDistinctClause(ParseState * pstate,List ** targetlist,List * sortClause,bool is_agg)2788 transformDistinctClause(ParseState *pstate,
2789 						List **targetlist, List *sortClause, bool is_agg)
2790 {
2791 	List	   *result = NIL;
2792 	ListCell   *slitem;
2793 	ListCell   *tlitem;
2794 
2795 	/*
2796 	 * The distinctClause should consist of all ORDER BY items followed by all
2797 	 * other non-resjunk targetlist items.  There must not be any resjunk
2798 	 * ORDER BY items --- that would imply that we are sorting by a value that
2799 	 * isn't necessarily unique within a DISTINCT group, so the results
2800 	 * wouldn't be well-defined.  This construction ensures we follow the rule
2801 	 * that sortClause and distinctClause match; in fact the sortClause will
2802 	 * always be a prefix of distinctClause.
2803 	 *
2804 	 * Note a corner case: the same TLE could be in the ORDER BY list multiple
2805 	 * times with different sortops.  We have to include it in the
2806 	 * distinctClause the same way to preserve the prefix property. The net
2807 	 * effect will be that the TLE value will be made unique according to both
2808 	 * sortops.
2809 	 */
2810 	foreach(slitem, sortClause)
2811 	{
2812 		SortGroupClause *scl = (SortGroupClause *) lfirst(slitem);
2813 		TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
2814 
2815 		if (tle->resjunk)
2816 			ereport(ERROR,
2817 					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
2818 					 is_agg ?
2819 					 errmsg("in an aggregate with DISTINCT, ORDER BY expressions must appear in argument list") :
2820 					 errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list"),
2821 					 parser_errposition(pstate,
2822 										exprLocation((Node *) tle->expr))));
2823 		result = lappend(result, copyObject(scl));
2824 	}
2825 
2826 	/*
2827 	 * Now add any remaining non-resjunk tlist items, using default sort/group
2828 	 * semantics for their data types.
2829 	 */
2830 	foreach(tlitem, *targetlist)
2831 	{
2832 		TargetEntry *tle = (TargetEntry *) lfirst(tlitem);
2833 
2834 		if (tle->resjunk)
2835 			continue;			/* ignore junk */
2836 		result = addTargetToGroupList(pstate, tle,
2837 									  result, *targetlist,
2838 									  exprLocation((Node *) tle->expr));
2839 	}
2840 
2841 	/*
2842 	 * Complain if we found nothing to make DISTINCT.  Returning an empty list
2843 	 * would cause the parsed Query to look like it didn't have DISTINCT, with
2844 	 * results that would probably surprise the user.  Note: this case is
2845 	 * presently impossible for aggregates because of grammar restrictions,
2846 	 * but we check anyway.
2847 	 */
2848 	if (result == NIL)
2849 		ereport(ERROR,
2850 				(errcode(ERRCODE_SYNTAX_ERROR),
2851 				 is_agg ?
2852 				 errmsg("an aggregate with DISTINCT must have at least one argument") :
2853 				 errmsg("SELECT DISTINCT must have at least one column")));
2854 
2855 	return result;
2856 }
2857 
2858 /*
2859  * transformDistinctOnClause -
2860  *	  transform a DISTINCT ON clause
2861  *
2862  * Since we may need to add items to the query's targetlist, that list
2863  * is passed by reference.
2864  *
2865  * As with GROUP BY, we absorb the sorting semantics of ORDER BY as much as
2866  * possible into the distinctClause.  This avoids a possible need to re-sort,
2867  * and allows the user to choose the equality semantics used by DISTINCT,
2868  * should she be working with a datatype that has more than one equality
2869  * operator.
2870  */
2871 List *
transformDistinctOnClause(ParseState * pstate,List * distinctlist,List ** targetlist,List * sortClause)2872 transformDistinctOnClause(ParseState *pstate, List *distinctlist,
2873 						  List **targetlist, List *sortClause)
2874 {
2875 	List	   *result = NIL;
2876 	List	   *sortgrouprefs = NIL;
2877 	bool		skipped_sortitem;
2878 	ListCell   *lc;
2879 	ListCell   *lc2;
2880 
2881 	/*
2882 	 * Add all the DISTINCT ON expressions to the tlist (if not already
2883 	 * present, they are added as resjunk items).  Assign sortgroupref numbers
2884 	 * to them, and make a list of these numbers.  (NB: we rely below on the
2885 	 * sortgrouprefs list being one-for-one with the original distinctlist.
2886 	 * Also notice that we could have duplicate DISTINCT ON expressions and
2887 	 * hence duplicate entries in sortgrouprefs.)
2888 	 */
2889 	foreach(lc, distinctlist)
2890 	{
2891 		Node	   *dexpr = (Node *) lfirst(lc);
2892 		int			sortgroupref;
2893 		TargetEntry *tle;
2894 
2895 		tle = findTargetlistEntrySQL92(pstate, dexpr, targetlist,
2896 									   EXPR_KIND_DISTINCT_ON);
2897 		sortgroupref = assignSortGroupRef(tle, *targetlist);
2898 		sortgrouprefs = lappend_int(sortgrouprefs, sortgroupref);
2899 	}
2900 
2901 	/*
2902 	 * If the user writes both DISTINCT ON and ORDER BY, adopt the sorting
2903 	 * semantics from ORDER BY items that match DISTINCT ON items, and also
2904 	 * adopt their column sort order.  We insist that the distinctClause and
2905 	 * sortClause match, so throw error if we find the need to add any more
2906 	 * distinctClause items after we've skipped an ORDER BY item that wasn't
2907 	 * in DISTINCT ON.
2908 	 */
2909 	skipped_sortitem = false;
2910 	foreach(lc, sortClause)
2911 	{
2912 		SortGroupClause *scl = (SortGroupClause *) lfirst(lc);
2913 
2914 		if (list_member_int(sortgrouprefs, scl->tleSortGroupRef))
2915 		{
2916 			if (skipped_sortitem)
2917 				ereport(ERROR,
2918 						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
2919 						 errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"),
2920 						 parser_errposition(pstate,
2921 											get_matching_location(scl->tleSortGroupRef,
2922 																  sortgrouprefs,
2923 																  distinctlist))));
2924 			else
2925 				result = lappend(result, copyObject(scl));
2926 		}
2927 		else
2928 			skipped_sortitem = true;
2929 	}
2930 
2931 	/*
2932 	 * Now add any remaining DISTINCT ON items, using default sort/group
2933 	 * semantics for their data types.  (Note: this is pretty questionable; if
2934 	 * the ORDER BY list doesn't include all the DISTINCT ON items and more
2935 	 * besides, you certainly aren't using DISTINCT ON in the intended way,
2936 	 * and you probably aren't going to get consistent results.  It might be
2937 	 * better to throw an error or warning here.  But historically we've
2938 	 * allowed it, so keep doing so.)
2939 	 */
2940 	forboth(lc, distinctlist, lc2, sortgrouprefs)
2941 	{
2942 		Node	   *dexpr = (Node *) lfirst(lc);
2943 		int			sortgroupref = lfirst_int(lc2);
2944 		TargetEntry *tle = get_sortgroupref_tle(sortgroupref, *targetlist);
2945 
2946 		if (targetIsInSortList(tle, InvalidOid, result))
2947 			continue;			/* already in list (with some semantics) */
2948 		if (skipped_sortitem)
2949 			ereport(ERROR,
2950 					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
2951 					 errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions"),
2952 					 parser_errposition(pstate, exprLocation(dexpr))));
2953 		result = addTargetToGroupList(pstate, tle,
2954 									  result, *targetlist,
2955 									  exprLocation(dexpr));
2956 	}
2957 
2958 	/*
2959 	 * An empty result list is impossible here because of grammar
2960 	 * restrictions.
2961 	 */
2962 	Assert(result != NIL);
2963 
2964 	return result;
2965 }
2966 
2967 /*
2968  * get_matching_location
2969  *		Get the exprLocation of the exprs member corresponding to the
2970  *		(first) member of sortgrouprefs that equals sortgroupref.
2971  *
2972  * This is used so that we can point at a troublesome DISTINCT ON entry.
2973  * (Note that we need to use the original untransformed DISTINCT ON list
2974  * item, as whatever TLE it corresponds to will very possibly have a
2975  * parse location pointing to some matching entry in the SELECT list
2976  * or ORDER BY list.)
2977  */
2978 static int
get_matching_location(int sortgroupref,List * sortgrouprefs,List * exprs)2979 get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs)
2980 {
2981 	ListCell   *lcs;
2982 	ListCell   *lce;
2983 
2984 	forboth(lcs, sortgrouprefs, lce, exprs)
2985 	{
2986 		if (lfirst_int(lcs) == sortgroupref)
2987 			return exprLocation((Node *) lfirst(lce));
2988 	}
2989 	/* if no match, caller blew it */
2990 	elog(ERROR, "get_matching_location: no matching sortgroupref");
2991 	return -1;					/* keep compiler quiet */
2992 }
2993 
2994 /*
2995  * resolve_unique_index_expr
2996  *		Infer a unique index from a list of indexElems, for ON
2997  *		CONFLICT clause
2998  *
2999  * Perform parse analysis of expressions and columns appearing within ON
3000  * CONFLICT clause.  During planning, the returned list of expressions is used
3001  * to infer which unique index to use.
3002  */
3003 static List *
resolve_unique_index_expr(ParseState * pstate,InferClause * infer,Relation heapRel)3004 resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
3005 						  Relation heapRel)
3006 {
3007 	List	   *result = NIL;
3008 	ListCell   *l;
3009 
3010 	foreach(l, infer->indexElems)
3011 	{
3012 		IndexElem  *ielem = (IndexElem *) lfirst(l);
3013 		InferenceElem *pInfer = makeNode(InferenceElem);
3014 		Node	   *parse;
3015 
3016 		/*
3017 		 * Raw grammar re-uses CREATE INDEX infrastructure for unique index
3018 		 * inference clause, and so will accept opclasses by name and so on.
3019 		 *
3020 		 * Make no attempt to match ASC or DESC ordering or NULLS FIRST/NULLS
3021 		 * LAST ordering, since those are not significant for inference
3022 		 * purposes (any unique index matching the inference specification in
3023 		 * other regards is accepted indifferently).  Actively reject this as
3024 		 * wrong-headed.
3025 		 */
3026 		if (ielem->ordering != SORTBY_DEFAULT)
3027 			ereport(ERROR,
3028 					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
3029 					 errmsg("ASC/DESC is not allowed in ON CONFLICT clause"),
3030 					 parser_errposition(pstate,
3031 										exprLocation((Node *) infer))));
3032 		if (ielem->nulls_ordering != SORTBY_NULLS_DEFAULT)
3033 			ereport(ERROR,
3034 					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
3035 					 errmsg("NULLS FIRST/LAST is not allowed in ON CONFLICT clause"),
3036 					 parser_errposition(pstate,
3037 										exprLocation((Node *) infer))));
3038 
3039 		if (!ielem->expr)
3040 		{
3041 			/* Simple index attribute */
3042 			ColumnRef  *n;
3043 
3044 			/*
3045 			 * Grammar won't have built raw expression for us in event of
3046 			 * plain column reference.  Create one directly, and perform
3047 			 * expression transformation.  Planner expects this, and performs
3048 			 * its own normalization for the purposes of matching against
3049 			 * pg_index.
3050 			 */
3051 			n = makeNode(ColumnRef);
3052 			n->fields = list_make1(makeString(ielem->name));
3053 			/* Location is approximately that of inference specification */
3054 			n->location = infer->location;
3055 			parse = (Node *) n;
3056 		}
3057 		else
3058 		{
3059 			/* Do parse transformation of the raw expression */
3060 			parse = (Node *) ielem->expr;
3061 		}
3062 
3063 		/*
3064 		 * transformExpr() should have already rejected subqueries,
3065 		 * aggregates, and window functions, based on the EXPR_KIND_ for an
3066 		 * index expression.  Expressions returning sets won't have been
3067 		 * rejected, but don't bother doing so here; there should be no
3068 		 * available expression unique index to match any such expression
3069 		 * against anyway.
3070 		 */
3071 		pInfer->expr = transformExpr(pstate, parse, EXPR_KIND_INDEX_EXPRESSION);
3072 
3073 		/* Perform lookup of collation and operator class as required */
3074 		if (!ielem->collation)
3075 			pInfer->infercollid = InvalidOid;
3076 		else
3077 			pInfer->infercollid = LookupCollation(pstate, ielem->collation,
3078 												  exprLocation(pInfer->expr));
3079 
3080 		if (!ielem->opclass)
3081 			pInfer->inferopclass = InvalidOid;
3082 		else
3083 			pInfer->inferopclass = get_opclass_oid(BTREE_AM_OID,
3084 												   ielem->opclass, false);
3085 
3086 		result = lappend(result, pInfer);
3087 	}
3088 
3089 	return result;
3090 }
3091 
3092 /*
3093  * transformOnConflictArbiter -
3094  *		transform arbiter expressions in an ON CONFLICT clause.
3095  *
3096  * Transformed expressions used to infer one unique index relation to serve as
3097  * an ON CONFLICT arbiter.  Partial unique indexes may be inferred using WHERE
3098  * clause from inference specification clause.
3099  */
3100 void
transformOnConflictArbiter(ParseState * pstate,OnConflictClause * onConflictClause,List ** arbiterExpr,Node ** arbiterWhere,Oid * constraint)3101 transformOnConflictArbiter(ParseState *pstate,
3102 						   OnConflictClause *onConflictClause,
3103 						   List **arbiterExpr, Node **arbiterWhere,
3104 						   Oid *constraint)
3105 {
3106 	InferClause *infer = onConflictClause->infer;
3107 
3108 	*arbiterExpr = NIL;
3109 	*arbiterWhere = NULL;
3110 	*constraint = InvalidOid;
3111 
3112 	if (onConflictClause->action == ONCONFLICT_UPDATE && !infer)
3113 		ereport(ERROR,
3114 				(errcode(ERRCODE_SYNTAX_ERROR),
3115 				 errmsg("ON CONFLICT DO UPDATE requires inference specification or constraint name"),
3116 				 errhint("For example, ON CONFLICT (column_name)."),
3117 				 parser_errposition(pstate,
3118 									exprLocation((Node *) onConflictClause))));
3119 
3120 	/*
3121 	 * To simplify certain aspects of its design, speculative insertion into
3122 	 * system catalogs is disallowed
3123 	 */
3124 	if (IsCatalogRelation(pstate->p_target_relation))
3125 		ereport(ERROR,
3126 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3127 				 errmsg("ON CONFLICT is not supported with system catalog tables"),
3128 				 parser_errposition(pstate,
3129 									exprLocation((Node *) onConflictClause))));
3130 
3131 	/* Same applies to table used by logical decoding as catalog table */
3132 	if (RelationIsUsedAsCatalogTable(pstate->p_target_relation))
3133 		ereport(ERROR,
3134 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3135 				 errmsg("ON CONFLICT is not supported on table \"%s\" used as a catalog table",
3136 						RelationGetRelationName(pstate->p_target_relation)),
3137 				 parser_errposition(pstate,
3138 									exprLocation((Node *) onConflictClause))));
3139 
3140 	/* ON CONFLICT DO NOTHING does not require an inference clause */
3141 	if (infer)
3142 	{
3143 		List	   *save_namespace;
3144 
3145 		/*
3146 		 * While we process the arbiter expressions, accept only non-qualified
3147 		 * references to the target table. Hide any other relations.
3148 		 */
3149 		save_namespace = pstate->p_namespace;
3150 		pstate->p_namespace = NIL;
3151 		addRTEtoQuery(pstate, pstate->p_target_rangetblentry,
3152 					  false, false, true);
3153 
3154 		if (infer->indexElems)
3155 			*arbiterExpr = resolve_unique_index_expr(pstate, infer,
3156 													 pstate->p_target_relation);
3157 
3158 		/*
3159 		 * Handling inference WHERE clause (for partial unique index
3160 		 * inference)
3161 		 */
3162 		if (infer->whereClause)
3163 			*arbiterWhere = transformExpr(pstate, infer->whereClause,
3164 										  EXPR_KIND_INDEX_PREDICATE);
3165 
3166 		pstate->p_namespace = save_namespace;
3167 
3168 		/*
3169 		 * If the arbiter is specified by constraint name, get the constraint
3170 		 * OID and mark the constrained columns as requiring SELECT privilege,
3171 		 * in the same way as would have happened if the arbiter had been
3172 		 * specified by explicit reference to the constraint's index columns.
3173 		 */
3174 		if (infer->conname)
3175 		{
3176 			Oid			relid = RelationGetRelid(pstate->p_target_relation);
3177 			RangeTblEntry *rte = pstate->p_target_rangetblentry;
3178 			Bitmapset  *conattnos;
3179 
3180 			conattnos = get_relation_constraint_attnos(relid, infer->conname,
3181 													   false, constraint);
3182 
3183 			/* Make sure the rel as a whole is marked for SELECT access */
3184 			rte->requiredPerms |= ACL_SELECT;
3185 			/* Mark the constrained columns as requiring SELECT access */
3186 			rte->selectedCols = bms_add_members(rte->selectedCols, conattnos);
3187 		}
3188 	}
3189 
3190 	/*
3191 	 * It's convenient to form a list of expressions based on the
3192 	 * representation used by CREATE INDEX, since the same restrictions are
3193 	 * appropriate (e.g. on subqueries).  However, from here on, a dedicated
3194 	 * primnode representation is used for inference elements, and so
3195 	 * assign_query_collations() can be trusted to do the right thing with the
3196 	 * post parse analysis query tree inference clause representation.
3197 	 */
3198 }
3199 
3200 /*
3201  * addTargetToSortList
3202  *		If the given targetlist entry isn't already in the SortGroupClause
3203  *		list, add it to the end of the list, using the given sort ordering
3204  *		info.
3205  *
3206  * Returns the updated SortGroupClause list.
3207  */
3208 List *
addTargetToSortList(ParseState * pstate,TargetEntry * tle,List * sortlist,List * targetlist,SortBy * sortby)3209 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
3210 					List *sortlist, List *targetlist, SortBy *sortby)
3211 {
3212 	Oid			restype = exprType((Node *) tle->expr);
3213 	Oid			sortop;
3214 	Oid			eqop;
3215 	bool		hashable;
3216 	bool		reverse;
3217 	int			location;
3218 	ParseCallbackState pcbstate;
3219 
3220 	/* if tlist item is an UNKNOWN literal, change it to TEXT */
3221 	if (restype == UNKNOWNOID)
3222 	{
3223 		tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
3224 										 restype, TEXTOID, -1,
3225 										 COERCION_IMPLICIT,
3226 										 COERCE_IMPLICIT_CAST,
3227 										 -1);
3228 		restype = TEXTOID;
3229 	}
3230 
3231 	/*
3232 	 * Rather than clutter the API of get_sort_group_operators and the other
3233 	 * functions we're about to use, make use of error context callback to
3234 	 * mark any error reports with a parse position.  We point to the operator
3235 	 * location if present, else to the expression being sorted.  (NB: use the
3236 	 * original untransformed expression here; the TLE entry might well point
3237 	 * at a duplicate expression in the regular SELECT list.)
3238 	 */
3239 	location = sortby->location;
3240 	if (location < 0)
3241 		location = exprLocation(sortby->node);
3242 	setup_parser_errposition_callback(&pcbstate, pstate, location);
3243 
3244 	/* determine the sortop, eqop, and directionality */
3245 	switch (sortby->sortby_dir)
3246 	{
3247 		case SORTBY_DEFAULT:
3248 		case SORTBY_ASC:
3249 			get_sort_group_operators(restype,
3250 									 true, true, false,
3251 									 &sortop, &eqop, NULL,
3252 									 &hashable);
3253 			reverse = false;
3254 			break;
3255 		case SORTBY_DESC:
3256 			get_sort_group_operators(restype,
3257 									 false, true, true,
3258 									 NULL, &eqop, &sortop,
3259 									 &hashable);
3260 			reverse = true;
3261 			break;
3262 		case SORTBY_USING:
3263 			Assert(sortby->useOp != NIL);
3264 			sortop = compatible_oper_opid(sortby->useOp,
3265 										  restype,
3266 										  restype,
3267 										  false);
3268 
3269 			/*
3270 			 * Verify it's a valid ordering operator, fetch the corresponding
3271 			 * equality operator, and determine whether to consider it like
3272 			 * ASC or DESC.
3273 			 */
3274 			eqop = get_equality_op_for_ordering_op(sortop, &reverse);
3275 			if (!OidIsValid(eqop))
3276 				ereport(ERROR,
3277 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
3278 						 errmsg("operator %s is not a valid ordering operator",
3279 								strVal(llast(sortby->useOp))),
3280 						 errhint("Ordering operators must be \"<\" or \">\" members of btree operator families.")));
3281 
3282 			/*
3283 			 * Also see if the equality operator is hashable.
3284 			 */
3285 			hashable = op_hashjoinable(eqop, restype);
3286 			break;
3287 		default:
3288 			elog(ERROR, "unrecognized sortby_dir: %d", sortby->sortby_dir);
3289 			sortop = InvalidOid;	/* keep compiler quiet */
3290 			eqop = InvalidOid;
3291 			hashable = false;
3292 			reverse = false;
3293 			break;
3294 	}
3295 
3296 	cancel_parser_errposition_callback(&pcbstate);
3297 
3298 	/* avoid making duplicate sortlist entries */
3299 	if (!targetIsInSortList(tle, sortop, sortlist))
3300 	{
3301 		SortGroupClause *sortcl = makeNode(SortGroupClause);
3302 
3303 		sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
3304 
3305 		sortcl->eqop = eqop;
3306 		sortcl->sortop = sortop;
3307 		sortcl->hashable = hashable;
3308 
3309 		switch (sortby->sortby_nulls)
3310 		{
3311 			case SORTBY_NULLS_DEFAULT:
3312 				/* NULLS FIRST is default for DESC; other way for ASC */
3313 				sortcl->nulls_first = reverse;
3314 				break;
3315 			case SORTBY_NULLS_FIRST:
3316 				sortcl->nulls_first = true;
3317 				break;
3318 			case SORTBY_NULLS_LAST:
3319 				sortcl->nulls_first = false;
3320 				break;
3321 			default:
3322 				elog(ERROR, "unrecognized sortby_nulls: %d",
3323 					 sortby->sortby_nulls);
3324 				break;
3325 		}
3326 
3327 		sortlist = lappend(sortlist, sortcl);
3328 	}
3329 
3330 	return sortlist;
3331 }
3332 
3333 /*
3334  * addTargetToGroupList
3335  *		If the given targetlist entry isn't already in the SortGroupClause
3336  *		list, add it to the end of the list, using default sort/group
3337  *		semantics.
3338  *
3339  * This is very similar to addTargetToSortList, except that we allow the
3340  * case where only a grouping (equality) operator can be found, and that
3341  * the TLE is considered "already in the list" if it appears there with any
3342  * sorting semantics.
3343  *
3344  * location is the parse location to be fingered in event of trouble.  Note
3345  * that we can't rely on exprLocation(tle->expr), because that might point
3346  * to a SELECT item that matches the GROUP BY item; it'd be pretty confusing
3347  * to report such a location.
3348  *
3349  * Returns the updated SortGroupClause list.
3350  */
3351 static List *
addTargetToGroupList(ParseState * pstate,TargetEntry * tle,List * grouplist,List * targetlist,int location)3352 addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
3353 					 List *grouplist, List *targetlist, int location)
3354 {
3355 	Oid			restype = exprType((Node *) tle->expr);
3356 
3357 	/* if tlist item is an UNKNOWN literal, change it to TEXT */
3358 	if (restype == UNKNOWNOID)
3359 	{
3360 		tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
3361 										 restype, TEXTOID, -1,
3362 										 COERCION_IMPLICIT,
3363 										 COERCE_IMPLICIT_CAST,
3364 										 -1);
3365 		restype = TEXTOID;
3366 	}
3367 
3368 	/* avoid making duplicate grouplist entries */
3369 	if (!targetIsInSortList(tle, InvalidOid, grouplist))
3370 	{
3371 		SortGroupClause *grpcl = makeNode(SortGroupClause);
3372 		Oid			sortop;
3373 		Oid			eqop;
3374 		bool		hashable;
3375 		ParseCallbackState pcbstate;
3376 
3377 		setup_parser_errposition_callback(&pcbstate, pstate, location);
3378 
3379 		/* determine the eqop and optional sortop */
3380 		get_sort_group_operators(restype,
3381 								 false, true, false,
3382 								 &sortop, &eqop, NULL,
3383 								 &hashable);
3384 
3385 		cancel_parser_errposition_callback(&pcbstate);
3386 
3387 		grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
3388 		grpcl->eqop = eqop;
3389 		grpcl->sortop = sortop;
3390 		grpcl->nulls_first = false; /* OK with or without sortop */
3391 		grpcl->hashable = hashable;
3392 
3393 		grouplist = lappend(grouplist, grpcl);
3394 	}
3395 
3396 	return grouplist;
3397 }
3398 
3399 /*
3400  * assignSortGroupRef
3401  *	  Assign the targetentry an unused ressortgroupref, if it doesn't
3402  *	  already have one.  Return the assigned or pre-existing refnumber.
3403  *
3404  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
3405  */
3406 Index
assignSortGroupRef(TargetEntry * tle,List * tlist)3407 assignSortGroupRef(TargetEntry *tle, List *tlist)
3408 {
3409 	Index		maxRef;
3410 	ListCell   *l;
3411 
3412 	if (tle->ressortgroupref)	/* already has one? */
3413 		return tle->ressortgroupref;
3414 
3415 	/* easiest way to pick an unused refnumber: max used + 1 */
3416 	maxRef = 0;
3417 	foreach(l, tlist)
3418 	{
3419 		Index		ref = ((TargetEntry *) lfirst(l))->ressortgroupref;
3420 
3421 		if (ref > maxRef)
3422 			maxRef = ref;
3423 	}
3424 	tle->ressortgroupref = maxRef + 1;
3425 	return tle->ressortgroupref;
3426 }
3427 
3428 /*
3429  * targetIsInSortList
3430  *		Is the given target item already in the sortlist?
3431  *		If sortop is not InvalidOid, also test for a match to the sortop.
3432  *
3433  * It is not an oversight that this function ignores the nulls_first flag.
3434  * We check sortop when determining if an ORDER BY item is redundant with
3435  * earlier ORDER BY items, because it's conceivable that "ORDER BY
3436  * foo USING <, foo USING <<<" is not redundant, if <<< distinguishes
3437  * values that < considers equal.  We need not check nulls_first
3438  * however, because a lower-order column with the same sortop but
3439  * opposite nulls direction is redundant.  Also, we can consider
3440  * ORDER BY foo ASC, foo DESC redundant, so check for a commutator match.
3441  *
3442  * Works for both ordering and grouping lists (sortop would normally be
3443  * InvalidOid when considering grouping).  Note that the main reason we need
3444  * this routine (and not just a quick test for nonzeroness of ressortgroupref)
3445  * is that a TLE might be in only one of the lists.
3446  */
3447 bool
targetIsInSortList(TargetEntry * tle,Oid sortop,List * sortList)3448 targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList)
3449 {
3450 	Index		ref = tle->ressortgroupref;
3451 	ListCell   *l;
3452 
3453 	/* no need to scan list if tle has no marker */
3454 	if (ref == 0)
3455 		return false;
3456 
3457 	foreach(l, sortList)
3458 	{
3459 		SortGroupClause *scl = (SortGroupClause *) lfirst(l);
3460 
3461 		if (scl->tleSortGroupRef == ref &&
3462 			(sortop == InvalidOid ||
3463 			 sortop == scl->sortop ||
3464 			 sortop == get_commutator(scl->sortop)))
3465 			return true;
3466 	}
3467 	return false;
3468 }
3469 
3470 /*
3471  * findWindowClause
3472  *		Find the named WindowClause in the list, or return NULL if not there
3473  */
3474 static WindowClause *
findWindowClause(List * wclist,const char * name)3475 findWindowClause(List *wclist, const char *name)
3476 {
3477 	ListCell   *l;
3478 
3479 	foreach(l, wclist)
3480 	{
3481 		WindowClause *wc = (WindowClause *) lfirst(l);
3482 
3483 		if (wc->name && strcmp(wc->name, name) == 0)
3484 			return wc;
3485 	}
3486 
3487 	return NULL;
3488 }
3489 
3490 /*
3491  * transformFrameOffset
3492  *		Process a window frame offset expression
3493  */
3494 static Node *
transformFrameOffset(ParseState * pstate,int frameOptions,Node * clause)3495 transformFrameOffset(ParseState *pstate, int frameOptions, Node *clause)
3496 {
3497 	const char *constructName = NULL;
3498 	Node	   *node;
3499 
3500 	/* Quick exit if no offset expression */
3501 	if (clause == NULL)
3502 		return NULL;
3503 
3504 	if (frameOptions & FRAMEOPTION_ROWS)
3505 	{
3506 		/* Transform the raw expression tree */
3507 		node = transformExpr(pstate, clause, EXPR_KIND_WINDOW_FRAME_ROWS);
3508 
3509 		/*
3510 		 * Like LIMIT clause, simply coerce to int8
3511 		 */
3512 		constructName = "ROWS";
3513 		node = coerce_to_specific_type(pstate, node, INT8OID, constructName);
3514 	}
3515 	else if (frameOptions & FRAMEOPTION_RANGE)
3516 	{
3517 		/* Transform the raw expression tree */
3518 		node = transformExpr(pstate, clause, EXPR_KIND_WINDOW_FRAME_RANGE);
3519 
3520 		/*
3521 		 * this needs a lot of thought to decide how to support in the context
3522 		 * of Postgres' extensible datatype framework
3523 		 */
3524 		constructName = "RANGE";
3525 		/* error was already thrown by gram.y, this is just a backstop */
3526 		elog(ERROR, "window frame with value offset is not implemented");
3527 	}
3528 	else
3529 	{
3530 		Assert(false);
3531 		node = NULL;
3532 	}
3533 
3534 	/* Disallow variables in frame offsets */
3535 	checkExprIsVarFree(pstate, node, constructName);
3536 
3537 	return node;
3538 }
3539