1 /*-------------------------------------------------------------------------
2  *
3  * parse_collate.c
4  *		Routines for assigning collation information.
5  *
6  * We choose to handle collation analysis in a post-pass over the output
7  * of expression parse analysis.  This is because we need more state to
8  * perform this processing than is needed in the finished tree.  If we
9  * did it on-the-fly while building the tree, all that state would have
10  * to be kept in expression node trees permanently.  This way, the extra
11  * storage is just local variables in this recursive routine.
12  *
13  * The info that is actually saved in the finished tree is:
14  * 1. The output collation of each expression node, or InvalidOid if it
15  * returns a noncollatable data type.  This can also be InvalidOid if the
16  * result type is collatable but the collation is indeterminate.
17  * 2. The collation to be used in executing each function.  InvalidOid means
18  * that there are no collatable inputs or their collation is indeterminate.
19  * This value is only stored in node types that might call collation-using
20  * functions.
21  *
22  * You might think we could get away with storing only one collation per
23  * node, but the two concepts really need to be kept distinct.  Otherwise
24  * it's too confusing when a function produces a collatable output type but
25  * has no collatable inputs or produces noncollatable output from collatable
26  * inputs.
27  *
28  * Cases with indeterminate collation might result in an error being thrown
29  * at runtime.  If we knew exactly which functions require collation
30  * information, we could throw those errors at parse time instead.
31  *
32  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
33  * Portions Copyright (c) 1994, Regents of the University of California
34  *
35  *
36  * IDENTIFICATION
37  *	  src/backend/parser/parse_collate.c
38  *
39  *-------------------------------------------------------------------------
40  */
41 #include "postgres.h"
42 
43 #include "catalog/pg_aggregate.h"
44 #include "catalog/pg_collation.h"
45 #include "nodes/makefuncs.h"
46 #include "nodes/nodeFuncs.h"
47 #include "parser/parse_collate.h"
48 #include "utils/lsyscache.h"
49 
50 
51 /*
52  * Collation strength (the SQL standard calls this "derivation").  Order is
53  * chosen to allow comparisons to work usefully.  Note: the standard doesn't
54  * seem to distinguish between NONE and CONFLICT.
55  */
56 typedef enum
57 {
58 	COLLATE_NONE,				/* expression is of a noncollatable datatype */
59 	COLLATE_IMPLICIT,			/* collation was derived implicitly */
60 	COLLATE_CONFLICT,			/* we had a conflict of implicit collations */
61 	COLLATE_EXPLICIT			/* collation was derived explicitly */
62 } CollateStrength;
63 
64 typedef struct
65 {
66 	ParseState *pstate;			/* parse state (for error reporting) */
67 	Oid			collation;		/* OID of current collation, if any */
68 	CollateStrength strength;	/* strength of current collation choice */
69 	int			location;		/* location of expr that set collation */
70 	/* Remaining fields are only valid when strength == COLLATE_CONFLICT */
71 	Oid			collation2;		/* OID of conflicting collation */
72 	int			location2;		/* location of expr that set collation2 */
73 } assign_collations_context;
74 
75 static bool assign_query_collations_walker(Node *node, ParseState *pstate);
76 static bool assign_collations_walker(Node *node,
77 						 assign_collations_context *context);
78 static void merge_collation_state(Oid collation,
79 					  CollateStrength strength,
80 					  int location,
81 					  Oid collation2,
82 					  int location2,
83 					  assign_collations_context *context);
84 static void assign_aggregate_collations(Aggref *aggref,
85 							assign_collations_context *loccontext);
86 static void assign_ordered_set_collations(Aggref *aggref,
87 							  assign_collations_context *loccontext);
88 static void assign_hypothetical_collations(Aggref *aggref,
89 							   assign_collations_context *loccontext);
90 
91 
92 /*
93  * assign_query_collations()
94  *		Mark all expressions in the given Query with collation information.
95  *
96  * This should be applied to each Query after completion of parse analysis
97  * for expressions.  Note that we do not recurse into sub-Queries, since
98  * those should have been processed when built.
99  */
100 void
assign_query_collations(ParseState * pstate,Query * query)101 assign_query_collations(ParseState *pstate, Query *query)
102 {
103 	/*
104 	 * We just use query_tree_walker() to visit all the contained expressions.
105 	 * We can skip the rangetable and CTE subqueries, though, since RTEs and
106 	 * subqueries had better have been processed already (else Vars referring
107 	 * to them would not get created with the right collation).
108 	 */
109 	(void) query_tree_walker(query,
110 							 assign_query_collations_walker,
111 							 (void *) pstate,
112 							 QTW_IGNORE_RANGE_TABLE |
113 							 QTW_IGNORE_CTE_SUBQUERIES);
114 }
115 
116 /*
117  * Walker for assign_query_collations
118  *
119  * Each expression found by query_tree_walker is processed independently.
120  * Note that query_tree_walker may pass us a whole List, such as the
121  * targetlist, in which case each subexpression must be processed
122  * independently --- we don't want to bleat if two different targetentries
123  * have different collations.
124  */
125 static bool
assign_query_collations_walker(Node * node,ParseState * pstate)126 assign_query_collations_walker(Node *node, ParseState *pstate)
127 {
128 	/* Need do nothing for empty subexpressions */
129 	if (node == NULL)
130 		return false;
131 
132 	/*
133 	 * We don't want to recurse into a set-operations tree; it's already been
134 	 * fully processed in transformSetOperationStmt.
135 	 */
136 	if (IsA(node, SetOperationStmt))
137 		return false;
138 
139 	if (IsA(node, List))
140 		assign_list_collations(pstate, (List *) node);
141 	else
142 		assign_expr_collations(pstate, node);
143 
144 	return false;
145 }
146 
147 /*
148  * assign_list_collations()
149  *		Mark all nodes in the list of expressions with collation information.
150  *
151  * The list member expressions are processed independently; they do not have
152  * to share a common collation.
153  */
154 void
assign_list_collations(ParseState * pstate,List * exprs)155 assign_list_collations(ParseState *pstate, List *exprs)
156 {
157 	ListCell   *lc;
158 
159 	foreach(lc, exprs)
160 	{
161 		Node	   *node = (Node *) lfirst(lc);
162 
163 		assign_expr_collations(pstate, node);
164 	}
165 }
166 
167 /*
168  * assign_expr_collations()
169  *		Mark all nodes in the given expression tree with collation information.
170  *
171  * This is exported for the benefit of various utility commands that process
172  * expressions without building a complete Query.  It should be applied after
173  * calling transformExpr() plus any expression-modifying operations such as
174  * coerce_to_boolean().
175  */
176 void
assign_expr_collations(ParseState * pstate,Node * expr)177 assign_expr_collations(ParseState *pstate, Node *expr)
178 {
179 	assign_collations_context context;
180 
181 	/* initialize context for tree walk */
182 	context.pstate = pstate;
183 	context.collation = InvalidOid;
184 	context.strength = COLLATE_NONE;
185 	context.location = -1;
186 
187 	/* and away we go */
188 	(void) assign_collations_walker(expr, &context);
189 }
190 
191 /*
192  * select_common_collation()
193  *		Identify a common collation for a list of expressions.
194  *
195  * The expressions should all return the same datatype, else this is not
196  * terribly meaningful.
197  *
198  * none_ok means that it is permitted to return InvalidOid, indicating that
199  * no common collation could be identified, even for collatable datatypes.
200  * Otherwise, an error is thrown for conflict of implicit collations.
201  *
202  * In theory, none_ok = true reflects the rules of SQL standard clause "Result
203  * of data type combinations", none_ok = false reflects the rules of clause
204  * "Collation determination" (in some cases invoked via "Grouping
205  * operations").
206  */
207 Oid
select_common_collation(ParseState * pstate,List * exprs,bool none_ok)208 select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
209 {
210 	assign_collations_context context;
211 
212 	/* initialize context for tree walk */
213 	context.pstate = pstate;
214 	context.collation = InvalidOid;
215 	context.strength = COLLATE_NONE;
216 	context.location = -1;
217 
218 	/* and away we go */
219 	(void) assign_collations_walker((Node *) exprs, &context);
220 
221 	/* deal with collation conflict */
222 	if (context.strength == COLLATE_CONFLICT)
223 	{
224 		if (none_ok)
225 			return InvalidOid;
226 		ereport(ERROR,
227 				(errcode(ERRCODE_COLLATION_MISMATCH),
228 				 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
229 						get_collation_name(context.collation),
230 						get_collation_name(context.collation2)),
231 				 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232 				 parser_errposition(context.pstate, context.location2)));
233 	}
234 
235 	/*
236 	 * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237 	 * that's okay because it must mean none of the expressions returned
238 	 * collatable datatypes.
239 	 */
240 	return context.collation;
241 }
242 
243 /*
244  * assign_collations_walker()
245  *		Recursive guts of collation processing.
246  *
247  * Nodes with no children (eg, Vars, Consts, Params) must have been marked
248  * when built.  All upper-level nodes are marked here.
249  *
250  * Note: if this is invoked directly on a List, it will attempt to infer a
251  * common collation for all the list members.  In particular, it will throw
252  * error if there are conflicting explicit collations for different members.
253  */
254 static bool
assign_collations_walker(Node * node,assign_collations_context * context)255 assign_collations_walker(Node *node, assign_collations_context *context)
256 {
257 	assign_collations_context loccontext;
258 	Oid			collation;
259 	CollateStrength strength;
260 	int			location;
261 
262 	/* Need do nothing for empty subexpressions */
263 	if (node == NULL)
264 		return false;
265 
266 	/*
267 	 * Prepare for recursion.  For most node types, though not all, the first
268 	 * thing we do is recurse to process all nodes below this one. Each level
269 	 * of the tree has its own local context.
270 	 */
271 	loccontext.pstate = context->pstate;
272 	loccontext.collation = InvalidOid;
273 	loccontext.strength = COLLATE_NONE;
274 	loccontext.location = -1;
275 	/* Set these fields just to suppress uninitialized-value warnings: */
276 	loccontext.collation2 = InvalidOid;
277 	loccontext.location2 = -1;
278 
279 	/*
280 	 * Recurse if appropriate, then determine the collation for this node.
281 	 *
282 	 * Note: the general cases are at the bottom of the switch, after various
283 	 * special cases.
284 	 */
285 	switch (nodeTag(node))
286 	{
287 		case T_CollateExpr:
288 			{
289 				/*
290 				 * COLLATE sets an explicitly derived collation, regardless of
291 				 * what the child state is.  But we must recurse to set up
292 				 * collation info below here.
293 				 */
294 				CollateExpr *expr = (CollateExpr *) node;
295 
296 				(void) expression_tree_walker(node,
297 											  assign_collations_walker,
298 											  (void *) &loccontext);
299 
300 				collation = expr->collOid;
301 				Assert(OidIsValid(collation));
302 				strength = COLLATE_EXPLICIT;
303 				location = expr->location;
304 			}
305 			break;
306 		case T_FieldSelect:
307 			{
308 				/*
309 				 * For FieldSelect, the result has the field's declared
310 				 * collation, independently of what happened in the arguments.
311 				 * (The immediate argument must be composite and thus not
312 				 * collatable, anyhow.)  The field's collation was already
313 				 * looked up and saved in the node.
314 				 */
315 				FieldSelect *expr = (FieldSelect *) node;
316 
317 				/* ... but first, recurse */
318 				(void) expression_tree_walker(node,
319 											  assign_collations_walker,
320 											  (void *) &loccontext);
321 
322 				if (OidIsValid(expr->resultcollid))
323 				{
324 					/* Node's result type is collatable. */
325 					/* Pass up field's collation as an implicit choice. */
326 					collation = expr->resultcollid;
327 					strength = COLLATE_IMPLICIT;
328 					location = exprLocation(node);
329 				}
330 				else
331 				{
332 					/* Node's result type isn't collatable. */
333 					collation = InvalidOid;
334 					strength = COLLATE_NONE;
335 					location = -1;	/* won't be used */
336 				}
337 			}
338 			break;
339 		case T_RowExpr:
340 			{
341 				/*
342 				 * RowExpr is a special case because the subexpressions are
343 				 * independent: we don't want to complain if some of them have
344 				 * incompatible explicit collations.
345 				 */
346 				RowExpr    *expr = (RowExpr *) node;
347 
348 				assign_list_collations(context->pstate, expr->args);
349 
350 				/*
351 				 * Since the result is always composite and therefore never
352 				 * has a collation, we can just stop here: this node has no
353 				 * impact on the collation of its parent.
354 				 */
355 				return false;	/* done */
356 			}
357 		case T_RowCompareExpr:
358 			{
359 				/*
360 				 * For RowCompare, we have to find the common collation of
361 				 * each pair of input columns and build a list.  If we can't
362 				 * find a common collation, we just put InvalidOid into the
363 				 * list, which may or may not cause an error at runtime.
364 				 */
365 				RowCompareExpr *expr = (RowCompareExpr *) node;
366 				List	   *colls = NIL;
367 				ListCell   *l;
368 				ListCell   *r;
369 
370 				forboth(l, expr->largs, r, expr->rargs)
371 				{
372 					Node	   *le = (Node *) lfirst(l);
373 					Node	   *re = (Node *) lfirst(r);
374 					Oid			coll;
375 
376 					coll = select_common_collation(context->pstate,
377 												   list_make2(le, re),
378 												   true);
379 					colls = lappend_oid(colls, coll);
380 				}
381 				expr->inputcollids = colls;
382 
383 				/*
384 				 * Since the result is always boolean and therefore never has
385 				 * a collation, we can just stop here: this node has no impact
386 				 * on the collation of its parent.
387 				 */
388 				return false;	/* done */
389 			}
390 		case T_CoerceToDomain:
391 			{
392 				/*
393 				 * If the domain declaration included a non-default COLLATE
394 				 * spec, then use that collation as the output collation of
395 				 * the coercion.  Otherwise allow the input collation to
396 				 * bubble up.  (The input should be of the domain's base type,
397 				 * therefore we don't need to worry about it not being
398 				 * collatable when the domain is.)
399 				 */
400 				CoerceToDomain *expr = (CoerceToDomain *) node;
401 				Oid			typcollation = get_typcollation(expr->resulttype);
402 
403 				/* ... but first, recurse */
404 				(void) expression_tree_walker(node,
405 											  assign_collations_walker,
406 											  (void *) &loccontext);
407 
408 				if (OidIsValid(typcollation))
409 				{
410 					/* Node's result type is collatable. */
411 					if (typcollation == DEFAULT_COLLATION_OID)
412 					{
413 						/* Collation state bubbles up from child. */
414 						collation = loccontext.collation;
415 						strength = loccontext.strength;
416 						location = loccontext.location;
417 					}
418 					else
419 					{
420 						/* Use domain's collation as an implicit choice. */
421 						collation = typcollation;
422 						strength = COLLATE_IMPLICIT;
423 						location = exprLocation(node);
424 					}
425 				}
426 				else
427 				{
428 					/* Node's result type isn't collatable. */
429 					collation = InvalidOid;
430 					strength = COLLATE_NONE;
431 					location = -1;	/* won't be used */
432 				}
433 
434 				/*
435 				 * Save the state into the expression node.  We know it
436 				 * doesn't care about input collation.
437 				 */
438 				if (strength == COLLATE_CONFLICT)
439 					exprSetCollation(node, InvalidOid);
440 				else
441 					exprSetCollation(node, collation);
442 			}
443 			break;
444 		case T_TargetEntry:
445 			(void) expression_tree_walker(node,
446 										  assign_collations_walker,
447 										  (void *) &loccontext);
448 
449 			/*
450 			 * TargetEntry can have only one child, and should bubble that
451 			 * state up to its parent.  We can't use the general-case code
452 			 * below because exprType and friends don't work on TargetEntry.
453 			 */
454 			collation = loccontext.collation;
455 			strength = loccontext.strength;
456 			location = loccontext.location;
457 
458 			/*
459 			 * Throw error if the collation is indeterminate for a TargetEntry
460 			 * that is a sort/group target.  We prefer to do this now, instead
461 			 * of leaving the comparison functions to fail at runtime, because
462 			 * we can give a syntax error pointer to help locate the problem.
463 			 * There are some cases where there might not be a failure, for
464 			 * example if the planner chooses to use hash aggregation instead
465 			 * of sorting for grouping; but it seems better to predictably
466 			 * throw an error.  (Compare transformSetOperationTree, which will
467 			 * throw error for indeterminate collation of set-op columns, even
468 			 * though the planner might be able to implement the set-op
469 			 * without sorting.)
470 			 */
471 			if (strength == COLLATE_CONFLICT &&
472 				((TargetEntry *) node)->ressortgroupref != 0)
473 				ereport(ERROR,
474 						(errcode(ERRCODE_COLLATION_MISMATCH),
475 						 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476 								get_collation_name(loccontext.collation),
477 								get_collation_name(loccontext.collation2)),
478 						 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479 						 parser_errposition(context->pstate,
480 											loccontext.location2)));
481 			break;
482 		case T_InferenceElem:
483 		case T_RangeTblRef:
484 		case T_JoinExpr:
485 		case T_FromExpr:
486 		case T_OnConflictExpr:
487 		case T_SortGroupClause:
488 			(void) expression_tree_walker(node,
489 										  assign_collations_walker,
490 										  (void *) &loccontext);
491 
492 			/*
493 			 * When we're invoked on a query's jointree, we don't need to do
494 			 * anything with join nodes except recurse through them to process
495 			 * WHERE/ON expressions.  So just stop here.  Likewise, we don't
496 			 * need to do anything when invoked on sort/group lists.
497 			 */
498 			return false;
499 		case T_Query:
500 			{
501 				/*
502 				 * We get here when we're invoked on the Query belonging to a
503 				 * SubLink.  Act as though the Query returns its first output
504 				 * column, which indeed is what it does for EXPR_SUBLINK and
505 				 * ARRAY_SUBLINK cases.  In the cases where the SubLink
506 				 * returns boolean, this info will be ignored.  Special case:
507 				 * in EXISTS, the Query might return no columns, in which case
508 				 * we need do nothing.
509 				 *
510 				 * We needn't recurse, since the Query is already processed.
511 				 */
512 				Query	   *qtree = (Query *) node;
513 				TargetEntry *tent;
514 
515 				if (qtree->targetList == NIL)
516 					return false;
517 				tent = linitial_node(TargetEntry, qtree->targetList);
518 				if (tent->resjunk)
519 					return false;
520 
521 				collation = exprCollation((Node *) tent->expr);
522 				/* collation doesn't change if it's converted to array */
523 				strength = COLLATE_IMPLICIT;
524 				location = exprLocation((Node *) tent->expr);
525 			}
526 			break;
527 		case T_List:
528 			(void) expression_tree_walker(node,
529 										  assign_collations_walker,
530 										  (void *) &loccontext);
531 
532 			/*
533 			 * When processing a list, collation state just bubbles up from
534 			 * the list elements.
535 			 */
536 			collation = loccontext.collation;
537 			strength = loccontext.strength;
538 			location = loccontext.location;
539 			break;
540 
541 		case T_Var:
542 		case T_Const:
543 		case T_Param:
544 		case T_CoerceToDomainValue:
545 		case T_CaseTestExpr:
546 		case T_SetToDefault:
547 		case T_CurrentOfExpr:
548 
549 			/*
550 			 * General case for childless expression nodes.  These should
551 			 * already have a collation assigned; it is not this function's
552 			 * responsibility to look into the catalogs for base-case
553 			 * information.
554 			 */
555 			collation = exprCollation(node);
556 
557 			/*
558 			 * Note: in most cases, there will be an assigned collation
559 			 * whenever type_is_collatable(exprType(node)); but an exception
560 			 * occurs for a Var referencing a subquery output column for which
561 			 * a unique collation was not determinable.  That may lead to a
562 			 * runtime failure if a collation-sensitive function is applied to
563 			 * the Var.
564 			 */
565 
566 			if (OidIsValid(collation))
567 				strength = COLLATE_IMPLICIT;
568 			else
569 				strength = COLLATE_NONE;
570 			location = exprLocation(node);
571 			break;
572 
573 		default:
574 			{
575 				/*
576 				 * General case for most expression nodes with children. First
577 				 * recurse, then figure out what to assign to this node.
578 				 */
579 				Oid			typcollation;
580 
581 				/*
582 				 * For most node types, we want to treat all the child
583 				 * expressions alike; but there are a few exceptions, hence
584 				 * this inner switch.
585 				 */
586 				switch (nodeTag(node))
587 				{
588 					case T_Aggref:
589 						{
590 							/*
591 							 * Aggref is messy enough that we give it its own
592 							 * function, in fact three of them.  The FILTER
593 							 * clause is independent of the rest of the
594 							 * aggregate, however, so it can be processed
595 							 * separately.
596 							 */
597 							Aggref	   *aggref = (Aggref *) node;
598 
599 							switch (aggref->aggkind)
600 							{
601 								case AGGKIND_NORMAL:
602 									assign_aggregate_collations(aggref,
603 																&loccontext);
604 									break;
605 								case AGGKIND_ORDERED_SET:
606 									assign_ordered_set_collations(aggref,
607 																  &loccontext);
608 									break;
609 								case AGGKIND_HYPOTHETICAL:
610 									assign_hypothetical_collations(aggref,
611 																   &loccontext);
612 									break;
613 								default:
614 									elog(ERROR, "unrecognized aggkind: %d",
615 										 (int) aggref->aggkind);
616 							}
617 
618 							assign_expr_collations(context->pstate,
619 												   (Node *) aggref->aggfilter);
620 						}
621 						break;
622 					case T_WindowFunc:
623 						{
624 							/*
625 							 * WindowFunc requires special processing only for
626 							 * its aggfilter clause, as for aggregates.
627 							 */
628 							WindowFunc *wfunc = (WindowFunc *) node;
629 
630 							(void) assign_collations_walker((Node *) wfunc->args,
631 															&loccontext);
632 
633 							assign_expr_collations(context->pstate,
634 												   (Node *) wfunc->aggfilter);
635 						}
636 						break;
637 					case T_CaseExpr:
638 						{
639 							/*
640 							 * CaseExpr is a special case because we do not
641 							 * want to recurse into the test expression (if
642 							 * any).  It was already marked with collations
643 							 * during transformCaseExpr, and furthermore its
644 							 * collation is not relevant to the result of the
645 							 * CASE --- only the output expressions are.
646 							 */
647 							CaseExpr   *expr = (CaseExpr *) node;
648 							ListCell   *lc;
649 
650 							foreach(lc, expr->args)
651 							{
652 								CaseWhen   *when = lfirst_node(CaseWhen, lc);
653 
654 								/*
655 								 * The condition expressions mustn't affect
656 								 * the CASE's result collation either; but
657 								 * since they are known to yield boolean, it's
658 								 * safe to recurse directly on them --- they
659 								 * won't change loccontext.
660 								 */
661 								(void) assign_collations_walker((Node *) when->expr,
662 																&loccontext);
663 								(void) assign_collations_walker((Node *) when->result,
664 																&loccontext);
665 							}
666 							(void) assign_collations_walker((Node *) expr->defresult,
667 															&loccontext);
668 						}
669 						break;
670 					default:
671 
672 						/*
673 						 * Normal case: all child expressions contribute
674 						 * equally to loccontext.
675 						 */
676 						(void) expression_tree_walker(node,
677 													  assign_collations_walker,
678 													  (void *) &loccontext);
679 						break;
680 				}
681 
682 				/*
683 				 * Now figure out what collation to assign to this node.
684 				 */
685 				typcollation = get_typcollation(exprType(node));
686 				if (OidIsValid(typcollation))
687 				{
688 					/* Node's result is collatable; what about its input? */
689 					if (loccontext.strength > COLLATE_NONE)
690 					{
691 						/* Collation state bubbles up from children. */
692 						collation = loccontext.collation;
693 						strength = loccontext.strength;
694 						location = loccontext.location;
695 					}
696 					else
697 					{
698 						/*
699 						 * Collatable output produced without any collatable
700 						 * input.  Use the type's collation (which is usually
701 						 * DEFAULT_COLLATION_OID, but might be different for a
702 						 * domain).
703 						 */
704 						collation = typcollation;
705 						strength = COLLATE_IMPLICIT;
706 						location = exprLocation(node);
707 					}
708 				}
709 				else
710 				{
711 					/* Node's result type isn't collatable. */
712 					collation = InvalidOid;
713 					strength = COLLATE_NONE;
714 					location = -1;	/* won't be used */
715 				}
716 
717 				/*
718 				 * Save the result collation into the expression node. If the
719 				 * state is COLLATE_CONFLICT, we'll set the collation to
720 				 * InvalidOid, which might result in an error at runtime.
721 				 */
722 				if (strength == COLLATE_CONFLICT)
723 					exprSetCollation(node, InvalidOid);
724 				else
725 					exprSetCollation(node, collation);
726 
727 				/*
728 				 * Likewise save the input collation, which is the one that
729 				 * any function called by this node should use.
730 				 */
731 				if (loccontext.strength == COLLATE_CONFLICT)
732 					exprSetInputCollation(node, InvalidOid);
733 				else
734 					exprSetInputCollation(node, loccontext.collation);
735 			}
736 			break;
737 	}
738 
739 	/*
740 	 * Now, merge my information into my parent's state.
741 	 */
742 	merge_collation_state(collation,
743 						  strength,
744 						  location,
745 						  loccontext.collation2,
746 						  loccontext.location2,
747 						  context);
748 
749 	return false;
750 }
751 
752 /*
753  * Merge collation state of a subexpression into the context for its parent.
754  */
755 static void
merge_collation_state(Oid collation,CollateStrength strength,int location,Oid collation2,int location2,assign_collations_context * context)756 merge_collation_state(Oid collation,
757 					  CollateStrength strength,
758 					  int location,
759 					  Oid collation2,
760 					  int location2,
761 					  assign_collations_context *context)
762 {
763 	/*
764 	 * If the collation strength for this node is different from what's
765 	 * already in *context, then this node either dominates or is dominated by
766 	 * earlier siblings.
767 	 */
768 	if (strength > context->strength)
769 	{
770 		/* Override previous parent state */
771 		context->collation = collation;
772 		context->strength = strength;
773 		context->location = location;
774 		/* Bubble up error info if applicable */
775 		if (strength == COLLATE_CONFLICT)
776 		{
777 			context->collation2 = collation2;
778 			context->location2 = location2;
779 		}
780 	}
781 	else if (strength == context->strength)
782 	{
783 		/* Merge, or detect error if there's a collation conflict */
784 		switch (strength)
785 		{
786 			case COLLATE_NONE:
787 				/* Nothing + nothing is still nothing */
788 				break;
789 			case COLLATE_IMPLICIT:
790 				if (collation != context->collation)
791 				{
792 					/*
793 					 * Non-default implicit collation always beats default.
794 					 */
795 					if (context->collation == DEFAULT_COLLATION_OID)
796 					{
797 						/* Override previous parent state */
798 						context->collation = collation;
799 						context->strength = strength;
800 						context->location = location;
801 					}
802 					else if (collation != DEFAULT_COLLATION_OID)
803 					{
804 						/*
805 						 * Oops, we have a conflict.  We cannot throw error
806 						 * here, since the conflict could be resolved by a
807 						 * later sibling CollateExpr, or the parent might not
808 						 * care about collation anyway.  Return enough info to
809 						 * throw the error later, if needed.
810 						 */
811 						context->strength = COLLATE_CONFLICT;
812 						context->collation2 = collation;
813 						context->location2 = location;
814 					}
815 				}
816 				break;
817 			case COLLATE_CONFLICT:
818 				/* We're still conflicted ... */
819 				break;
820 			case COLLATE_EXPLICIT:
821 				if (collation != context->collation)
822 				{
823 					/*
824 					 * Oops, we have a conflict of explicit COLLATE clauses.
825 					 * Here we choose to throw error immediately; that is what
826 					 * the SQL standard says to do, and there's no good reason
827 					 * to be less strict.
828 					 */
829 					ereport(ERROR,
830 							(errcode(ERRCODE_COLLATION_MISMATCH),
831 							 errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
832 									get_collation_name(context->collation),
833 									get_collation_name(collation)),
834 							 parser_errposition(context->pstate, location)));
835 				}
836 				break;
837 		}
838 	}
839 }
840 
841 /*
842  * Aggref is a special case because expressions used only for ordering
843  * shouldn't be taken to conflict with each other or with regular args,
844  * indeed shouldn't affect the aggregate's result collation at all.
845  * We handle this by applying assign_expr_collations() to them rather than
846  * passing down our loccontext.
847  *
848  * Note that we recurse to each TargetEntry, not directly to its contained
849  * expression, so that the case above for T_TargetEntry will complain if we
850  * can't resolve a collation for an ORDER BY item (whether or not it is also
851  * a normal aggregate arg).
852  *
853  * We need not recurse into the aggorder or aggdistinct lists, because those
854  * contain only SortGroupClause nodes which we need not process.
855  */
856 static void
assign_aggregate_collations(Aggref * aggref,assign_collations_context * loccontext)857 assign_aggregate_collations(Aggref *aggref,
858 							assign_collations_context *loccontext)
859 {
860 	ListCell   *lc;
861 
862 	/* Plain aggregates have no direct args */
863 	Assert(aggref->aggdirectargs == NIL);
864 
865 	/* Process aggregated args, holding resjunk ones at arm's length */
866 	foreach(lc, aggref->args)
867 	{
868 		TargetEntry *tle = lfirst_node(TargetEntry, lc);
869 
870 		if (tle->resjunk)
871 			assign_expr_collations(loccontext->pstate, (Node *) tle);
872 		else
873 			(void) assign_collations_walker((Node *) tle, loccontext);
874 	}
875 }
876 
877 /*
878  * For ordered-set aggregates, it's somewhat unclear how best to proceed.
879  * The spec-defined inverse distribution functions have only one sort column
880  * and don't return collatable types, but this is clearly too restrictive in
881  * the general case.  Our solution is to consider that the aggregate's direct
882  * arguments contribute normally to determination of the aggregate's own
883  * collation, while aggregated arguments contribute only when the aggregate
884  * is designed to have exactly one aggregated argument (i.e., it has a single
885  * aggregated argument and is non-variadic).  If it can have more than one
886  * aggregated argument, we process the aggregated arguments as independent
887  * sort columns.  This avoids throwing error for something like
888  *		agg(...) within group (order by x collate "foo", y collate "bar")
889  * while also guaranteeing that variadic aggregates don't change in behavior
890  * depending on how many sort columns a particular call happens to have.
891  *
892  * Otherwise this is much like the plain-aggregate case.
893  */
894 static void
assign_ordered_set_collations(Aggref * aggref,assign_collations_context * loccontext)895 assign_ordered_set_collations(Aggref *aggref,
896 							  assign_collations_context *loccontext)
897 {
898 	bool		merge_sort_collations;
899 	ListCell   *lc;
900 
901 	/* Merge sort collations to parent only if there can be only one */
902 	merge_sort_collations = (list_length(aggref->args) == 1 &&
903 							 get_func_variadictype(aggref->aggfnoid) == InvalidOid);
904 
905 	/* Direct args, if any, are normal children of the Aggref node */
906 	(void) assign_collations_walker((Node *) aggref->aggdirectargs,
907 									loccontext);
908 
909 	/* Process aggregated args appropriately */
910 	foreach(lc, aggref->args)
911 	{
912 		TargetEntry *tle = lfirst_node(TargetEntry, lc);
913 
914 		if (merge_sort_collations)
915 			(void) assign_collations_walker((Node *) tle, loccontext);
916 		else
917 			assign_expr_collations(loccontext->pstate, (Node *) tle);
918 	}
919 }
920 
921 /*
922  * Hypothetical-set aggregates are even more special: per spec, we need to
923  * unify the collations of each pair of hypothetical and aggregated args.
924  * And we need to force the choice of collation down into the sort column
925  * to ensure that the sort happens with the chosen collation.  Other than
926  * that, the behavior is like regular ordered-set aggregates.  Note that
927  * hypothetical direct arguments contribute to the aggregate collation
928  * only when their partner aggregated arguments do.
929  */
930 static void
assign_hypothetical_collations(Aggref * aggref,assign_collations_context * loccontext)931 assign_hypothetical_collations(Aggref *aggref,
932 							   assign_collations_context *loccontext)
933 {
934 	ListCell   *h_cell = list_head(aggref->aggdirectargs);
935 	ListCell   *s_cell = list_head(aggref->args);
936 	bool		merge_sort_collations;
937 	int			extra_args;
938 
939 	/* Merge sort collations to parent only if there can be only one */
940 	merge_sort_collations = (list_length(aggref->args) == 1 &&
941 							 get_func_variadictype(aggref->aggfnoid) == InvalidOid);
942 
943 	/* Process any non-hypothetical direct args */
944 	extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
945 	Assert(extra_args >= 0);
946 	while (extra_args-- > 0)
947 	{
948 		(void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
949 		h_cell = lnext(h_cell);
950 	}
951 
952 	/* Scan hypothetical args and aggregated args in parallel */
953 	while (h_cell && s_cell)
954 	{
955 		Node	   *h_arg = (Node *) lfirst(h_cell);
956 		TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
957 		assign_collations_context paircontext;
958 
959 		/*
960 		 * Assign collations internally in this pair of expressions, then
961 		 * choose a common collation for them.  This should match
962 		 * select_common_collation(), but we can't use that function as-is
963 		 * because we need access to the whole collation state so we can
964 		 * bubble it up to the aggregate function's level.
965 		 */
966 		paircontext.pstate = loccontext->pstate;
967 		paircontext.collation = InvalidOid;
968 		paircontext.strength = COLLATE_NONE;
969 		paircontext.location = -1;
970 		/* Set these fields just to suppress uninitialized-value warnings: */
971 		paircontext.collation2 = InvalidOid;
972 		paircontext.location2 = -1;
973 
974 		(void) assign_collations_walker(h_arg, &paircontext);
975 		(void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
976 
977 		/* deal with collation conflict */
978 		if (paircontext.strength == COLLATE_CONFLICT)
979 			ereport(ERROR,
980 					(errcode(ERRCODE_COLLATION_MISMATCH),
981 					 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
982 							get_collation_name(paircontext.collation),
983 							get_collation_name(paircontext.collation2)),
984 					 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
985 					 parser_errposition(paircontext.pstate,
986 										paircontext.location2)));
987 
988 		/*
989 		 * At this point paircontext.collation can be InvalidOid only if the
990 		 * type is not collatable; no need to do anything in that case.  If we
991 		 * do have to change the sort column's collation, do it by inserting a
992 		 * RelabelType node into the sort column TLE.
993 		 *
994 		 * XXX This is pretty grotty for a couple of reasons:
995 		 * assign_collations_walker isn't supposed to be changing the
996 		 * expression structure like this, and a parse-time change of
997 		 * collation ought to be signaled by a CollateExpr not a RelabelType
998 		 * (the use of RelabelType for collation marking is supposed to be a
999 		 * planner/executor thing only).  But we have no better alternative.
1000 		 * In particular, injecting a CollateExpr could result in the
1001 		 * expression being interpreted differently after dump/reload, since
1002 		 * we might be effectively promoting an implicit collation to
1003 		 * explicit.  This kluge is relying on ruleutils.c not printing a
1004 		 * COLLATE clause for a RelabelType, and probably on some other
1005 		 * fragile behaviors.
1006 		 */
1007 		if (OidIsValid(paircontext.collation) &&
1008 			paircontext.collation != exprCollation((Node *) s_tle->expr))
1009 		{
1010 			s_tle->expr = (Expr *)
1011 				makeRelabelType(s_tle->expr,
1012 								exprType((Node *) s_tle->expr),
1013 								exprTypmod((Node *) s_tle->expr),
1014 								paircontext.collation,
1015 								COERCE_IMPLICIT_CAST);
1016 		}
1017 
1018 		/*
1019 		 * If appropriate, merge this column's collation state up to the
1020 		 * aggregate function.
1021 		 */
1022 		if (merge_sort_collations)
1023 			merge_collation_state(paircontext.collation,
1024 								  paircontext.strength,
1025 								  paircontext.location,
1026 								  paircontext.collation2,
1027 								  paircontext.location2,
1028 								  loccontext);
1029 
1030 		h_cell = lnext(h_cell);
1031 		s_cell = lnext(s_cell);
1032 	}
1033 	Assert(h_cell == NULL && s_cell == NULL);
1034 }
1035