1 /*-------------------------------------------------------------------------
2  *
3  * parse_collate.c
4  *		Routines for assigning collation information.
5  *
6  * We choose to handle collation analysis in a post-pass over the output
7  * of expression parse analysis.  This is because we need more state to
8  * perform this processing than is needed in the finished tree.  If we
9  * did it on-the-fly while building the tree, all that state would have
10  * to be kept in expression node trees permanently.  This way, the extra
11  * storage is just local variables in this recursive routine.
12  *
13  * The info that is actually saved in the finished tree is:
14  * 1. The output collation of each expression node, or InvalidOid if it
15  * returns a noncollatable data type.  This can also be InvalidOid if the
16  * result type is collatable but the collation is indeterminate.
17  * 2. The collation to be used in executing each function.  InvalidOid means
18  * that there are no collatable inputs or their collation is indeterminate.
19  * This value is only stored in node types that might call collation-using
20  * functions.
21  *
22  * You might think we could get away with storing only one collation per
23  * node, but the two concepts really need to be kept distinct.  Otherwise
24  * it's too confusing when a function produces a collatable output type but
25  * has no collatable inputs or produces noncollatable output from collatable
26  * inputs.
27  *
28  * Cases with indeterminate collation might result in an error being thrown
29  * at runtime.  If we knew exactly which functions require collation
30  * information, we could throw those errors at parse time instead.
31  *
32  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
33  * Portions Copyright (c) 1994, Regents of the University of California
34  *
35  *
36  * IDENTIFICATION
37  *	  src/backend/parser/parse_collate.c
38  *
39  *-------------------------------------------------------------------------
40  */
41 #include "postgres.h"
42 
43 #include "catalog/pg_aggregate.h"
44 #include "catalog/pg_collation.h"
45 #include "nodes/makefuncs.h"
46 #include "nodes/nodeFuncs.h"
47 #include "parser/parse_collate.h"
48 #include "utils/lsyscache.h"
49 
50 
51 /*
52  * Collation strength (the SQL standard calls this "derivation").  Order is
53  * chosen to allow comparisons to work usefully.  Note: the standard doesn't
54  * seem to distinguish between NONE and CONFLICT.
55  */
56 typedef enum
57 {
58 	COLLATE_NONE,				/* expression is of a noncollatable datatype */
59 	COLLATE_IMPLICIT,			/* collation was derived implicitly */
60 	COLLATE_CONFLICT,			/* we had a conflict of implicit collations */
61 	COLLATE_EXPLICIT			/* collation was derived explicitly */
62 } CollateStrength;
63 
64 typedef struct
65 {
66 	ParseState *pstate;			/* parse state (for error reporting) */
67 	Oid			collation;		/* OID of current collation, if any */
68 	CollateStrength strength;	/* strength of current collation choice */
69 	int			location;		/* location of expr that set collation */
70 	/* Remaining fields are only valid when strength == COLLATE_CONFLICT */
71 	Oid			collation2;		/* OID of conflicting collation */
72 	int			location2;		/* location of expr that set collation2 */
73 } assign_collations_context;
74 
75 static bool assign_query_collations_walker(Node *node, ParseState *pstate);
76 static bool assign_collations_walker(Node *node,
77 						 assign_collations_context *context);
78 static void merge_collation_state(Oid collation,
79 					  CollateStrength strength,
80 					  int location,
81 					  Oid collation2,
82 					  int location2,
83 					  assign_collations_context *context);
84 static void assign_aggregate_collations(Aggref *aggref,
85 							assign_collations_context *loccontext);
86 static void assign_ordered_set_collations(Aggref *aggref,
87 							  assign_collations_context *loccontext);
88 static void assign_hypothetical_collations(Aggref *aggref,
89 							   assign_collations_context *loccontext);
90 
91 
92 /*
93  * assign_query_collations()
94  *		Mark all expressions in the given Query with collation information.
95  *
96  * This should be applied to each Query after completion of parse analysis
97  * for expressions.  Note that we do not recurse into sub-Queries, since
98  * those should have been processed when built.
99  */
100 void
assign_query_collations(ParseState * pstate,Query * query)101 assign_query_collations(ParseState *pstate, Query *query)
102 {
103 	/*
104 	 * We just use query_tree_walker() to visit all the contained expressions.
105 	 * We can skip the rangetable and CTE subqueries, though, since RTEs and
106 	 * subqueries had better have been processed already (else Vars referring
107 	 * to them would not get created with the right collation).
108 	 */
109 	(void) query_tree_walker(query,
110 							 assign_query_collations_walker,
111 							 (void *) pstate,
112 							 QTW_IGNORE_RANGE_TABLE |
113 							 QTW_IGNORE_CTE_SUBQUERIES);
114 }
115 
116 /*
117  * Walker for assign_query_collations
118  *
119  * Each expression found by query_tree_walker is processed independently.
120  * Note that query_tree_walker may pass us a whole List, such as the
121  * targetlist, in which case each subexpression must be processed
122  * independently --- we don't want to bleat if two different targetentries
123  * have different collations.
124  */
125 static bool
assign_query_collations_walker(Node * node,ParseState * pstate)126 assign_query_collations_walker(Node *node, ParseState *pstate)
127 {
128 	/* Need do nothing for empty subexpressions */
129 	if (node == NULL)
130 		return false;
131 
132 	/*
133 	 * We don't want to recurse into a set-operations tree; it's already been
134 	 * fully processed in transformSetOperationStmt.
135 	 */
136 	if (IsA(node, SetOperationStmt))
137 		return false;
138 
139 	if (IsA(node, List))
140 		assign_list_collations(pstate, (List *) node);
141 	else
142 		assign_expr_collations(pstate, node);
143 
144 	return false;
145 }
146 
147 /*
148  * assign_list_collations()
149  *		Mark all nodes in the list of expressions with collation information.
150  *
151  * The list member expressions are processed independently; they do not have
152  * to share a common collation.
153  */
154 void
assign_list_collations(ParseState * pstate,List * exprs)155 assign_list_collations(ParseState *pstate, List *exprs)
156 {
157 	ListCell   *lc;
158 
159 	foreach(lc, exprs)
160 	{
161 		Node	   *node = (Node *) lfirst(lc);
162 
163 		assign_expr_collations(pstate, node);
164 	}
165 }
166 
167 /*
168  * assign_expr_collations()
169  *		Mark all nodes in the given expression tree with collation information.
170  *
171  * This is exported for the benefit of various utility commands that process
172  * expressions without building a complete Query.  It should be applied after
173  * calling transformExpr() plus any expression-modifying operations such as
174  * coerce_to_boolean().
175  */
176 void
assign_expr_collations(ParseState * pstate,Node * expr)177 assign_expr_collations(ParseState *pstate, Node *expr)
178 {
179 	assign_collations_context context;
180 
181 	/* initialize context for tree walk */
182 	context.pstate = pstate;
183 	context.collation = InvalidOid;
184 	context.strength = COLLATE_NONE;
185 	context.location = -1;
186 
187 	/* and away we go */
188 	(void) assign_collations_walker(expr, &context);
189 }
190 
191 /*
192  * select_common_collation()
193  *		Identify a common collation for a list of expressions.
194  *
195  * The expressions should all return the same datatype, else this is not
196  * terribly meaningful.
197  *
198  * none_ok means that it is permitted to return InvalidOid, indicating that
199  * no common collation could be identified, even for collatable datatypes.
200  * Otherwise, an error is thrown for conflict of implicit collations.
201  *
202  * In theory, none_ok = true reflects the rules of SQL standard clause "Result
203  * of data type combinations", none_ok = false reflects the rules of clause
204  * "Collation determination" (in some cases invoked via "Grouping
205  * operations").
206  */
207 Oid
select_common_collation(ParseState * pstate,List * exprs,bool none_ok)208 select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
209 {
210 	assign_collations_context context;
211 
212 	/* initialize context for tree walk */
213 	context.pstate = pstate;
214 	context.collation = InvalidOid;
215 	context.strength = COLLATE_NONE;
216 	context.location = -1;
217 
218 	/* and away we go */
219 	(void) assign_collations_walker((Node *) exprs, &context);
220 
221 	/* deal with collation conflict */
222 	if (context.strength == COLLATE_CONFLICT)
223 	{
224 		if (none_ok)
225 			return InvalidOid;
226 		ereport(ERROR,
227 				(errcode(ERRCODE_COLLATION_MISMATCH),
228 				 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
229 						get_collation_name(context.collation),
230 						get_collation_name(context.collation2)),
231 				 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232 				 parser_errposition(context.pstate, context.location2)));
233 	}
234 
235 	/*
236 	 * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237 	 * that's okay because it must mean none of the expressions returned
238 	 * collatable datatypes.
239 	 */
240 	return context.collation;
241 }
242 
243 /*
244  * assign_collations_walker()
245  *		Recursive guts of collation processing.
246  *
247  * Nodes with no children (eg, Vars, Consts, Params) must have been marked
248  * when built.  All upper-level nodes are marked here.
249  *
250  * Note: if this is invoked directly on a List, it will attempt to infer a
251  * common collation for all the list members.  In particular, it will throw
252  * error if there are conflicting explicit collations for different members.
253  */
254 static bool
assign_collations_walker(Node * node,assign_collations_context * context)255 assign_collations_walker(Node *node, assign_collations_context *context)
256 {
257 	assign_collations_context loccontext;
258 	Oid			collation;
259 	CollateStrength strength;
260 	int			location;
261 
262 	/* Need do nothing for empty subexpressions */
263 	if (node == NULL)
264 		return false;
265 
266 	/*
267 	 * Prepare for recursion.  For most node types, though not all, the first
268 	 * thing we do is recurse to process all nodes below this one. Each level
269 	 * of the tree has its own local context.
270 	 */
271 	loccontext.pstate = context->pstate;
272 	loccontext.collation = InvalidOid;
273 	loccontext.strength = COLLATE_NONE;
274 	loccontext.location = -1;
275 	/* Set these fields just to suppress uninitialized-value warnings: */
276 	loccontext.collation2 = InvalidOid;
277 	loccontext.location2 = -1;
278 
279 	/*
280 	 * Recurse if appropriate, then determine the collation for this node.
281 	 *
282 	 * Note: the general cases are at the bottom of the switch, after various
283 	 * special cases.
284 	 */
285 	switch (nodeTag(node))
286 	{
287 		case T_CollateExpr:
288 			{
289 				/*
290 				 * COLLATE sets an explicitly derived collation, regardless of
291 				 * what the child state is.  But we must recurse to set up
292 				 * collation info below here.
293 				 */
294 				CollateExpr *expr = (CollateExpr *) node;
295 
296 				(void) expression_tree_walker(node,
297 											  assign_collations_walker,
298 											  (void *) &loccontext);
299 
300 				collation = expr->collOid;
301 				Assert(OidIsValid(collation));
302 				strength = COLLATE_EXPLICIT;
303 				location = expr->location;
304 			}
305 			break;
306 		case T_FieldSelect:
307 			{
308 				/*
309 				 * For FieldSelect, the result has the field's declared
310 				 * collation, independently of what happened in the arguments.
311 				 * (The immediate argument must be composite and thus not
312 				 * collatable, anyhow.)  The field's collation was already
313 				 * looked up and saved in the node.
314 				 */
315 				FieldSelect *expr = (FieldSelect *) node;
316 
317 				/* ... but first, recurse */
318 				(void) expression_tree_walker(node,
319 											  assign_collations_walker,
320 											  (void *) &loccontext);
321 
322 				if (OidIsValid(expr->resultcollid))
323 				{
324 					/* Node's result type is collatable. */
325 					/* Pass up field's collation as an implicit choice. */
326 					collation = expr->resultcollid;
327 					strength = COLLATE_IMPLICIT;
328 					location = exprLocation(node);
329 				}
330 				else
331 				{
332 					/* Node's result type isn't collatable. */
333 					collation = InvalidOid;
334 					strength = COLLATE_NONE;
335 					location = -1;		/* won't be used */
336 				}
337 			}
338 			break;
339 		case T_RowExpr:
340 			{
341 				/*
342 				 * RowExpr is a special case because the subexpressions are
343 				 * independent: we don't want to complain if some of them have
344 				 * incompatible explicit collations.
345 				 */
346 				RowExpr    *expr = (RowExpr *) node;
347 
348 				assign_list_collations(context->pstate, expr->args);
349 
350 				/*
351 				 * Since the result is always composite and therefore never
352 				 * has a collation, we can just stop here: this node has no
353 				 * impact on the collation of its parent.
354 				 */
355 				return false;	/* done */
356 			}
357 		case T_RowCompareExpr:
358 			{
359 				/*
360 				 * For RowCompare, we have to find the common collation of
361 				 * each pair of input columns and build a list.  If we can't
362 				 * find a common collation, we just put InvalidOid into the
363 				 * list, which may or may not cause an error at runtime.
364 				 */
365 				RowCompareExpr *expr = (RowCompareExpr *) node;
366 				List	   *colls = NIL;
367 				ListCell   *l;
368 				ListCell   *r;
369 
370 				forboth(l, expr->largs, r, expr->rargs)
371 				{
372 					Node	   *le = (Node *) lfirst(l);
373 					Node	   *re = (Node *) lfirst(r);
374 					Oid			coll;
375 
376 					coll = select_common_collation(context->pstate,
377 												   list_make2(le, re),
378 												   true);
379 					colls = lappend_oid(colls, coll);
380 				}
381 				expr->inputcollids = colls;
382 
383 				/*
384 				 * Since the result is always boolean and therefore never has
385 				 * a collation, we can just stop here: this node has no impact
386 				 * on the collation of its parent.
387 				 */
388 				return false;	/* done */
389 			}
390 		case T_CoerceToDomain:
391 			{
392 				/*
393 				 * If the domain declaration included a non-default COLLATE
394 				 * spec, then use that collation as the output collation of
395 				 * the coercion.  Otherwise allow the input collation to
396 				 * bubble up.  (The input should be of the domain's base type,
397 				 * therefore we don't need to worry about it not being
398 				 * collatable when the domain is.)
399 				 */
400 				CoerceToDomain *expr = (CoerceToDomain *) node;
401 				Oid			typcollation = get_typcollation(expr->resulttype);
402 
403 				/* ... but first, recurse */
404 				(void) expression_tree_walker(node,
405 											  assign_collations_walker,
406 											  (void *) &loccontext);
407 
408 				if (OidIsValid(typcollation))
409 				{
410 					/* Node's result type is collatable. */
411 					if (typcollation == DEFAULT_COLLATION_OID)
412 					{
413 						/* Collation state bubbles up from child. */
414 						collation = loccontext.collation;
415 						strength = loccontext.strength;
416 						location = loccontext.location;
417 					}
418 					else
419 					{
420 						/* Use domain's collation as an implicit choice. */
421 						collation = typcollation;
422 						strength = COLLATE_IMPLICIT;
423 						location = exprLocation(node);
424 					}
425 				}
426 				else
427 				{
428 					/* Node's result type isn't collatable. */
429 					collation = InvalidOid;
430 					strength = COLLATE_NONE;
431 					location = -1;		/* won't be used */
432 				}
433 
434 				/*
435 				 * Save the state into the expression node.  We know it
436 				 * doesn't care about input collation.
437 				 */
438 				if (strength == COLLATE_CONFLICT)
439 					exprSetCollation(node, InvalidOid);
440 				else
441 					exprSetCollation(node, collation);
442 			}
443 			break;
444 		case T_TargetEntry:
445 			(void) expression_tree_walker(node,
446 										  assign_collations_walker,
447 										  (void *) &loccontext);
448 
449 			/*
450 			 * TargetEntry can have only one child, and should bubble that
451 			 * state up to its parent.  We can't use the general-case code
452 			 * below because exprType and friends don't work on TargetEntry.
453 			 */
454 			collation = loccontext.collation;
455 			strength = loccontext.strength;
456 			location = loccontext.location;
457 
458 			/*
459 			 * Throw error if the collation is indeterminate for a TargetEntry
460 			 * that is a sort/group target.  We prefer to do this now, instead
461 			 * of leaving the comparison functions to fail at runtime, because
462 			 * we can give a syntax error pointer to help locate the problem.
463 			 * There are some cases where there might not be a failure, for
464 			 * example if the planner chooses to use hash aggregation instead
465 			 * of sorting for grouping; but it seems better to predictably
466 			 * throw an error.  (Compare transformSetOperationTree, which will
467 			 * throw error for indeterminate collation of set-op columns, even
468 			 * though the planner might be able to implement the set-op
469 			 * without sorting.)
470 			 */
471 			if (strength == COLLATE_CONFLICT &&
472 				((TargetEntry *) node)->ressortgroupref != 0)
473 				ereport(ERROR,
474 						(errcode(ERRCODE_COLLATION_MISMATCH),
475 						 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476 								get_collation_name(loccontext.collation),
477 								get_collation_name(loccontext.collation2)),
478 						 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479 						 parser_errposition(context->pstate,
480 											loccontext.location2)));
481 			break;
482 		case T_InferenceElem:
483 		case T_RangeTblRef:
484 		case T_JoinExpr:
485 		case T_FromExpr:
486 		case T_OnConflictExpr:
487 		case T_SortGroupClause:
488 			(void) expression_tree_walker(node,
489 										  assign_collations_walker,
490 										  (void *) &loccontext);
491 
492 			/*
493 			 * When we're invoked on a query's jointree, we don't need to do
494 			 * anything with join nodes except recurse through them to process
495 			 * WHERE/ON expressions.  So just stop here.  Likewise, we don't
496 			 * need to do anything when invoked on sort/group lists.
497 			 */
498 			return false;
499 		case T_Query:
500 			{
501 				/*
502 				 * We get here when we're invoked on the Query belonging to a
503 				 * SubLink.  Act as though the Query returns its first output
504 				 * column, which indeed is what it does for EXPR_SUBLINK and
505 				 * ARRAY_SUBLINK cases.  In the cases where the SubLink
506 				 * returns boolean, this info will be ignored.  Special case:
507 				 * in EXISTS, the Query might return no columns, in which case
508 				 * we need do nothing.
509 				 *
510 				 * We needn't recurse, since the Query is already processed.
511 				 */
512 				Query	   *qtree = (Query *) node;
513 				TargetEntry *tent;
514 
515 				if (qtree->targetList == NIL)
516 					return false;
517 				tent = (TargetEntry *) linitial(qtree->targetList);
518 				Assert(IsA(tent, TargetEntry));
519 				if (tent->resjunk)
520 					return false;
521 
522 				collation = exprCollation((Node *) tent->expr);
523 				/* collation doesn't change if it's converted to array */
524 				strength = COLLATE_IMPLICIT;
525 				location = exprLocation((Node *) tent->expr);
526 			}
527 			break;
528 		case T_List:
529 			(void) expression_tree_walker(node,
530 										  assign_collations_walker,
531 										  (void *) &loccontext);
532 
533 			/*
534 			 * When processing a list, collation state just bubbles up from
535 			 * the list elements.
536 			 */
537 			collation = loccontext.collation;
538 			strength = loccontext.strength;
539 			location = loccontext.location;
540 			break;
541 
542 		case T_Var:
543 		case T_Const:
544 		case T_Param:
545 		case T_CoerceToDomainValue:
546 		case T_CaseTestExpr:
547 		case T_SetToDefault:
548 		case T_CurrentOfExpr:
549 
550 			/*
551 			 * General case for childless expression nodes.  These should
552 			 * already have a collation assigned; it is not this function's
553 			 * responsibility to look into the catalogs for base-case
554 			 * information.
555 			 */
556 			collation = exprCollation(node);
557 
558 			/*
559 			 * Note: in most cases, there will be an assigned collation
560 			 * whenever type_is_collatable(exprType(node)); but an exception
561 			 * occurs for a Var referencing a subquery output column for which
562 			 * a unique collation was not determinable.  That may lead to a
563 			 * runtime failure if a collation-sensitive function is applied to
564 			 * the Var.
565 			 */
566 
567 			if (OidIsValid(collation))
568 				strength = COLLATE_IMPLICIT;
569 			else
570 				strength = COLLATE_NONE;
571 			location = exprLocation(node);
572 			break;
573 
574 		default:
575 			{
576 				/*
577 				 * General case for most expression nodes with children. First
578 				 * recurse, then figure out what to assign to this node.
579 				 */
580 				Oid			typcollation;
581 
582 				/*
583 				 * For most node types, we want to treat all the child
584 				 * expressions alike; but there are a few exceptions, hence
585 				 * this inner switch.
586 				 */
587 				switch (nodeTag(node))
588 				{
589 					case T_Aggref:
590 						{
591 							/*
592 							 * Aggref is messy enough that we give it its own
593 							 * function, in fact three of them.  The FILTER
594 							 * clause is independent of the rest of the
595 							 * aggregate, however, so it can be processed
596 							 * separately.
597 							 */
598 							Aggref	   *aggref = (Aggref *) node;
599 
600 							switch (aggref->aggkind)
601 							{
602 								case AGGKIND_NORMAL:
603 									assign_aggregate_collations(aggref,
604 																&loccontext);
605 									break;
606 								case AGGKIND_ORDERED_SET:
607 									assign_ordered_set_collations(aggref,
608 																&loccontext);
609 									break;
610 								case AGGKIND_HYPOTHETICAL:
611 									assign_hypothetical_collations(aggref,
612 																&loccontext);
613 									break;
614 								default:
615 									elog(ERROR, "unrecognized aggkind: %d",
616 										 (int) aggref->aggkind);
617 							}
618 
619 							assign_expr_collations(context->pstate,
620 												 (Node *) aggref->aggfilter);
621 						}
622 						break;
623 					case T_WindowFunc:
624 						{
625 							/*
626 							 * WindowFunc requires special processing only for
627 							 * its aggfilter clause, as for aggregates.
628 							 */
629 							WindowFunc *wfunc = (WindowFunc *) node;
630 
631 							(void) assign_collations_walker((Node *) wfunc->args,
632 															&loccontext);
633 
634 							assign_expr_collations(context->pstate,
635 												   (Node *) wfunc->aggfilter);
636 						}
637 						break;
638 					case T_CaseExpr:
639 						{
640 							/*
641 							 * CaseExpr is a special case because we do not
642 							 * want to recurse into the test expression (if
643 							 * any).  It was already marked with collations
644 							 * during transformCaseExpr, and furthermore its
645 							 * collation is not relevant to the result of the
646 							 * CASE --- only the output expressions are.
647 							 */
648 							CaseExpr   *expr = (CaseExpr *) node;
649 							ListCell   *lc;
650 
651 							foreach(lc, expr->args)
652 							{
653 								CaseWhen   *when = (CaseWhen *) lfirst(lc);
654 
655 								Assert(IsA(when, CaseWhen));
656 
657 								/*
658 								 * The condition expressions mustn't affect
659 								 * the CASE's result collation either; but
660 								 * since they are known to yield boolean, it's
661 								 * safe to recurse directly on them --- they
662 								 * won't change loccontext.
663 								 */
664 								(void) assign_collations_walker((Node *) when->expr,
665 																&loccontext);
666 								(void) assign_collations_walker((Node *) when->result,
667 																&loccontext);
668 							}
669 							(void) assign_collations_walker((Node *) expr->defresult,
670 															&loccontext);
671 						}
672 						break;
673 					default:
674 
675 						/*
676 						 * Normal case: all child expressions contribute
677 						 * equally to loccontext.
678 						 */
679 						(void) expression_tree_walker(node,
680 													assign_collations_walker,
681 													  (void *) &loccontext);
682 						break;
683 				}
684 
685 				/*
686 				 * Now figure out what collation to assign to this node.
687 				 */
688 				typcollation = get_typcollation(exprType(node));
689 				if (OidIsValid(typcollation))
690 				{
691 					/* Node's result is collatable; what about its input? */
692 					if (loccontext.strength > COLLATE_NONE)
693 					{
694 						/* Collation state bubbles up from children. */
695 						collation = loccontext.collation;
696 						strength = loccontext.strength;
697 						location = loccontext.location;
698 					}
699 					else
700 					{
701 						/*
702 						 * Collatable output produced without any collatable
703 						 * input.  Use the type's collation (which is usually
704 						 * DEFAULT_COLLATION_OID, but might be different for a
705 						 * domain).
706 						 */
707 						collation = typcollation;
708 						strength = COLLATE_IMPLICIT;
709 						location = exprLocation(node);
710 					}
711 				}
712 				else
713 				{
714 					/* Node's result type isn't collatable. */
715 					collation = InvalidOid;
716 					strength = COLLATE_NONE;
717 					location = -1;		/* won't be used */
718 				}
719 
720 				/*
721 				 * Save the result collation into the expression node. If the
722 				 * state is COLLATE_CONFLICT, we'll set the collation to
723 				 * InvalidOid, which might result in an error at runtime.
724 				 */
725 				if (strength == COLLATE_CONFLICT)
726 					exprSetCollation(node, InvalidOid);
727 				else
728 					exprSetCollation(node, collation);
729 
730 				/*
731 				 * Likewise save the input collation, which is the one that
732 				 * any function called by this node should use.
733 				 */
734 				if (loccontext.strength == COLLATE_CONFLICT)
735 					exprSetInputCollation(node, InvalidOid);
736 				else
737 					exprSetInputCollation(node, loccontext.collation);
738 			}
739 			break;
740 	}
741 
742 	/*
743 	 * Now, merge my information into my parent's state.
744 	 */
745 	merge_collation_state(collation,
746 						  strength,
747 						  location,
748 						  loccontext.collation2,
749 						  loccontext.location2,
750 						  context);
751 
752 	return false;
753 }
754 
755 /*
756  * Merge collation state of a subexpression into the context for its parent.
757  */
758 static void
merge_collation_state(Oid collation,CollateStrength strength,int location,Oid collation2,int location2,assign_collations_context * context)759 merge_collation_state(Oid collation,
760 					  CollateStrength strength,
761 					  int location,
762 					  Oid collation2,
763 					  int location2,
764 					  assign_collations_context *context)
765 {
766 	/*
767 	 * If the collation strength for this node is different from what's
768 	 * already in *context, then this node either dominates or is dominated by
769 	 * earlier siblings.
770 	 */
771 	if (strength > context->strength)
772 	{
773 		/* Override previous parent state */
774 		context->collation = collation;
775 		context->strength = strength;
776 		context->location = location;
777 		/* Bubble up error info if applicable */
778 		if (strength == COLLATE_CONFLICT)
779 		{
780 			context->collation2 = collation2;
781 			context->location2 = location2;
782 		}
783 	}
784 	else if (strength == context->strength)
785 	{
786 		/* Merge, or detect error if there's a collation conflict */
787 		switch (strength)
788 		{
789 			case COLLATE_NONE:
790 				/* Nothing + nothing is still nothing */
791 				break;
792 			case COLLATE_IMPLICIT:
793 				if (collation != context->collation)
794 				{
795 					/*
796 					 * Non-default implicit collation always beats default.
797 					 */
798 					if (context->collation == DEFAULT_COLLATION_OID)
799 					{
800 						/* Override previous parent state */
801 						context->collation = collation;
802 						context->strength = strength;
803 						context->location = location;
804 					}
805 					else if (collation != DEFAULT_COLLATION_OID)
806 					{
807 						/*
808 						 * Ooops, we have a conflict.  We cannot throw error
809 						 * here, since the conflict could be resolved by a
810 						 * later sibling CollateExpr, or the parent might not
811 						 * care about collation anyway.  Return enough info to
812 						 * throw the error later, if needed.
813 						 */
814 						context->strength = COLLATE_CONFLICT;
815 						context->collation2 = collation;
816 						context->location2 = location;
817 					}
818 				}
819 				break;
820 			case COLLATE_CONFLICT:
821 				/* We're still conflicted ... */
822 				break;
823 			case COLLATE_EXPLICIT:
824 				if (collation != context->collation)
825 				{
826 					/*
827 					 * Ooops, we have a conflict of explicit COLLATE clauses.
828 					 * Here we choose to throw error immediately; that is what
829 					 * the SQL standard says to do, and there's no good reason
830 					 * to be less strict.
831 					 */
832 					ereport(ERROR,
833 							(errcode(ERRCODE_COLLATION_MISMATCH),
834 							 errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
835 									get_collation_name(context->collation),
836 									get_collation_name(collation)),
837 							 parser_errposition(context->pstate, location)));
838 				}
839 				break;
840 		}
841 	}
842 }
843 
844 /*
845  * Aggref is a special case because expressions used only for ordering
846  * shouldn't be taken to conflict with each other or with regular args,
847  * indeed shouldn't affect the aggregate's result collation at all.
848  * We handle this by applying assign_expr_collations() to them rather than
849  * passing down our loccontext.
850  *
851  * Note that we recurse to each TargetEntry, not directly to its contained
852  * expression, so that the case above for T_TargetEntry will complain if we
853  * can't resolve a collation for an ORDER BY item (whether or not it is also
854  * a normal aggregate arg).
855  *
856  * We need not recurse into the aggorder or aggdistinct lists, because those
857  * contain only SortGroupClause nodes which we need not process.
858  */
859 static void
assign_aggregate_collations(Aggref * aggref,assign_collations_context * loccontext)860 assign_aggregate_collations(Aggref *aggref,
861 							assign_collations_context *loccontext)
862 {
863 	ListCell   *lc;
864 
865 	/* Plain aggregates have no direct args */
866 	Assert(aggref->aggdirectargs == NIL);
867 
868 	/* Process aggregated args, holding resjunk ones at arm's length */
869 	foreach(lc, aggref->args)
870 	{
871 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
872 
873 		Assert(IsA(tle, TargetEntry));
874 		if (tle->resjunk)
875 			assign_expr_collations(loccontext->pstate, (Node *) tle);
876 		else
877 			(void) assign_collations_walker((Node *) tle, loccontext);
878 	}
879 }
880 
881 /*
882  * For ordered-set aggregates, it's somewhat unclear how best to proceed.
883  * The spec-defined inverse distribution functions have only one sort column
884  * and don't return collatable types, but this is clearly too restrictive in
885  * the general case.  Our solution is to consider that the aggregate's direct
886  * arguments contribute normally to determination of the aggregate's own
887  * collation, while aggregated arguments contribute only when the aggregate
888  * is designed to have exactly one aggregated argument (i.e., it has a single
889  * aggregated argument and is non-variadic).  If it can have more than one
890  * aggregated argument, we process the aggregated arguments as independent
891  * sort columns.  This avoids throwing error for something like
892  *		agg(...) within group (order by x collate "foo", y collate "bar")
893  * while also guaranteeing that variadic aggregates don't change in behavior
894  * depending on how many sort columns a particular call happens to have.
895  *
896  * Otherwise this is much like the plain-aggregate case.
897  */
898 static void
assign_ordered_set_collations(Aggref * aggref,assign_collations_context * loccontext)899 assign_ordered_set_collations(Aggref *aggref,
900 							  assign_collations_context *loccontext)
901 {
902 	bool		merge_sort_collations;
903 	ListCell   *lc;
904 
905 	/* Merge sort collations to parent only if there can be only one */
906 	merge_sort_collations = (list_length(aggref->args) == 1 &&
907 					  get_func_variadictype(aggref->aggfnoid) == InvalidOid);
908 
909 	/* Direct args, if any, are normal children of the Aggref node */
910 	(void) assign_collations_walker((Node *) aggref->aggdirectargs,
911 									loccontext);
912 
913 	/* Process aggregated args appropriately */
914 	foreach(lc, aggref->args)
915 	{
916 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
917 
918 		Assert(IsA(tle, TargetEntry));
919 		if (merge_sort_collations)
920 			(void) assign_collations_walker((Node *) tle, loccontext);
921 		else
922 			assign_expr_collations(loccontext->pstate, (Node *) tle);
923 	}
924 }
925 
926 /*
927  * Hypothetical-set aggregates are even more special: per spec, we need to
928  * unify the collations of each pair of hypothetical and aggregated args.
929  * And we need to force the choice of collation down into the sort column
930  * to ensure that the sort happens with the chosen collation.  Other than
931  * that, the behavior is like regular ordered-set aggregates.  Note that
932  * hypothetical direct arguments contribute to the aggregate collation
933  * only when their partner aggregated arguments do.
934  */
935 static void
assign_hypothetical_collations(Aggref * aggref,assign_collations_context * loccontext)936 assign_hypothetical_collations(Aggref *aggref,
937 							   assign_collations_context *loccontext)
938 {
939 	ListCell   *h_cell = list_head(aggref->aggdirectargs);
940 	ListCell   *s_cell = list_head(aggref->args);
941 	bool		merge_sort_collations;
942 	int			extra_args;
943 
944 	/* Merge sort collations to parent only if there can be only one */
945 	merge_sort_collations = (list_length(aggref->args) == 1 &&
946 					  get_func_variadictype(aggref->aggfnoid) == InvalidOid);
947 
948 	/* Process any non-hypothetical direct args */
949 	extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
950 	Assert(extra_args >= 0);
951 	while (extra_args-- > 0)
952 	{
953 		(void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
954 		h_cell = lnext(h_cell);
955 	}
956 
957 	/* Scan hypothetical args and aggregated args in parallel */
958 	while (h_cell && s_cell)
959 	{
960 		Node	   *h_arg = (Node *) lfirst(h_cell);
961 		TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell);
962 		assign_collations_context paircontext;
963 
964 		/*
965 		 * Assign collations internally in this pair of expressions, then
966 		 * choose a common collation for them.  This should match
967 		 * select_common_collation(), but we can't use that function as-is
968 		 * because we need access to the whole collation state so we can
969 		 * bubble it up to the aggregate function's level.
970 		 */
971 		paircontext.pstate = loccontext->pstate;
972 		paircontext.collation = InvalidOid;
973 		paircontext.strength = COLLATE_NONE;
974 		paircontext.location = -1;
975 		/* Set these fields just to suppress uninitialized-value warnings: */
976 		paircontext.collation2 = InvalidOid;
977 		paircontext.location2 = -1;
978 
979 		(void) assign_collations_walker(h_arg, &paircontext);
980 		(void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
981 
982 		/* deal with collation conflict */
983 		if (paircontext.strength == COLLATE_CONFLICT)
984 			ereport(ERROR,
985 					(errcode(ERRCODE_COLLATION_MISMATCH),
986 					 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
987 							get_collation_name(paircontext.collation),
988 							get_collation_name(paircontext.collation2)),
989 					 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
990 					 parser_errposition(paircontext.pstate,
991 										paircontext.location2)));
992 
993 		/*
994 		 * At this point paircontext.collation can be InvalidOid only if the
995 		 * type is not collatable; no need to do anything in that case.  If we
996 		 * do have to change the sort column's collation, do it by inserting a
997 		 * RelabelType node into the sort column TLE.
998 		 *
999 		 * XXX This is pretty grotty for a couple of reasons:
1000 		 * assign_collations_walker isn't supposed to be changing the
1001 		 * expression structure like this, and a parse-time change of
1002 		 * collation ought to be signaled by a CollateExpr not a RelabelType
1003 		 * (the use of RelabelType for collation marking is supposed to be a
1004 		 * planner/executor thing only).  But we have no better alternative.
1005 		 * In particular, injecting a CollateExpr could result in the
1006 		 * expression being interpreted differently after dump/reload, since
1007 		 * we might be effectively promoting an implicit collation to
1008 		 * explicit.  This kluge is relying on ruleutils.c not printing a
1009 		 * COLLATE clause for a RelabelType, and probably on some other
1010 		 * fragile behaviors.
1011 		 */
1012 		if (OidIsValid(paircontext.collation) &&
1013 			paircontext.collation != exprCollation((Node *) s_tle->expr))
1014 		{
1015 			s_tle->expr = (Expr *)
1016 				makeRelabelType(s_tle->expr,
1017 								exprType((Node *) s_tle->expr),
1018 								exprTypmod((Node *) s_tle->expr),
1019 								paircontext.collation,
1020 								COERCE_IMPLICIT_CAST);
1021 		}
1022 
1023 		/*
1024 		 * If appropriate, merge this column's collation state up to the
1025 		 * aggregate function.
1026 		 */
1027 		if (merge_sort_collations)
1028 			merge_collation_state(paircontext.collation,
1029 								  paircontext.strength,
1030 								  paircontext.location,
1031 								  paircontext.collation2,
1032 								  paircontext.location2,
1033 								  loccontext);
1034 
1035 		h_cell = lnext(h_cell);
1036 		s_cell = lnext(s_cell);
1037 	}
1038 	Assert(h_cell == NULL && s_cell == NULL);
1039 }
1040