1 /*------------------------------------------------------------------------- 2 * 3 * parse_collate.c 4 * Routines for assigning collation information. 5 * 6 * We choose to handle collation analysis in a post-pass over the output 7 * of expression parse analysis. This is because we need more state to 8 * perform this processing than is needed in the finished tree. If we 9 * did it on-the-fly while building the tree, all that state would have 10 * to be kept in expression node trees permanently. This way, the extra 11 * storage is just local variables in this recursive routine. 12 * 13 * The info that is actually saved in the finished tree is: 14 * 1. The output collation of each expression node, or InvalidOid if it 15 * returns a noncollatable data type. This can also be InvalidOid if the 16 * result type is collatable but the collation is indeterminate. 17 * 2. The collation to be used in executing each function. InvalidOid means 18 * that there are no collatable inputs or their collation is indeterminate. 19 * This value is only stored in node types that might call collation-using 20 * functions. 21 * 22 * You might think we could get away with storing only one collation per 23 * node, but the two concepts really need to be kept distinct. Otherwise 24 * it's too confusing when a function produces a collatable output type but 25 * has no collatable inputs or produces noncollatable output from collatable 26 * inputs. 27 * 28 * Cases with indeterminate collation might result in an error being thrown 29 * at runtime. If we knew exactly which functions require collation 30 * information, we could throw those errors at parse time instead. 31 * 32 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 33 * Portions Copyright (c) 1994, Regents of the University of California 34 * 35 * 36 * IDENTIFICATION 37 * src/backend/parser/parse_collate.c 38 * 39 *------------------------------------------------------------------------- 40 */ 41 #include "postgres.h" 42 43 #include "catalog/pg_aggregate.h" 44 #include "catalog/pg_collation.h" 45 #include "nodes/makefuncs.h" 46 #include "nodes/nodeFuncs.h" 47 #include "parser/parse_collate.h" 48 #include "utils/lsyscache.h" 49 50 51 /* 52 * Collation strength (the SQL standard calls this "derivation"). Order is 53 * chosen to allow comparisons to work usefully. Note: the standard doesn't 54 * seem to distinguish between NONE and CONFLICT. 55 */ 56 typedef enum 57 { 58 COLLATE_NONE, /* expression is of a noncollatable datatype */ 59 COLLATE_IMPLICIT, /* collation was derived implicitly */ 60 COLLATE_CONFLICT, /* we had a conflict of implicit collations */ 61 COLLATE_EXPLICIT /* collation was derived explicitly */ 62 } CollateStrength; 63 64 typedef struct 65 { 66 ParseState *pstate; /* parse state (for error reporting) */ 67 Oid collation; /* OID of current collation, if any */ 68 CollateStrength strength; /* strength of current collation choice */ 69 int location; /* location of expr that set collation */ 70 /* Remaining fields are only valid when strength == COLLATE_CONFLICT */ 71 Oid collation2; /* OID of conflicting collation */ 72 int location2; /* location of expr that set collation2 */ 73 } assign_collations_context; 74 75 static bool assign_query_collations_walker(Node *node, ParseState *pstate); 76 static bool assign_collations_walker(Node *node, 77 assign_collations_context *context); 78 static void merge_collation_state(Oid collation, 79 CollateStrength strength, 80 int location, 81 Oid collation2, 82 int location2, 83 assign_collations_context *context); 84 static void assign_aggregate_collations(Aggref *aggref, 85 assign_collations_context *loccontext); 86 static void assign_ordered_set_collations(Aggref *aggref, 87 assign_collations_context *loccontext); 88 static void assign_hypothetical_collations(Aggref *aggref, 89 assign_collations_context *loccontext); 90 91 92 /* 93 * assign_query_collations() 94 * Mark all expressions in the given Query with collation information. 95 * 96 * This should be applied to each Query after completion of parse analysis 97 * for expressions. Note that we do not recurse into sub-Queries, since 98 * those should have been processed when built. 99 */ 100 void 101 assign_query_collations(ParseState *pstate, Query *query) 102 { 103 /* 104 * We just use query_tree_walker() to visit all the contained expressions. 105 * We can skip the rangetable and CTE subqueries, though, since RTEs and 106 * subqueries had better have been processed already (else Vars referring 107 * to them would not get created with the right collation). 108 */ 109 (void) query_tree_walker(query, 110 assign_query_collations_walker, 111 (void *) pstate, 112 QTW_IGNORE_RANGE_TABLE | 113 QTW_IGNORE_CTE_SUBQUERIES); 114 } 115 116 /* 117 * Walker for assign_query_collations 118 * 119 * Each expression found by query_tree_walker is processed independently. 120 * Note that query_tree_walker may pass us a whole List, such as the 121 * targetlist, in which case each subexpression must be processed 122 * independently --- we don't want to bleat if two different targetentries 123 * have different collations. 124 */ 125 static bool 126 assign_query_collations_walker(Node *node, ParseState *pstate) 127 { 128 /* Need do nothing for empty subexpressions */ 129 if (node == NULL) 130 return false; 131 132 /* 133 * We don't want to recurse into a set-operations tree; it's already been 134 * fully processed in transformSetOperationStmt. 135 */ 136 if (IsA(node, SetOperationStmt)) 137 return false; 138 139 if (IsA(node, List)) 140 assign_list_collations(pstate, (List *) node); 141 else 142 assign_expr_collations(pstate, node); 143 144 return false; 145 } 146 147 /* 148 * assign_list_collations() 149 * Mark all nodes in the list of expressions with collation information. 150 * 151 * The list member expressions are processed independently; they do not have 152 * to share a common collation. 153 */ 154 void 155 assign_list_collations(ParseState *pstate, List *exprs) 156 { 157 ListCell *lc; 158 159 foreach(lc, exprs) 160 { 161 Node *node = (Node *) lfirst(lc); 162 163 assign_expr_collations(pstate, node); 164 } 165 } 166 167 /* 168 * assign_expr_collations() 169 * Mark all nodes in the given expression tree with collation information. 170 * 171 * This is exported for the benefit of various utility commands that process 172 * expressions without building a complete Query. It should be applied after 173 * calling transformExpr() plus any expression-modifying operations such as 174 * coerce_to_boolean(). 175 */ 176 void 177 assign_expr_collations(ParseState *pstate, Node *expr) 178 { 179 assign_collations_context context; 180 181 /* initialize context for tree walk */ 182 context.pstate = pstate; 183 context.collation = InvalidOid; 184 context.strength = COLLATE_NONE; 185 context.location = -1; 186 187 /* and away we go */ 188 (void) assign_collations_walker(expr, &context); 189 } 190 191 /* 192 * select_common_collation() 193 * Identify a common collation for a list of expressions. 194 * 195 * The expressions should all return the same datatype, else this is not 196 * terribly meaningful. 197 * 198 * none_ok means that it is permitted to return InvalidOid, indicating that 199 * no common collation could be identified, even for collatable datatypes. 200 * Otherwise, an error is thrown for conflict of implicit collations. 201 * 202 * In theory, none_ok = true reflects the rules of SQL standard clause "Result 203 * of data type combinations", none_ok = false reflects the rules of clause 204 * "Collation determination" (in some cases invoked via "Grouping 205 * operations"). 206 */ 207 Oid 208 select_common_collation(ParseState *pstate, List *exprs, bool none_ok) 209 { 210 assign_collations_context context; 211 212 /* initialize context for tree walk */ 213 context.pstate = pstate; 214 context.collation = InvalidOid; 215 context.strength = COLLATE_NONE; 216 context.location = -1; 217 218 /* and away we go */ 219 (void) assign_collations_walker((Node *) exprs, &context); 220 221 /* deal with collation conflict */ 222 if (context.strength == COLLATE_CONFLICT) 223 { 224 if (none_ok) 225 return InvalidOid; 226 ereport(ERROR, 227 (errcode(ERRCODE_COLLATION_MISMATCH), 228 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", 229 get_collation_name(context.collation), 230 get_collation_name(context.collation2)), 231 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), 232 parser_errposition(context.pstate, context.location2))); 233 } 234 235 /* 236 * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but 237 * that's okay because it must mean none of the expressions returned 238 * collatable datatypes. 239 */ 240 return context.collation; 241 } 242 243 /* 244 * assign_collations_walker() 245 * Recursive guts of collation processing. 246 * 247 * Nodes with no children (eg, Vars, Consts, Params) must have been marked 248 * when built. All upper-level nodes are marked here. 249 * 250 * Note: if this is invoked directly on a List, it will attempt to infer a 251 * common collation for all the list members. In particular, it will throw 252 * error if there are conflicting explicit collations for different members. 253 */ 254 static bool 255 assign_collations_walker(Node *node, assign_collations_context *context) 256 { 257 assign_collations_context loccontext; 258 Oid collation; 259 CollateStrength strength; 260 int location; 261 262 /* Need do nothing for empty subexpressions */ 263 if (node == NULL) 264 return false; 265 266 /* 267 * Prepare for recursion. For most node types, though not all, the first 268 * thing we do is recurse to process all nodes below this one. Each level 269 * of the tree has its own local context. 270 */ 271 loccontext.pstate = context->pstate; 272 loccontext.collation = InvalidOid; 273 loccontext.strength = COLLATE_NONE; 274 loccontext.location = -1; 275 /* Set these fields just to suppress uninitialized-value warnings: */ 276 loccontext.collation2 = InvalidOid; 277 loccontext.location2 = -1; 278 279 /* 280 * Recurse if appropriate, then determine the collation for this node. 281 * 282 * Note: the general cases are at the bottom of the switch, after various 283 * special cases. 284 */ 285 switch (nodeTag(node)) 286 { 287 case T_CollateExpr: 288 { 289 /* 290 * COLLATE sets an explicitly derived collation, regardless of 291 * what the child state is. But we must recurse to set up 292 * collation info below here. 293 */ 294 CollateExpr *expr = (CollateExpr *) node; 295 296 (void) expression_tree_walker(node, 297 assign_collations_walker, 298 (void *) &loccontext); 299 300 collation = expr->collOid; 301 Assert(OidIsValid(collation)); 302 strength = COLLATE_EXPLICIT; 303 location = expr->location; 304 } 305 break; 306 case T_FieldSelect: 307 { 308 /* 309 * For FieldSelect, the result has the field's declared 310 * collation, independently of what happened in the arguments. 311 * (The immediate argument must be composite and thus not 312 * collatable, anyhow.) The field's collation was already 313 * looked up and saved in the node. 314 */ 315 FieldSelect *expr = (FieldSelect *) node; 316 317 /* ... but first, recurse */ 318 (void) expression_tree_walker(node, 319 assign_collations_walker, 320 (void *) &loccontext); 321 322 if (OidIsValid(expr->resultcollid)) 323 { 324 /* Node's result type is collatable. */ 325 /* Pass up field's collation as an implicit choice. */ 326 collation = expr->resultcollid; 327 strength = COLLATE_IMPLICIT; 328 location = exprLocation(node); 329 } 330 else 331 { 332 /* Node's result type isn't collatable. */ 333 collation = InvalidOid; 334 strength = COLLATE_NONE; 335 location = -1; /* won't be used */ 336 } 337 } 338 break; 339 case T_RowExpr: 340 { 341 /* 342 * RowExpr is a special case because the subexpressions are 343 * independent: we don't want to complain if some of them have 344 * incompatible explicit collations. 345 */ 346 RowExpr *expr = (RowExpr *) node; 347 348 assign_list_collations(context->pstate, expr->args); 349 350 /* 351 * Since the result is always composite and therefore never 352 * has a collation, we can just stop here: this node has no 353 * impact on the collation of its parent. 354 */ 355 return false; /* done */ 356 } 357 case T_RowCompareExpr: 358 { 359 /* 360 * For RowCompare, we have to find the common collation of 361 * each pair of input columns and build a list. If we can't 362 * find a common collation, we just put InvalidOid into the 363 * list, which may or may not cause an error at runtime. 364 */ 365 RowCompareExpr *expr = (RowCompareExpr *) node; 366 List *colls = NIL; 367 ListCell *l; 368 ListCell *r; 369 370 forboth(l, expr->largs, r, expr->rargs) 371 { 372 Node *le = (Node *) lfirst(l); 373 Node *re = (Node *) lfirst(r); 374 Oid coll; 375 376 coll = select_common_collation(context->pstate, 377 list_make2(le, re), 378 true); 379 colls = lappend_oid(colls, coll); 380 } 381 expr->inputcollids = colls; 382 383 /* 384 * Since the result is always boolean and therefore never has 385 * a collation, we can just stop here: this node has no impact 386 * on the collation of its parent. 387 */ 388 return false; /* done */ 389 } 390 case T_CoerceToDomain: 391 { 392 /* 393 * If the domain declaration included a non-default COLLATE 394 * spec, then use that collation as the output collation of 395 * the coercion. Otherwise allow the input collation to 396 * bubble up. (The input should be of the domain's base type, 397 * therefore we don't need to worry about it not being 398 * collatable when the domain is.) 399 */ 400 CoerceToDomain *expr = (CoerceToDomain *) node; 401 Oid typcollation = get_typcollation(expr->resulttype); 402 403 /* ... but first, recurse */ 404 (void) expression_tree_walker(node, 405 assign_collations_walker, 406 (void *) &loccontext); 407 408 if (OidIsValid(typcollation)) 409 { 410 /* Node's result type is collatable. */ 411 if (typcollation == DEFAULT_COLLATION_OID) 412 { 413 /* Collation state bubbles up from child. */ 414 collation = loccontext.collation; 415 strength = loccontext.strength; 416 location = loccontext.location; 417 } 418 else 419 { 420 /* Use domain's collation as an implicit choice. */ 421 collation = typcollation; 422 strength = COLLATE_IMPLICIT; 423 location = exprLocation(node); 424 } 425 } 426 else 427 { 428 /* Node's result type isn't collatable. */ 429 collation = InvalidOid; 430 strength = COLLATE_NONE; 431 location = -1; /* won't be used */ 432 } 433 434 /* 435 * Save the state into the expression node. We know it 436 * doesn't care about input collation. 437 */ 438 if (strength == COLLATE_CONFLICT) 439 exprSetCollation(node, InvalidOid); 440 else 441 exprSetCollation(node, collation); 442 } 443 break; 444 case T_TargetEntry: 445 (void) expression_tree_walker(node, 446 assign_collations_walker, 447 (void *) &loccontext); 448 449 /* 450 * TargetEntry can have only one child, and should bubble that 451 * state up to its parent. We can't use the general-case code 452 * below because exprType and friends don't work on TargetEntry. 453 */ 454 collation = loccontext.collation; 455 strength = loccontext.strength; 456 location = loccontext.location; 457 458 /* 459 * Throw error if the collation is indeterminate for a TargetEntry 460 * that is a sort/group target. We prefer to do this now, instead 461 * of leaving the comparison functions to fail at runtime, because 462 * we can give a syntax error pointer to help locate the problem. 463 * There are some cases where there might not be a failure, for 464 * example if the planner chooses to use hash aggregation instead 465 * of sorting for grouping; but it seems better to predictably 466 * throw an error. (Compare transformSetOperationTree, which will 467 * throw error for indeterminate collation of set-op columns, even 468 * though the planner might be able to implement the set-op 469 * without sorting.) 470 */ 471 if (strength == COLLATE_CONFLICT && 472 ((TargetEntry *) node)->ressortgroupref != 0) 473 ereport(ERROR, 474 (errcode(ERRCODE_COLLATION_MISMATCH), 475 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", 476 get_collation_name(loccontext.collation), 477 get_collation_name(loccontext.collation2)), 478 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), 479 parser_errposition(context->pstate, 480 loccontext.location2))); 481 break; 482 case T_InferenceElem: 483 case T_RangeTblRef: 484 case T_JoinExpr: 485 case T_FromExpr: 486 case T_OnConflictExpr: 487 case T_SortGroupClause: 488 (void) expression_tree_walker(node, 489 assign_collations_walker, 490 (void *) &loccontext); 491 492 /* 493 * When we're invoked on a query's jointree, we don't need to do 494 * anything with join nodes except recurse through them to process 495 * WHERE/ON expressions. So just stop here. Likewise, we don't 496 * need to do anything when invoked on sort/group lists. 497 */ 498 return false; 499 case T_Query: 500 { 501 /* 502 * We get here when we're invoked on the Query belonging to a 503 * SubLink. Act as though the Query returns its first output 504 * column, which indeed is what it does for EXPR_SUBLINK and 505 * ARRAY_SUBLINK cases. In the cases where the SubLink 506 * returns boolean, this info will be ignored. Special case: 507 * in EXISTS, the Query might return no columns, in which case 508 * we need do nothing. 509 * 510 * We needn't recurse, since the Query is already processed. 511 */ 512 Query *qtree = (Query *) node; 513 TargetEntry *tent; 514 515 if (qtree->targetList == NIL) 516 return false; 517 tent = linitial_node(TargetEntry, qtree->targetList); 518 if (tent->resjunk) 519 return false; 520 521 collation = exprCollation((Node *) tent->expr); 522 /* collation doesn't change if it's converted to array */ 523 strength = COLLATE_IMPLICIT; 524 location = exprLocation((Node *) tent->expr); 525 } 526 break; 527 case T_List: 528 (void) expression_tree_walker(node, 529 assign_collations_walker, 530 (void *) &loccontext); 531 532 /* 533 * When processing a list, collation state just bubbles up from 534 * the list elements. 535 */ 536 collation = loccontext.collation; 537 strength = loccontext.strength; 538 location = loccontext.location; 539 break; 540 541 case T_Var: 542 case T_Const: 543 case T_Param: 544 case T_CoerceToDomainValue: 545 case T_CaseTestExpr: 546 case T_SetToDefault: 547 case T_CurrentOfExpr: 548 549 /* 550 * General case for childless expression nodes. These should 551 * already have a collation assigned; it is not this function's 552 * responsibility to look into the catalogs for base-case 553 * information. 554 */ 555 collation = exprCollation(node); 556 557 /* 558 * Note: in most cases, there will be an assigned collation 559 * whenever type_is_collatable(exprType(node)); but an exception 560 * occurs for a Var referencing a subquery output column for which 561 * a unique collation was not determinable. That may lead to a 562 * runtime failure if a collation-sensitive function is applied to 563 * the Var. 564 */ 565 566 if (OidIsValid(collation)) 567 strength = COLLATE_IMPLICIT; 568 else 569 strength = COLLATE_NONE; 570 location = exprLocation(node); 571 break; 572 573 default: 574 { 575 /* 576 * General case for most expression nodes with children. First 577 * recurse, then figure out what to assign to this node. 578 */ 579 Oid typcollation; 580 581 /* 582 * For most node types, we want to treat all the child 583 * expressions alike; but there are a few exceptions, hence 584 * this inner switch. 585 */ 586 switch (nodeTag(node)) 587 { 588 case T_Aggref: 589 { 590 /* 591 * Aggref is messy enough that we give it its own 592 * function, in fact three of them. The FILTER 593 * clause is independent of the rest of the 594 * aggregate, however, so it can be processed 595 * separately. 596 */ 597 Aggref *aggref = (Aggref *) node; 598 599 switch (aggref->aggkind) 600 { 601 case AGGKIND_NORMAL: 602 assign_aggregate_collations(aggref, 603 &loccontext); 604 break; 605 case AGGKIND_ORDERED_SET: 606 assign_ordered_set_collations(aggref, 607 &loccontext); 608 break; 609 case AGGKIND_HYPOTHETICAL: 610 assign_hypothetical_collations(aggref, 611 &loccontext); 612 break; 613 default: 614 elog(ERROR, "unrecognized aggkind: %d", 615 (int) aggref->aggkind); 616 } 617 618 assign_expr_collations(context->pstate, 619 (Node *) aggref->aggfilter); 620 } 621 break; 622 case T_WindowFunc: 623 { 624 /* 625 * WindowFunc requires special processing only for 626 * its aggfilter clause, as for aggregates. 627 */ 628 WindowFunc *wfunc = (WindowFunc *) node; 629 630 (void) assign_collations_walker((Node *) wfunc->args, 631 &loccontext); 632 633 assign_expr_collations(context->pstate, 634 (Node *) wfunc->aggfilter); 635 } 636 break; 637 case T_CaseExpr: 638 { 639 /* 640 * CaseExpr is a special case because we do not 641 * want to recurse into the test expression (if 642 * any). It was already marked with collations 643 * during transformCaseExpr, and furthermore its 644 * collation is not relevant to the result of the 645 * CASE --- only the output expressions are. 646 */ 647 CaseExpr *expr = (CaseExpr *) node; 648 ListCell *lc; 649 650 foreach(lc, expr->args) 651 { 652 CaseWhen *when = lfirst_node(CaseWhen, lc); 653 654 /* 655 * The condition expressions mustn't affect 656 * the CASE's result collation either; but 657 * since they are known to yield boolean, it's 658 * safe to recurse directly on them --- they 659 * won't change loccontext. 660 */ 661 (void) assign_collations_walker((Node *) when->expr, 662 &loccontext); 663 (void) assign_collations_walker((Node *) when->result, 664 &loccontext); 665 } 666 (void) assign_collations_walker((Node *) expr->defresult, 667 &loccontext); 668 } 669 break; 670 default: 671 672 /* 673 * Normal case: all child expressions contribute 674 * equally to loccontext. 675 */ 676 (void) expression_tree_walker(node, 677 assign_collations_walker, 678 (void *) &loccontext); 679 break; 680 } 681 682 /* 683 * Now figure out what collation to assign to this node. 684 */ 685 typcollation = get_typcollation(exprType(node)); 686 if (OidIsValid(typcollation)) 687 { 688 /* Node's result is collatable; what about its input? */ 689 if (loccontext.strength > COLLATE_NONE) 690 { 691 /* Collation state bubbles up from children. */ 692 collation = loccontext.collation; 693 strength = loccontext.strength; 694 location = loccontext.location; 695 } 696 else 697 { 698 /* 699 * Collatable output produced without any collatable 700 * input. Use the type's collation (which is usually 701 * DEFAULT_COLLATION_OID, but might be different for a 702 * domain). 703 */ 704 collation = typcollation; 705 strength = COLLATE_IMPLICIT; 706 location = exprLocation(node); 707 } 708 } 709 else 710 { 711 /* Node's result type isn't collatable. */ 712 collation = InvalidOid; 713 strength = COLLATE_NONE; 714 location = -1; /* won't be used */ 715 } 716 717 /* 718 * Save the result collation into the expression node. If the 719 * state is COLLATE_CONFLICT, we'll set the collation to 720 * InvalidOid, which might result in an error at runtime. 721 */ 722 if (strength == COLLATE_CONFLICT) 723 exprSetCollation(node, InvalidOid); 724 else 725 exprSetCollation(node, collation); 726 727 /* 728 * Likewise save the input collation, which is the one that 729 * any function called by this node should use. 730 */ 731 if (loccontext.strength == COLLATE_CONFLICT) 732 exprSetInputCollation(node, InvalidOid); 733 else 734 exprSetInputCollation(node, loccontext.collation); 735 } 736 break; 737 } 738 739 /* 740 * Now, merge my information into my parent's state. 741 */ 742 merge_collation_state(collation, 743 strength, 744 location, 745 loccontext.collation2, 746 loccontext.location2, 747 context); 748 749 return false; 750 } 751 752 /* 753 * Merge collation state of a subexpression into the context for its parent. 754 */ 755 static void 756 merge_collation_state(Oid collation, 757 CollateStrength strength, 758 int location, 759 Oid collation2, 760 int location2, 761 assign_collations_context *context) 762 { 763 /* 764 * If the collation strength for this node is different from what's 765 * already in *context, then this node either dominates or is dominated by 766 * earlier siblings. 767 */ 768 if (strength > context->strength) 769 { 770 /* Override previous parent state */ 771 context->collation = collation; 772 context->strength = strength; 773 context->location = location; 774 /* Bubble up error info if applicable */ 775 if (strength == COLLATE_CONFLICT) 776 { 777 context->collation2 = collation2; 778 context->location2 = location2; 779 } 780 } 781 else if (strength == context->strength) 782 { 783 /* Merge, or detect error if there's a collation conflict */ 784 switch (strength) 785 { 786 case COLLATE_NONE: 787 /* Nothing + nothing is still nothing */ 788 break; 789 case COLLATE_IMPLICIT: 790 if (collation != context->collation) 791 { 792 /* 793 * Non-default implicit collation always beats default. 794 */ 795 if (context->collation == DEFAULT_COLLATION_OID) 796 { 797 /* Override previous parent state */ 798 context->collation = collation; 799 context->strength = strength; 800 context->location = location; 801 } 802 else if (collation != DEFAULT_COLLATION_OID) 803 { 804 /* 805 * Oops, we have a conflict. We cannot throw error 806 * here, since the conflict could be resolved by a 807 * later sibling CollateExpr, or the parent might not 808 * care about collation anyway. Return enough info to 809 * throw the error later, if needed. 810 */ 811 context->strength = COLLATE_CONFLICT; 812 context->collation2 = collation; 813 context->location2 = location; 814 } 815 } 816 break; 817 case COLLATE_CONFLICT: 818 /* We're still conflicted ... */ 819 break; 820 case COLLATE_EXPLICIT: 821 if (collation != context->collation) 822 { 823 /* 824 * Oops, we have a conflict of explicit COLLATE clauses. 825 * Here we choose to throw error immediately; that is what 826 * the SQL standard says to do, and there's no good reason 827 * to be less strict. 828 */ 829 ereport(ERROR, 830 (errcode(ERRCODE_COLLATION_MISMATCH), 831 errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"", 832 get_collation_name(context->collation), 833 get_collation_name(collation)), 834 parser_errposition(context->pstate, location))); 835 } 836 break; 837 } 838 } 839 } 840 841 /* 842 * Aggref is a special case because expressions used only for ordering 843 * shouldn't be taken to conflict with each other or with regular args, 844 * indeed shouldn't affect the aggregate's result collation at all. 845 * We handle this by applying assign_expr_collations() to them rather than 846 * passing down our loccontext. 847 * 848 * Note that we recurse to each TargetEntry, not directly to its contained 849 * expression, so that the case above for T_TargetEntry will complain if we 850 * can't resolve a collation for an ORDER BY item (whether or not it is also 851 * a normal aggregate arg). 852 * 853 * We need not recurse into the aggorder or aggdistinct lists, because those 854 * contain only SortGroupClause nodes which we need not process. 855 */ 856 static void 857 assign_aggregate_collations(Aggref *aggref, 858 assign_collations_context *loccontext) 859 { 860 ListCell *lc; 861 862 /* Plain aggregates have no direct args */ 863 Assert(aggref->aggdirectargs == NIL); 864 865 /* Process aggregated args, holding resjunk ones at arm's length */ 866 foreach(lc, aggref->args) 867 { 868 TargetEntry *tle = lfirst_node(TargetEntry, lc); 869 870 if (tle->resjunk) 871 assign_expr_collations(loccontext->pstate, (Node *) tle); 872 else 873 (void) assign_collations_walker((Node *) tle, loccontext); 874 } 875 } 876 877 /* 878 * For ordered-set aggregates, it's somewhat unclear how best to proceed. 879 * The spec-defined inverse distribution functions have only one sort column 880 * and don't return collatable types, but this is clearly too restrictive in 881 * the general case. Our solution is to consider that the aggregate's direct 882 * arguments contribute normally to determination of the aggregate's own 883 * collation, while aggregated arguments contribute only when the aggregate 884 * is designed to have exactly one aggregated argument (i.e., it has a single 885 * aggregated argument and is non-variadic). If it can have more than one 886 * aggregated argument, we process the aggregated arguments as independent 887 * sort columns. This avoids throwing error for something like 888 * agg(...) within group (order by x collate "foo", y collate "bar") 889 * while also guaranteeing that variadic aggregates don't change in behavior 890 * depending on how many sort columns a particular call happens to have. 891 * 892 * Otherwise this is much like the plain-aggregate case. 893 */ 894 static void 895 assign_ordered_set_collations(Aggref *aggref, 896 assign_collations_context *loccontext) 897 { 898 bool merge_sort_collations; 899 ListCell *lc; 900 901 /* Merge sort collations to parent only if there can be only one */ 902 merge_sort_collations = (list_length(aggref->args) == 1 && 903 get_func_variadictype(aggref->aggfnoid) == InvalidOid); 904 905 /* Direct args, if any, are normal children of the Aggref node */ 906 (void) assign_collations_walker((Node *) aggref->aggdirectargs, 907 loccontext); 908 909 /* Process aggregated args appropriately */ 910 foreach(lc, aggref->args) 911 { 912 TargetEntry *tle = lfirst_node(TargetEntry, lc); 913 914 if (merge_sort_collations) 915 (void) assign_collations_walker((Node *) tle, loccontext); 916 else 917 assign_expr_collations(loccontext->pstate, (Node *) tle); 918 } 919 } 920 921 /* 922 * Hypothetical-set aggregates are even more special: per spec, we need to 923 * unify the collations of each pair of hypothetical and aggregated args. 924 * And we need to force the choice of collation down into the sort column 925 * to ensure that the sort happens with the chosen collation. Other than 926 * that, the behavior is like regular ordered-set aggregates. Note that 927 * hypothetical direct arguments contribute to the aggregate collation 928 * only when their partner aggregated arguments do. 929 */ 930 static void 931 assign_hypothetical_collations(Aggref *aggref, 932 assign_collations_context *loccontext) 933 { 934 ListCell *h_cell = list_head(aggref->aggdirectargs); 935 ListCell *s_cell = list_head(aggref->args); 936 bool merge_sort_collations; 937 int extra_args; 938 939 /* Merge sort collations to parent only if there can be only one */ 940 merge_sort_collations = (list_length(aggref->args) == 1 && 941 get_func_variadictype(aggref->aggfnoid) == InvalidOid); 942 943 /* Process any non-hypothetical direct args */ 944 extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args); 945 Assert(extra_args >= 0); 946 while (extra_args-- > 0) 947 { 948 (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext); 949 h_cell = lnext(h_cell); 950 } 951 952 /* Scan hypothetical args and aggregated args in parallel */ 953 while (h_cell && s_cell) 954 { 955 Node *h_arg = (Node *) lfirst(h_cell); 956 TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell); 957 assign_collations_context paircontext; 958 959 /* 960 * Assign collations internally in this pair of expressions, then 961 * choose a common collation for them. This should match 962 * select_common_collation(), but we can't use that function as-is 963 * because we need access to the whole collation state so we can 964 * bubble it up to the aggregate function's level. 965 */ 966 paircontext.pstate = loccontext->pstate; 967 paircontext.collation = InvalidOid; 968 paircontext.strength = COLLATE_NONE; 969 paircontext.location = -1; 970 /* Set these fields just to suppress uninitialized-value warnings: */ 971 paircontext.collation2 = InvalidOid; 972 paircontext.location2 = -1; 973 974 (void) assign_collations_walker(h_arg, &paircontext); 975 (void) assign_collations_walker((Node *) s_tle->expr, &paircontext); 976 977 /* deal with collation conflict */ 978 if (paircontext.strength == COLLATE_CONFLICT) 979 ereport(ERROR, 980 (errcode(ERRCODE_COLLATION_MISMATCH), 981 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", 982 get_collation_name(paircontext.collation), 983 get_collation_name(paircontext.collation2)), 984 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), 985 parser_errposition(paircontext.pstate, 986 paircontext.location2))); 987 988 /* 989 * At this point paircontext.collation can be InvalidOid only if the 990 * type is not collatable; no need to do anything in that case. If we 991 * do have to change the sort column's collation, do it by inserting a 992 * RelabelType node into the sort column TLE. 993 * 994 * XXX This is pretty grotty for a couple of reasons: 995 * assign_collations_walker isn't supposed to be changing the 996 * expression structure like this, and a parse-time change of 997 * collation ought to be signaled by a CollateExpr not a RelabelType 998 * (the use of RelabelType for collation marking is supposed to be a 999 * planner/executor thing only). But we have no better alternative. 1000 * In particular, injecting a CollateExpr could result in the 1001 * expression being interpreted differently after dump/reload, since 1002 * we might be effectively promoting an implicit collation to 1003 * explicit. This kluge is relying on ruleutils.c not printing a 1004 * COLLATE clause for a RelabelType, and probably on some other 1005 * fragile behaviors. 1006 */ 1007 if (OidIsValid(paircontext.collation) && 1008 paircontext.collation != exprCollation((Node *) s_tle->expr)) 1009 { 1010 s_tle->expr = (Expr *) 1011 makeRelabelType(s_tle->expr, 1012 exprType((Node *) s_tle->expr), 1013 exprTypmod((Node *) s_tle->expr), 1014 paircontext.collation, 1015 COERCE_IMPLICIT_CAST); 1016 } 1017 1018 /* 1019 * If appropriate, merge this column's collation state up to the 1020 * aggregate function. 1021 */ 1022 if (merge_sort_collations) 1023 merge_collation_state(paircontext.collation, 1024 paircontext.strength, 1025 paircontext.location, 1026 paircontext.collation2, 1027 paircontext.location2, 1028 loccontext); 1029 1030 h_cell = lnext(h_cell); 1031 s_cell = lnext(s_cell); 1032 } 1033 Assert(h_cell == NULL && s_cell == NULL); 1034 } 1035