1 /*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2017, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12 /*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 * settable at run-time. However, we don't actually set those locale
23 * categories permanently. This would have bizarre effects like no
24 * longer accepting standard floating-point literals in some locales.
25 * Instead, we only set these locale categories briefly when needed,
26 * cache the required information obtained from localeconv() or
27 * strftime(), and then set the locale categories back to "C".
28 * The cached information is only used by the formatting functions
29 * (to_char, etc.) and the money type. For the user, this should all be
30 * transparent.
31 *
32 * !!! NOW HEAR THIS !!!
33 *
34 * We've been bitten repeatedly by this bug, so let's try to keep it in
35 * mind in future: on some platforms, the locale functions return pointers
36 * to static data that will be overwritten by any later locale function.
37 * Thus, for example, the obvious-looking sequence
38 * save = setlocale(category, NULL);
39 * if (!setlocale(category, value))
40 * fail = true;
41 * setlocale(category, save);
42 * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 * will change the memory save is pointing at. To do this sort of thing
44 * safely, you *must* pstrdup what setlocale returns the first time.
45 *
46 * The POSIX locale standard is available here:
47 *
48 * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 *----------
50 */
51
52
53 #include "postgres.h"
54
55 #include <time.h>
56
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "catalog/pg_control.h"
60 #include "mb/pg_wchar.h"
61 #include "utils/builtins.h"
62 #include "utils/hsearch.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_locale.h"
66 #include "utils/syscache.h"
67
68 #ifdef USE_ICU
69 #include <unicode/ucnv.h>
70 /* ICU might have a different definition of "bool", don't buy it */
71 #ifdef bool
72 #undef bool
73 #endif
74 #endif
75
76 #ifdef WIN32
77 /*
78 * This Windows file defines StrNCpy. We don't need it here, so we undefine
79 * it to keep the compiler quiet, and undefine it again after the file is
80 * included, so we don't accidentally use theirs.
81 */
82 #undef StrNCpy
83 #include <shlwapi.h>
84 #ifdef StrNCpy
85 #undef STrNCpy
86 #endif
87 #endif
88
89 #define MAX_L10N_DATA 80
90
91
92 /* GUC settings */
93 char *locale_messages;
94 char *locale_monetary;
95 char *locale_numeric;
96 char *locale_time;
97
98 /* lc_time localization cache */
99 char *localized_abbrev_days[7];
100 char *localized_full_days[7];
101 char *localized_abbrev_months[12];
102 char *localized_full_months[12];
103
104 /* indicates whether locale information cache is valid */
105 static bool CurrentLocaleConvValid = false;
106 static bool CurrentLCTimeValid = false;
107
108 /* Environment variable storage area */
109
110 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
111
112 static char lc_collate_envbuf[LC_ENV_BUFSIZE];
113 static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
114
115 #ifdef LC_MESSAGES
116 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
117 #endif
118 static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
119 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
120 static char lc_time_envbuf[LC_ENV_BUFSIZE];
121
122 /* Cache for collation-related knowledge */
123
124 typedef struct
125 {
126 Oid collid; /* hash key: pg_collation OID */
127 bool collate_is_c; /* is collation's LC_COLLATE C? */
128 bool ctype_is_c; /* is collation's LC_CTYPE C? */
129 bool flags_valid; /* true if above flags are valid */
130 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
131 } collation_cache_entry;
132
133 static HTAB *collation_cache = NULL;
134
135
136 #if defined(WIN32) && defined(LC_MESSAGES)
137 static char *IsoLocaleName(const char *); /* MSVC specific */
138 #endif
139
140
141 /*
142 * pg_perm_setlocale
143 *
144 * This wraps the libc function setlocale(), with two additions. First, when
145 * changing LC_CTYPE, update gettext's encoding for the current message
146 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
147 * not on Windows. Second, if the operation is successful, the corresponding
148 * LC_XXX environment variable is set to match. By setting the environment
149 * variable, we ensure that any subsequent use of setlocale(..., "") will
150 * preserve the settings made through this routine. Of course, LC_ALL must
151 * also be unset to fully ensure that, but that has to be done elsewhere after
152 * all the individual LC_XXX variables have been set correctly. (Thank you
153 * Perl for making this kluge necessary.)
154 */
155 char *
pg_perm_setlocale(int category,const char * locale)156 pg_perm_setlocale(int category, const char *locale)
157 {
158 char *result;
159 const char *envvar;
160 char *envbuf;
161
162 #ifndef WIN32
163 result = setlocale(category, locale);
164 #else
165
166 /*
167 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
168 * the given value is good and set it in the environment variables. We
169 * must ignore attempts to set to "", which means "keep using the old
170 * environment value".
171 */
172 #ifdef LC_MESSAGES
173 if (category == LC_MESSAGES)
174 {
175 result = (char *) locale;
176 if (locale == NULL || locale[0] == '\0')
177 return result;
178 }
179 else
180 #endif
181 result = setlocale(category, locale);
182 #endif /* WIN32 */
183
184 if (result == NULL)
185 return result; /* fall out immediately on failure */
186
187 /*
188 * Use the right encoding in translated messages. Under ENABLE_NLS, let
189 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
190 * format strings are ASCII, but database-encoding strings may enter the
191 * message via %s. This makes the overall message encoding equal to the
192 * database encoding.
193 */
194 if (category == LC_CTYPE)
195 {
196 static char save_lc_ctype[LC_ENV_BUFSIZE];
197
198 /* copy setlocale() return value before callee invokes it again */
199 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
200 result = save_lc_ctype;
201
202 #ifdef ENABLE_NLS
203 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
204 #else
205 SetMessageEncoding(GetDatabaseEncoding());
206 #endif
207 }
208
209 switch (category)
210 {
211 case LC_COLLATE:
212 envvar = "LC_COLLATE";
213 envbuf = lc_collate_envbuf;
214 break;
215 case LC_CTYPE:
216 envvar = "LC_CTYPE";
217 envbuf = lc_ctype_envbuf;
218 break;
219 #ifdef LC_MESSAGES
220 case LC_MESSAGES:
221 envvar = "LC_MESSAGES";
222 envbuf = lc_messages_envbuf;
223 #ifdef WIN32
224 result = IsoLocaleName(locale);
225 if (result == NULL)
226 result = (char *) locale;
227 elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
228 #endif /* WIN32 */
229 break;
230 #endif /* LC_MESSAGES */
231 case LC_MONETARY:
232 envvar = "LC_MONETARY";
233 envbuf = lc_monetary_envbuf;
234 break;
235 case LC_NUMERIC:
236 envvar = "LC_NUMERIC";
237 envbuf = lc_numeric_envbuf;
238 break;
239 case LC_TIME:
240 envvar = "LC_TIME";
241 envbuf = lc_time_envbuf;
242 break;
243 default:
244 elog(FATAL, "unrecognized LC category: %d", category);
245 envvar = NULL; /* keep compiler quiet */
246 envbuf = NULL;
247 return NULL;
248 }
249
250 snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
251
252 if (putenv(envbuf))
253 return NULL;
254
255 return result;
256 }
257
258
259 /*
260 * Is the locale name valid for the locale category?
261 *
262 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
263 * canonical name is stored there. This is especially useful for figuring out
264 * what locale name "" means (ie, the server environment value). (Actually,
265 * it seems that on most implementations that's the only thing it's good for;
266 * we could wish that setlocale gave back a canonically spelled version of
267 * the locale name, but typically it doesn't.)
268 */
269 bool
check_locale(int category,const char * locale,char ** canonname)270 check_locale(int category, const char *locale, char **canonname)
271 {
272 char *save;
273 char *res;
274
275 if (canonname)
276 *canonname = NULL; /* in case of failure */
277
278 save = setlocale(category, NULL);
279 if (!save)
280 return false; /* won't happen, we hope */
281
282 /* save may be pointing at a modifiable scratch variable, see above. */
283 save = pstrdup(save);
284
285 /* set the locale with setlocale, to see if it accepts it. */
286 res = setlocale(category, locale);
287
288 /* save canonical name if requested. */
289 if (res && canonname)
290 *canonname = pstrdup(res);
291
292 /* restore old value. */
293 if (!setlocale(category, save))
294 elog(WARNING, "failed to restore old locale \"%s\"", save);
295 pfree(save);
296
297 return (res != NULL);
298 }
299
300
301 /*
302 * GUC check/assign hooks
303 *
304 * For most locale categories, the assign hook doesn't actually set the locale
305 * permanently, just reset flags so that the next use will cache the
306 * appropriate values. (See explanation at the top of this file.)
307 *
308 * Note: we accept value = "" as selecting the postmaster's environment
309 * value, whatever it was (so long as the environment setting is legal).
310 * This will have been locked down by an earlier call to pg_perm_setlocale.
311 */
312 bool
check_locale_monetary(char ** newval,void ** extra,GucSource source)313 check_locale_monetary(char **newval, void **extra, GucSource source)
314 {
315 return check_locale(LC_MONETARY, *newval, NULL);
316 }
317
318 void
assign_locale_monetary(const char * newval,void * extra)319 assign_locale_monetary(const char *newval, void *extra)
320 {
321 CurrentLocaleConvValid = false;
322 }
323
324 bool
check_locale_numeric(char ** newval,void ** extra,GucSource source)325 check_locale_numeric(char **newval, void **extra, GucSource source)
326 {
327 return check_locale(LC_NUMERIC, *newval, NULL);
328 }
329
330 void
assign_locale_numeric(const char * newval,void * extra)331 assign_locale_numeric(const char *newval, void *extra)
332 {
333 CurrentLocaleConvValid = false;
334 }
335
336 bool
check_locale_time(char ** newval,void ** extra,GucSource source)337 check_locale_time(char **newval, void **extra, GucSource source)
338 {
339 return check_locale(LC_TIME, *newval, NULL);
340 }
341
342 void
assign_locale_time(const char * newval,void * extra)343 assign_locale_time(const char *newval, void *extra)
344 {
345 CurrentLCTimeValid = false;
346 }
347
348 /*
349 * We allow LC_MESSAGES to actually be set globally.
350 *
351 * Note: we normally disallow value = "" because it wouldn't have consistent
352 * semantics (it'd effectively just use the previous value). However, this
353 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
354 * not even if the attempted setting fails due to invalid environment value.
355 * The idea there is just to accept the environment setting *if possible*
356 * during startup, until we can read the proper value from postgresql.conf.
357 */
358 bool
check_locale_messages(char ** newval,void ** extra,GucSource source)359 check_locale_messages(char **newval, void **extra, GucSource source)
360 {
361 if (**newval == '\0')
362 {
363 if (source == PGC_S_DEFAULT)
364 return true;
365 else
366 return false;
367 }
368
369 /*
370 * LC_MESSAGES category does not exist everywhere, but accept it anyway
371 *
372 * On Windows, we can't even check the value, so accept blindly
373 */
374 #if defined(LC_MESSAGES) && !defined(WIN32)
375 return check_locale(LC_MESSAGES, *newval, NULL);
376 #else
377 return true;
378 #endif
379 }
380
381 void
assign_locale_messages(const char * newval,void * extra)382 assign_locale_messages(const char *newval, void *extra)
383 {
384 /*
385 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
386 * We ignore failure, as per comment above.
387 */
388 #ifdef LC_MESSAGES
389 (void) pg_perm_setlocale(LC_MESSAGES, newval);
390 #endif
391 }
392
393
394 /*
395 * Frees the malloced content of a struct lconv. (But not the struct
396 * itself.) It's important that this not throw elog(ERROR).
397 */
398 static void
free_struct_lconv(struct lconv * s)399 free_struct_lconv(struct lconv *s)
400 {
401 if (s->decimal_point)
402 free(s->decimal_point);
403 if (s->thousands_sep)
404 free(s->thousands_sep);
405 if (s->grouping)
406 free(s->grouping);
407 if (s->int_curr_symbol)
408 free(s->int_curr_symbol);
409 if (s->currency_symbol)
410 free(s->currency_symbol);
411 if (s->mon_decimal_point)
412 free(s->mon_decimal_point);
413 if (s->mon_thousands_sep)
414 free(s->mon_thousands_sep);
415 if (s->mon_grouping)
416 free(s->mon_grouping);
417 if (s->positive_sign)
418 free(s->positive_sign);
419 if (s->negative_sign)
420 free(s->negative_sign);
421 }
422
423 /*
424 * Check that all fields of a struct lconv (or at least, the ones we care
425 * about) are non-NULL. The field list must match free_struct_lconv().
426 */
427 static bool
struct_lconv_is_valid(struct lconv * s)428 struct_lconv_is_valid(struct lconv *s)
429 {
430 if (s->decimal_point == NULL)
431 return false;
432 if (s->thousands_sep == NULL)
433 return false;
434 if (s->grouping == NULL)
435 return false;
436 if (s->int_curr_symbol == NULL)
437 return false;
438 if (s->currency_symbol == NULL)
439 return false;
440 if (s->mon_decimal_point == NULL)
441 return false;
442 if (s->mon_thousands_sep == NULL)
443 return false;
444 if (s->mon_grouping == NULL)
445 return false;
446 if (s->positive_sign == NULL)
447 return false;
448 if (s->negative_sign == NULL)
449 return false;
450 return true;
451 }
452
453
454 /*
455 * Convert the strdup'd string at *str from the specified encoding to the
456 * database encoding.
457 */
458 static void
db_encoding_convert(int encoding,char ** str)459 db_encoding_convert(int encoding, char **str)
460 {
461 char *pstr;
462 char *mstr;
463
464 /* convert the string to the database encoding */
465 pstr = pg_any_to_server(*str, strlen(*str), encoding);
466 if (pstr == *str)
467 return; /* no conversion happened */
468
469 /* need it malloc'd not palloc'd */
470 mstr = strdup(pstr);
471 if (mstr == NULL)
472 ereport(ERROR,
473 (errcode(ERRCODE_OUT_OF_MEMORY),
474 errmsg("out of memory")));
475
476 /* replace old string */
477 free(*str);
478 *str = mstr;
479
480 pfree(pstr);
481 }
482
483
484 /*
485 * Return the POSIX lconv struct (contains number/money formatting
486 * information) with locale information for all categories.
487 */
488 struct lconv *
PGLC_localeconv(void)489 PGLC_localeconv(void)
490 {
491 static struct lconv CurrentLocaleConv;
492 static bool CurrentLocaleConvAllocated = false;
493 struct lconv *extlconv;
494 struct lconv worklconv;
495 char *save_lc_monetary;
496 char *save_lc_numeric;
497 #ifdef WIN32
498 char *save_lc_ctype;
499 #endif
500
501 /* Did we do it already? */
502 if (CurrentLocaleConvValid)
503 return &CurrentLocaleConv;
504
505 /* Free any already-allocated storage */
506 if (CurrentLocaleConvAllocated)
507 {
508 free_struct_lconv(&CurrentLocaleConv);
509 CurrentLocaleConvAllocated = false;
510 }
511
512 /*
513 * This is tricky because we really don't want to risk throwing error
514 * while the locale is set to other than our usual settings. Therefore,
515 * the process is: collect the usual settings, set locale to special
516 * setting, copy relevant data into worklconv using strdup(), restore
517 * normal settings, convert data to desired encoding, and finally stash
518 * the collected data in CurrentLocaleConv. This makes it safe if we
519 * throw an error during encoding conversion or run out of memory anywhere
520 * in the process. All data pointed to by struct lconv members is
521 * allocated with strdup, to avoid premature elog(ERROR) and to allow
522 * using a single cleanup routine.
523 */
524 memset(&worklconv, 0, sizeof(worklconv));
525
526 /* Save prevailing values of monetary and numeric locales */
527 save_lc_monetary = setlocale(LC_MONETARY, NULL);
528 if (!save_lc_monetary)
529 elog(ERROR, "setlocale(NULL) failed");
530 save_lc_monetary = pstrdup(save_lc_monetary);
531
532 save_lc_numeric = setlocale(LC_NUMERIC, NULL);
533 if (!save_lc_numeric)
534 elog(ERROR, "setlocale(NULL) failed");
535 save_lc_numeric = pstrdup(save_lc_numeric);
536
537 #ifdef WIN32
538
539 /*
540 * The POSIX standard explicitly says that it is undefined what happens if
541 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
542 * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
543 * believe that localeconv() should return strings that are encoded in the
544 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
545 * once we have successfully collected the localeconv() results, we will
546 * convert them from that codeset to the desired server encoding.
547 *
548 * Windows, of course, resolutely does things its own way; on that
549 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
550 * results. Hence, we must temporarily set that category as well.
551 */
552
553 /* Save prevailing value of ctype locale */
554 save_lc_ctype = setlocale(LC_CTYPE, NULL);
555 if (!save_lc_ctype)
556 elog(ERROR, "setlocale(NULL) failed");
557 save_lc_ctype = pstrdup(save_lc_ctype);
558
559 /* Here begins the critical section where we must not throw error */
560
561 /* use numeric to set the ctype */
562 setlocale(LC_CTYPE, locale_numeric);
563 #endif
564
565 /* Get formatting information for numeric */
566 setlocale(LC_NUMERIC, locale_numeric);
567 extlconv = localeconv();
568
569 /* Must copy data now in case setlocale() overwrites it */
570 worklconv.decimal_point = strdup(extlconv->decimal_point);
571 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
572 worklconv.grouping = strdup(extlconv->grouping);
573
574 #ifdef WIN32
575 /* use monetary to set the ctype */
576 setlocale(LC_CTYPE, locale_monetary);
577 #endif
578
579 /* Get formatting information for monetary */
580 setlocale(LC_MONETARY, locale_monetary);
581 extlconv = localeconv();
582
583 /* Must copy data now in case setlocale() overwrites it */
584 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
585 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
586 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
587 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
588 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
589 worklconv.positive_sign = strdup(extlconv->positive_sign);
590 worklconv.negative_sign = strdup(extlconv->negative_sign);
591 /* Copy scalar fields as well */
592 worklconv.int_frac_digits = extlconv->int_frac_digits;
593 worklconv.frac_digits = extlconv->frac_digits;
594 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
595 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
596 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
597 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
598 worklconv.p_sign_posn = extlconv->p_sign_posn;
599 worklconv.n_sign_posn = extlconv->n_sign_posn;
600
601 /*
602 * Restore the prevailing locale settings; failure to do so is fatal.
603 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
604 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
605 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
606 * are almost certainly "C", there's really no reason that restoring those
607 * should fail.
608 */
609 #ifdef WIN32
610 if (!setlocale(LC_CTYPE, save_lc_ctype))
611 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
612 #endif
613 if (!setlocale(LC_MONETARY, save_lc_monetary))
614 elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
615 if (!setlocale(LC_NUMERIC, save_lc_numeric))
616 elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
617
618 /*
619 * At this point we've done our best to clean up, and can call functions
620 * that might possibly throw errors with a clean conscience. But let's
621 * make sure we don't leak any already-strdup'd fields in worklconv.
622 */
623 PG_TRY();
624 {
625 int encoding;
626
627 /* Release the pstrdup'd locale names */
628 pfree(save_lc_monetary);
629 pfree(save_lc_numeric);
630 #ifdef WIN32
631 pfree(save_lc_ctype);
632 #endif
633
634 /* If any of the preceding strdup calls failed, complain now. */
635 if (!struct_lconv_is_valid(&worklconv))
636 ereport(ERROR,
637 (errcode(ERRCODE_OUT_OF_MEMORY),
638 errmsg("out of memory")));
639
640 /*
641 * Now we must perform encoding conversion from whatever's associated
642 * with the locales into the database encoding. If we can't identify
643 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
644 * use PG_SQL_ASCII, which will result in just validating that the
645 * strings are OK in the database encoding.
646 */
647 encoding = pg_get_encoding_from_locale(locale_numeric, true);
648 if (encoding < 0)
649 encoding = PG_SQL_ASCII;
650
651 db_encoding_convert(encoding, &worklconv.decimal_point);
652 db_encoding_convert(encoding, &worklconv.thousands_sep);
653 /* grouping is not text and does not require conversion */
654
655 encoding = pg_get_encoding_from_locale(locale_monetary, true);
656 if (encoding < 0)
657 encoding = PG_SQL_ASCII;
658
659 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
660 db_encoding_convert(encoding, &worklconv.currency_symbol);
661 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
662 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
663 /* mon_grouping is not text and does not require conversion */
664 db_encoding_convert(encoding, &worklconv.positive_sign);
665 db_encoding_convert(encoding, &worklconv.negative_sign);
666 }
667 PG_CATCH();
668 {
669 free_struct_lconv(&worklconv);
670 PG_RE_THROW();
671 }
672 PG_END_TRY();
673
674 /*
675 * Everything is good, so save the results.
676 */
677 CurrentLocaleConv = worklconv;
678 CurrentLocaleConvAllocated = true;
679 CurrentLocaleConvValid = true;
680 return &CurrentLocaleConv;
681 }
682
683 #ifdef WIN32
684 /*
685 * On Windows, strftime() returns its output in encoding CP_ACP (the default
686 * operating system codepage for the computer), which is likely different
687 * from SERVER_ENCODING. This is especially important in Japanese versions
688 * of Windows which will use SJIS encoding, which we don't support as a
689 * server encoding.
690 *
691 * So, instead of using strftime(), use wcsftime() to return the value in
692 * wide characters (internally UTF16) and then convert to UTF8, which we
693 * know how to handle directly.
694 *
695 * Note that this only affects the calls to strftime() in this file, which are
696 * used to get the locale-aware strings. Other parts of the backend use
697 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
698 */
699 static size_t
strftime_win32(char * dst,size_t dstlen,const char * format,const struct tm * tm)700 strftime_win32(char *dst, size_t dstlen,
701 const char *format, const struct tm *tm)
702 {
703 size_t len;
704 wchar_t wformat[8]; /* formats used below need 3 chars */
705 wchar_t wbuf[MAX_L10N_DATA];
706
707 /*
708 * Get a wchar_t version of the format string. We only actually use
709 * plain-ASCII formats in this file, so we can say that they're UTF8.
710 */
711 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
712 wformat, lengthof(wformat));
713 if (len == 0)
714 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
715 GetLastError());
716
717 len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
718 if (len == 0)
719 {
720 /*
721 * wcsftime failed, possibly because the result would not fit in
722 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
723 */
724 return 0;
725 }
726
727 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
728 NULL, NULL);
729 if (len == 0)
730 elog(ERROR, "could not convert string to UTF-8: error code %lu",
731 GetLastError());
732
733 dst[len] = '\0';
734
735 return len;
736 }
737
738 /* redefine strftime() */
739 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
740 #endif /* WIN32 */
741
742 /*
743 * Subroutine for cache_locale_time().
744 * Convert the given string from encoding "encoding" to the database
745 * encoding, and store the result at *dst, replacing any previous value.
746 */
747 static void
cache_single_string(char ** dst,const char * src,int encoding)748 cache_single_string(char **dst, const char *src, int encoding)
749 {
750 char *ptr;
751 char *olddst;
752
753 /* Convert the string to the database encoding, or validate it's OK */
754 ptr = pg_any_to_server(src, strlen(src), encoding);
755
756 /* Store the string in long-lived storage, replacing any previous value */
757 olddst = *dst;
758 *dst = MemoryContextStrdup(TopMemoryContext, ptr);
759 if (olddst)
760 pfree(olddst);
761
762 /* Might as well clean up any palloc'd conversion result, too */
763 if (ptr != src)
764 pfree(ptr);
765 }
766
767 /*
768 * Update the lc_time localization cache variables if needed.
769 */
770 void
cache_locale_time(void)771 cache_locale_time(void)
772 {
773 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
774 char *bufptr;
775 time_t timenow;
776 struct tm *timeinfo;
777 bool strftimefail = false;
778 int encoding;
779 int i;
780 char *save_lc_time;
781 #ifdef WIN32
782 char *save_lc_ctype;
783 #endif
784
785 /* did we do this already? */
786 if (CurrentLCTimeValid)
787 return;
788
789 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
790
791 /*
792 * As in PGLC_localeconv(), it's critical that we not throw error while
793 * libc's locale settings have nondefault values. Hence, we just call
794 * strftime() within the critical section, and then convert and save its
795 * results afterwards.
796 */
797
798 /* Save prevailing value of time locale */
799 save_lc_time = setlocale(LC_TIME, NULL);
800 if (!save_lc_time)
801 elog(ERROR, "setlocale(NULL) failed");
802 save_lc_time = pstrdup(save_lc_time);
803
804 #ifdef WIN32
805
806 /*
807 * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
808 * must set it here. This code looks the same as what PGLC_localeconv()
809 * does, but the underlying reason is different: this does NOT determine
810 * the encoding we'll get back from strftime_win32().
811 */
812
813 /* Save prevailing value of ctype locale */
814 save_lc_ctype = setlocale(LC_CTYPE, NULL);
815 if (!save_lc_ctype)
816 elog(ERROR, "setlocale(NULL) failed");
817 save_lc_ctype = pstrdup(save_lc_ctype);
818
819 /* use lc_time to set the ctype */
820 setlocale(LC_CTYPE, locale_time);
821 #endif
822
823 setlocale(LC_TIME, locale_time);
824
825 /* We use times close to current time as data for strftime(). */
826 timenow = time(NULL);
827 timeinfo = localtime(&timenow);
828
829 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
830 bufptr = buf;
831
832 /*
833 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
834 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
835 * error.) An implementation might report errors (e.g. ENOMEM) by
836 * returning 0 (or, less plausibly, a negative value) and setting errno.
837 * Report errno just in case the implementation did that, but clear it in
838 * advance of the calls so we don't emit a stale, unrelated errno.
839 */
840 errno = 0;
841
842 /* localized days */
843 for (i = 0; i < 7; i++)
844 {
845 timeinfo->tm_wday = i;
846 if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
847 strftimefail = true;
848 bufptr += MAX_L10N_DATA;
849 if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
850 strftimefail = true;
851 bufptr += MAX_L10N_DATA;
852 }
853
854 /* localized months */
855 for (i = 0; i < 12; i++)
856 {
857 timeinfo->tm_mon = i;
858 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
859 if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
860 strftimefail = true;
861 bufptr += MAX_L10N_DATA;
862 if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
863 strftimefail = true;
864 bufptr += MAX_L10N_DATA;
865 }
866
867 /*
868 * Restore the prevailing locale settings; as in PGLC_localeconv(),
869 * failure to do so is fatal.
870 */
871 #ifdef WIN32
872 if (!setlocale(LC_CTYPE, save_lc_ctype))
873 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
874 #endif
875 if (!setlocale(LC_TIME, save_lc_time))
876 elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
877
878 /*
879 * At this point we've done our best to clean up, and can throw errors, or
880 * call functions that might throw errors, with a clean conscience.
881 */
882 if (strftimefail)
883 elog(ERROR, "strftime() failed: %m");
884
885 /* Release the pstrdup'd locale names */
886 pfree(save_lc_time);
887 #ifdef WIN32
888 pfree(save_lc_ctype);
889 #endif
890
891 #ifndef WIN32
892
893 /*
894 * As in PGLC_localeconv(), we must convert strftime()'s output from the
895 * encoding implied by LC_TIME to the database encoding. If we can't
896 * identify the LC_TIME encoding, just perform encoding validation.
897 */
898 encoding = pg_get_encoding_from_locale(locale_time, true);
899 if (encoding < 0)
900 encoding = PG_SQL_ASCII;
901
902 #else
903
904 /*
905 * On Windows, strftime_win32() always returns UTF8 data, so convert from
906 * that if necessary.
907 */
908 encoding = PG_UTF8;
909
910 #endif /* WIN32 */
911
912 bufptr = buf;
913
914 /* localized days */
915 for (i = 0; i < 7; i++)
916 {
917 cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
918 bufptr += MAX_L10N_DATA;
919 cache_single_string(&localized_full_days[i], bufptr, encoding);
920 bufptr += MAX_L10N_DATA;
921 }
922
923 /* localized months */
924 for (i = 0; i < 12; i++)
925 {
926 cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
927 bufptr += MAX_L10N_DATA;
928 cache_single_string(&localized_full_months[i], bufptr, encoding);
929 bufptr += MAX_L10N_DATA;
930 }
931
932 CurrentLCTimeValid = true;
933 }
934
935
936 #if defined(WIN32) && defined(LC_MESSAGES)
937 /*
938 * Convert a Windows setlocale() argument to a Unix-style one.
939 *
940 * Regardless of platform, we install message catalogs under a Unix-style
941 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
942 * following that style will elicit localized interface strings.
943 *
944 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
945 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
946 * case-insensitive. setlocale() returns the fully-qualified form; for
947 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
948 * setlocale() and _create_locale() select a "locale identifier"[1] and store
949 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
950 * ISO 639 language and the ISO 3166 country. Character encoding does not
951 * matter, because the server and client encodings govern that.
952 *
953 * Windows Vista introduced the "locale name" concept[2], closely following
954 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
955 * Studio 2012, setlocale() accepts locale names in addition to the strings it
956 * accepted historically. It does not standardize them; setlocale("Th-tH")
957 * returns "Th-tH". setlocale(category, "") still returns a traditional
958 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
959 * content to carry locale names instead of locale identifiers.
960 *
961 * Visual Studio 2015 should still be able to do the same as Visual Studio
962 * 2012, but the declaration of locale_name is missing in _locale_t, causing
963 * this code compilation to fail, hence this falls back instead on to
964 * enumerating all system locales by using EnumSystemLocalesEx to find the
965 * required locale name. If the input argument is in Unix-style then we can
966 * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
967 * LOCALE_SNAME.
968 *
969 * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
970 * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
971 * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
972 * localized messages. In particular, every lc_messages setting that initdb
973 * can select automatically will yield only C-locale messages. XXX This could
974 * be fixed by running the fully-qualified locale name through a lookup table.
975 *
976 * This function returns a pointer to a static buffer bearing the converted
977 * name or NULL if conversion fails.
978 *
979 * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
980 * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
981 */
982
983 #if _MSC_VER >= 1900
984 /*
985 * Callback function for EnumSystemLocalesEx() in get_iso_localename().
986 *
987 * This function enumerates all system locales, searching for one that matches
988 * an input with the format: <Language>[_<Country>], e.g.
989 * English[_United States]
990 *
991 * The input is a three wchar_t array as an LPARAM. The first element is the
992 * locale_name we want to match, the second element is an allocated buffer
993 * where the Unix-style locale is copied if a match is found, and the third
994 * element is the search status, 1 if a match was found, 0 otherwise.
995 */
996 static BOOL CALLBACK
search_locale_enum(LPWSTR pStr,DWORD dwFlags,LPARAM lparam)997 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
998 {
999 wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
1000 wchar_t **argv;
1001
1002 (void) (dwFlags);
1003
1004 argv = (wchar_t **) lparam;
1005 *argv[2] = (wchar_t) 0;
1006
1007 memset(test_locale, 0, sizeof(test_locale));
1008
1009 /* Get the name of the <Language> in English */
1010 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1011 test_locale, LOCALE_NAME_MAX_LENGTH))
1012 {
1013 /*
1014 * If the enumerated locale does not have a hyphen ("en") OR the
1015 * lc_message input does not have an underscore ("English"), we only
1016 * need to compare the <Language> tags.
1017 */
1018 if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1019 {
1020 if (_wcsicmp(argv[0], test_locale) == 0)
1021 {
1022 wcscpy(argv[1], pStr);
1023 *argv[2] = (wchar_t) 1;
1024 return FALSE;
1025 }
1026 }
1027
1028 /*
1029 * We have to compare a full <Language>_<Country> tag, so we append
1030 * the underscore and name of the country/region in English, e.g.
1031 * "English_United States".
1032 */
1033 else
1034 {
1035 size_t len;
1036
1037 wcscat(test_locale, L"_");
1038 len = wcslen(test_locale);
1039 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1040 test_locale + len,
1041 LOCALE_NAME_MAX_LENGTH - len))
1042 {
1043 if (_wcsicmp(argv[0], test_locale) == 0)
1044 {
1045 wcscpy(argv[1], pStr);
1046 *argv[2] = (wchar_t) 1;
1047 return FALSE;
1048 }
1049 }
1050 }
1051 }
1052
1053 return TRUE;
1054 }
1055
1056 /*
1057 * This function converts a Windows locale name to an ISO formatted version
1058 * for Visual Studio 2015 or greater.
1059 *
1060 * Returns NULL, if no valid conversion was found.
1061 */
1062 static char *
get_iso_localename(const char * winlocname)1063 get_iso_localename(const char *winlocname)
1064 {
1065 wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1066 wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1067 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1068 char *period;
1069 int len;
1070 int ret_val;
1071
1072 /*
1073 * Valid locales have the following syntax:
1074 * <Language>[_<Country>[.<CodePage>]]
1075 *
1076 * GetLocaleInfoEx can only take locale name without code-page and for the
1077 * purpose of this API the code-page doesn't matter.
1078 */
1079 period = strchr(winlocname, '.');
1080 if (period != NULL)
1081 len = period - winlocname;
1082 else
1083 len = pg_mbstrlen(winlocname);
1084
1085 memset(wc_locale_name, 0, sizeof(wc_locale_name));
1086 memset(buffer, 0, sizeof(buffer));
1087 MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1088 LOCALE_NAME_MAX_LENGTH);
1089
1090 /*
1091 * If the lc_messages is already an Unix-style string, we have a direct
1092 * match with LOCALE_SNAME, e.g. en-US, en_US.
1093 */
1094 ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1095 LOCALE_NAME_MAX_LENGTH);
1096 if (!ret_val)
1097 {
1098 /*
1099 * Search for a locale in the system that matches language and country
1100 * name.
1101 */
1102 wchar_t *argv[3];
1103
1104 argv[0] = wc_locale_name;
1105 argv[1] = buffer;
1106 argv[2] = (wchar_t *) &ret_val;
1107 EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1108 NULL);
1109 }
1110
1111 if (ret_val)
1112 {
1113 size_t rc;
1114 char *hyphen;
1115
1116 /* Locale names use only ASCII, any conversion locale suffices. */
1117 rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1118 if (rc == -1 || rc == sizeof(iso_lc_messages))
1119 return NULL;
1120
1121 /*
1122 * Simply replace the hyphen with an underscore. See comments in
1123 * IsoLocaleName.
1124 */
1125 hyphen = strchr(iso_lc_messages, '-');
1126 if (hyphen)
1127 *hyphen = '_';
1128
1129 return iso_lc_messages;
1130 }
1131
1132 return NULL;
1133 }
1134 #endif /* _MSC_VER >= 1900 */
1135
1136 static char *
IsoLocaleName(const char * winlocname)1137 IsoLocaleName(const char *winlocname)
1138 {
1139 #if (_MSC_VER >= 1400) /* VC8.0 or later */
1140 static char iso_lc_messages[32];
1141 _locale_t loct = NULL;
1142
1143 if (pg_strcasecmp("c", winlocname) == 0 ||
1144 pg_strcasecmp("posix", winlocname) == 0)
1145 {
1146 strcpy(iso_lc_messages, "C");
1147 return iso_lc_messages;
1148 }
1149
1150 #if (_MSC_VER >= 1900) /* Visual Studio 2015 or later */
1151 return get_iso_localename(winlocname);
1152 #else
1153 loct = _create_locale(LC_CTYPE, winlocname);
1154 if (loct != NULL)
1155 {
1156 #if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
1157 size_t rc;
1158 char *hyphen;
1159
1160 /* Locale names use only ASCII, any conversion locale suffices. */
1161 rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1162 sizeof(iso_lc_messages), NULL);
1163 _free_locale(loct);
1164 if (rc == -1 || rc == sizeof(iso_lc_messages))
1165 return NULL;
1166
1167 /*
1168 * Since the message catalogs sit on a case-insensitive filesystem, we
1169 * need not standardize letter case here. So long as we do not ship
1170 * message catalogs for which it would matter, we also need not
1171 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1172 * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1173 *
1174 * Note that the locale name can be less-specific than the value we
1175 * would derive under earlier Visual Studio releases. For example,
1176 * French_France.1252 yields just "fr". This does not affect any of
1177 * the country-specific message catalogs available as of this writing
1178 * (pt_BR, zh_CN, zh_TW).
1179 */
1180 hyphen = strchr(iso_lc_messages, '-');
1181 if (hyphen)
1182 *hyphen = '_';
1183 #else
1184 char isolang[32],
1185 isocrty[32];
1186 LCID lcid;
1187
1188 lcid = loct->locinfo->lc_handle[LC_CTYPE];
1189 if (lcid == 0)
1190 lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1191 _free_locale(loct);
1192
1193 if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1194 return NULL;
1195 if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1196 return NULL;
1197 snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
1198 #endif
1199 return iso_lc_messages;
1200 }
1201 return NULL;
1202 #endif /* Visual Studio 2015 or later */
1203 #else
1204 return NULL; /* Not supported on this version of msvc/mingw */
1205 #endif /* _MSC_VER >= 1400 */
1206 }
1207 #endif /* WIN32 && LC_MESSAGES */
1208
1209
1210 /*
1211 * Detect aging strxfrm() implementations that, in a subset of locales, write
1212 * past the specified buffer length. Affected users must update OS packages
1213 * before using PostgreSQL 9.5 or later.
1214 *
1215 * Assume that the bug can come and go from one postmaster startup to another
1216 * due to physical replication among diverse machines. Assume that the bug's
1217 * presence will not change during the life of a particular postmaster. Given
1218 * those assumptions, call this no less than once per postmaster startup per
1219 * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1220 * there is no need to consider pg_collation locales.
1221 */
1222 void
check_strxfrm_bug(void)1223 check_strxfrm_bug(void)
1224 {
1225 char buf[32];
1226 const int canary = 0x7F;
1227 bool ok = true;
1228
1229 /*
1230 * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1231 * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1232 * below that range.
1233 *
1234 * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1235 * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1236 * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1237 * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1238 */
1239 buf[7] = canary;
1240 (void) strxfrm(buf, "ab", 7);
1241 if (buf[7] != canary)
1242 ok = false;
1243
1244 /*
1245 * illumos bug #1594 was present in the source tree from 2010-10-11 to
1246 * 2012-02-01. Given an ASCII string of any length and length limit 1,
1247 * affected systems ignore the length limit and modify a number of bytes
1248 * one less than the return value. The problem inputs for this bug do not
1249 * overlap those for the Solaris bug, hence a distinct test.
1250 *
1251 * Affected systems include smartos-20110926T021612Z. Affected locales
1252 * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1253 */
1254 buf[1] = canary;
1255 (void) strxfrm(buf, "a", 1);
1256 if (buf[1] != canary)
1257 ok = false;
1258
1259 if (!ok)
1260 ereport(ERROR,
1261 (errcode(ERRCODE_SYSTEM_ERROR),
1262 errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1263 setlocale(LC_COLLATE, NULL)),
1264 errhint("Apply system library package updates.")));
1265 }
1266
1267
1268 /*
1269 * Cache mechanism for collation information.
1270 *
1271 * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1272 * (or POSIX), so we can optimize a few code paths in various places.
1273 * For the built-in C and POSIX collations, we can know that without even
1274 * doing a cache lookup, but we want to support aliases for C/POSIX too.
1275 * For the "default" collation, there are separate static cache variables,
1276 * since consulting the pg_collation catalog doesn't tell us what we need.
1277 *
1278 * Also, if a pg_locale_t has been requested for a collation, we cache that
1279 * for the life of a backend.
1280 *
1281 * Note that some code relies on the flags not reporting false negatives
1282 * (that is, saying it's not C when it is). For example, char2wchar()
1283 * could fail if the locale is C, so str_tolower() shouldn't call it
1284 * in that case.
1285 *
1286 * Note that we currently lack any way to flush the cache. Since we don't
1287 * support ALTER COLLATION, this is OK. The worst case is that someone
1288 * drops a collation, and a useless cache entry hangs around in existing
1289 * backends.
1290 */
1291
1292 static collation_cache_entry *
lookup_collation_cache(Oid collation,bool set_flags)1293 lookup_collation_cache(Oid collation, bool set_flags)
1294 {
1295 collation_cache_entry *cache_entry;
1296 bool found;
1297
1298 Assert(OidIsValid(collation));
1299 Assert(collation != DEFAULT_COLLATION_OID);
1300
1301 if (collation_cache == NULL)
1302 {
1303 /* First time through, initialize the hash table */
1304 HASHCTL ctl;
1305
1306 memset(&ctl, 0, sizeof(ctl));
1307 ctl.keysize = sizeof(Oid);
1308 ctl.entrysize = sizeof(collation_cache_entry);
1309 collation_cache = hash_create("Collation cache", 100, &ctl,
1310 HASH_ELEM | HASH_BLOBS);
1311 }
1312
1313 cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1314 if (!found)
1315 {
1316 /*
1317 * Make sure cache entry is marked invalid, in case we fail before
1318 * setting things.
1319 */
1320 cache_entry->flags_valid = false;
1321 cache_entry->locale = 0;
1322 }
1323
1324 if (set_flags && !cache_entry->flags_valid)
1325 {
1326 /* Attempt to set the flags */
1327 HeapTuple tp;
1328 Form_pg_collation collform;
1329 const char *collcollate;
1330 const char *collctype;
1331
1332 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1333 if (!HeapTupleIsValid(tp))
1334 elog(ERROR, "cache lookup failed for collation %u", collation);
1335 collform = (Form_pg_collation) GETSTRUCT(tp);
1336
1337 collcollate = NameStr(collform->collcollate);
1338 collctype = NameStr(collform->collctype);
1339
1340 cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1341 (strcmp(collcollate, "POSIX") == 0));
1342 cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1343 (strcmp(collctype, "POSIX") == 0));
1344
1345 cache_entry->flags_valid = true;
1346
1347 ReleaseSysCache(tp);
1348 }
1349
1350 return cache_entry;
1351 }
1352
1353
1354 /*
1355 * Detect whether collation's LC_COLLATE property is C
1356 */
1357 bool
lc_collate_is_c(Oid collation)1358 lc_collate_is_c(Oid collation)
1359 {
1360 /*
1361 * If we're asked about "collation 0", return false, so that the code will
1362 * go into the non-C path and report that the collation is bogus.
1363 */
1364 if (!OidIsValid(collation))
1365 return false;
1366
1367 /*
1368 * If we're asked about the default collation, we have to inquire of the C
1369 * library. Cache the result so we only have to compute it once.
1370 */
1371 if (collation == DEFAULT_COLLATION_OID)
1372 {
1373 static int result = -1;
1374 char *localeptr;
1375
1376 if (result >= 0)
1377 return (bool) result;
1378 localeptr = setlocale(LC_COLLATE, NULL);
1379 if (!localeptr)
1380 elog(ERROR, "invalid LC_COLLATE setting");
1381
1382 if (strcmp(localeptr, "C") == 0)
1383 result = true;
1384 else if (strcmp(localeptr, "POSIX") == 0)
1385 result = true;
1386 else
1387 result = false;
1388 return (bool) result;
1389 }
1390
1391 /*
1392 * If we're asked about the built-in C/POSIX collations, we know that.
1393 */
1394 if (collation == C_COLLATION_OID ||
1395 collation == POSIX_COLLATION_OID)
1396 return true;
1397
1398 /*
1399 * Otherwise, we have to consult pg_collation, but we cache that.
1400 */
1401 return (lookup_collation_cache(collation, true))->collate_is_c;
1402 }
1403
1404 /*
1405 * Detect whether collation's LC_CTYPE property is C
1406 */
1407 bool
lc_ctype_is_c(Oid collation)1408 lc_ctype_is_c(Oid collation)
1409 {
1410 /*
1411 * If we're asked about "collation 0", return false, so that the code will
1412 * go into the non-C path and report that the collation is bogus.
1413 */
1414 if (!OidIsValid(collation))
1415 return false;
1416
1417 /*
1418 * If we're asked about the default collation, we have to inquire of the C
1419 * library. Cache the result so we only have to compute it once.
1420 */
1421 if (collation == DEFAULT_COLLATION_OID)
1422 {
1423 static int result = -1;
1424 char *localeptr;
1425
1426 if (result >= 0)
1427 return (bool) result;
1428 localeptr = setlocale(LC_CTYPE, NULL);
1429 if (!localeptr)
1430 elog(ERROR, "invalid LC_CTYPE setting");
1431
1432 if (strcmp(localeptr, "C") == 0)
1433 result = true;
1434 else if (strcmp(localeptr, "POSIX") == 0)
1435 result = true;
1436 else
1437 result = false;
1438 return (bool) result;
1439 }
1440
1441 /*
1442 * If we're asked about the built-in C/POSIX collations, we know that.
1443 */
1444 if (collation == C_COLLATION_OID ||
1445 collation == POSIX_COLLATION_OID)
1446 return true;
1447
1448 /*
1449 * Otherwise, we have to consult pg_collation, but we cache that.
1450 */
1451 return (lookup_collation_cache(collation, true))->ctype_is_c;
1452 }
1453
1454
1455 /* simple subroutine for reporting errors from newlocale() */
1456 #ifdef HAVE_LOCALE_T
1457 static void
report_newlocale_failure(const char * localename)1458 report_newlocale_failure(const char *localename)
1459 {
1460 int save_errno;
1461
1462 /*
1463 * Windows doesn't provide any useful error indication from
1464 * _create_locale(), and BSD-derived platforms don't seem to feel they
1465 * need to set errno either (even though POSIX is pretty clear that
1466 * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1467 * is what to report.
1468 */
1469 if (errno == 0)
1470 errno = ENOENT;
1471
1472 /*
1473 * ENOENT means "no such locale", not "no such file", so clarify that
1474 * errno with an errdetail message.
1475 */
1476 save_errno = errno; /* auxiliary funcs might change errno */
1477 ereport(ERROR,
1478 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1479 errmsg("could not create locale \"%s\": %m",
1480 localename),
1481 (save_errno == ENOENT ?
1482 errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1483 localename) : 0)));
1484 }
1485 #endif /* HAVE_LOCALE_T */
1486
1487
1488 /*
1489 * Create a locale_t from a collation OID. Results are cached for the
1490 * lifetime of the backend. Thus, do not free the result with freelocale().
1491 *
1492 * As a special optimization, the default/database collation returns 0.
1493 * Callers should then revert to the non-locale_t-enabled code path.
1494 * In fact, they shouldn't call this function at all when they are dealing
1495 * with the default locale. That can save quite a bit in hotspots.
1496 * Also, callers should avoid calling this before going down a C/POSIX
1497 * fastpath, because such a fastpath should work even on platforms without
1498 * locale_t support in the C library.
1499 *
1500 * For simplicity, we always generate COLLATE + CTYPE even though we
1501 * might only need one of them. Since this is called only once per session,
1502 * it shouldn't cost much.
1503 */
1504 pg_locale_t
pg_newlocale_from_collation(Oid collid)1505 pg_newlocale_from_collation(Oid collid)
1506 {
1507 collation_cache_entry *cache_entry;
1508
1509 /* Callers must pass a valid OID */
1510 Assert(OidIsValid(collid));
1511
1512 /* Return 0 for "default" collation, just in case caller forgets */
1513 if (collid == DEFAULT_COLLATION_OID)
1514 return (pg_locale_t) 0;
1515
1516 cache_entry = lookup_collation_cache(collid, false);
1517
1518 if (cache_entry->locale == 0)
1519 {
1520 /* We haven't computed this yet in this session, so do it */
1521 HeapTuple tp;
1522 Form_pg_collation collform;
1523 const char *collcollate;
1524 const char *collctype pg_attribute_unused();
1525 struct pg_locale_struct result;
1526 pg_locale_t resultp;
1527 Datum collversion;
1528 bool isnull;
1529
1530 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1531 if (!HeapTupleIsValid(tp))
1532 elog(ERROR, "cache lookup failed for collation %u", collid);
1533 collform = (Form_pg_collation) GETSTRUCT(tp);
1534
1535 collcollate = NameStr(collform->collcollate);
1536 collctype = NameStr(collform->collctype);
1537
1538 /* We'll fill in the result struct locally before allocating memory */
1539 memset(&result, 0, sizeof(result));
1540 result.provider = collform->collprovider;
1541
1542 if (collform->collprovider == COLLPROVIDER_LIBC)
1543 {
1544 #ifdef HAVE_LOCALE_T
1545 locale_t loc;
1546
1547 if (strcmp(collcollate, collctype) == 0)
1548 {
1549 /* Normal case where they're the same */
1550 errno = 0;
1551 #ifndef WIN32
1552 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1553 NULL);
1554 #else
1555 loc = _create_locale(LC_ALL, collcollate);
1556 #endif
1557 if (!loc)
1558 report_newlocale_failure(collcollate);
1559 }
1560 else
1561 {
1562 #ifndef WIN32
1563 /* We need two newlocale() steps */
1564 locale_t loc1;
1565
1566 errno = 0;
1567 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1568 if (!loc1)
1569 report_newlocale_failure(collcollate);
1570 errno = 0;
1571 loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1572 if (!loc)
1573 report_newlocale_failure(collctype);
1574 #else
1575
1576 /*
1577 * XXX The _create_locale() API doesn't appear to support
1578 * this. Could perhaps be worked around by changing
1579 * pg_locale_t to contain two separate fields.
1580 */
1581 ereport(ERROR,
1582 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1583 errmsg("collations with different collate and ctype values are not supported on this platform")));
1584 #endif
1585 }
1586
1587 result.info.lt = loc;
1588 #else /* not HAVE_LOCALE_T */
1589 /* platform that doesn't support locale_t */
1590 ereport(ERROR,
1591 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1592 errmsg("collation provider LIBC is not supported on this platform")));
1593 #endif /* not HAVE_LOCALE_T */
1594 }
1595 else if (collform->collprovider == COLLPROVIDER_ICU)
1596 {
1597 #ifdef USE_ICU
1598 UCollator *collator;
1599 UErrorCode status;
1600
1601 if (strcmp(collcollate, collctype) != 0)
1602 ereport(ERROR,
1603 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1604 errmsg("collations with different collate and ctype values are not supported by ICU")));
1605
1606 status = U_ZERO_ERROR;
1607 collator = ucol_open(collcollate, &status);
1608 if (U_FAILURE(status))
1609 ereport(ERROR,
1610 (errmsg("could not open collator for locale \"%s\": %s",
1611 collcollate, u_errorName(status))));
1612
1613 /* We will leak this string if we get an error below :-( */
1614 result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1615 collcollate);
1616 result.info.icu.ucol = collator;
1617 #else /* not USE_ICU */
1618 /* could get here if a collation was created by a build with ICU */
1619 ereport(ERROR,
1620 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1621 errmsg("ICU is not supported in this build"), \
1622 errhint("You need to rebuild PostgreSQL using --with-icu.")));
1623 #endif /* not USE_ICU */
1624 }
1625
1626 collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1627 &isnull);
1628 if (!isnull)
1629 {
1630 char *actual_versionstr;
1631 char *collversionstr;
1632
1633 actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1634 if (!actual_versionstr)
1635 {
1636 /*
1637 * This could happen when specifying a version in CREATE
1638 * COLLATION for a libc locale, or manually creating a mess in
1639 * the catalogs.
1640 */
1641 ereport(ERROR,
1642 (errmsg("collation \"%s\" has no actual version, but a version was specified",
1643 NameStr(collform->collname))));
1644 }
1645 collversionstr = TextDatumGetCString(collversion);
1646
1647 if (strcmp(actual_versionstr, collversionstr) != 0)
1648 ereport(WARNING,
1649 (errmsg("collation \"%s\" has version mismatch",
1650 NameStr(collform->collname)),
1651 errdetail("The collation in the database was created using version %s, "
1652 "but the operating system provides version %s.",
1653 collversionstr, actual_versionstr),
1654 errhint("Rebuild all objects affected by this collation and run "
1655 "ALTER COLLATION %s REFRESH VERSION, "
1656 "or build PostgreSQL with the right library version.",
1657 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1658 NameStr(collform->collname)))));
1659 }
1660
1661 ReleaseSysCache(tp);
1662
1663 /* We'll keep the pg_locale_t structures in TopMemoryContext */
1664 resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1665 *resultp = result;
1666
1667 cache_entry->locale = resultp;
1668 }
1669
1670 return cache_entry->locale;
1671 }
1672
1673 /*
1674 * Get provider-specific collation version string for the given collation from
1675 * the operating system/library.
1676 *
1677 * A particular provider must always either return a non-NULL string or return
1678 * NULL (if it doesn't support versions). It must not return NULL for some
1679 * collcollate and not NULL for others.
1680 */
1681 char *
get_collation_actual_version(char collprovider,const char * collcollate)1682 get_collation_actual_version(char collprovider, const char *collcollate)
1683 {
1684 char *collversion;
1685
1686 #ifdef USE_ICU
1687 if (collprovider == COLLPROVIDER_ICU)
1688 {
1689 UCollator *collator;
1690 UErrorCode status;
1691 UVersionInfo versioninfo;
1692 char buf[U_MAX_VERSION_STRING_LENGTH];
1693
1694 status = U_ZERO_ERROR;
1695 collator = ucol_open(collcollate, &status);
1696 if (U_FAILURE(status))
1697 ereport(ERROR,
1698 (errmsg("could not open collator for locale \"%s\": %s",
1699 collcollate, u_errorName(status))));
1700 ucol_getVersion(collator, versioninfo);
1701 ucol_close(collator);
1702
1703 u_versionToString(versioninfo, buf);
1704 collversion = pstrdup(buf);
1705 }
1706 else
1707 #endif
1708 collversion = NULL;
1709
1710 return collversion;
1711 }
1712
1713
1714 #ifdef USE_ICU
1715 /*
1716 * Converter object for converting between ICU's UChar strings and C strings
1717 * in database encoding. Since the database encoding doesn't change, we only
1718 * need one of these per session.
1719 */
1720 static UConverter *icu_converter = NULL;
1721
1722 static void
init_icu_converter(void)1723 init_icu_converter(void)
1724 {
1725 const char *icu_encoding_name;
1726 UErrorCode status;
1727 UConverter *conv;
1728
1729 if (icu_converter)
1730 return;
1731
1732 icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1733
1734 status = U_ZERO_ERROR;
1735 conv = ucnv_open(icu_encoding_name, &status);
1736 if (U_FAILURE(status))
1737 ereport(ERROR,
1738 (errmsg("could not open ICU converter for encoding \"%s\": %s",
1739 icu_encoding_name, u_errorName(status))));
1740
1741 icu_converter = conv;
1742 }
1743
1744 /*
1745 * Convert a string in the database encoding into a string of UChars.
1746 *
1747 * The source string at buff is of length nbytes
1748 * (it needn't be nul-terminated)
1749 *
1750 * *buff_uchar receives a pointer to the palloc'd result string, and
1751 * the function's result is the number of UChars generated.
1752 *
1753 * The result string is nul-terminated, though most callers rely on the
1754 * result length instead.
1755 */
1756 int32_t
icu_to_uchar(UChar ** buff_uchar,const char * buff,size_t nbytes)1757 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1758 {
1759 UErrorCode status;
1760 int32_t len_uchar;
1761
1762 init_icu_converter();
1763
1764 status = U_ZERO_ERROR;
1765 len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1766 buff, nbytes, &status);
1767 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1768 ereport(ERROR,
1769 (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1770
1771 *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1772
1773 status = U_ZERO_ERROR;
1774 len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1775 buff, nbytes, &status);
1776 if (U_FAILURE(status))
1777 ereport(ERROR,
1778 (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1779
1780 return len_uchar;
1781 }
1782
1783 /*
1784 * Convert a string of UChars into the database encoding.
1785 *
1786 * The source string at buff_uchar is of length len_uchar
1787 * (it needn't be nul-terminated)
1788 *
1789 * *result receives a pointer to the palloc'd result string, and the
1790 * function's result is the number of bytes generated (not counting nul).
1791 *
1792 * The result string is nul-terminated.
1793 */
1794 int32_t
icu_from_uchar(char ** result,const UChar * buff_uchar,int32_t len_uchar)1795 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1796 {
1797 UErrorCode status;
1798 int32_t len_result;
1799
1800 init_icu_converter();
1801
1802 status = U_ZERO_ERROR;
1803 len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1804 buff_uchar, len_uchar, &status);
1805 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1806 ereport(ERROR,
1807 (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1808
1809 *result = palloc(len_result + 1);
1810
1811 status = U_ZERO_ERROR;
1812 len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1813 buff_uchar, len_uchar, &status);
1814 if (U_FAILURE(status))
1815 ereport(ERROR,
1816 (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1817
1818 return len_result;
1819 }
1820 #endif /* USE_ICU */
1821
1822 /*
1823 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1824 * Therefore we keep them here rather than with the mbutils code.
1825 */
1826
1827 #ifdef USE_WIDE_UPPER_LOWER
1828
1829 /*
1830 * wchar2char --- convert wide characters to multibyte format
1831 *
1832 * This has the same API as the standard wcstombs_l() function; in particular,
1833 * tolen is the maximum number of bytes to store at *to, and *from must be
1834 * zero-terminated. The output will be zero-terminated iff there is room.
1835 */
1836 size_t
wchar2char(char * to,const wchar_t * from,size_t tolen,pg_locale_t locale)1837 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1838 {
1839 size_t result;
1840
1841 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1842
1843 if (tolen == 0)
1844 return 0;
1845
1846 #ifdef WIN32
1847
1848 /*
1849 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1850 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1851 * MultiByteToWideChar().
1852 */
1853 if (GetDatabaseEncoding() == PG_UTF8)
1854 {
1855 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1856 NULL, NULL);
1857 /* A zero return is failure */
1858 if (result <= 0)
1859 result = -1;
1860 else
1861 {
1862 Assert(result <= tolen);
1863 /* Microsoft counts the zero terminator in the result */
1864 result--;
1865 }
1866 }
1867 else
1868 #endif /* WIN32 */
1869 if (locale == (pg_locale_t) 0)
1870 {
1871 /* Use wcstombs directly for the default locale */
1872 result = wcstombs(to, from, tolen);
1873 }
1874 else
1875 {
1876 #ifdef HAVE_LOCALE_T
1877 #ifdef HAVE_WCSTOMBS_L
1878 /* Use wcstombs_l for nondefault locales */
1879 result = wcstombs_l(to, from, tolen, locale->info.lt);
1880 #else /* !HAVE_WCSTOMBS_L */
1881 /* We have to temporarily set the locale as current ... ugh */
1882 locale_t save_locale = uselocale(locale->info.lt);
1883
1884 result = wcstombs(to, from, tolen);
1885
1886 uselocale(save_locale);
1887 #endif /* HAVE_WCSTOMBS_L */
1888 #else /* !HAVE_LOCALE_T */
1889 /* Can't have locale != 0 without HAVE_LOCALE_T */
1890 elog(ERROR, "wcstombs_l is not available");
1891 result = 0; /* keep compiler quiet */
1892 #endif /* HAVE_LOCALE_T */
1893 }
1894
1895 return result;
1896 }
1897
1898 /*
1899 * char2wchar --- convert multibyte characters to wide characters
1900 *
1901 * This has almost the API of mbstowcs_l(), except that *from need not be
1902 * null-terminated; instead, the number of input bytes is specified as
1903 * fromlen. Also, we ereport() rather than returning -1 for invalid
1904 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1905 * The output will be zero-terminated iff there is room.
1906 */
1907 size_t
char2wchar(wchar_t * to,size_t tolen,const char * from,size_t fromlen,pg_locale_t locale)1908 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1909 pg_locale_t locale)
1910 {
1911 size_t result;
1912
1913 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1914
1915 if (tolen == 0)
1916 return 0;
1917
1918 #ifdef WIN32
1919 /* See WIN32 "Unicode" comment above */
1920 if (GetDatabaseEncoding() == PG_UTF8)
1921 {
1922 /* Win32 API does not work for zero-length input */
1923 if (fromlen == 0)
1924 result = 0;
1925 else
1926 {
1927 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1928 /* A zero return is failure */
1929 if (result == 0)
1930 result = -1;
1931 }
1932
1933 if (result != -1)
1934 {
1935 Assert(result < tolen);
1936 /* Append trailing null wchar (MultiByteToWideChar() does not) */
1937 to[result] = 0;
1938 }
1939 }
1940 else
1941 #endif /* WIN32 */
1942 {
1943 /* mbstowcs requires ending '\0' */
1944 char *str = pnstrdup(from, fromlen);
1945
1946 if (locale == (pg_locale_t) 0)
1947 {
1948 /* Use mbstowcs directly for the default locale */
1949 result = mbstowcs(to, str, tolen);
1950 }
1951 else
1952 {
1953 #ifdef HAVE_LOCALE_T
1954 #ifdef HAVE_MBSTOWCS_L
1955 /* Use mbstowcs_l for nondefault locales */
1956 result = mbstowcs_l(to, str, tolen, locale->info.lt);
1957 #else /* !HAVE_MBSTOWCS_L */
1958 /* We have to temporarily set the locale as current ... ugh */
1959 locale_t save_locale = uselocale(locale->info.lt);
1960
1961 result = mbstowcs(to, str, tolen);
1962
1963 uselocale(save_locale);
1964 #endif /* HAVE_MBSTOWCS_L */
1965 #else /* !HAVE_LOCALE_T */
1966 /* Can't have locale != 0 without HAVE_LOCALE_T */
1967 elog(ERROR, "mbstowcs_l is not available");
1968 result = 0; /* keep compiler quiet */
1969 #endif /* HAVE_LOCALE_T */
1970 }
1971
1972 pfree(str);
1973 }
1974
1975 if (result == -1)
1976 {
1977 /*
1978 * Invalid multibyte character encountered. We try to give a useful
1979 * error message by letting pg_verifymbstr check the string. But it's
1980 * possible that the string is OK to us, and not OK to mbstowcs ---
1981 * this suggests that the LC_CTYPE locale is different from the
1982 * database encoding. Give a generic error message if verifymbstr
1983 * can't find anything wrong.
1984 */
1985 pg_verifymbstr(from, fromlen, false); /* might not return */
1986 /* but if it does ... */
1987 ereport(ERROR,
1988 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1989 errmsg("invalid multibyte character for locale"),
1990 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1991 }
1992
1993 return result;
1994 }
1995
1996 #endif /* USE_WIDE_UPPER_LOWER */
1997