1 /*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2018, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12 /*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 * settable at run-time. However, we don't actually set those locale
23 * categories permanently. This would have bizarre effects like no
24 * longer accepting standard floating-point literals in some locales.
25 * Instead, we only set these locale categories briefly when needed,
26 * cache the required information obtained from localeconv() or
27 * strftime(), and then set the locale categories back to "C".
28 * The cached information is only used by the formatting functions
29 * (to_char, etc.) and the money type. For the user, this should all be
30 * transparent.
31 *
32 * !!! NOW HEAR THIS !!!
33 *
34 * We've been bitten repeatedly by this bug, so let's try to keep it in
35 * mind in future: on some platforms, the locale functions return pointers
36 * to static data that will be overwritten by any later locale function.
37 * Thus, for example, the obvious-looking sequence
38 * save = setlocale(category, NULL);
39 * if (!setlocale(category, value))
40 * fail = true;
41 * setlocale(category, save);
42 * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 * will change the memory save is pointing at. To do this sort of thing
44 * safely, you *must* pstrdup what setlocale returns the first time.
45 *
46 * The POSIX locale standard is available here:
47 *
48 * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 *----------
50 */
51
52
53 #include "postgres.h"
54
55 #include <time.h>
56
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "catalog/pg_control.h"
60 #include "mb/pg_wchar.h"
61 #include "utils/builtins.h"
62 #include "utils/hsearch.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_locale.h"
66 #include "utils/syscache.h"
67
68 #ifdef USE_ICU
69 #include <unicode/ucnv.h>
70 #endif
71
72 #ifdef WIN32
73 /*
74 * This Windows file defines StrNCpy. We don't need it here, so we undefine
75 * it to keep the compiler quiet, and undefine it again after the file is
76 * included, so we don't accidentally use theirs.
77 */
78 #undef StrNCpy
79 #include <shlwapi.h>
80 #ifdef StrNCpy
81 #undef STrNCpy
82 #endif
83 #endif
84
85 #define MAX_L10N_DATA 80
86
87
88 /* GUC settings */
89 char *locale_messages;
90 char *locale_monetary;
91 char *locale_numeric;
92 char *locale_time;
93
94 /* lc_time localization cache */
95 char *localized_abbrev_days[7];
96 char *localized_full_days[7];
97 char *localized_abbrev_months[12];
98 char *localized_full_months[12];
99
100 /* indicates whether locale information cache is valid */
101 static bool CurrentLocaleConvValid = false;
102 static bool CurrentLCTimeValid = false;
103
104 /* Environment variable storage area */
105
106 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
107
108 static char lc_collate_envbuf[LC_ENV_BUFSIZE];
109 static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
110
111 #ifdef LC_MESSAGES
112 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
113 #endif
114 static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
115 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
116 static char lc_time_envbuf[LC_ENV_BUFSIZE];
117
118 /* Cache for collation-related knowledge */
119
120 typedef struct
121 {
122 Oid collid; /* hash key: pg_collation OID */
123 bool collate_is_c; /* is collation's LC_COLLATE C? */
124 bool ctype_is_c; /* is collation's LC_CTYPE C? */
125 bool flags_valid; /* true if above flags are valid */
126 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
127 } collation_cache_entry;
128
129 static HTAB *collation_cache = NULL;
130
131
132 #if defined(WIN32) && defined(LC_MESSAGES)
133 static char *IsoLocaleName(const char *); /* MSVC specific */
134 #endif
135
136
137 /*
138 * pg_perm_setlocale
139 *
140 * This wraps the libc function setlocale(), with two additions. First, when
141 * changing LC_CTYPE, update gettext's encoding for the current message
142 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
143 * not on Windows. Second, if the operation is successful, the corresponding
144 * LC_XXX environment variable is set to match. By setting the environment
145 * variable, we ensure that any subsequent use of setlocale(..., "") will
146 * preserve the settings made through this routine. Of course, LC_ALL must
147 * also be unset to fully ensure that, but that has to be done elsewhere after
148 * all the individual LC_XXX variables have been set correctly. (Thank you
149 * Perl for making this kluge necessary.)
150 */
151 char *
pg_perm_setlocale(int category,const char * locale)152 pg_perm_setlocale(int category, const char *locale)
153 {
154 char *result;
155 const char *envvar;
156 char *envbuf;
157
158 #ifndef WIN32
159 result = setlocale(category, locale);
160 #else
161
162 /*
163 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
164 * the given value is good and set it in the environment variables. We
165 * must ignore attempts to set to "", which means "keep using the old
166 * environment value".
167 */
168 #ifdef LC_MESSAGES
169 if (category == LC_MESSAGES)
170 {
171 result = (char *) locale;
172 if (locale == NULL || locale[0] == '\0')
173 return result;
174 }
175 else
176 #endif
177 result = setlocale(category, locale);
178 #endif /* WIN32 */
179
180 if (result == NULL)
181 return result; /* fall out immediately on failure */
182
183 /*
184 * Use the right encoding in translated messages. Under ENABLE_NLS, let
185 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
186 * format strings are ASCII, but database-encoding strings may enter the
187 * message via %s. This makes the overall message encoding equal to the
188 * database encoding.
189 */
190 if (category == LC_CTYPE)
191 {
192 static char save_lc_ctype[LC_ENV_BUFSIZE];
193
194 /* copy setlocale() return value before callee invokes it again */
195 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
196 result = save_lc_ctype;
197
198 #ifdef ENABLE_NLS
199 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
200 #else
201 SetMessageEncoding(GetDatabaseEncoding());
202 #endif
203 }
204
205 switch (category)
206 {
207 case LC_COLLATE:
208 envvar = "LC_COLLATE";
209 envbuf = lc_collate_envbuf;
210 break;
211 case LC_CTYPE:
212 envvar = "LC_CTYPE";
213 envbuf = lc_ctype_envbuf;
214 break;
215 #ifdef LC_MESSAGES
216 case LC_MESSAGES:
217 envvar = "LC_MESSAGES";
218 envbuf = lc_messages_envbuf;
219 #ifdef WIN32
220 result = IsoLocaleName(locale);
221 if (result == NULL)
222 result = (char *) locale;
223 elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
224 #endif /* WIN32 */
225 break;
226 #endif /* LC_MESSAGES */
227 case LC_MONETARY:
228 envvar = "LC_MONETARY";
229 envbuf = lc_monetary_envbuf;
230 break;
231 case LC_NUMERIC:
232 envvar = "LC_NUMERIC";
233 envbuf = lc_numeric_envbuf;
234 break;
235 case LC_TIME:
236 envvar = "LC_TIME";
237 envbuf = lc_time_envbuf;
238 break;
239 default:
240 elog(FATAL, "unrecognized LC category: %d", category);
241 envvar = NULL; /* keep compiler quiet */
242 envbuf = NULL;
243 return NULL;
244 }
245
246 snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
247
248 if (putenv(envbuf))
249 return NULL;
250
251 return result;
252 }
253
254
255 /*
256 * Is the locale name valid for the locale category?
257 *
258 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
259 * canonical name is stored there. This is especially useful for figuring out
260 * what locale name "" means (ie, the server environment value). (Actually,
261 * it seems that on most implementations that's the only thing it's good for;
262 * we could wish that setlocale gave back a canonically spelled version of
263 * the locale name, but typically it doesn't.)
264 */
265 bool
check_locale(int category,const char * locale,char ** canonname)266 check_locale(int category, const char *locale, char **canonname)
267 {
268 char *save;
269 char *res;
270
271 if (canonname)
272 *canonname = NULL; /* in case of failure */
273
274 save = setlocale(category, NULL);
275 if (!save)
276 return false; /* won't happen, we hope */
277
278 /* save may be pointing at a modifiable scratch variable, see above. */
279 save = pstrdup(save);
280
281 /* set the locale with setlocale, to see if it accepts it. */
282 res = setlocale(category, locale);
283
284 /* save canonical name if requested. */
285 if (res && canonname)
286 *canonname = pstrdup(res);
287
288 /* restore old value. */
289 if (!setlocale(category, save))
290 elog(WARNING, "failed to restore old locale \"%s\"", save);
291 pfree(save);
292
293 return (res != NULL);
294 }
295
296
297 /*
298 * GUC check/assign hooks
299 *
300 * For most locale categories, the assign hook doesn't actually set the locale
301 * permanently, just reset flags so that the next use will cache the
302 * appropriate values. (See explanation at the top of this file.)
303 *
304 * Note: we accept value = "" as selecting the postmaster's environment
305 * value, whatever it was (so long as the environment setting is legal).
306 * This will have been locked down by an earlier call to pg_perm_setlocale.
307 */
308 bool
check_locale_monetary(char ** newval,void ** extra,GucSource source)309 check_locale_monetary(char **newval, void **extra, GucSource source)
310 {
311 return check_locale(LC_MONETARY, *newval, NULL);
312 }
313
314 void
assign_locale_monetary(const char * newval,void * extra)315 assign_locale_monetary(const char *newval, void *extra)
316 {
317 CurrentLocaleConvValid = false;
318 }
319
320 bool
check_locale_numeric(char ** newval,void ** extra,GucSource source)321 check_locale_numeric(char **newval, void **extra, GucSource source)
322 {
323 return check_locale(LC_NUMERIC, *newval, NULL);
324 }
325
326 void
assign_locale_numeric(const char * newval,void * extra)327 assign_locale_numeric(const char *newval, void *extra)
328 {
329 CurrentLocaleConvValid = false;
330 }
331
332 bool
check_locale_time(char ** newval,void ** extra,GucSource source)333 check_locale_time(char **newval, void **extra, GucSource source)
334 {
335 return check_locale(LC_TIME, *newval, NULL);
336 }
337
338 void
assign_locale_time(const char * newval,void * extra)339 assign_locale_time(const char *newval, void *extra)
340 {
341 CurrentLCTimeValid = false;
342 }
343
344 /*
345 * We allow LC_MESSAGES to actually be set globally.
346 *
347 * Note: we normally disallow value = "" because it wouldn't have consistent
348 * semantics (it'd effectively just use the previous value). However, this
349 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
350 * not even if the attempted setting fails due to invalid environment value.
351 * The idea there is just to accept the environment setting *if possible*
352 * during startup, until we can read the proper value from postgresql.conf.
353 */
354 bool
check_locale_messages(char ** newval,void ** extra,GucSource source)355 check_locale_messages(char **newval, void **extra, GucSource source)
356 {
357 if (**newval == '\0')
358 {
359 if (source == PGC_S_DEFAULT)
360 return true;
361 else
362 return false;
363 }
364
365 /*
366 * LC_MESSAGES category does not exist everywhere, but accept it anyway
367 *
368 * On Windows, we can't even check the value, so accept blindly
369 */
370 #if defined(LC_MESSAGES) && !defined(WIN32)
371 return check_locale(LC_MESSAGES, *newval, NULL);
372 #else
373 return true;
374 #endif
375 }
376
377 void
assign_locale_messages(const char * newval,void * extra)378 assign_locale_messages(const char *newval, void *extra)
379 {
380 /*
381 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
382 * We ignore failure, as per comment above.
383 */
384 #ifdef LC_MESSAGES
385 (void) pg_perm_setlocale(LC_MESSAGES, newval);
386 #endif
387 }
388
389
390 /*
391 * Frees the malloced content of a struct lconv. (But not the struct
392 * itself.) It's important that this not throw elog(ERROR).
393 */
394 static void
free_struct_lconv(struct lconv * s)395 free_struct_lconv(struct lconv *s)
396 {
397 if (s->decimal_point)
398 free(s->decimal_point);
399 if (s->thousands_sep)
400 free(s->thousands_sep);
401 if (s->grouping)
402 free(s->grouping);
403 if (s->int_curr_symbol)
404 free(s->int_curr_symbol);
405 if (s->currency_symbol)
406 free(s->currency_symbol);
407 if (s->mon_decimal_point)
408 free(s->mon_decimal_point);
409 if (s->mon_thousands_sep)
410 free(s->mon_thousands_sep);
411 if (s->mon_grouping)
412 free(s->mon_grouping);
413 if (s->positive_sign)
414 free(s->positive_sign);
415 if (s->negative_sign)
416 free(s->negative_sign);
417 }
418
419 /*
420 * Check that all fields of a struct lconv (or at least, the ones we care
421 * about) are non-NULL. The field list must match free_struct_lconv().
422 */
423 static bool
struct_lconv_is_valid(struct lconv * s)424 struct_lconv_is_valid(struct lconv *s)
425 {
426 if (s->decimal_point == NULL)
427 return false;
428 if (s->thousands_sep == NULL)
429 return false;
430 if (s->grouping == NULL)
431 return false;
432 if (s->int_curr_symbol == NULL)
433 return false;
434 if (s->currency_symbol == NULL)
435 return false;
436 if (s->mon_decimal_point == NULL)
437 return false;
438 if (s->mon_thousands_sep == NULL)
439 return false;
440 if (s->mon_grouping == NULL)
441 return false;
442 if (s->positive_sign == NULL)
443 return false;
444 if (s->negative_sign == NULL)
445 return false;
446 return true;
447 }
448
449
450 /*
451 * Convert the strdup'd string at *str from the specified encoding to the
452 * database encoding.
453 */
454 static void
db_encoding_convert(int encoding,char ** str)455 db_encoding_convert(int encoding, char **str)
456 {
457 char *pstr;
458 char *mstr;
459
460 /* convert the string to the database encoding */
461 pstr = pg_any_to_server(*str, strlen(*str), encoding);
462 if (pstr == *str)
463 return; /* no conversion happened */
464
465 /* need it malloc'd not palloc'd */
466 mstr = strdup(pstr);
467 if (mstr == NULL)
468 ereport(ERROR,
469 (errcode(ERRCODE_OUT_OF_MEMORY),
470 errmsg("out of memory")));
471
472 /* replace old string */
473 free(*str);
474 *str = mstr;
475
476 pfree(pstr);
477 }
478
479
480 /*
481 * Return the POSIX lconv struct (contains number/money formatting
482 * information) with locale information for all categories.
483 */
484 struct lconv *
PGLC_localeconv(void)485 PGLC_localeconv(void)
486 {
487 static struct lconv CurrentLocaleConv;
488 static bool CurrentLocaleConvAllocated = false;
489 struct lconv *extlconv;
490 struct lconv worklconv;
491 char *save_lc_monetary;
492 char *save_lc_numeric;
493 #ifdef WIN32
494 char *save_lc_ctype;
495 #endif
496
497 /* Did we do it already? */
498 if (CurrentLocaleConvValid)
499 return &CurrentLocaleConv;
500
501 /* Free any already-allocated storage */
502 if (CurrentLocaleConvAllocated)
503 {
504 free_struct_lconv(&CurrentLocaleConv);
505 CurrentLocaleConvAllocated = false;
506 }
507
508 /*
509 * This is tricky because we really don't want to risk throwing error
510 * while the locale is set to other than our usual settings. Therefore,
511 * the process is: collect the usual settings, set locale to special
512 * setting, copy relevant data into worklconv using strdup(), restore
513 * normal settings, convert data to desired encoding, and finally stash
514 * the collected data in CurrentLocaleConv. This makes it safe if we
515 * throw an error during encoding conversion or run out of memory anywhere
516 * in the process. All data pointed to by struct lconv members is
517 * allocated with strdup, to avoid premature elog(ERROR) and to allow
518 * using a single cleanup routine.
519 */
520 memset(&worklconv, 0, sizeof(worklconv));
521
522 /* Save prevailing values of monetary and numeric locales */
523 save_lc_monetary = setlocale(LC_MONETARY, NULL);
524 if (!save_lc_monetary)
525 elog(ERROR, "setlocale(NULL) failed");
526 save_lc_monetary = pstrdup(save_lc_monetary);
527
528 save_lc_numeric = setlocale(LC_NUMERIC, NULL);
529 if (!save_lc_numeric)
530 elog(ERROR, "setlocale(NULL) failed");
531 save_lc_numeric = pstrdup(save_lc_numeric);
532
533 #ifdef WIN32
534
535 /*
536 * The POSIX standard explicitly says that it is undefined what happens if
537 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
538 * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
539 * believe that localeconv() should return strings that are encoded in the
540 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
541 * once we have successfully collected the localeconv() results, we will
542 * convert them from that codeset to the desired server encoding.
543 *
544 * Windows, of course, resolutely does things its own way; on that
545 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
546 * results. Hence, we must temporarily set that category as well.
547 */
548
549 /* Save prevailing value of ctype locale */
550 save_lc_ctype = setlocale(LC_CTYPE, NULL);
551 if (!save_lc_ctype)
552 elog(ERROR, "setlocale(NULL) failed");
553 save_lc_ctype = pstrdup(save_lc_ctype);
554
555 /* Here begins the critical section where we must not throw error */
556
557 /* use numeric to set the ctype */
558 setlocale(LC_CTYPE, locale_numeric);
559 #endif
560
561 /* Get formatting information for numeric */
562 setlocale(LC_NUMERIC, locale_numeric);
563 extlconv = localeconv();
564
565 /* Must copy data now in case setlocale() overwrites it */
566 worklconv.decimal_point = strdup(extlconv->decimal_point);
567 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
568 worklconv.grouping = strdup(extlconv->grouping);
569
570 #ifdef WIN32
571 /* use monetary to set the ctype */
572 setlocale(LC_CTYPE, locale_monetary);
573 #endif
574
575 /* Get formatting information for monetary */
576 setlocale(LC_MONETARY, locale_monetary);
577 extlconv = localeconv();
578
579 /* Must copy data now in case setlocale() overwrites it */
580 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
581 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
582 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
583 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
584 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
585 worklconv.positive_sign = strdup(extlconv->positive_sign);
586 worklconv.negative_sign = strdup(extlconv->negative_sign);
587 /* Copy scalar fields as well */
588 worklconv.int_frac_digits = extlconv->int_frac_digits;
589 worklconv.frac_digits = extlconv->frac_digits;
590 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
591 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
592 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
593 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
594 worklconv.p_sign_posn = extlconv->p_sign_posn;
595 worklconv.n_sign_posn = extlconv->n_sign_posn;
596
597 /*
598 * Restore the prevailing locale settings; failure to do so is fatal.
599 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
600 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
601 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
602 * are almost certainly "C", there's really no reason that restoring those
603 * should fail.
604 */
605 #ifdef WIN32
606 if (!setlocale(LC_CTYPE, save_lc_ctype))
607 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
608 #endif
609 if (!setlocale(LC_MONETARY, save_lc_monetary))
610 elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
611 if (!setlocale(LC_NUMERIC, save_lc_numeric))
612 elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
613
614 /*
615 * At this point we've done our best to clean up, and can call functions
616 * that might possibly throw errors with a clean conscience. But let's
617 * make sure we don't leak any already-strdup'd fields in worklconv.
618 */
619 PG_TRY();
620 {
621 int encoding;
622
623 /* Release the pstrdup'd locale names */
624 pfree(save_lc_monetary);
625 pfree(save_lc_numeric);
626 #ifdef WIN32
627 pfree(save_lc_ctype);
628 #endif
629
630 /* If any of the preceding strdup calls failed, complain now. */
631 if (!struct_lconv_is_valid(&worklconv))
632 ereport(ERROR,
633 (errcode(ERRCODE_OUT_OF_MEMORY),
634 errmsg("out of memory")));
635
636 /*
637 * Now we must perform encoding conversion from whatever's associated
638 * with the locales into the database encoding. If we can't identify
639 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
640 * use PG_SQL_ASCII, which will result in just validating that the
641 * strings are OK in the database encoding.
642 */
643 encoding = pg_get_encoding_from_locale(locale_numeric, true);
644 if (encoding < 0)
645 encoding = PG_SQL_ASCII;
646
647 db_encoding_convert(encoding, &worklconv.decimal_point);
648 db_encoding_convert(encoding, &worklconv.thousands_sep);
649 /* grouping is not text and does not require conversion */
650
651 encoding = pg_get_encoding_from_locale(locale_monetary, true);
652 if (encoding < 0)
653 encoding = PG_SQL_ASCII;
654
655 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
656 db_encoding_convert(encoding, &worklconv.currency_symbol);
657 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
658 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
659 /* mon_grouping is not text and does not require conversion */
660 db_encoding_convert(encoding, &worklconv.positive_sign);
661 db_encoding_convert(encoding, &worklconv.negative_sign);
662 }
663 PG_CATCH();
664 {
665 free_struct_lconv(&worklconv);
666 PG_RE_THROW();
667 }
668 PG_END_TRY();
669
670 /*
671 * Everything is good, so save the results.
672 */
673 CurrentLocaleConv = worklconv;
674 CurrentLocaleConvAllocated = true;
675 CurrentLocaleConvValid = true;
676 return &CurrentLocaleConv;
677 }
678
679 #ifdef WIN32
680 /*
681 * On Windows, strftime() returns its output in encoding CP_ACP (the default
682 * operating system codepage for the computer), which is likely different
683 * from SERVER_ENCODING. This is especially important in Japanese versions
684 * of Windows which will use SJIS encoding, which we don't support as a
685 * server encoding.
686 *
687 * So, instead of using strftime(), use wcsftime() to return the value in
688 * wide characters (internally UTF16) and then convert to UTF8, which we
689 * know how to handle directly.
690 *
691 * Note that this only affects the calls to strftime() in this file, which are
692 * used to get the locale-aware strings. Other parts of the backend use
693 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
694 */
695 static size_t
strftime_win32(char * dst,size_t dstlen,const char * format,const struct tm * tm)696 strftime_win32(char *dst, size_t dstlen,
697 const char *format, const struct tm *tm)
698 {
699 size_t len;
700 wchar_t wformat[8]; /* formats used below need 3 chars */
701 wchar_t wbuf[MAX_L10N_DATA];
702
703 /*
704 * Get a wchar_t version of the format string. We only actually use
705 * plain-ASCII formats in this file, so we can say that they're UTF8.
706 */
707 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
708 wformat, lengthof(wformat));
709 if (len == 0)
710 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
711 GetLastError());
712
713 len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
714 if (len == 0)
715 {
716 /*
717 * wcsftime failed, possibly because the result would not fit in
718 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
719 */
720 return 0;
721 }
722
723 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
724 NULL, NULL);
725 if (len == 0)
726 elog(ERROR, "could not convert string to UTF-8: error code %lu",
727 GetLastError());
728
729 dst[len] = '\0';
730
731 return len;
732 }
733
734 /* redefine strftime() */
735 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
736 #endif /* WIN32 */
737
738 /*
739 * Subroutine for cache_locale_time().
740 * Convert the given string from encoding "encoding" to the database
741 * encoding, and store the result at *dst, replacing any previous value.
742 */
743 static void
cache_single_string(char ** dst,const char * src,int encoding)744 cache_single_string(char **dst, const char *src, int encoding)
745 {
746 char *ptr;
747 char *olddst;
748
749 /* Convert the string to the database encoding, or validate it's OK */
750 ptr = pg_any_to_server(src, strlen(src), encoding);
751
752 /* Store the string in long-lived storage, replacing any previous value */
753 olddst = *dst;
754 *dst = MemoryContextStrdup(TopMemoryContext, ptr);
755 if (olddst)
756 pfree(olddst);
757
758 /* Might as well clean up any palloc'd conversion result, too */
759 if (ptr != src)
760 pfree(ptr);
761 }
762
763 /*
764 * Update the lc_time localization cache variables if needed.
765 */
766 void
cache_locale_time(void)767 cache_locale_time(void)
768 {
769 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
770 char *bufptr;
771 time_t timenow;
772 struct tm *timeinfo;
773 bool strftimefail = false;
774 int encoding;
775 int i;
776 char *save_lc_time;
777 #ifdef WIN32
778 char *save_lc_ctype;
779 #endif
780
781 /* did we do this already? */
782 if (CurrentLCTimeValid)
783 return;
784
785 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
786
787 /*
788 * As in PGLC_localeconv(), it's critical that we not throw error while
789 * libc's locale settings have nondefault values. Hence, we just call
790 * strftime() within the critical section, and then convert and save its
791 * results afterwards.
792 */
793
794 /* Save prevailing value of time locale */
795 save_lc_time = setlocale(LC_TIME, NULL);
796 if (!save_lc_time)
797 elog(ERROR, "setlocale(NULL) failed");
798 save_lc_time = pstrdup(save_lc_time);
799
800 #ifdef WIN32
801
802 /*
803 * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
804 * must set it here. This code looks the same as what PGLC_localeconv()
805 * does, but the underlying reason is different: this does NOT determine
806 * the encoding we'll get back from strftime_win32().
807 */
808
809 /* Save prevailing value of ctype locale */
810 save_lc_ctype = setlocale(LC_CTYPE, NULL);
811 if (!save_lc_ctype)
812 elog(ERROR, "setlocale(NULL) failed");
813 save_lc_ctype = pstrdup(save_lc_ctype);
814
815 /* use lc_time to set the ctype */
816 setlocale(LC_CTYPE, locale_time);
817 #endif
818
819 setlocale(LC_TIME, locale_time);
820
821 /* We use times close to current time as data for strftime(). */
822 timenow = time(NULL);
823 timeinfo = localtime(&timenow);
824
825 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
826 bufptr = buf;
827
828 /*
829 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
830 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
831 * error.) An implementation might report errors (e.g. ENOMEM) by
832 * returning 0 (or, less plausibly, a negative value) and setting errno.
833 * Report errno just in case the implementation did that, but clear it in
834 * advance of the calls so we don't emit a stale, unrelated errno.
835 */
836 errno = 0;
837
838 /* localized days */
839 for (i = 0; i < 7; i++)
840 {
841 timeinfo->tm_wday = i;
842 if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
843 strftimefail = true;
844 bufptr += MAX_L10N_DATA;
845 if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
846 strftimefail = true;
847 bufptr += MAX_L10N_DATA;
848 }
849
850 /* localized months */
851 for (i = 0; i < 12; i++)
852 {
853 timeinfo->tm_mon = i;
854 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
855 if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
856 strftimefail = true;
857 bufptr += MAX_L10N_DATA;
858 if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
859 strftimefail = true;
860 bufptr += MAX_L10N_DATA;
861 }
862
863 /*
864 * Restore the prevailing locale settings; as in PGLC_localeconv(),
865 * failure to do so is fatal.
866 */
867 #ifdef WIN32
868 if (!setlocale(LC_CTYPE, save_lc_ctype))
869 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
870 #endif
871 if (!setlocale(LC_TIME, save_lc_time))
872 elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
873
874 /*
875 * At this point we've done our best to clean up, and can throw errors, or
876 * call functions that might throw errors, with a clean conscience.
877 */
878 if (strftimefail)
879 elog(ERROR, "strftime() failed: %m");
880
881 /* Release the pstrdup'd locale names */
882 pfree(save_lc_time);
883 #ifdef WIN32
884 pfree(save_lc_ctype);
885 #endif
886
887 #ifndef WIN32
888
889 /*
890 * As in PGLC_localeconv(), we must convert strftime()'s output from the
891 * encoding implied by LC_TIME to the database encoding. If we can't
892 * identify the LC_TIME encoding, just perform encoding validation.
893 */
894 encoding = pg_get_encoding_from_locale(locale_time, true);
895 if (encoding < 0)
896 encoding = PG_SQL_ASCII;
897
898 #else
899
900 /*
901 * On Windows, strftime_win32() always returns UTF8 data, so convert from
902 * that if necessary.
903 */
904 encoding = PG_UTF8;
905
906 #endif /* WIN32 */
907
908 bufptr = buf;
909
910 /* localized days */
911 for (i = 0; i < 7; i++)
912 {
913 cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
914 bufptr += MAX_L10N_DATA;
915 cache_single_string(&localized_full_days[i], bufptr, encoding);
916 bufptr += MAX_L10N_DATA;
917 }
918
919 /* localized months */
920 for (i = 0; i < 12; i++)
921 {
922 cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
923 bufptr += MAX_L10N_DATA;
924 cache_single_string(&localized_full_months[i], bufptr, encoding);
925 bufptr += MAX_L10N_DATA;
926 }
927
928 CurrentLCTimeValid = true;
929 }
930
931
932 #if defined(WIN32) && defined(LC_MESSAGES)
933 /*
934 * Convert a Windows setlocale() argument to a Unix-style one.
935 *
936 * Regardless of platform, we install message catalogs under a Unix-style
937 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
938 * following that style will elicit localized interface strings.
939 *
940 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
941 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
942 * case-insensitive. setlocale() returns the fully-qualified form; for
943 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
944 * setlocale() and _create_locale() select a "locale identifier"[1] and store
945 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
946 * ISO 639 language and the ISO 3166 country. Character encoding does not
947 * matter, because the server and client encodings govern that.
948 *
949 * Windows Vista introduced the "locale name" concept[2], closely following
950 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
951 * Studio 2012, setlocale() accepts locale names in addition to the strings it
952 * accepted historically. It does not standardize them; setlocale("Th-tH")
953 * returns "Th-tH". setlocale(category, "") still returns a traditional
954 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
955 * content to carry locale names instead of locale identifiers.
956 *
957 * Visual Studio 2015 should still be able to do the same as Visual Studio
958 * 2012, but the declaration of locale_name is missing in _locale_t, causing
959 * this code compilation to fail, hence this falls back instead on to
960 * enumerating all system locales by using EnumSystemLocalesEx to find the
961 * required locale name. If the input argument is in Unix-style then we can
962 * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
963 * LOCALE_SNAME.
964 *
965 * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
966 * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
967 * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
968 * localized messages. In particular, every lc_messages setting that initdb
969 * can select automatically will yield only C-locale messages. XXX This could
970 * be fixed by running the fully-qualified locale name through a lookup table.
971 *
972 * This function returns a pointer to a static buffer bearing the converted
973 * name or NULL if conversion fails.
974 *
975 * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
976 * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
977 */
978
979 #if _MSC_VER >= 1900
980 /*
981 * Callback function for EnumSystemLocalesEx() in get_iso_localename().
982 *
983 * This function enumerates all system locales, searching for one that matches
984 * an input with the format: <Language>[_<Country>], e.g.
985 * English[_United States]
986 *
987 * The input is a three wchar_t array as an LPARAM. The first element is the
988 * locale_name we want to match, the second element is an allocated buffer
989 * where the Unix-style locale is copied if a match is found, and the third
990 * element is the search status, 1 if a match was found, 0 otherwise.
991 */
992 static BOOL CALLBACK
search_locale_enum(LPWSTR pStr,DWORD dwFlags,LPARAM lparam)993 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
994 {
995 wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
996 wchar_t **argv;
997
998 (void) (dwFlags);
999
1000 argv = (wchar_t **) lparam;
1001 *argv[2] = (wchar_t) 0;
1002
1003 memset(test_locale, 0, sizeof(test_locale));
1004
1005 /* Get the name of the <Language> in English */
1006 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1007 test_locale, LOCALE_NAME_MAX_LENGTH))
1008 {
1009 /*
1010 * If the enumerated locale does not have a hyphen ("en") OR the
1011 * lc_message input does not have an underscore ("English"), we only
1012 * need to compare the <Language> tags.
1013 */
1014 if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1015 {
1016 if (_wcsicmp(argv[0], test_locale) == 0)
1017 {
1018 wcscpy(argv[1], pStr);
1019 *argv[2] = (wchar_t) 1;
1020 return FALSE;
1021 }
1022 }
1023
1024 /*
1025 * We have to compare a full <Language>_<Country> tag, so we append
1026 * the underscore and name of the country/region in English, e.g.
1027 * "English_United States".
1028 */
1029 else
1030 {
1031 size_t len;
1032
1033 wcscat(test_locale, L"_");
1034 len = wcslen(test_locale);
1035 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1036 test_locale + len,
1037 LOCALE_NAME_MAX_LENGTH - len))
1038 {
1039 if (_wcsicmp(argv[0], test_locale) == 0)
1040 {
1041 wcscpy(argv[1], pStr);
1042 *argv[2] = (wchar_t) 1;
1043 return FALSE;
1044 }
1045 }
1046 }
1047 }
1048
1049 return TRUE;
1050 }
1051
1052 /*
1053 * This function converts a Windows locale name to an ISO formatted version
1054 * for Visual Studio 2015 or greater.
1055 *
1056 * Returns NULL, if no valid conversion was found.
1057 */
1058 static char *
get_iso_localename(const char * winlocname)1059 get_iso_localename(const char *winlocname)
1060 {
1061 wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1062 wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1063 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1064 char *period;
1065 int len;
1066 int ret_val;
1067
1068 /*
1069 * Valid locales have the following syntax:
1070 * <Language>[_<Country>[.<CodePage>]]
1071 *
1072 * GetLocaleInfoEx can only take locale name without code-page and for the
1073 * purpose of this API the code-page doesn't matter.
1074 */
1075 period = strchr(winlocname, '.');
1076 if (period != NULL)
1077 len = period - winlocname;
1078 else
1079 len = pg_mbstrlen(winlocname);
1080
1081 memset(wc_locale_name, 0, sizeof(wc_locale_name));
1082 memset(buffer, 0, sizeof(buffer));
1083 MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1084 LOCALE_NAME_MAX_LENGTH);
1085
1086 /*
1087 * If the lc_messages is already an Unix-style string, we have a direct
1088 * match with LOCALE_SNAME, e.g. en-US, en_US.
1089 */
1090 ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1091 LOCALE_NAME_MAX_LENGTH);
1092 if (!ret_val)
1093 {
1094 /*
1095 * Search for a locale in the system that matches language and country
1096 * name.
1097 */
1098 wchar_t *argv[3];
1099
1100 argv[0] = wc_locale_name;
1101 argv[1] = buffer;
1102 argv[2] = (wchar_t *) &ret_val;
1103 EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1104 NULL);
1105 }
1106
1107 if (ret_val)
1108 {
1109 size_t rc;
1110 char *hyphen;
1111
1112 /* Locale names use only ASCII, any conversion locale suffices. */
1113 rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1114 if (rc == -1 || rc == sizeof(iso_lc_messages))
1115 return NULL;
1116
1117 /*
1118 * Simply replace the hyphen with an underscore. See comments in
1119 * IsoLocaleName.
1120 */
1121 hyphen = strchr(iso_lc_messages, '-');
1122 if (hyphen)
1123 *hyphen = '_';
1124
1125 return iso_lc_messages;
1126 }
1127
1128 return NULL;
1129 }
1130 #endif /* _MSC_VER >= 1900 */
1131
1132 static char *
IsoLocaleName(const char * winlocname)1133 IsoLocaleName(const char *winlocname)
1134 {
1135 #if (_MSC_VER >= 1400) /* VC8.0 or later */
1136 static char iso_lc_messages[32];
1137 _locale_t loct = NULL;
1138
1139 if (pg_strcasecmp("c", winlocname) == 0 ||
1140 pg_strcasecmp("posix", winlocname) == 0)
1141 {
1142 strcpy(iso_lc_messages, "C");
1143 return iso_lc_messages;
1144 }
1145
1146 #if (_MSC_VER >= 1900) /* Visual Studio 2015 or later */
1147 return get_iso_localename(winlocname);
1148 #else
1149 loct = _create_locale(LC_CTYPE, winlocname);
1150 if (loct != NULL)
1151 {
1152 #if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
1153 size_t rc;
1154 char *hyphen;
1155
1156 /* Locale names use only ASCII, any conversion locale suffices. */
1157 rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1158 sizeof(iso_lc_messages), NULL);
1159 _free_locale(loct);
1160 if (rc == -1 || rc == sizeof(iso_lc_messages))
1161 return NULL;
1162
1163 /*
1164 * Since the message catalogs sit on a case-insensitive filesystem, we
1165 * need not standardize letter case here. So long as we do not ship
1166 * message catalogs for which it would matter, we also need not
1167 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1168 * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1169 *
1170 * Note that the locale name can be less-specific than the value we
1171 * would derive under earlier Visual Studio releases. For example,
1172 * French_France.1252 yields just "fr". This does not affect any of
1173 * the country-specific message catalogs available as of this writing
1174 * (pt_BR, zh_CN, zh_TW).
1175 */
1176 hyphen = strchr(iso_lc_messages, '-');
1177 if (hyphen)
1178 *hyphen = '_';
1179 #else
1180 char isolang[32],
1181 isocrty[32];
1182 LCID lcid;
1183
1184 lcid = loct->locinfo->lc_handle[LC_CTYPE];
1185 if (lcid == 0)
1186 lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1187 _free_locale(loct);
1188
1189 if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1190 return NULL;
1191 if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1192 return NULL;
1193 snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
1194 #endif
1195 return iso_lc_messages;
1196 }
1197 return NULL;
1198 #endif /* Visual Studio 2015 or later */
1199 #else
1200 return NULL; /* Not supported on this version of msvc/mingw */
1201 #endif /* _MSC_VER >= 1400 */
1202 }
1203 #endif /* WIN32 && LC_MESSAGES */
1204
1205
1206 /*
1207 * Detect aging strxfrm() implementations that, in a subset of locales, write
1208 * past the specified buffer length. Affected users must update OS packages
1209 * before using PostgreSQL 9.5 or later.
1210 *
1211 * Assume that the bug can come and go from one postmaster startup to another
1212 * due to physical replication among diverse machines. Assume that the bug's
1213 * presence will not change during the life of a particular postmaster. Given
1214 * those assumptions, call this no less than once per postmaster startup per
1215 * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1216 * there is no need to consider pg_collation locales.
1217 */
1218 void
check_strxfrm_bug(void)1219 check_strxfrm_bug(void)
1220 {
1221 char buf[32];
1222 const int canary = 0x7F;
1223 bool ok = true;
1224
1225 /*
1226 * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1227 * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1228 * below that range.
1229 *
1230 * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1231 * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1232 * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1233 * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1234 */
1235 buf[7] = canary;
1236 (void) strxfrm(buf, "ab", 7);
1237 if (buf[7] != canary)
1238 ok = false;
1239
1240 /*
1241 * illumos bug #1594 was present in the source tree from 2010-10-11 to
1242 * 2012-02-01. Given an ASCII string of any length and length limit 1,
1243 * affected systems ignore the length limit and modify a number of bytes
1244 * one less than the return value. The problem inputs for this bug do not
1245 * overlap those for the Solaris bug, hence a distinct test.
1246 *
1247 * Affected systems include smartos-20110926T021612Z. Affected locales
1248 * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1249 */
1250 buf[1] = canary;
1251 (void) strxfrm(buf, "a", 1);
1252 if (buf[1] != canary)
1253 ok = false;
1254
1255 if (!ok)
1256 ereport(ERROR,
1257 (errcode(ERRCODE_SYSTEM_ERROR),
1258 errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1259 setlocale(LC_COLLATE, NULL)),
1260 errhint("Apply system library package updates.")));
1261 }
1262
1263
1264 /*
1265 * Cache mechanism for collation information.
1266 *
1267 * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1268 * (or POSIX), so we can optimize a few code paths in various places.
1269 * For the built-in C and POSIX collations, we can know that without even
1270 * doing a cache lookup, but we want to support aliases for C/POSIX too.
1271 * For the "default" collation, there are separate static cache variables,
1272 * since consulting the pg_collation catalog doesn't tell us what we need.
1273 *
1274 * Also, if a pg_locale_t has been requested for a collation, we cache that
1275 * for the life of a backend.
1276 *
1277 * Note that some code relies on the flags not reporting false negatives
1278 * (that is, saying it's not C when it is). For example, char2wchar()
1279 * could fail if the locale is C, so str_tolower() shouldn't call it
1280 * in that case.
1281 *
1282 * Note that we currently lack any way to flush the cache. Since we don't
1283 * support ALTER COLLATION, this is OK. The worst case is that someone
1284 * drops a collation, and a useless cache entry hangs around in existing
1285 * backends.
1286 */
1287
1288 static collation_cache_entry *
lookup_collation_cache(Oid collation,bool set_flags)1289 lookup_collation_cache(Oid collation, bool set_flags)
1290 {
1291 collation_cache_entry *cache_entry;
1292 bool found;
1293
1294 Assert(OidIsValid(collation));
1295 Assert(collation != DEFAULT_COLLATION_OID);
1296
1297 if (collation_cache == NULL)
1298 {
1299 /* First time through, initialize the hash table */
1300 HASHCTL ctl;
1301
1302 memset(&ctl, 0, sizeof(ctl));
1303 ctl.keysize = sizeof(Oid);
1304 ctl.entrysize = sizeof(collation_cache_entry);
1305 collation_cache = hash_create("Collation cache", 100, &ctl,
1306 HASH_ELEM | HASH_BLOBS);
1307 }
1308
1309 cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1310 if (!found)
1311 {
1312 /*
1313 * Make sure cache entry is marked invalid, in case we fail before
1314 * setting things.
1315 */
1316 cache_entry->flags_valid = false;
1317 cache_entry->locale = 0;
1318 }
1319
1320 if (set_flags && !cache_entry->flags_valid)
1321 {
1322 /* Attempt to set the flags */
1323 HeapTuple tp;
1324 Form_pg_collation collform;
1325 const char *collcollate;
1326 const char *collctype;
1327
1328 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1329 if (!HeapTupleIsValid(tp))
1330 elog(ERROR, "cache lookup failed for collation %u", collation);
1331 collform = (Form_pg_collation) GETSTRUCT(tp);
1332
1333 collcollate = NameStr(collform->collcollate);
1334 collctype = NameStr(collform->collctype);
1335
1336 cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1337 (strcmp(collcollate, "POSIX") == 0));
1338 cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1339 (strcmp(collctype, "POSIX") == 0));
1340
1341 cache_entry->flags_valid = true;
1342
1343 ReleaseSysCache(tp);
1344 }
1345
1346 return cache_entry;
1347 }
1348
1349
1350 /*
1351 * Detect whether collation's LC_COLLATE property is C
1352 */
1353 bool
lc_collate_is_c(Oid collation)1354 lc_collate_is_c(Oid collation)
1355 {
1356 /*
1357 * If we're asked about "collation 0", return false, so that the code will
1358 * go into the non-C path and report that the collation is bogus.
1359 */
1360 if (!OidIsValid(collation))
1361 return false;
1362
1363 /*
1364 * If we're asked about the default collation, we have to inquire of the C
1365 * library. Cache the result so we only have to compute it once.
1366 */
1367 if (collation == DEFAULT_COLLATION_OID)
1368 {
1369 static int result = -1;
1370 char *localeptr;
1371
1372 if (result >= 0)
1373 return (bool) result;
1374 localeptr = setlocale(LC_COLLATE, NULL);
1375 if (!localeptr)
1376 elog(ERROR, "invalid LC_COLLATE setting");
1377
1378 if (strcmp(localeptr, "C") == 0)
1379 result = true;
1380 else if (strcmp(localeptr, "POSIX") == 0)
1381 result = true;
1382 else
1383 result = false;
1384 return (bool) result;
1385 }
1386
1387 /*
1388 * If we're asked about the built-in C/POSIX collations, we know that.
1389 */
1390 if (collation == C_COLLATION_OID ||
1391 collation == POSIX_COLLATION_OID)
1392 return true;
1393
1394 /*
1395 * Otherwise, we have to consult pg_collation, but we cache that.
1396 */
1397 return (lookup_collation_cache(collation, true))->collate_is_c;
1398 }
1399
1400 /*
1401 * Detect whether collation's LC_CTYPE property is C
1402 */
1403 bool
lc_ctype_is_c(Oid collation)1404 lc_ctype_is_c(Oid collation)
1405 {
1406 /*
1407 * If we're asked about "collation 0", return false, so that the code will
1408 * go into the non-C path and report that the collation is bogus.
1409 */
1410 if (!OidIsValid(collation))
1411 return false;
1412
1413 /*
1414 * If we're asked about the default collation, we have to inquire of the C
1415 * library. Cache the result so we only have to compute it once.
1416 */
1417 if (collation == DEFAULT_COLLATION_OID)
1418 {
1419 static int result = -1;
1420 char *localeptr;
1421
1422 if (result >= 0)
1423 return (bool) result;
1424 localeptr = setlocale(LC_CTYPE, NULL);
1425 if (!localeptr)
1426 elog(ERROR, "invalid LC_CTYPE setting");
1427
1428 if (strcmp(localeptr, "C") == 0)
1429 result = true;
1430 else if (strcmp(localeptr, "POSIX") == 0)
1431 result = true;
1432 else
1433 result = false;
1434 return (bool) result;
1435 }
1436
1437 /*
1438 * If we're asked about the built-in C/POSIX collations, we know that.
1439 */
1440 if (collation == C_COLLATION_OID ||
1441 collation == POSIX_COLLATION_OID)
1442 return true;
1443
1444 /*
1445 * Otherwise, we have to consult pg_collation, but we cache that.
1446 */
1447 return (lookup_collation_cache(collation, true))->ctype_is_c;
1448 }
1449
1450
1451 /* simple subroutine for reporting errors from newlocale() */
1452 #ifdef HAVE_LOCALE_T
1453 static void
report_newlocale_failure(const char * localename)1454 report_newlocale_failure(const char *localename)
1455 {
1456 int save_errno;
1457
1458 /*
1459 * Windows doesn't provide any useful error indication from
1460 * _create_locale(), and BSD-derived platforms don't seem to feel they
1461 * need to set errno either (even though POSIX is pretty clear that
1462 * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1463 * is what to report.
1464 */
1465 if (errno == 0)
1466 errno = ENOENT;
1467
1468 /*
1469 * ENOENT means "no such locale", not "no such file", so clarify that
1470 * errno with an errdetail message.
1471 */
1472 save_errno = errno; /* auxiliary funcs might change errno */
1473 ereport(ERROR,
1474 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1475 errmsg("could not create locale \"%s\": %m",
1476 localename),
1477 (save_errno == ENOENT ?
1478 errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1479 localename) : 0)));
1480 }
1481 #endif /* HAVE_LOCALE_T */
1482
1483
1484 /*
1485 * Create a locale_t from a collation OID. Results are cached for the
1486 * lifetime of the backend. Thus, do not free the result with freelocale().
1487 *
1488 * As a special optimization, the default/database collation returns 0.
1489 * Callers should then revert to the non-locale_t-enabled code path.
1490 * In fact, they shouldn't call this function at all when they are dealing
1491 * with the default locale. That can save quite a bit in hotspots.
1492 * Also, callers should avoid calling this before going down a C/POSIX
1493 * fastpath, because such a fastpath should work even on platforms without
1494 * locale_t support in the C library.
1495 *
1496 * For simplicity, we always generate COLLATE + CTYPE even though we
1497 * might only need one of them. Since this is called only once per session,
1498 * it shouldn't cost much.
1499 */
1500 pg_locale_t
pg_newlocale_from_collation(Oid collid)1501 pg_newlocale_from_collation(Oid collid)
1502 {
1503 collation_cache_entry *cache_entry;
1504
1505 /* Callers must pass a valid OID */
1506 Assert(OidIsValid(collid));
1507
1508 /* Return 0 for "default" collation, just in case caller forgets */
1509 if (collid == DEFAULT_COLLATION_OID)
1510 return (pg_locale_t) 0;
1511
1512 cache_entry = lookup_collation_cache(collid, false);
1513
1514 if (cache_entry->locale == 0)
1515 {
1516 /* We haven't computed this yet in this session, so do it */
1517 HeapTuple tp;
1518 Form_pg_collation collform;
1519 const char *collcollate;
1520 const char *collctype pg_attribute_unused();
1521 struct pg_locale_struct result;
1522 pg_locale_t resultp;
1523 Datum collversion;
1524 bool isnull;
1525
1526 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1527 if (!HeapTupleIsValid(tp))
1528 elog(ERROR, "cache lookup failed for collation %u", collid);
1529 collform = (Form_pg_collation) GETSTRUCT(tp);
1530
1531 collcollate = NameStr(collform->collcollate);
1532 collctype = NameStr(collform->collctype);
1533
1534 /* We'll fill in the result struct locally before allocating memory */
1535 memset(&result, 0, sizeof(result));
1536 result.provider = collform->collprovider;
1537
1538 if (collform->collprovider == COLLPROVIDER_LIBC)
1539 {
1540 #ifdef HAVE_LOCALE_T
1541 locale_t loc;
1542
1543 if (strcmp(collcollate, collctype) == 0)
1544 {
1545 /* Normal case where they're the same */
1546 errno = 0;
1547 #ifndef WIN32
1548 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1549 NULL);
1550 #else
1551 loc = _create_locale(LC_ALL, collcollate);
1552 #endif
1553 if (!loc)
1554 report_newlocale_failure(collcollate);
1555 }
1556 else
1557 {
1558 #ifndef WIN32
1559 /* We need two newlocale() steps */
1560 locale_t loc1;
1561
1562 errno = 0;
1563 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1564 if (!loc1)
1565 report_newlocale_failure(collcollate);
1566 errno = 0;
1567 loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1568 if (!loc)
1569 report_newlocale_failure(collctype);
1570 #else
1571
1572 /*
1573 * XXX The _create_locale() API doesn't appear to support
1574 * this. Could perhaps be worked around by changing
1575 * pg_locale_t to contain two separate fields.
1576 */
1577 ereport(ERROR,
1578 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1579 errmsg("collations with different collate and ctype values are not supported on this platform")));
1580 #endif
1581 }
1582
1583 result.info.lt = loc;
1584 #else /* not HAVE_LOCALE_T */
1585 /* platform that doesn't support locale_t */
1586 ereport(ERROR,
1587 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1588 errmsg("collation provider LIBC is not supported on this platform")));
1589 #endif /* not HAVE_LOCALE_T */
1590 }
1591 else if (collform->collprovider == COLLPROVIDER_ICU)
1592 {
1593 #ifdef USE_ICU
1594 UCollator *collator;
1595 UErrorCode status;
1596
1597 if (strcmp(collcollate, collctype) != 0)
1598 ereport(ERROR,
1599 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1600 errmsg("collations with different collate and ctype values are not supported by ICU")));
1601
1602 status = U_ZERO_ERROR;
1603 collator = ucol_open(collcollate, &status);
1604 if (U_FAILURE(status))
1605 ereport(ERROR,
1606 (errmsg("could not open collator for locale \"%s\": %s",
1607 collcollate, u_errorName(status))));
1608
1609 /* We will leak this string if we get an error below :-( */
1610 result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1611 collcollate);
1612 result.info.icu.ucol = collator;
1613 #else /* not USE_ICU */
1614 /* could get here if a collation was created by a build with ICU */
1615 ereport(ERROR,
1616 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1617 errmsg("ICU is not supported in this build"), \
1618 errhint("You need to rebuild PostgreSQL using --with-icu.")));
1619 #endif /* not USE_ICU */
1620 }
1621
1622 collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1623 &isnull);
1624 if (!isnull)
1625 {
1626 char *actual_versionstr;
1627 char *collversionstr;
1628
1629 actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1630 if (!actual_versionstr)
1631 {
1632 /*
1633 * This could happen when specifying a version in CREATE
1634 * COLLATION for a libc locale, or manually creating a mess in
1635 * the catalogs.
1636 */
1637 ereport(ERROR,
1638 (errmsg("collation \"%s\" has no actual version, but a version was specified",
1639 NameStr(collform->collname))));
1640 }
1641 collversionstr = TextDatumGetCString(collversion);
1642
1643 if (strcmp(actual_versionstr, collversionstr) != 0)
1644 ereport(WARNING,
1645 (errmsg("collation \"%s\" has version mismatch",
1646 NameStr(collform->collname)),
1647 errdetail("The collation in the database was created using version %s, "
1648 "but the operating system provides version %s.",
1649 collversionstr, actual_versionstr),
1650 errhint("Rebuild all objects affected by this collation and run "
1651 "ALTER COLLATION %s REFRESH VERSION, "
1652 "or build PostgreSQL with the right library version.",
1653 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1654 NameStr(collform->collname)))));
1655 }
1656
1657 ReleaseSysCache(tp);
1658
1659 /* We'll keep the pg_locale_t structures in TopMemoryContext */
1660 resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1661 *resultp = result;
1662
1663 cache_entry->locale = resultp;
1664 }
1665
1666 return cache_entry->locale;
1667 }
1668
1669 /*
1670 * Get provider-specific collation version string for the given collation from
1671 * the operating system/library.
1672 *
1673 * A particular provider must always either return a non-NULL string or return
1674 * NULL (if it doesn't support versions). It must not return NULL for some
1675 * collcollate and not NULL for others.
1676 */
1677 char *
get_collation_actual_version(char collprovider,const char * collcollate)1678 get_collation_actual_version(char collprovider, const char *collcollate)
1679 {
1680 char *collversion;
1681
1682 #ifdef USE_ICU
1683 if (collprovider == COLLPROVIDER_ICU)
1684 {
1685 UCollator *collator;
1686 UErrorCode status;
1687 UVersionInfo versioninfo;
1688 char buf[U_MAX_VERSION_STRING_LENGTH];
1689
1690 status = U_ZERO_ERROR;
1691 collator = ucol_open(collcollate, &status);
1692 if (U_FAILURE(status))
1693 ereport(ERROR,
1694 (errmsg("could not open collator for locale \"%s\": %s",
1695 collcollate, u_errorName(status))));
1696 ucol_getVersion(collator, versioninfo);
1697 ucol_close(collator);
1698
1699 u_versionToString(versioninfo, buf);
1700 collversion = pstrdup(buf);
1701 }
1702 else
1703 #endif
1704 collversion = NULL;
1705
1706 return collversion;
1707 }
1708
1709
1710 #ifdef USE_ICU
1711 /*
1712 * Converter object for converting between ICU's UChar strings and C strings
1713 * in database encoding. Since the database encoding doesn't change, we only
1714 * need one of these per session.
1715 */
1716 static UConverter *icu_converter = NULL;
1717
1718 static void
init_icu_converter(void)1719 init_icu_converter(void)
1720 {
1721 const char *icu_encoding_name;
1722 UErrorCode status;
1723 UConverter *conv;
1724
1725 if (icu_converter)
1726 return;
1727
1728 icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1729
1730 status = U_ZERO_ERROR;
1731 conv = ucnv_open(icu_encoding_name, &status);
1732 if (U_FAILURE(status))
1733 ereport(ERROR,
1734 (errmsg("could not open ICU converter for encoding \"%s\": %s",
1735 icu_encoding_name, u_errorName(status))));
1736
1737 icu_converter = conv;
1738 }
1739
1740 /*
1741 * Convert a string in the database encoding into a string of UChars.
1742 *
1743 * The source string at buff is of length nbytes
1744 * (it needn't be nul-terminated)
1745 *
1746 * *buff_uchar receives a pointer to the palloc'd result string, and
1747 * the function's result is the number of UChars generated.
1748 *
1749 * The result string is nul-terminated, though most callers rely on the
1750 * result length instead.
1751 */
1752 int32_t
icu_to_uchar(UChar ** buff_uchar,const char * buff,size_t nbytes)1753 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1754 {
1755 UErrorCode status;
1756 int32_t len_uchar;
1757
1758 init_icu_converter();
1759
1760 status = U_ZERO_ERROR;
1761 len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1762 buff, nbytes, &status);
1763 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1764 ereport(ERROR,
1765 (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1766
1767 *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1768
1769 status = U_ZERO_ERROR;
1770 len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1771 buff, nbytes, &status);
1772 if (U_FAILURE(status))
1773 ereport(ERROR,
1774 (errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1775
1776 return len_uchar;
1777 }
1778
1779 /*
1780 * Convert a string of UChars into the database encoding.
1781 *
1782 * The source string at buff_uchar is of length len_uchar
1783 * (it needn't be nul-terminated)
1784 *
1785 * *result receives a pointer to the palloc'd result string, and the
1786 * function's result is the number of bytes generated (not counting nul).
1787 *
1788 * The result string is nul-terminated.
1789 */
1790 int32_t
icu_from_uchar(char ** result,const UChar * buff_uchar,int32_t len_uchar)1791 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1792 {
1793 UErrorCode status;
1794 int32_t len_result;
1795
1796 init_icu_converter();
1797
1798 status = U_ZERO_ERROR;
1799 len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1800 buff_uchar, len_uchar, &status);
1801 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1802 ereport(ERROR,
1803 (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1804
1805 *result = palloc(len_result + 1);
1806
1807 status = U_ZERO_ERROR;
1808 len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1809 buff_uchar, len_uchar, &status);
1810 if (U_FAILURE(status))
1811 ereport(ERROR,
1812 (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1813
1814 return len_result;
1815 }
1816
1817 #endif /* USE_ICU */
1818
1819 /*
1820 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1821 * Therefore we keep them here rather than with the mbutils code.
1822 */
1823
1824 /*
1825 * wchar2char --- convert wide characters to multibyte format
1826 *
1827 * This has the same API as the standard wcstombs_l() function; in particular,
1828 * tolen is the maximum number of bytes to store at *to, and *from must be
1829 * zero-terminated. The output will be zero-terminated iff there is room.
1830 */
1831 size_t
wchar2char(char * to,const wchar_t * from,size_t tolen,pg_locale_t locale)1832 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1833 {
1834 size_t result;
1835
1836 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1837
1838 if (tolen == 0)
1839 return 0;
1840
1841 #ifdef WIN32
1842
1843 /*
1844 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1845 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1846 * MultiByteToWideChar().
1847 */
1848 if (GetDatabaseEncoding() == PG_UTF8)
1849 {
1850 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1851 NULL, NULL);
1852 /* A zero return is failure */
1853 if (result <= 0)
1854 result = -1;
1855 else
1856 {
1857 Assert(result <= tolen);
1858 /* Microsoft counts the zero terminator in the result */
1859 result--;
1860 }
1861 }
1862 else
1863 #endif /* WIN32 */
1864 if (locale == (pg_locale_t) 0)
1865 {
1866 /* Use wcstombs directly for the default locale */
1867 result = wcstombs(to, from, tolen);
1868 }
1869 else
1870 {
1871 #ifdef HAVE_LOCALE_T
1872 #ifdef HAVE_WCSTOMBS_L
1873 /* Use wcstombs_l for nondefault locales */
1874 result = wcstombs_l(to, from, tolen, locale->info.lt);
1875 #else /* !HAVE_WCSTOMBS_L */
1876 /* We have to temporarily set the locale as current ... ugh */
1877 locale_t save_locale = uselocale(locale->info.lt);
1878
1879 result = wcstombs(to, from, tolen);
1880
1881 uselocale(save_locale);
1882 #endif /* HAVE_WCSTOMBS_L */
1883 #else /* !HAVE_LOCALE_T */
1884 /* Can't have locale != 0 without HAVE_LOCALE_T */
1885 elog(ERROR, "wcstombs_l is not available");
1886 result = 0; /* keep compiler quiet */
1887 #endif /* HAVE_LOCALE_T */
1888 }
1889
1890 return result;
1891 }
1892
1893 /*
1894 * char2wchar --- convert multibyte characters to wide characters
1895 *
1896 * This has almost the API of mbstowcs_l(), except that *from need not be
1897 * null-terminated; instead, the number of input bytes is specified as
1898 * fromlen. Also, we ereport() rather than returning -1 for invalid
1899 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1900 * The output will be zero-terminated iff there is room.
1901 */
1902 size_t
char2wchar(wchar_t * to,size_t tolen,const char * from,size_t fromlen,pg_locale_t locale)1903 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1904 pg_locale_t locale)
1905 {
1906 size_t result;
1907
1908 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1909
1910 if (tolen == 0)
1911 return 0;
1912
1913 #ifdef WIN32
1914 /* See WIN32 "Unicode" comment above */
1915 if (GetDatabaseEncoding() == PG_UTF8)
1916 {
1917 /* Win32 API does not work for zero-length input */
1918 if (fromlen == 0)
1919 result = 0;
1920 else
1921 {
1922 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1923 /* A zero return is failure */
1924 if (result == 0)
1925 result = -1;
1926 }
1927
1928 if (result != -1)
1929 {
1930 Assert(result < tolen);
1931 /* Append trailing null wchar (MultiByteToWideChar() does not) */
1932 to[result] = 0;
1933 }
1934 }
1935 else
1936 #endif /* WIN32 */
1937 {
1938 /* mbstowcs requires ending '\0' */
1939 char *str = pnstrdup(from, fromlen);
1940
1941 if (locale == (pg_locale_t) 0)
1942 {
1943 /* Use mbstowcs directly for the default locale */
1944 result = mbstowcs(to, str, tolen);
1945 }
1946 else
1947 {
1948 #ifdef HAVE_LOCALE_T
1949 #ifdef HAVE_MBSTOWCS_L
1950 /* Use mbstowcs_l for nondefault locales */
1951 result = mbstowcs_l(to, str, tolen, locale->info.lt);
1952 #else /* !HAVE_MBSTOWCS_L */
1953 /* We have to temporarily set the locale as current ... ugh */
1954 locale_t save_locale = uselocale(locale->info.lt);
1955
1956 result = mbstowcs(to, str, tolen);
1957
1958 uselocale(save_locale);
1959 #endif /* HAVE_MBSTOWCS_L */
1960 #else /* !HAVE_LOCALE_T */
1961 /* Can't have locale != 0 without HAVE_LOCALE_T */
1962 elog(ERROR, "mbstowcs_l is not available");
1963 result = 0; /* keep compiler quiet */
1964 #endif /* HAVE_LOCALE_T */
1965 }
1966
1967 pfree(str);
1968 }
1969
1970 if (result == -1)
1971 {
1972 /*
1973 * Invalid multibyte character encountered. We try to give a useful
1974 * error message by letting pg_verifymbstr check the string. But it's
1975 * possible that the string is OK to us, and not OK to mbstowcs ---
1976 * this suggests that the LC_CTYPE locale is different from the
1977 * database encoding. Give a generic error message if verifymbstr
1978 * can't find anything wrong.
1979 */
1980 pg_verifymbstr(from, fromlen, false); /* might not return */
1981 /* but if it does ... */
1982 ereport(ERROR,
1983 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1984 errmsg("invalid multibyte character for locale"),
1985 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1986 }
1987
1988 return result;
1989 }
1990