1 /*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2021, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12 /*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 * settable at run-time. However, we don't actually set those locale
23 * categories permanently. This would have bizarre effects like no
24 * longer accepting standard floating-point literals in some locales.
25 * Instead, we only set these locale categories briefly when needed,
26 * cache the required information obtained from localeconv() or
27 * strftime(), and then set the locale categories back to "C".
28 * The cached information is only used by the formatting functions
29 * (to_char, etc.) and the money type. For the user, this should all be
30 * transparent.
31 *
32 * !!! NOW HEAR THIS !!!
33 *
34 * We've been bitten repeatedly by this bug, so let's try to keep it in
35 * mind in future: on some platforms, the locale functions return pointers
36 * to static data that will be overwritten by any later locale function.
37 * Thus, for example, the obvious-looking sequence
38 * save = setlocale(category, NULL);
39 * if (!setlocale(category, value))
40 * fail = true;
41 * setlocale(category, save);
42 * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 * will change the memory save is pointing at. To do this sort of thing
44 * safely, you *must* pstrdup what setlocale returns the first time.
45 *
46 * The POSIX locale standard is available here:
47 *
48 * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 *----------
50 */
51
52
53 #include "postgres.h"
54
55 #include <time.h>
56
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "catalog/pg_control.h"
60 #include "mb/pg_wchar.h"
61 #include "utils/builtins.h"
62 #include "utils/formatting.h"
63 #include "utils/hsearch.h"
64 #include "utils/lsyscache.h"
65 #include "utils/memutils.h"
66 #include "utils/pg_locale.h"
67 #include "utils/syscache.h"
68
69 #ifdef USE_ICU
70 #include <unicode/ucnv.h>
71 #endif
72
73 #ifdef __GLIBC__
74 #include <gnu/libc-version.h>
75 #endif
76
77 #ifdef WIN32
78 #include <shlwapi.h>
79 #endif
80
81 #define MAX_L10N_DATA 80
82
83
84 /* GUC settings */
85 char *locale_messages;
86 char *locale_monetary;
87 char *locale_numeric;
88 char *locale_time;
89
90 /*
91 * lc_time localization cache.
92 *
93 * We use only the first 7 or 12 entries of these arrays. The last array
94 * element is left as NULL for the convenience of outside code that wants
95 * to sequentially scan these arrays.
96 */
97 char *localized_abbrev_days[7 + 1];
98 char *localized_full_days[7 + 1];
99 char *localized_abbrev_months[12 + 1];
100 char *localized_full_months[12 + 1];
101
102 /* indicates whether locale information cache is valid */
103 static bool CurrentLocaleConvValid = false;
104 static bool CurrentLCTimeValid = false;
105
106 /* Cache for collation-related knowledge */
107
108 typedef struct
109 {
110 Oid collid; /* hash key: pg_collation OID */
111 bool collate_is_c; /* is collation's LC_COLLATE C? */
112 bool ctype_is_c; /* is collation's LC_CTYPE C? */
113 bool flags_valid; /* true if above flags are valid */
114 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
115 } collation_cache_entry;
116
117 static HTAB *collation_cache = NULL;
118
119
120 #if defined(WIN32) && defined(LC_MESSAGES)
121 static char *IsoLocaleName(const char *); /* MSVC specific */
122 #endif
123
124 #ifdef USE_ICU
125 static void icu_set_collation_attributes(UCollator *collator, const char *loc);
126 #endif
127
128 /*
129 * pg_perm_setlocale
130 *
131 * This wraps the libc function setlocale(), with two additions. First, when
132 * changing LC_CTYPE, update gettext's encoding for the current message
133 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
134 * not on Windows. Second, if the operation is successful, the corresponding
135 * LC_XXX environment variable is set to match. By setting the environment
136 * variable, we ensure that any subsequent use of setlocale(..., "") will
137 * preserve the settings made through this routine. Of course, LC_ALL must
138 * also be unset to fully ensure that, but that has to be done elsewhere after
139 * all the individual LC_XXX variables have been set correctly. (Thank you
140 * Perl for making this kluge necessary.)
141 */
142 char *
pg_perm_setlocale(int category,const char * locale)143 pg_perm_setlocale(int category, const char *locale)
144 {
145 char *result;
146 const char *envvar;
147
148 #ifndef WIN32
149 result = setlocale(category, locale);
150 #else
151
152 /*
153 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
154 * the given value is good and set it in the environment variables. We
155 * must ignore attempts to set to "", which means "keep using the old
156 * environment value".
157 */
158 #ifdef LC_MESSAGES
159 if (category == LC_MESSAGES)
160 {
161 result = (char *) locale;
162 if (locale == NULL || locale[0] == '\0')
163 return result;
164 }
165 else
166 #endif
167 result = setlocale(category, locale);
168 #endif /* WIN32 */
169
170 if (result == NULL)
171 return result; /* fall out immediately on failure */
172
173 /*
174 * Use the right encoding in translated messages. Under ENABLE_NLS, let
175 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
176 * format strings are ASCII, but database-encoding strings may enter the
177 * message via %s. This makes the overall message encoding equal to the
178 * database encoding.
179 */
180 if (category == LC_CTYPE)
181 {
182 static char save_lc_ctype[NAMEDATALEN + 20];
183
184 /* copy setlocale() return value before callee invokes it again */
185 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
186 result = save_lc_ctype;
187
188 #ifdef ENABLE_NLS
189 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
190 #else
191 SetMessageEncoding(GetDatabaseEncoding());
192 #endif
193 }
194
195 switch (category)
196 {
197 case LC_COLLATE:
198 envvar = "LC_COLLATE";
199 break;
200 case LC_CTYPE:
201 envvar = "LC_CTYPE";
202 break;
203 #ifdef LC_MESSAGES
204 case LC_MESSAGES:
205 envvar = "LC_MESSAGES";
206 #ifdef WIN32
207 result = IsoLocaleName(locale);
208 if (result == NULL)
209 result = (char *) locale;
210 elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
211 #endif /* WIN32 */
212 break;
213 #endif /* LC_MESSAGES */
214 case LC_MONETARY:
215 envvar = "LC_MONETARY";
216 break;
217 case LC_NUMERIC:
218 envvar = "LC_NUMERIC";
219 break;
220 case LC_TIME:
221 envvar = "LC_TIME";
222 break;
223 default:
224 elog(FATAL, "unrecognized LC category: %d", category);
225 return NULL; /* keep compiler quiet */
226 }
227
228 if (setenv(envvar, result, 1) != 0)
229 return NULL;
230
231 return result;
232 }
233
234
235 /*
236 * Is the locale name valid for the locale category?
237 *
238 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
239 * canonical name is stored there. This is especially useful for figuring out
240 * what locale name "" means (ie, the server environment value). (Actually,
241 * it seems that on most implementations that's the only thing it's good for;
242 * we could wish that setlocale gave back a canonically spelled version of
243 * the locale name, but typically it doesn't.)
244 */
245 bool
check_locale(int category,const char * locale,char ** canonname)246 check_locale(int category, const char *locale, char **canonname)
247 {
248 char *save;
249 char *res;
250
251 if (canonname)
252 *canonname = NULL; /* in case of failure */
253
254 save = setlocale(category, NULL);
255 if (!save)
256 return false; /* won't happen, we hope */
257
258 /* save may be pointing at a modifiable scratch variable, see above. */
259 save = pstrdup(save);
260
261 /* set the locale with setlocale, to see if it accepts it. */
262 res = setlocale(category, locale);
263
264 /* save canonical name if requested. */
265 if (res && canonname)
266 *canonname = pstrdup(res);
267
268 /* restore old value. */
269 if (!setlocale(category, save))
270 elog(WARNING, "failed to restore old locale \"%s\"", save);
271 pfree(save);
272
273 return (res != NULL);
274 }
275
276
277 /*
278 * GUC check/assign hooks
279 *
280 * For most locale categories, the assign hook doesn't actually set the locale
281 * permanently, just reset flags so that the next use will cache the
282 * appropriate values. (See explanation at the top of this file.)
283 *
284 * Note: we accept value = "" as selecting the postmaster's environment
285 * value, whatever it was (so long as the environment setting is legal).
286 * This will have been locked down by an earlier call to pg_perm_setlocale.
287 */
288 bool
check_locale_monetary(char ** newval,void ** extra,GucSource source)289 check_locale_monetary(char **newval, void **extra, GucSource source)
290 {
291 return check_locale(LC_MONETARY, *newval, NULL);
292 }
293
294 void
assign_locale_monetary(const char * newval,void * extra)295 assign_locale_monetary(const char *newval, void *extra)
296 {
297 CurrentLocaleConvValid = false;
298 }
299
300 bool
check_locale_numeric(char ** newval,void ** extra,GucSource source)301 check_locale_numeric(char **newval, void **extra, GucSource source)
302 {
303 return check_locale(LC_NUMERIC, *newval, NULL);
304 }
305
306 void
assign_locale_numeric(const char * newval,void * extra)307 assign_locale_numeric(const char *newval, void *extra)
308 {
309 CurrentLocaleConvValid = false;
310 }
311
312 bool
check_locale_time(char ** newval,void ** extra,GucSource source)313 check_locale_time(char **newval, void **extra, GucSource source)
314 {
315 return check_locale(LC_TIME, *newval, NULL);
316 }
317
318 void
assign_locale_time(const char * newval,void * extra)319 assign_locale_time(const char *newval, void *extra)
320 {
321 CurrentLCTimeValid = false;
322 }
323
324 /*
325 * We allow LC_MESSAGES to actually be set globally.
326 *
327 * Note: we normally disallow value = "" because it wouldn't have consistent
328 * semantics (it'd effectively just use the previous value). However, this
329 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
330 * not even if the attempted setting fails due to invalid environment value.
331 * The idea there is just to accept the environment setting *if possible*
332 * during startup, until we can read the proper value from postgresql.conf.
333 */
334 bool
check_locale_messages(char ** newval,void ** extra,GucSource source)335 check_locale_messages(char **newval, void **extra, GucSource source)
336 {
337 if (**newval == '\0')
338 {
339 if (source == PGC_S_DEFAULT)
340 return true;
341 else
342 return false;
343 }
344
345 /*
346 * LC_MESSAGES category does not exist everywhere, but accept it anyway
347 *
348 * On Windows, we can't even check the value, so accept blindly
349 */
350 #if defined(LC_MESSAGES) && !defined(WIN32)
351 return check_locale(LC_MESSAGES, *newval, NULL);
352 #else
353 return true;
354 #endif
355 }
356
357 void
assign_locale_messages(const char * newval,void * extra)358 assign_locale_messages(const char *newval, void *extra)
359 {
360 /*
361 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
362 * We ignore failure, as per comment above.
363 */
364 #ifdef LC_MESSAGES
365 (void) pg_perm_setlocale(LC_MESSAGES, newval);
366 #endif
367 }
368
369
370 /*
371 * Frees the malloced content of a struct lconv. (But not the struct
372 * itself.) It's important that this not throw elog(ERROR).
373 */
374 static void
free_struct_lconv(struct lconv * s)375 free_struct_lconv(struct lconv *s)
376 {
377 if (s->decimal_point)
378 free(s->decimal_point);
379 if (s->thousands_sep)
380 free(s->thousands_sep);
381 if (s->grouping)
382 free(s->grouping);
383 if (s->int_curr_symbol)
384 free(s->int_curr_symbol);
385 if (s->currency_symbol)
386 free(s->currency_symbol);
387 if (s->mon_decimal_point)
388 free(s->mon_decimal_point);
389 if (s->mon_thousands_sep)
390 free(s->mon_thousands_sep);
391 if (s->mon_grouping)
392 free(s->mon_grouping);
393 if (s->positive_sign)
394 free(s->positive_sign);
395 if (s->negative_sign)
396 free(s->negative_sign);
397 }
398
399 /*
400 * Check that all fields of a struct lconv (or at least, the ones we care
401 * about) are non-NULL. The field list must match free_struct_lconv().
402 */
403 static bool
struct_lconv_is_valid(struct lconv * s)404 struct_lconv_is_valid(struct lconv *s)
405 {
406 if (s->decimal_point == NULL)
407 return false;
408 if (s->thousands_sep == NULL)
409 return false;
410 if (s->grouping == NULL)
411 return false;
412 if (s->int_curr_symbol == NULL)
413 return false;
414 if (s->currency_symbol == NULL)
415 return false;
416 if (s->mon_decimal_point == NULL)
417 return false;
418 if (s->mon_thousands_sep == NULL)
419 return false;
420 if (s->mon_grouping == NULL)
421 return false;
422 if (s->positive_sign == NULL)
423 return false;
424 if (s->negative_sign == NULL)
425 return false;
426 return true;
427 }
428
429
430 /*
431 * Convert the strdup'd string at *str from the specified encoding to the
432 * database encoding.
433 */
434 static void
db_encoding_convert(int encoding,char ** str)435 db_encoding_convert(int encoding, char **str)
436 {
437 char *pstr;
438 char *mstr;
439
440 /* convert the string to the database encoding */
441 pstr = pg_any_to_server(*str, strlen(*str), encoding);
442 if (pstr == *str)
443 return; /* no conversion happened */
444
445 /* need it malloc'd not palloc'd */
446 mstr = strdup(pstr);
447 if (mstr == NULL)
448 ereport(ERROR,
449 (errcode(ERRCODE_OUT_OF_MEMORY),
450 errmsg("out of memory")));
451
452 /* replace old string */
453 free(*str);
454 *str = mstr;
455
456 pfree(pstr);
457 }
458
459
460 /*
461 * Return the POSIX lconv struct (contains number/money formatting
462 * information) with locale information for all categories.
463 */
464 struct lconv *
PGLC_localeconv(void)465 PGLC_localeconv(void)
466 {
467 static struct lconv CurrentLocaleConv;
468 static bool CurrentLocaleConvAllocated = false;
469 struct lconv *extlconv;
470 struct lconv worklconv;
471 char *save_lc_monetary;
472 char *save_lc_numeric;
473 #ifdef WIN32
474 char *save_lc_ctype;
475 #endif
476
477 /* Did we do it already? */
478 if (CurrentLocaleConvValid)
479 return &CurrentLocaleConv;
480
481 /* Free any already-allocated storage */
482 if (CurrentLocaleConvAllocated)
483 {
484 free_struct_lconv(&CurrentLocaleConv);
485 CurrentLocaleConvAllocated = false;
486 }
487
488 /*
489 * This is tricky because we really don't want to risk throwing error
490 * while the locale is set to other than our usual settings. Therefore,
491 * the process is: collect the usual settings, set locale to special
492 * setting, copy relevant data into worklconv using strdup(), restore
493 * normal settings, convert data to desired encoding, and finally stash
494 * the collected data in CurrentLocaleConv. This makes it safe if we
495 * throw an error during encoding conversion or run out of memory anywhere
496 * in the process. All data pointed to by struct lconv members is
497 * allocated with strdup, to avoid premature elog(ERROR) and to allow
498 * using a single cleanup routine.
499 */
500 memset(&worklconv, 0, sizeof(worklconv));
501
502 /* Save prevailing values of monetary and numeric locales */
503 save_lc_monetary = setlocale(LC_MONETARY, NULL);
504 if (!save_lc_monetary)
505 elog(ERROR, "setlocale(NULL) failed");
506 save_lc_monetary = pstrdup(save_lc_monetary);
507
508 save_lc_numeric = setlocale(LC_NUMERIC, NULL);
509 if (!save_lc_numeric)
510 elog(ERROR, "setlocale(NULL) failed");
511 save_lc_numeric = pstrdup(save_lc_numeric);
512
513 #ifdef WIN32
514
515 /*
516 * The POSIX standard explicitly says that it is undefined what happens if
517 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
518 * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
519 * believe that localeconv() should return strings that are encoded in the
520 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
521 * once we have successfully collected the localeconv() results, we will
522 * convert them from that codeset to the desired server encoding.
523 *
524 * Windows, of course, resolutely does things its own way; on that
525 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
526 * results. Hence, we must temporarily set that category as well.
527 */
528
529 /* Save prevailing value of ctype locale */
530 save_lc_ctype = setlocale(LC_CTYPE, NULL);
531 if (!save_lc_ctype)
532 elog(ERROR, "setlocale(NULL) failed");
533 save_lc_ctype = pstrdup(save_lc_ctype);
534
535 /* Here begins the critical section where we must not throw error */
536
537 /* use numeric to set the ctype */
538 setlocale(LC_CTYPE, locale_numeric);
539 #endif
540
541 /* Get formatting information for numeric */
542 setlocale(LC_NUMERIC, locale_numeric);
543 extlconv = localeconv();
544
545 /* Must copy data now in case setlocale() overwrites it */
546 worklconv.decimal_point = strdup(extlconv->decimal_point);
547 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
548 worklconv.grouping = strdup(extlconv->grouping);
549
550 #ifdef WIN32
551 /* use monetary to set the ctype */
552 setlocale(LC_CTYPE, locale_monetary);
553 #endif
554
555 /* Get formatting information for monetary */
556 setlocale(LC_MONETARY, locale_monetary);
557 extlconv = localeconv();
558
559 /* Must copy data now in case setlocale() overwrites it */
560 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
561 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
562 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
563 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
564 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
565 worklconv.positive_sign = strdup(extlconv->positive_sign);
566 worklconv.negative_sign = strdup(extlconv->negative_sign);
567 /* Copy scalar fields as well */
568 worklconv.int_frac_digits = extlconv->int_frac_digits;
569 worklconv.frac_digits = extlconv->frac_digits;
570 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
571 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
572 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
573 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
574 worklconv.p_sign_posn = extlconv->p_sign_posn;
575 worklconv.n_sign_posn = extlconv->n_sign_posn;
576
577 /*
578 * Restore the prevailing locale settings; failure to do so is fatal.
579 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
580 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
581 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
582 * are almost certainly "C", there's really no reason that restoring those
583 * should fail.
584 */
585 #ifdef WIN32
586 if (!setlocale(LC_CTYPE, save_lc_ctype))
587 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
588 #endif
589 if (!setlocale(LC_MONETARY, save_lc_monetary))
590 elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
591 if (!setlocale(LC_NUMERIC, save_lc_numeric))
592 elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
593
594 /*
595 * At this point we've done our best to clean up, and can call functions
596 * that might possibly throw errors with a clean conscience. But let's
597 * make sure we don't leak any already-strdup'd fields in worklconv.
598 */
599 PG_TRY();
600 {
601 int encoding;
602
603 /* Release the pstrdup'd locale names */
604 pfree(save_lc_monetary);
605 pfree(save_lc_numeric);
606 #ifdef WIN32
607 pfree(save_lc_ctype);
608 #endif
609
610 /* If any of the preceding strdup calls failed, complain now. */
611 if (!struct_lconv_is_valid(&worklconv))
612 ereport(ERROR,
613 (errcode(ERRCODE_OUT_OF_MEMORY),
614 errmsg("out of memory")));
615
616 /*
617 * Now we must perform encoding conversion from whatever's associated
618 * with the locales into the database encoding. If we can't identify
619 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
620 * use PG_SQL_ASCII, which will result in just validating that the
621 * strings are OK in the database encoding.
622 */
623 encoding = pg_get_encoding_from_locale(locale_numeric, true);
624 if (encoding < 0)
625 encoding = PG_SQL_ASCII;
626
627 db_encoding_convert(encoding, &worklconv.decimal_point);
628 db_encoding_convert(encoding, &worklconv.thousands_sep);
629 /* grouping is not text and does not require conversion */
630
631 encoding = pg_get_encoding_from_locale(locale_monetary, true);
632 if (encoding < 0)
633 encoding = PG_SQL_ASCII;
634
635 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
636 db_encoding_convert(encoding, &worklconv.currency_symbol);
637 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
638 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
639 /* mon_grouping is not text and does not require conversion */
640 db_encoding_convert(encoding, &worklconv.positive_sign);
641 db_encoding_convert(encoding, &worklconv.negative_sign);
642 }
643 PG_CATCH();
644 {
645 free_struct_lconv(&worklconv);
646 PG_RE_THROW();
647 }
648 PG_END_TRY();
649
650 /*
651 * Everything is good, so save the results.
652 */
653 CurrentLocaleConv = worklconv;
654 CurrentLocaleConvAllocated = true;
655 CurrentLocaleConvValid = true;
656 return &CurrentLocaleConv;
657 }
658
659 #ifdef WIN32
660 /*
661 * On Windows, strftime() returns its output in encoding CP_ACP (the default
662 * operating system codepage for the computer), which is likely different
663 * from SERVER_ENCODING. This is especially important in Japanese versions
664 * of Windows which will use SJIS encoding, which we don't support as a
665 * server encoding.
666 *
667 * So, instead of using strftime(), use wcsftime() to return the value in
668 * wide characters (internally UTF16) and then convert to UTF8, which we
669 * know how to handle directly.
670 *
671 * Note that this only affects the calls to strftime() in this file, which are
672 * used to get the locale-aware strings. Other parts of the backend use
673 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
674 */
675 static size_t
strftime_win32(char * dst,size_t dstlen,const char * format,const struct tm * tm)676 strftime_win32(char *dst, size_t dstlen,
677 const char *format, const struct tm *tm)
678 {
679 size_t len;
680 wchar_t wformat[8]; /* formats used below need 3 chars */
681 wchar_t wbuf[MAX_L10N_DATA];
682
683 /*
684 * Get a wchar_t version of the format string. We only actually use
685 * plain-ASCII formats in this file, so we can say that they're UTF8.
686 */
687 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
688 wformat, lengthof(wformat));
689 if (len == 0)
690 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
691 GetLastError());
692
693 len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
694 if (len == 0)
695 {
696 /*
697 * wcsftime failed, possibly because the result would not fit in
698 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
699 */
700 return 0;
701 }
702
703 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
704 NULL, NULL);
705 if (len == 0)
706 elog(ERROR, "could not convert string to UTF-8: error code %lu",
707 GetLastError());
708
709 dst[len] = '\0';
710
711 return len;
712 }
713
714 /* redefine strftime() */
715 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
716 #endif /* WIN32 */
717
718 /*
719 * Subroutine for cache_locale_time().
720 * Convert the given string from encoding "encoding" to the database
721 * encoding, and store the result at *dst, replacing any previous value.
722 */
723 static void
cache_single_string(char ** dst,const char * src,int encoding)724 cache_single_string(char **dst, const char *src, int encoding)
725 {
726 char *ptr;
727 char *olddst;
728
729 /* Convert the string to the database encoding, or validate it's OK */
730 ptr = pg_any_to_server(src, strlen(src), encoding);
731
732 /* Store the string in long-lived storage, replacing any previous value */
733 olddst = *dst;
734 *dst = MemoryContextStrdup(TopMemoryContext, ptr);
735 if (olddst)
736 pfree(olddst);
737
738 /* Might as well clean up any palloc'd conversion result, too */
739 if (ptr != src)
740 pfree(ptr);
741 }
742
743 /*
744 * Update the lc_time localization cache variables if needed.
745 */
746 void
cache_locale_time(void)747 cache_locale_time(void)
748 {
749 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
750 char *bufptr;
751 time_t timenow;
752 struct tm *timeinfo;
753 bool strftimefail = false;
754 int encoding;
755 int i;
756 char *save_lc_time;
757 #ifdef WIN32
758 char *save_lc_ctype;
759 #endif
760
761 /* did we do this already? */
762 if (CurrentLCTimeValid)
763 return;
764
765 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
766
767 /*
768 * As in PGLC_localeconv(), it's critical that we not throw error while
769 * libc's locale settings have nondefault values. Hence, we just call
770 * strftime() within the critical section, and then convert and save its
771 * results afterwards.
772 */
773
774 /* Save prevailing value of time locale */
775 save_lc_time = setlocale(LC_TIME, NULL);
776 if (!save_lc_time)
777 elog(ERROR, "setlocale(NULL) failed");
778 save_lc_time = pstrdup(save_lc_time);
779
780 #ifdef WIN32
781
782 /*
783 * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
784 * must set it here. This code looks the same as what PGLC_localeconv()
785 * does, but the underlying reason is different: this does NOT determine
786 * the encoding we'll get back from strftime_win32().
787 */
788
789 /* Save prevailing value of ctype locale */
790 save_lc_ctype = setlocale(LC_CTYPE, NULL);
791 if (!save_lc_ctype)
792 elog(ERROR, "setlocale(NULL) failed");
793 save_lc_ctype = pstrdup(save_lc_ctype);
794
795 /* use lc_time to set the ctype */
796 setlocale(LC_CTYPE, locale_time);
797 #endif
798
799 setlocale(LC_TIME, locale_time);
800
801 /* We use times close to current time as data for strftime(). */
802 timenow = time(NULL);
803 timeinfo = localtime(&timenow);
804
805 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
806 bufptr = buf;
807
808 /*
809 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
810 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
811 * error.) An implementation might report errors (e.g. ENOMEM) by
812 * returning 0 (or, less plausibly, a negative value) and setting errno.
813 * Report errno just in case the implementation did that, but clear it in
814 * advance of the calls so we don't emit a stale, unrelated errno.
815 */
816 errno = 0;
817
818 /* localized days */
819 for (i = 0; i < 7; i++)
820 {
821 timeinfo->tm_wday = i;
822 if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
823 strftimefail = true;
824 bufptr += MAX_L10N_DATA;
825 if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
826 strftimefail = true;
827 bufptr += MAX_L10N_DATA;
828 }
829
830 /* localized months */
831 for (i = 0; i < 12; i++)
832 {
833 timeinfo->tm_mon = i;
834 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
835 if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
836 strftimefail = true;
837 bufptr += MAX_L10N_DATA;
838 if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
839 strftimefail = true;
840 bufptr += MAX_L10N_DATA;
841 }
842
843 /*
844 * Restore the prevailing locale settings; as in PGLC_localeconv(),
845 * failure to do so is fatal.
846 */
847 #ifdef WIN32
848 if (!setlocale(LC_CTYPE, save_lc_ctype))
849 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
850 #endif
851 if (!setlocale(LC_TIME, save_lc_time))
852 elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
853
854 /*
855 * At this point we've done our best to clean up, and can throw errors, or
856 * call functions that might throw errors, with a clean conscience.
857 */
858 if (strftimefail)
859 elog(ERROR, "strftime() failed: %m");
860
861 /* Release the pstrdup'd locale names */
862 pfree(save_lc_time);
863 #ifdef WIN32
864 pfree(save_lc_ctype);
865 #endif
866
867 #ifndef WIN32
868
869 /*
870 * As in PGLC_localeconv(), we must convert strftime()'s output from the
871 * encoding implied by LC_TIME to the database encoding. If we can't
872 * identify the LC_TIME encoding, just perform encoding validation.
873 */
874 encoding = pg_get_encoding_from_locale(locale_time, true);
875 if (encoding < 0)
876 encoding = PG_SQL_ASCII;
877
878 #else
879
880 /*
881 * On Windows, strftime_win32() always returns UTF8 data, so convert from
882 * that if necessary.
883 */
884 encoding = PG_UTF8;
885
886 #endif /* WIN32 */
887
888 bufptr = buf;
889
890 /* localized days */
891 for (i = 0; i < 7; i++)
892 {
893 cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
894 bufptr += MAX_L10N_DATA;
895 cache_single_string(&localized_full_days[i], bufptr, encoding);
896 bufptr += MAX_L10N_DATA;
897 }
898 localized_abbrev_days[7] = NULL;
899 localized_full_days[7] = NULL;
900
901 /* localized months */
902 for (i = 0; i < 12; i++)
903 {
904 cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
905 bufptr += MAX_L10N_DATA;
906 cache_single_string(&localized_full_months[i], bufptr, encoding);
907 bufptr += MAX_L10N_DATA;
908 }
909 localized_abbrev_months[12] = NULL;
910 localized_full_months[12] = NULL;
911
912 CurrentLCTimeValid = true;
913 }
914
915
916 #if defined(WIN32) && defined(LC_MESSAGES)
917 /*
918 * Convert a Windows setlocale() argument to a Unix-style one.
919 *
920 * Regardless of platform, we install message catalogs under a Unix-style
921 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
922 * following that style will elicit localized interface strings.
923 *
924 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
925 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
926 * case-insensitive. setlocale() returns the fully-qualified form; for
927 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
928 * setlocale() and _create_locale() select a "locale identifier"[1] and store
929 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
930 * ISO 639 language and the ISO 3166 country. Character encoding does not
931 * matter, because the server and client encodings govern that.
932 *
933 * Windows Vista introduced the "locale name" concept[2], closely following
934 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
935 * Studio 2012, setlocale() accepts locale names in addition to the strings it
936 * accepted historically. It does not standardize them; setlocale("Th-tH")
937 * returns "Th-tH". setlocale(category, "") still returns a traditional
938 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
939 * content to carry locale names instead of locale identifiers.
940 *
941 * Visual Studio 2015 should still be able to do the same as Visual Studio
942 * 2012, but the declaration of locale_name is missing in _locale_t, causing
943 * this code compilation to fail, hence this falls back instead on to
944 * enumerating all system locales by using EnumSystemLocalesEx to find the
945 * required locale name. If the input argument is in Unix-style then we can
946 * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
947 * LOCALE_SNAME.
948 *
949 * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
950 * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
951 * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
952 * localized messages. In particular, every lc_messages setting that initdb
953 * can select automatically will yield only C-locale messages. XXX This could
954 * be fixed by running the fully-qualified locale name through a lookup table.
955 *
956 * This function returns a pointer to a static buffer bearing the converted
957 * name or NULL if conversion fails.
958 *
959 * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
960 * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
961 */
962
963 #if _MSC_VER >= 1900
964 /*
965 * Callback function for EnumSystemLocalesEx() in get_iso_localename().
966 *
967 * This function enumerates all system locales, searching for one that matches
968 * an input with the format: <Language>[_<Country>], e.g.
969 * English[_United States]
970 *
971 * The input is a three wchar_t array as an LPARAM. The first element is the
972 * locale_name we want to match, the second element is an allocated buffer
973 * where the Unix-style locale is copied if a match is found, and the third
974 * element is the search status, 1 if a match was found, 0 otherwise.
975 */
976 static BOOL CALLBACK
search_locale_enum(LPWSTR pStr,DWORD dwFlags,LPARAM lparam)977 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
978 {
979 wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
980 wchar_t **argv;
981
982 (void) (dwFlags);
983
984 argv = (wchar_t **) lparam;
985 *argv[2] = (wchar_t) 0;
986
987 memset(test_locale, 0, sizeof(test_locale));
988
989 /* Get the name of the <Language> in English */
990 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
991 test_locale, LOCALE_NAME_MAX_LENGTH))
992 {
993 /*
994 * If the enumerated locale does not have a hyphen ("en") OR the
995 * lc_message input does not have an underscore ("English"), we only
996 * need to compare the <Language> tags.
997 */
998 if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
999 {
1000 if (_wcsicmp(argv[0], test_locale) == 0)
1001 {
1002 wcscpy(argv[1], pStr);
1003 *argv[2] = (wchar_t) 1;
1004 return FALSE;
1005 }
1006 }
1007
1008 /*
1009 * We have to compare a full <Language>_<Country> tag, so we append
1010 * the underscore and name of the country/region in English, e.g.
1011 * "English_United States".
1012 */
1013 else
1014 {
1015 size_t len;
1016
1017 wcscat(test_locale, L"_");
1018 len = wcslen(test_locale);
1019 if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1020 test_locale + len,
1021 LOCALE_NAME_MAX_LENGTH - len))
1022 {
1023 if (_wcsicmp(argv[0], test_locale) == 0)
1024 {
1025 wcscpy(argv[1], pStr);
1026 *argv[2] = (wchar_t) 1;
1027 return FALSE;
1028 }
1029 }
1030 }
1031 }
1032
1033 return TRUE;
1034 }
1035
1036 /*
1037 * This function converts a Windows locale name to an ISO formatted version
1038 * for Visual Studio 2015 or greater.
1039 *
1040 * Returns NULL, if no valid conversion was found.
1041 */
1042 static char *
get_iso_localename(const char * winlocname)1043 get_iso_localename(const char *winlocname)
1044 {
1045 wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1046 wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
1047 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1048 char *period;
1049 int len;
1050 int ret_val;
1051
1052 /*
1053 * Valid locales have the following syntax:
1054 * <Language>[_<Country>[.<CodePage>]]
1055 *
1056 * GetLocaleInfoEx can only take locale name without code-page and for the
1057 * purpose of this API the code-page doesn't matter.
1058 */
1059 period = strchr(winlocname, '.');
1060 if (period != NULL)
1061 len = period - winlocname;
1062 else
1063 len = pg_mbstrlen(winlocname);
1064
1065 memset(wc_locale_name, 0, sizeof(wc_locale_name));
1066 memset(buffer, 0, sizeof(buffer));
1067 MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1068 LOCALE_NAME_MAX_LENGTH);
1069
1070 /*
1071 * If the lc_messages is already an Unix-style string, we have a direct
1072 * match with LOCALE_SNAME, e.g. en-US, en_US.
1073 */
1074 ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1075 LOCALE_NAME_MAX_LENGTH);
1076 if (!ret_val)
1077 {
1078 /*
1079 * Search for a locale in the system that matches language and country
1080 * name.
1081 */
1082 wchar_t *argv[3];
1083
1084 argv[0] = wc_locale_name;
1085 argv[1] = buffer;
1086 argv[2] = (wchar_t *) &ret_val;
1087 EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1088 NULL);
1089 }
1090
1091 if (ret_val)
1092 {
1093 size_t rc;
1094 char *hyphen;
1095
1096 /* Locale names use only ASCII, any conversion locale suffices. */
1097 rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1098 if (rc == -1 || rc == sizeof(iso_lc_messages))
1099 return NULL;
1100
1101 /*
1102 * Simply replace the hyphen with an underscore. See comments in
1103 * IsoLocaleName.
1104 */
1105 hyphen = strchr(iso_lc_messages, '-');
1106 if (hyphen)
1107 *hyphen = '_';
1108 return iso_lc_messages;
1109 }
1110
1111 return NULL;
1112 }
1113 #endif /* _MSC_VER >= 1900 */
1114
1115 static char *
IsoLocaleName(const char * winlocname)1116 IsoLocaleName(const char *winlocname)
1117 {
1118 #if defined(_MSC_VER)
1119 static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1120
1121 if (pg_strcasecmp("c", winlocname) == 0 ||
1122 pg_strcasecmp("posix", winlocname) == 0)
1123 {
1124 strcpy(iso_lc_messages, "C");
1125 return iso_lc_messages;
1126 }
1127 else
1128 {
1129 #if (_MSC_VER >= 1900) /* Visual Studio 2015 or later */
1130 return get_iso_localename(winlocname);
1131 #else
1132 _locale_t loct;
1133
1134 loct = _create_locale(LC_CTYPE, winlocname);
1135 if (loct != NULL)
1136 {
1137 size_t rc;
1138 char *hyphen;
1139
1140 /* Locale names use only ASCII, any conversion locale suffices. */
1141 rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1142 sizeof(iso_lc_messages), NULL);
1143 _free_locale(loct);
1144 if (rc == -1 || rc == sizeof(iso_lc_messages))
1145 return NULL;
1146
1147 /*
1148 * Since the message catalogs sit on a case-insensitive
1149 * filesystem, we need not standardize letter case here. So long
1150 * as we do not ship message catalogs for which it would matter,
1151 * we also need not translate the script/variant portion, e.g.
1152 * uz-Cyrl-UZ to uz_UZ@cyrillic. Simply replace the hyphen with
1153 * an underscore.
1154 *
1155 * Note that the locale name can be less-specific than the value
1156 * we would derive under earlier Visual Studio releases. For
1157 * example, French_France.1252 yields just "fr". This does not
1158 * affect any of the country-specific message catalogs available
1159 * as of this writing (pt_BR, zh_CN, zh_TW).
1160 */
1161 hyphen = strchr(iso_lc_messages, '-');
1162 if (hyphen)
1163 *hyphen = '_';
1164 return iso_lc_messages;
1165 }
1166 #endif /* Visual Studio 2015 or later */
1167 }
1168 #endif /* defined(_MSC_VER) */
1169 return NULL; /* Not supported on this version of msvc/mingw */
1170 }
1171 #endif /* WIN32 && LC_MESSAGES */
1172
1173
1174 /*
1175 * Detect aging strxfrm() implementations that, in a subset of locales, write
1176 * past the specified buffer length. Affected users must update OS packages
1177 * before using PostgreSQL 9.5 or later.
1178 *
1179 * Assume that the bug can come and go from one postmaster startup to another
1180 * due to physical replication among diverse machines. Assume that the bug's
1181 * presence will not change during the life of a particular postmaster. Given
1182 * those assumptions, call this no less than once per postmaster startup per
1183 * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1184 * there is no need to consider pg_collation locales.
1185 */
1186 void
check_strxfrm_bug(void)1187 check_strxfrm_bug(void)
1188 {
1189 char buf[32];
1190 const int canary = 0x7F;
1191 bool ok = true;
1192
1193 /*
1194 * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1195 * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1196 * below that range.
1197 *
1198 * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1199 * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1200 * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1201 * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1202 */
1203 buf[7] = canary;
1204 (void) strxfrm(buf, "ab", 7);
1205 if (buf[7] != canary)
1206 ok = false;
1207
1208 /*
1209 * illumos bug #1594 was present in the source tree from 2010-10-11 to
1210 * 2012-02-01. Given an ASCII string of any length and length limit 1,
1211 * affected systems ignore the length limit and modify a number of bytes
1212 * one less than the return value. The problem inputs for this bug do not
1213 * overlap those for the Solaris bug, hence a distinct test.
1214 *
1215 * Affected systems include smartos-20110926T021612Z. Affected locales
1216 * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1217 */
1218 buf[1] = canary;
1219 (void) strxfrm(buf, "a", 1);
1220 if (buf[1] != canary)
1221 ok = false;
1222
1223 if (!ok)
1224 ereport(ERROR,
1225 (errcode(ERRCODE_SYSTEM_ERROR),
1226 errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1227 setlocale(LC_COLLATE, NULL)),
1228 errhint("Apply system library package updates.")));
1229 }
1230
1231
1232 /*
1233 * Cache mechanism for collation information.
1234 *
1235 * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1236 * (or POSIX), so we can optimize a few code paths in various places.
1237 * For the built-in C and POSIX collations, we can know that without even
1238 * doing a cache lookup, but we want to support aliases for C/POSIX too.
1239 * For the "default" collation, there are separate static cache variables,
1240 * since consulting the pg_collation catalog doesn't tell us what we need.
1241 *
1242 * Also, if a pg_locale_t has been requested for a collation, we cache that
1243 * for the life of a backend.
1244 *
1245 * Note that some code relies on the flags not reporting false negatives
1246 * (that is, saying it's not C when it is). For example, char2wchar()
1247 * could fail if the locale is C, so str_tolower() shouldn't call it
1248 * in that case.
1249 *
1250 * Note that we currently lack any way to flush the cache. Since we don't
1251 * support ALTER COLLATION, this is OK. The worst case is that someone
1252 * drops a collation, and a useless cache entry hangs around in existing
1253 * backends.
1254 */
1255
1256 static collation_cache_entry *
lookup_collation_cache(Oid collation,bool set_flags)1257 lookup_collation_cache(Oid collation, bool set_flags)
1258 {
1259 collation_cache_entry *cache_entry;
1260 bool found;
1261
1262 Assert(OidIsValid(collation));
1263 Assert(collation != DEFAULT_COLLATION_OID);
1264
1265 if (collation_cache == NULL)
1266 {
1267 /* First time through, initialize the hash table */
1268 HASHCTL ctl;
1269
1270 ctl.keysize = sizeof(Oid);
1271 ctl.entrysize = sizeof(collation_cache_entry);
1272 collation_cache = hash_create("Collation cache", 100, &ctl,
1273 HASH_ELEM | HASH_BLOBS);
1274 }
1275
1276 cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1277 if (!found)
1278 {
1279 /*
1280 * Make sure cache entry is marked invalid, in case we fail before
1281 * setting things.
1282 */
1283 cache_entry->flags_valid = false;
1284 cache_entry->locale = 0;
1285 }
1286
1287 if (set_flags && !cache_entry->flags_valid)
1288 {
1289 /* Attempt to set the flags */
1290 HeapTuple tp;
1291 Form_pg_collation collform;
1292 const char *collcollate;
1293 const char *collctype;
1294
1295 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1296 if (!HeapTupleIsValid(tp))
1297 elog(ERROR, "cache lookup failed for collation %u", collation);
1298 collform = (Form_pg_collation) GETSTRUCT(tp);
1299
1300 collcollate = NameStr(collform->collcollate);
1301 collctype = NameStr(collform->collctype);
1302
1303 cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1304 (strcmp(collcollate, "POSIX") == 0));
1305 cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1306 (strcmp(collctype, "POSIX") == 0));
1307
1308 cache_entry->flags_valid = true;
1309
1310 ReleaseSysCache(tp);
1311 }
1312
1313 return cache_entry;
1314 }
1315
1316
1317 /*
1318 * Detect whether collation's LC_COLLATE property is C
1319 */
1320 bool
lc_collate_is_c(Oid collation)1321 lc_collate_is_c(Oid collation)
1322 {
1323 /*
1324 * If we're asked about "collation 0", return false, so that the code will
1325 * go into the non-C path and report that the collation is bogus.
1326 */
1327 if (!OidIsValid(collation))
1328 return false;
1329
1330 /*
1331 * If we're asked about the default collation, we have to inquire of the C
1332 * library. Cache the result so we only have to compute it once.
1333 */
1334 if (collation == DEFAULT_COLLATION_OID)
1335 {
1336 static int result = -1;
1337 char *localeptr;
1338
1339 if (result >= 0)
1340 return (bool) result;
1341 localeptr = setlocale(LC_COLLATE, NULL);
1342 if (!localeptr)
1343 elog(ERROR, "invalid LC_COLLATE setting");
1344
1345 if (strcmp(localeptr, "C") == 0)
1346 result = true;
1347 else if (strcmp(localeptr, "POSIX") == 0)
1348 result = true;
1349 else
1350 result = false;
1351 return (bool) result;
1352 }
1353
1354 /*
1355 * If we're asked about the built-in C/POSIX collations, we know that.
1356 */
1357 if (collation == C_COLLATION_OID ||
1358 collation == POSIX_COLLATION_OID)
1359 return true;
1360
1361 /*
1362 * Otherwise, we have to consult pg_collation, but we cache that.
1363 */
1364 return (lookup_collation_cache(collation, true))->collate_is_c;
1365 }
1366
1367 /*
1368 * Detect whether collation's LC_CTYPE property is C
1369 */
1370 bool
lc_ctype_is_c(Oid collation)1371 lc_ctype_is_c(Oid collation)
1372 {
1373 /*
1374 * If we're asked about "collation 0", return false, so that the code will
1375 * go into the non-C path and report that the collation is bogus.
1376 */
1377 if (!OidIsValid(collation))
1378 return false;
1379
1380 /*
1381 * If we're asked about the default collation, we have to inquire of the C
1382 * library. Cache the result so we only have to compute it once.
1383 */
1384 if (collation == DEFAULT_COLLATION_OID)
1385 {
1386 static int result = -1;
1387 char *localeptr;
1388
1389 if (result >= 0)
1390 return (bool) result;
1391 localeptr = setlocale(LC_CTYPE, NULL);
1392 if (!localeptr)
1393 elog(ERROR, "invalid LC_CTYPE setting");
1394
1395 if (strcmp(localeptr, "C") == 0)
1396 result = true;
1397 else if (strcmp(localeptr, "POSIX") == 0)
1398 result = true;
1399 else
1400 result = false;
1401 return (bool) result;
1402 }
1403
1404 /*
1405 * If we're asked about the built-in C/POSIX collations, we know that.
1406 */
1407 if (collation == C_COLLATION_OID ||
1408 collation == POSIX_COLLATION_OID)
1409 return true;
1410
1411 /*
1412 * Otherwise, we have to consult pg_collation, but we cache that.
1413 */
1414 return (lookup_collation_cache(collation, true))->ctype_is_c;
1415 }
1416
1417
1418 /* simple subroutine for reporting errors from newlocale() */
1419 #ifdef HAVE_LOCALE_T
1420 static void
report_newlocale_failure(const char * localename)1421 report_newlocale_failure(const char *localename)
1422 {
1423 int save_errno;
1424
1425 /*
1426 * Windows doesn't provide any useful error indication from
1427 * _create_locale(), and BSD-derived platforms don't seem to feel they
1428 * need to set errno either (even though POSIX is pretty clear that
1429 * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1430 * is what to report.
1431 */
1432 if (errno == 0)
1433 errno = ENOENT;
1434
1435 /*
1436 * ENOENT means "no such locale", not "no such file", so clarify that
1437 * errno with an errdetail message.
1438 */
1439 save_errno = errno; /* auxiliary funcs might change errno */
1440 ereport(ERROR,
1441 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1442 errmsg("could not create locale \"%s\": %m",
1443 localename),
1444 (save_errno == ENOENT ?
1445 errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1446 localename) : 0)));
1447 }
1448 #endif /* HAVE_LOCALE_T */
1449
1450
1451 /*
1452 * Create a locale_t from a collation OID. Results are cached for the
1453 * lifetime of the backend. Thus, do not free the result with freelocale().
1454 *
1455 * As a special optimization, the default/database collation returns 0.
1456 * Callers should then revert to the non-locale_t-enabled code path.
1457 * In fact, they shouldn't call this function at all when they are dealing
1458 * with the default locale. That can save quite a bit in hotspots.
1459 * Also, callers should avoid calling this before going down a C/POSIX
1460 * fastpath, because such a fastpath should work even on platforms without
1461 * locale_t support in the C library.
1462 *
1463 * For simplicity, we always generate COLLATE + CTYPE even though we
1464 * might only need one of them. Since this is called only once per session,
1465 * it shouldn't cost much.
1466 */
1467 pg_locale_t
pg_newlocale_from_collation(Oid collid)1468 pg_newlocale_from_collation(Oid collid)
1469 {
1470 collation_cache_entry *cache_entry;
1471
1472 /* Callers must pass a valid OID */
1473 Assert(OidIsValid(collid));
1474
1475 /* Return 0 for "default" collation, just in case caller forgets */
1476 if (collid == DEFAULT_COLLATION_OID)
1477 return (pg_locale_t) 0;
1478
1479 cache_entry = lookup_collation_cache(collid, false);
1480
1481 if (cache_entry->locale == 0)
1482 {
1483 /* We haven't computed this yet in this session, so do it */
1484 HeapTuple tp;
1485 Form_pg_collation collform;
1486 const char *collcollate;
1487 const char *collctype pg_attribute_unused();
1488 struct pg_locale_struct result;
1489 pg_locale_t resultp;
1490 Datum collversion;
1491 bool isnull;
1492
1493 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1494 if (!HeapTupleIsValid(tp))
1495 elog(ERROR, "cache lookup failed for collation %u", collid);
1496 collform = (Form_pg_collation) GETSTRUCT(tp);
1497
1498 collcollate = NameStr(collform->collcollate);
1499 collctype = NameStr(collform->collctype);
1500
1501 /* We'll fill in the result struct locally before allocating memory */
1502 memset(&result, 0, sizeof(result));
1503 result.provider = collform->collprovider;
1504 result.deterministic = collform->collisdeterministic;
1505
1506 if (collform->collprovider == COLLPROVIDER_LIBC)
1507 {
1508 #ifdef HAVE_LOCALE_T
1509 locale_t loc;
1510
1511 if (strcmp(collcollate, collctype) == 0)
1512 {
1513 /* Normal case where they're the same */
1514 errno = 0;
1515 #ifndef WIN32
1516 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1517 NULL);
1518 #else
1519 loc = _create_locale(LC_ALL, collcollate);
1520 #endif
1521 if (!loc)
1522 report_newlocale_failure(collcollate);
1523 }
1524 else
1525 {
1526 #ifndef WIN32
1527 /* We need two newlocale() steps */
1528 locale_t loc1;
1529
1530 errno = 0;
1531 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1532 if (!loc1)
1533 report_newlocale_failure(collcollate);
1534 errno = 0;
1535 loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1536 if (!loc)
1537 report_newlocale_failure(collctype);
1538 #else
1539
1540 /*
1541 * XXX The _create_locale() API doesn't appear to support
1542 * this. Could perhaps be worked around by changing
1543 * pg_locale_t to contain two separate fields.
1544 */
1545 ereport(ERROR,
1546 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1547 errmsg("collations with different collate and ctype values are not supported on this platform")));
1548 #endif
1549 }
1550
1551 result.info.lt = loc;
1552 #else /* not HAVE_LOCALE_T */
1553 /* platform that doesn't support locale_t */
1554 ereport(ERROR,
1555 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1556 errmsg("collation provider LIBC is not supported on this platform")));
1557 #endif /* not HAVE_LOCALE_T */
1558 }
1559 else if (collform->collprovider == COLLPROVIDER_ICU)
1560 {
1561 #ifdef USE_ICU
1562 UCollator *collator;
1563 UErrorCode status;
1564
1565 if (strcmp(collcollate, collctype) != 0)
1566 ereport(ERROR,
1567 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1568 errmsg("collations with different collate and ctype values are not supported by ICU")));
1569
1570 status = U_ZERO_ERROR;
1571 collator = ucol_open(collcollate, &status);
1572 if (U_FAILURE(status))
1573 ereport(ERROR,
1574 (errmsg("could not open collator for locale \"%s\": %s",
1575 collcollate, u_errorName(status))));
1576
1577 if (U_ICU_VERSION_MAJOR_NUM < 54)
1578 icu_set_collation_attributes(collator, collcollate);
1579
1580 /* We will leak this string if we get an error below :-( */
1581 result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1582 collcollate);
1583 result.info.icu.ucol = collator;
1584 #else /* not USE_ICU */
1585 /* could get here if a collation was created by a build with ICU */
1586 ereport(ERROR,
1587 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1588 errmsg("ICU is not supported in this build"), \
1589 errhint("You need to rebuild PostgreSQL using %s.", "--with-icu")));
1590 #endif /* not USE_ICU */
1591 }
1592
1593 collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1594 &isnull);
1595 if (!isnull)
1596 {
1597 char *actual_versionstr;
1598 char *collversionstr;
1599
1600 actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1601 if (!actual_versionstr)
1602 {
1603 /*
1604 * This could happen when specifying a version in CREATE
1605 * COLLATION for a libc locale, or manually creating a mess in
1606 * the catalogs.
1607 */
1608 ereport(ERROR,
1609 (errmsg("collation \"%s\" has no actual version, but a version was specified",
1610 NameStr(collform->collname))));
1611 }
1612 collversionstr = TextDatumGetCString(collversion);
1613
1614 if (strcmp(actual_versionstr, collversionstr) != 0)
1615 ereport(WARNING,
1616 (errmsg("collation \"%s\" has version mismatch",
1617 NameStr(collform->collname)),
1618 errdetail("The collation in the database was created using version %s, "
1619 "but the operating system provides version %s.",
1620 collversionstr, actual_versionstr),
1621 errhint("Rebuild all objects affected by this collation and run "
1622 "ALTER COLLATION %s REFRESH VERSION, "
1623 "or build PostgreSQL with the right library version.",
1624 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1625 NameStr(collform->collname)))));
1626 }
1627
1628 ReleaseSysCache(tp);
1629
1630 /* We'll keep the pg_locale_t structures in TopMemoryContext */
1631 resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1632 *resultp = result;
1633
1634 cache_entry->locale = resultp;
1635 }
1636
1637 return cache_entry->locale;
1638 }
1639
1640 /*
1641 * Get provider-specific collation version string for the given collation from
1642 * the operating system/library.
1643 */
1644 char *
get_collation_actual_version(char collprovider,const char * collcollate)1645 get_collation_actual_version(char collprovider, const char *collcollate)
1646 {
1647 char *collversion = NULL;
1648
1649 #ifdef USE_ICU
1650 if (collprovider == COLLPROVIDER_ICU)
1651 {
1652 UCollator *collator;
1653 UErrorCode status;
1654 UVersionInfo versioninfo;
1655 char buf[U_MAX_VERSION_STRING_LENGTH];
1656
1657 status = U_ZERO_ERROR;
1658 collator = ucol_open(collcollate, &status);
1659 if (U_FAILURE(status))
1660 ereport(ERROR,
1661 (errmsg("could not open collator for locale \"%s\": %s",
1662 collcollate, u_errorName(status))));
1663 ucol_getVersion(collator, versioninfo);
1664 ucol_close(collator);
1665
1666 u_versionToString(versioninfo, buf);
1667 collversion = pstrdup(buf);
1668 }
1669 else
1670 #endif
1671 if (collprovider == COLLPROVIDER_LIBC &&
1672 pg_strcasecmp("C", collcollate) != 0 &&
1673 pg_strncasecmp("C.", collcollate, 2) != 0 &&
1674 pg_strcasecmp("POSIX", collcollate) != 0)
1675 {
1676 #if defined(__GLIBC__)
1677 /* Use the glibc version because we don't have anything better. */
1678 collversion = pstrdup(gnu_get_libc_version());
1679 #elif defined(LC_VERSION_MASK)
1680 locale_t loc;
1681
1682 /* Look up FreeBSD collation version. */
1683 loc = newlocale(LC_COLLATE, collcollate, NULL);
1684 if (loc)
1685 {
1686 collversion =
1687 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
1688 freelocale(loc);
1689 }
1690 else
1691 ereport(ERROR,
1692 (errmsg("could not load locale \"%s\"", collcollate)));
1693 #elif defined(WIN32) && _WIN32_WINNT >= 0x0600
1694 /*
1695 * If we are targeting Windows Vista and above, we can ask for a name
1696 * given a collation name (earlier versions required a location code
1697 * that we don't have).
1698 */
1699 NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
1700 WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
1701
1702 MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
1703 LOCALE_NAME_MAX_LENGTH);
1704 if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
1705 {
1706 /*
1707 * GetNLSVersionEx() wants a language tag such as "en-US", not a
1708 * locale name like "English_United States.1252". Until those
1709 * values can be prevented from entering the system, or 100%
1710 * reliably converted to the more useful tag format, tolerate the
1711 * resulting error and report that we have no version data.
1712 */
1713 if (GetLastError() == ERROR_INVALID_PARAMETER)
1714 return NULL;
1715
1716 ereport(ERROR,
1717 (errmsg("could not get collation version for locale \"%s\": error code %lu",
1718 collcollate,
1719 GetLastError())));
1720 }
1721 collversion = psprintf("%d.%d,%d.%d",
1722 (version.dwNLSVersion >> 8) & 0xFFFF,
1723 version.dwNLSVersion & 0xFF,
1724 (version.dwDefinedVersion >> 8) & 0xFFFF,
1725 version.dwDefinedVersion & 0xFF);
1726 #endif
1727 }
1728
1729 return collversion;
1730 }
1731
1732
1733 #ifdef USE_ICU
1734 /*
1735 * Converter object for converting between ICU's UChar strings and C strings
1736 * in database encoding. Since the database encoding doesn't change, we only
1737 * need one of these per session.
1738 */
1739 static UConverter *icu_converter = NULL;
1740
1741 static void
init_icu_converter(void)1742 init_icu_converter(void)
1743 {
1744 const char *icu_encoding_name;
1745 UErrorCode status;
1746 UConverter *conv;
1747
1748 if (icu_converter)
1749 return; /* already done */
1750
1751 icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1752 if (!icu_encoding_name)
1753 ereport(ERROR,
1754 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1755 errmsg("encoding \"%s\" not supported by ICU",
1756 pg_encoding_to_char(GetDatabaseEncoding()))));
1757
1758 status = U_ZERO_ERROR;
1759 conv = ucnv_open(icu_encoding_name, &status);
1760 if (U_FAILURE(status))
1761 ereport(ERROR,
1762 (errmsg("could not open ICU converter for encoding \"%s\": %s",
1763 icu_encoding_name, u_errorName(status))));
1764
1765 icu_converter = conv;
1766 }
1767
1768 /*
1769 * Convert a string in the database encoding into a string of UChars.
1770 *
1771 * The source string at buff is of length nbytes
1772 * (it needn't be nul-terminated)
1773 *
1774 * *buff_uchar receives a pointer to the palloc'd result string, and
1775 * the function's result is the number of UChars generated.
1776 *
1777 * The result string is nul-terminated, though most callers rely on the
1778 * result length instead.
1779 */
1780 int32_t
icu_to_uchar(UChar ** buff_uchar,const char * buff,size_t nbytes)1781 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1782 {
1783 UErrorCode status;
1784 int32_t len_uchar;
1785
1786 init_icu_converter();
1787
1788 status = U_ZERO_ERROR;
1789 len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1790 buff, nbytes, &status);
1791 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1792 ereport(ERROR,
1793 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1794
1795 *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1796
1797 status = U_ZERO_ERROR;
1798 len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1799 buff, nbytes, &status);
1800 if (U_FAILURE(status))
1801 ereport(ERROR,
1802 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1803
1804 return len_uchar;
1805 }
1806
1807 /*
1808 * Convert a string of UChars into the database encoding.
1809 *
1810 * The source string at buff_uchar is of length len_uchar
1811 * (it needn't be nul-terminated)
1812 *
1813 * *result receives a pointer to the palloc'd result string, and the
1814 * function's result is the number of bytes generated (not counting nul).
1815 *
1816 * The result string is nul-terminated.
1817 */
1818 int32_t
icu_from_uchar(char ** result,const UChar * buff_uchar,int32_t len_uchar)1819 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1820 {
1821 UErrorCode status;
1822 int32_t len_result;
1823
1824 init_icu_converter();
1825
1826 status = U_ZERO_ERROR;
1827 len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1828 buff_uchar, len_uchar, &status);
1829 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1830 ereport(ERROR,
1831 (errmsg("%s failed: %s", "ucnv_fromUChars",
1832 u_errorName(status))));
1833
1834 *result = palloc(len_result + 1);
1835
1836 status = U_ZERO_ERROR;
1837 len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1838 buff_uchar, len_uchar, &status);
1839 if (U_FAILURE(status))
1840 ereport(ERROR,
1841 (errmsg("%s failed: %s", "ucnv_fromUChars",
1842 u_errorName(status))));
1843
1844 return len_result;
1845 }
1846
1847 /*
1848 * Parse collation attributes and apply them to the open collator. This takes
1849 * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
1850 * applies the key-value arguments.
1851 *
1852 * Starting with ICU version 54, the attributes are processed automatically by
1853 * ucol_open(), so this is only necessary for emulating this behavior on older
1854 * versions.
1855 */
pg_attribute_unused()1856 pg_attribute_unused()
1857 static void
1858 icu_set_collation_attributes(UCollator *collator, const char *loc)
1859 {
1860 char *str = asc_tolower(loc, strlen(loc));
1861
1862 str = strchr(str, '@');
1863 if (!str)
1864 return;
1865 str++;
1866
1867 for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
1868 {
1869 char *e = strchr(token, '=');
1870
1871 if (e)
1872 {
1873 char *name;
1874 char *value;
1875 UColAttribute uattr;
1876 UColAttributeValue uvalue;
1877 UErrorCode status;
1878
1879 status = U_ZERO_ERROR;
1880
1881 *e = '\0';
1882 name = token;
1883 value = e + 1;
1884
1885 /*
1886 * See attribute name and value lists in ICU i18n/coll.cpp
1887 */
1888 if (strcmp(name, "colstrength") == 0)
1889 uattr = UCOL_STRENGTH;
1890 else if (strcmp(name, "colbackwards") == 0)
1891 uattr = UCOL_FRENCH_COLLATION;
1892 else if (strcmp(name, "colcaselevel") == 0)
1893 uattr = UCOL_CASE_LEVEL;
1894 else if (strcmp(name, "colcasefirst") == 0)
1895 uattr = UCOL_CASE_FIRST;
1896 else if (strcmp(name, "colalternate") == 0)
1897 uattr = UCOL_ALTERNATE_HANDLING;
1898 else if (strcmp(name, "colnormalization") == 0)
1899 uattr = UCOL_NORMALIZATION_MODE;
1900 else if (strcmp(name, "colnumeric") == 0)
1901 uattr = UCOL_NUMERIC_COLLATION;
1902 else
1903 /* ignore if unknown */
1904 continue;
1905
1906 if (strcmp(value, "primary") == 0)
1907 uvalue = UCOL_PRIMARY;
1908 else if (strcmp(value, "secondary") == 0)
1909 uvalue = UCOL_SECONDARY;
1910 else if (strcmp(value, "tertiary") == 0)
1911 uvalue = UCOL_TERTIARY;
1912 else if (strcmp(value, "quaternary") == 0)
1913 uvalue = UCOL_QUATERNARY;
1914 else if (strcmp(value, "identical") == 0)
1915 uvalue = UCOL_IDENTICAL;
1916 else if (strcmp(value, "no") == 0)
1917 uvalue = UCOL_OFF;
1918 else if (strcmp(value, "yes") == 0)
1919 uvalue = UCOL_ON;
1920 else if (strcmp(value, "shifted") == 0)
1921 uvalue = UCOL_SHIFTED;
1922 else if (strcmp(value, "non-ignorable") == 0)
1923 uvalue = UCOL_NON_IGNORABLE;
1924 else if (strcmp(value, "lower") == 0)
1925 uvalue = UCOL_LOWER_FIRST;
1926 else if (strcmp(value, "upper") == 0)
1927 uvalue = UCOL_UPPER_FIRST;
1928 else
1929 status = U_ILLEGAL_ARGUMENT_ERROR;
1930
1931 if (status == U_ZERO_ERROR)
1932 ucol_setAttribute(collator, uattr, uvalue, &status);
1933
1934 /*
1935 * Pretend the error came from ucol_open(), for consistent error
1936 * message across ICU versions.
1937 */
1938 if (U_FAILURE(status))
1939 ereport(ERROR,
1940 (errmsg("could not open collator for locale \"%s\": %s",
1941 loc, u_errorName(status))));
1942 }
1943 }
1944 }
1945
1946 #endif /* USE_ICU */
1947
1948 /*
1949 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1950 * Therefore we keep them here rather than with the mbutils code.
1951 */
1952
1953 /*
1954 * wchar2char --- convert wide characters to multibyte format
1955 *
1956 * This has the same API as the standard wcstombs_l() function; in particular,
1957 * tolen is the maximum number of bytes to store at *to, and *from must be
1958 * zero-terminated. The output will be zero-terminated iff there is room.
1959 */
1960 size_t
wchar2char(char * to,const wchar_t * from,size_t tolen,pg_locale_t locale)1961 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1962 {
1963 size_t result;
1964
1965 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1966
1967 if (tolen == 0)
1968 return 0;
1969
1970 #ifdef WIN32
1971
1972 /*
1973 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1974 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1975 * MultiByteToWideChar().
1976 */
1977 if (GetDatabaseEncoding() == PG_UTF8)
1978 {
1979 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1980 NULL, NULL);
1981 /* A zero return is failure */
1982 if (result <= 0)
1983 result = -1;
1984 else
1985 {
1986 Assert(result <= tolen);
1987 /* Microsoft counts the zero terminator in the result */
1988 result--;
1989 }
1990 }
1991 else
1992 #endif /* WIN32 */
1993 if (locale == (pg_locale_t) 0)
1994 {
1995 /* Use wcstombs directly for the default locale */
1996 result = wcstombs(to, from, tolen);
1997 }
1998 else
1999 {
2000 #ifdef HAVE_LOCALE_T
2001 #ifdef HAVE_WCSTOMBS_L
2002 /* Use wcstombs_l for nondefault locales */
2003 result = wcstombs_l(to, from, tolen, locale->info.lt);
2004 #else /* !HAVE_WCSTOMBS_L */
2005 /* We have to temporarily set the locale as current ... ugh */
2006 locale_t save_locale = uselocale(locale->info.lt);
2007
2008 result = wcstombs(to, from, tolen);
2009
2010 uselocale(save_locale);
2011 #endif /* HAVE_WCSTOMBS_L */
2012 #else /* !HAVE_LOCALE_T */
2013 /* Can't have locale != 0 without HAVE_LOCALE_T */
2014 elog(ERROR, "wcstombs_l is not available");
2015 result = 0; /* keep compiler quiet */
2016 #endif /* HAVE_LOCALE_T */
2017 }
2018
2019 return result;
2020 }
2021
2022 /*
2023 * char2wchar --- convert multibyte characters to wide characters
2024 *
2025 * This has almost the API of mbstowcs_l(), except that *from need not be
2026 * null-terminated; instead, the number of input bytes is specified as
2027 * fromlen. Also, we ereport() rather than returning -1 for invalid
2028 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
2029 * The output will be zero-terminated iff there is room.
2030 */
2031 size_t
char2wchar(wchar_t * to,size_t tolen,const char * from,size_t fromlen,pg_locale_t locale)2032 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
2033 pg_locale_t locale)
2034 {
2035 size_t result;
2036
2037 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
2038
2039 if (tolen == 0)
2040 return 0;
2041
2042 #ifdef WIN32
2043 /* See WIN32 "Unicode" comment above */
2044 if (GetDatabaseEncoding() == PG_UTF8)
2045 {
2046 /* Win32 API does not work for zero-length input */
2047 if (fromlen == 0)
2048 result = 0;
2049 else
2050 {
2051 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
2052 /* A zero return is failure */
2053 if (result == 0)
2054 result = -1;
2055 }
2056
2057 if (result != -1)
2058 {
2059 Assert(result < tolen);
2060 /* Append trailing null wchar (MultiByteToWideChar() does not) */
2061 to[result] = 0;
2062 }
2063 }
2064 else
2065 #endif /* WIN32 */
2066 {
2067 /* mbstowcs requires ending '\0' */
2068 char *str = pnstrdup(from, fromlen);
2069
2070 if (locale == (pg_locale_t) 0)
2071 {
2072 /* Use mbstowcs directly for the default locale */
2073 result = mbstowcs(to, str, tolen);
2074 }
2075 else
2076 {
2077 #ifdef HAVE_LOCALE_T
2078 #ifdef HAVE_MBSTOWCS_L
2079 /* Use mbstowcs_l for nondefault locales */
2080 result = mbstowcs_l(to, str, tolen, locale->info.lt);
2081 #else /* !HAVE_MBSTOWCS_L */
2082 /* We have to temporarily set the locale as current ... ugh */
2083 locale_t save_locale = uselocale(locale->info.lt);
2084
2085 result = mbstowcs(to, str, tolen);
2086
2087 uselocale(save_locale);
2088 #endif /* HAVE_MBSTOWCS_L */
2089 #else /* !HAVE_LOCALE_T */
2090 /* Can't have locale != 0 without HAVE_LOCALE_T */
2091 elog(ERROR, "mbstowcs_l is not available");
2092 result = 0; /* keep compiler quiet */
2093 #endif /* HAVE_LOCALE_T */
2094 }
2095
2096 pfree(str);
2097 }
2098
2099 if (result == -1)
2100 {
2101 /*
2102 * Invalid multibyte character encountered. We try to give a useful
2103 * error message by letting pg_verifymbstr check the string. But it's
2104 * possible that the string is OK to us, and not OK to mbstowcs ---
2105 * this suggests that the LC_CTYPE locale is different from the
2106 * database encoding. Give a generic error message if pg_verifymbstr
2107 * can't find anything wrong.
2108 */
2109 pg_verifymbstr(from, fromlen, false); /* might not return */
2110 /* but if it does ... */
2111 ereport(ERROR,
2112 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2113 errmsg("invalid multibyte character for locale"),
2114 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
2115 }
2116
2117 return result;
2118 }
2119