1 /*-----------------------------------------------------------------------
2  *
3  * PostgreSQL locale utilities
4  *
5  * Portions Copyright (c) 2002-2017, PostgreSQL Global Development Group
6  *
7  * src/backend/utils/adt/pg_locale.c
8  *
9  *-----------------------------------------------------------------------
10  */
11 
12 /*----------
13  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16  * toupper(), etc. are always in the same fixed locale.
17  *
18  * LC_MESSAGES is settable at run time and will take effect
19  * immediately.
20  *
21  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22  * settable at run-time.  However, we don't actually set those locale
23  * categories permanently.  This would have bizarre effects like no
24  * longer accepting standard floating-point literals in some locales.
25  * Instead, we only set these locale categories briefly when needed,
26  * cache the required information obtained from localeconv() or
27  * strftime(), and then set the locale categories back to "C".
28  * The cached information is only used by the formatting functions
29  * (to_char, etc.) and the money type.  For the user, this should all be
30  * transparent.
31  *
32  * !!! NOW HEAR THIS !!!
33  *
34  * We've been bitten repeatedly by this bug, so let's try to keep it in
35  * mind in future: on some platforms, the locale functions return pointers
36  * to static data that will be overwritten by any later locale function.
37  * Thus, for example, the obvious-looking sequence
38  *			save = setlocale(category, NULL);
39  *			if (!setlocale(category, value))
40  *				fail = true;
41  *			setlocale(category, save);
42  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43  * will change the memory save is pointing at.  To do this sort of thing
44  * safely, you *must* pstrdup what setlocale returns the first time.
45  *
46  * The POSIX locale standard is available here:
47  *
48  *	http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49  *----------
50  */
51 
52 
53 #include "postgres.h"
54 
55 #include <time.h>
56 
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "catalog/pg_control.h"
60 #include "mb/pg_wchar.h"
61 #include "utils/builtins.h"
62 #include "utils/hsearch.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_locale.h"
66 #include "utils/syscache.h"
67 
68 #ifdef USE_ICU
69 #include <unicode/ucnv.h>
70 /* ICU might have a different definition of "bool", don't buy it */
71 #ifdef bool
72 #undef bool
73 #endif
74 #endif
75 
76 #ifdef WIN32
77 /*
78  * This Windows file defines StrNCpy. We don't need it here, so we undefine
79  * it to keep the compiler quiet, and undefine it again after the file is
80  * included, so we don't accidentally use theirs.
81  */
82 #undef StrNCpy
83 #include <shlwapi.h>
84 #ifdef StrNCpy
85 #undef STrNCpy
86 #endif
87 #endif
88 
89 #define		MAX_L10N_DATA		80
90 
91 
92 /* GUC settings */
93 char	   *locale_messages;
94 char	   *locale_monetary;
95 char	   *locale_numeric;
96 char	   *locale_time;
97 
98 /* lc_time localization cache */
99 char	   *localized_abbrev_days[7];
100 char	   *localized_full_days[7];
101 char	   *localized_abbrev_months[12];
102 char	   *localized_full_months[12];
103 
104 /* indicates whether locale information cache is valid */
105 static bool CurrentLocaleConvValid = false;
106 static bool CurrentLCTimeValid = false;
107 
108 /* Environment variable storage area */
109 
110 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
111 
112 static char lc_collate_envbuf[LC_ENV_BUFSIZE];
113 static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
114 
115 #ifdef LC_MESSAGES
116 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
117 #endif
118 static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
119 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
120 static char lc_time_envbuf[LC_ENV_BUFSIZE];
121 
122 /* Cache for collation-related knowledge */
123 
124 typedef struct
125 {
126 	Oid			collid;			/* hash key: pg_collation OID */
127 	bool		collate_is_c;	/* is collation's LC_COLLATE C? */
128 	bool		ctype_is_c;		/* is collation's LC_CTYPE C? */
129 	bool		flags_valid;	/* true if above flags are valid */
130 	pg_locale_t locale;			/* locale_t struct, or 0 if not valid */
131 } collation_cache_entry;
132 
133 static HTAB *collation_cache = NULL;
134 
135 
136 #if defined(WIN32) && defined(LC_MESSAGES)
137 static char *IsoLocaleName(const char *);	/* MSVC specific */
138 #endif
139 
140 
141 /*
142  * pg_perm_setlocale
143  *
144  * This wraps the libc function setlocale(), with two additions.  First, when
145  * changing LC_CTYPE, update gettext's encoding for the current message
146  * domain.  GNU gettext automatically tracks LC_CTYPE on most platforms, but
147  * not on Windows.  Second, if the operation is successful, the corresponding
148  * LC_XXX environment variable is set to match.  By setting the environment
149  * variable, we ensure that any subsequent use of setlocale(..., "") will
150  * preserve the settings made through this routine.  Of course, LC_ALL must
151  * also be unset to fully ensure that, but that has to be done elsewhere after
152  * all the individual LC_XXX variables have been set correctly.  (Thank you
153  * Perl for making this kluge necessary.)
154  */
155 char *
pg_perm_setlocale(int category,const char * locale)156 pg_perm_setlocale(int category, const char *locale)
157 {
158 	char	   *result;
159 	const char *envvar;
160 	char	   *envbuf;
161 
162 #ifndef WIN32
163 	result = setlocale(category, locale);
164 #else
165 
166 	/*
167 	 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
168 	 * the given value is good and set it in the environment variables. We
169 	 * must ignore attempts to set to "", which means "keep using the old
170 	 * environment value".
171 	 */
172 #ifdef LC_MESSAGES
173 	if (category == LC_MESSAGES)
174 	{
175 		result = (char *) locale;
176 		if (locale == NULL || locale[0] == '\0')
177 			return result;
178 	}
179 	else
180 #endif
181 		result = setlocale(category, locale);
182 #endif							/* WIN32 */
183 
184 	if (result == NULL)
185 		return result;			/* fall out immediately on failure */
186 
187 	/*
188 	 * Use the right encoding in translated messages.  Under ENABLE_NLS, let
189 	 * pg_bind_textdomain_codeset() figure it out.  Under !ENABLE_NLS, message
190 	 * format strings are ASCII, but database-encoding strings may enter the
191 	 * message via %s.  This makes the overall message encoding equal to the
192 	 * database encoding.
193 	 */
194 	if (category == LC_CTYPE)
195 	{
196 		static char save_lc_ctype[LC_ENV_BUFSIZE];
197 
198 		/* copy setlocale() return value before callee invokes it again */
199 		strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
200 		result = save_lc_ctype;
201 
202 #ifdef ENABLE_NLS
203 		SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
204 #else
205 		SetMessageEncoding(GetDatabaseEncoding());
206 #endif
207 	}
208 
209 	switch (category)
210 	{
211 		case LC_COLLATE:
212 			envvar = "LC_COLLATE";
213 			envbuf = lc_collate_envbuf;
214 			break;
215 		case LC_CTYPE:
216 			envvar = "LC_CTYPE";
217 			envbuf = lc_ctype_envbuf;
218 			break;
219 #ifdef LC_MESSAGES
220 		case LC_MESSAGES:
221 			envvar = "LC_MESSAGES";
222 			envbuf = lc_messages_envbuf;
223 #ifdef WIN32
224 			result = IsoLocaleName(locale);
225 			if (result == NULL)
226 				result = (char *) locale;
227 			elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
228 #endif							/* WIN32 */
229 			break;
230 #endif							/* LC_MESSAGES */
231 		case LC_MONETARY:
232 			envvar = "LC_MONETARY";
233 			envbuf = lc_monetary_envbuf;
234 			break;
235 		case LC_NUMERIC:
236 			envvar = "LC_NUMERIC";
237 			envbuf = lc_numeric_envbuf;
238 			break;
239 		case LC_TIME:
240 			envvar = "LC_TIME";
241 			envbuf = lc_time_envbuf;
242 			break;
243 		default:
244 			elog(FATAL, "unrecognized LC category: %d", category);
245 			envvar = NULL;		/* keep compiler quiet */
246 			envbuf = NULL;
247 			return NULL;
248 	}
249 
250 	snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
251 
252 	if (putenv(envbuf))
253 		return NULL;
254 
255 	return result;
256 }
257 
258 
259 /*
260  * Is the locale name valid for the locale category?
261  *
262  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
263  * canonical name is stored there.  This is especially useful for figuring out
264  * what locale name "" means (ie, the server environment value).  (Actually,
265  * it seems that on most implementations that's the only thing it's good for;
266  * we could wish that setlocale gave back a canonically spelled version of
267  * the locale name, but typically it doesn't.)
268  */
269 bool
check_locale(int category,const char * locale,char ** canonname)270 check_locale(int category, const char *locale, char **canonname)
271 {
272 	char	   *save;
273 	char	   *res;
274 
275 	if (canonname)
276 		*canonname = NULL;		/* in case of failure */
277 
278 	save = setlocale(category, NULL);
279 	if (!save)
280 		return false;			/* won't happen, we hope */
281 
282 	/* save may be pointing at a modifiable scratch variable, see above. */
283 	save = pstrdup(save);
284 
285 	/* set the locale with setlocale, to see if it accepts it. */
286 	res = setlocale(category, locale);
287 
288 	/* save canonical name if requested. */
289 	if (res && canonname)
290 		*canonname = pstrdup(res);
291 
292 	/* restore old value. */
293 	if (!setlocale(category, save))
294 		elog(WARNING, "failed to restore old locale \"%s\"", save);
295 	pfree(save);
296 
297 	return (res != NULL);
298 }
299 
300 
301 /*
302  * GUC check/assign hooks
303  *
304  * For most locale categories, the assign hook doesn't actually set the locale
305  * permanently, just reset flags so that the next use will cache the
306  * appropriate values.  (See explanation at the top of this file.)
307  *
308  * Note: we accept value = "" as selecting the postmaster's environment
309  * value, whatever it was (so long as the environment setting is legal).
310  * This will have been locked down by an earlier call to pg_perm_setlocale.
311  */
312 bool
check_locale_monetary(char ** newval,void ** extra,GucSource source)313 check_locale_monetary(char **newval, void **extra, GucSource source)
314 {
315 	return check_locale(LC_MONETARY, *newval, NULL);
316 }
317 
318 void
assign_locale_monetary(const char * newval,void * extra)319 assign_locale_monetary(const char *newval, void *extra)
320 {
321 	CurrentLocaleConvValid = false;
322 }
323 
324 bool
check_locale_numeric(char ** newval,void ** extra,GucSource source)325 check_locale_numeric(char **newval, void **extra, GucSource source)
326 {
327 	return check_locale(LC_NUMERIC, *newval, NULL);
328 }
329 
330 void
assign_locale_numeric(const char * newval,void * extra)331 assign_locale_numeric(const char *newval, void *extra)
332 {
333 	CurrentLocaleConvValid = false;
334 }
335 
336 bool
check_locale_time(char ** newval,void ** extra,GucSource source)337 check_locale_time(char **newval, void **extra, GucSource source)
338 {
339 	return check_locale(LC_TIME, *newval, NULL);
340 }
341 
342 void
assign_locale_time(const char * newval,void * extra)343 assign_locale_time(const char *newval, void *extra)
344 {
345 	CurrentLCTimeValid = false;
346 }
347 
348 /*
349  * We allow LC_MESSAGES to actually be set globally.
350  *
351  * Note: we normally disallow value = "" because it wouldn't have consistent
352  * semantics (it'd effectively just use the previous value).  However, this
353  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
354  * not even if the attempted setting fails due to invalid environment value.
355  * The idea there is just to accept the environment setting *if possible*
356  * during startup, until we can read the proper value from postgresql.conf.
357  */
358 bool
check_locale_messages(char ** newval,void ** extra,GucSource source)359 check_locale_messages(char **newval, void **extra, GucSource source)
360 {
361 	if (**newval == '\0')
362 	{
363 		if (source == PGC_S_DEFAULT)
364 			return true;
365 		else
366 			return false;
367 	}
368 
369 	/*
370 	 * LC_MESSAGES category does not exist everywhere, but accept it anyway
371 	 *
372 	 * On Windows, we can't even check the value, so accept blindly
373 	 */
374 #if defined(LC_MESSAGES) && !defined(WIN32)
375 	return check_locale(LC_MESSAGES, *newval, NULL);
376 #else
377 	return true;
378 #endif
379 }
380 
381 void
assign_locale_messages(const char * newval,void * extra)382 assign_locale_messages(const char *newval, void *extra)
383 {
384 	/*
385 	 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
386 	 * We ignore failure, as per comment above.
387 	 */
388 #ifdef LC_MESSAGES
389 	(void) pg_perm_setlocale(LC_MESSAGES, newval);
390 #endif
391 }
392 
393 
394 /*
395  * Frees the malloced content of a struct lconv.  (But not the struct
396  * itself.)  It's important that this not throw elog(ERROR).
397  */
398 static void
free_struct_lconv(struct lconv * s)399 free_struct_lconv(struct lconv *s)
400 {
401 	if (s->decimal_point)
402 		free(s->decimal_point);
403 	if (s->thousands_sep)
404 		free(s->thousands_sep);
405 	if (s->grouping)
406 		free(s->grouping);
407 	if (s->int_curr_symbol)
408 		free(s->int_curr_symbol);
409 	if (s->currency_symbol)
410 		free(s->currency_symbol);
411 	if (s->mon_decimal_point)
412 		free(s->mon_decimal_point);
413 	if (s->mon_thousands_sep)
414 		free(s->mon_thousands_sep);
415 	if (s->mon_grouping)
416 		free(s->mon_grouping);
417 	if (s->positive_sign)
418 		free(s->positive_sign);
419 	if (s->negative_sign)
420 		free(s->negative_sign);
421 }
422 
423 /*
424  * Check that all fields of a struct lconv (or at least, the ones we care
425  * about) are non-NULL.  The field list must match free_struct_lconv().
426  */
427 static bool
struct_lconv_is_valid(struct lconv * s)428 struct_lconv_is_valid(struct lconv *s)
429 {
430 	if (s->decimal_point == NULL)
431 		return false;
432 	if (s->thousands_sep == NULL)
433 		return false;
434 	if (s->grouping == NULL)
435 		return false;
436 	if (s->int_curr_symbol == NULL)
437 		return false;
438 	if (s->currency_symbol == NULL)
439 		return false;
440 	if (s->mon_decimal_point == NULL)
441 		return false;
442 	if (s->mon_thousands_sep == NULL)
443 		return false;
444 	if (s->mon_grouping == NULL)
445 		return false;
446 	if (s->positive_sign == NULL)
447 		return false;
448 	if (s->negative_sign == NULL)
449 		return false;
450 	return true;
451 }
452 
453 
454 /*
455  * Convert the strdup'd string at *str from the specified encoding to the
456  * database encoding.
457  */
458 static void
db_encoding_convert(int encoding,char ** str)459 db_encoding_convert(int encoding, char **str)
460 {
461 	char	   *pstr;
462 	char	   *mstr;
463 
464 	/* convert the string to the database encoding */
465 	pstr = pg_any_to_server(*str, strlen(*str), encoding);
466 	if (pstr == *str)
467 		return;					/* no conversion happened */
468 
469 	/* need it malloc'd not palloc'd */
470 	mstr = strdup(pstr);
471 	if (mstr == NULL)
472 		ereport(ERROR,
473 				(errcode(ERRCODE_OUT_OF_MEMORY),
474 				 errmsg("out of memory")));
475 
476 	/* replace old string */
477 	free(*str);
478 	*str = mstr;
479 
480 	pfree(pstr);
481 }
482 
483 
484 /*
485  * Return the POSIX lconv struct (contains number/money formatting
486  * information) with locale information for all categories.
487  */
488 struct lconv *
PGLC_localeconv(void)489 PGLC_localeconv(void)
490 {
491 	static struct lconv CurrentLocaleConv;
492 	static bool CurrentLocaleConvAllocated = false;
493 	struct lconv *extlconv;
494 	struct lconv worklconv;
495 	char	   *save_lc_monetary;
496 	char	   *save_lc_numeric;
497 #ifdef WIN32
498 	char	   *save_lc_ctype;
499 #endif
500 
501 	/* Did we do it already? */
502 	if (CurrentLocaleConvValid)
503 		return &CurrentLocaleConv;
504 
505 	/* Free any already-allocated storage */
506 	if (CurrentLocaleConvAllocated)
507 	{
508 		free_struct_lconv(&CurrentLocaleConv);
509 		CurrentLocaleConvAllocated = false;
510 	}
511 
512 	/*
513 	 * This is tricky because we really don't want to risk throwing error
514 	 * while the locale is set to other than our usual settings.  Therefore,
515 	 * the process is: collect the usual settings, set locale to special
516 	 * setting, copy relevant data into worklconv using strdup(), restore
517 	 * normal settings, convert data to desired encoding, and finally stash
518 	 * the collected data in CurrentLocaleConv.  This makes it safe if we
519 	 * throw an error during encoding conversion or run out of memory anywhere
520 	 * in the process.  All data pointed to by struct lconv members is
521 	 * allocated with strdup, to avoid premature elog(ERROR) and to allow
522 	 * using a single cleanup routine.
523 	 */
524 	memset(&worklconv, 0, sizeof(worklconv));
525 
526 	/* Save prevailing values of monetary and numeric locales */
527 	save_lc_monetary = setlocale(LC_MONETARY, NULL);
528 	if (!save_lc_monetary)
529 		elog(ERROR, "setlocale(NULL) failed");
530 	save_lc_monetary = pstrdup(save_lc_monetary);
531 
532 	save_lc_numeric = setlocale(LC_NUMERIC, NULL);
533 	if (!save_lc_numeric)
534 		elog(ERROR, "setlocale(NULL) failed");
535 	save_lc_numeric = pstrdup(save_lc_numeric);
536 
537 #ifdef WIN32
538 
539 	/*
540 	 * The POSIX standard explicitly says that it is undefined what happens if
541 	 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
542 	 * that implied by LC_CTYPE.  In practice, all Unix-ish platforms seem to
543 	 * believe that localeconv() should return strings that are encoded in the
544 	 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name.  Hence,
545 	 * once we have successfully collected the localeconv() results, we will
546 	 * convert them from that codeset to the desired server encoding.
547 	 *
548 	 * Windows, of course, resolutely does things its own way; on that
549 	 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
550 	 * results.  Hence, we must temporarily set that category as well.
551 	 */
552 
553 	/* Save prevailing value of ctype locale */
554 	save_lc_ctype = setlocale(LC_CTYPE, NULL);
555 	if (!save_lc_ctype)
556 		elog(ERROR, "setlocale(NULL) failed");
557 	save_lc_ctype = pstrdup(save_lc_ctype);
558 
559 	/* Here begins the critical section where we must not throw error */
560 
561 	/* use numeric to set the ctype */
562 	setlocale(LC_CTYPE, locale_numeric);
563 #endif
564 
565 	/* Get formatting information for numeric */
566 	setlocale(LC_NUMERIC, locale_numeric);
567 	extlconv = localeconv();
568 
569 	/* Must copy data now in case setlocale() overwrites it */
570 	worklconv.decimal_point = strdup(extlconv->decimal_point);
571 	worklconv.thousands_sep = strdup(extlconv->thousands_sep);
572 	worklconv.grouping = strdup(extlconv->grouping);
573 
574 #ifdef WIN32
575 	/* use monetary to set the ctype */
576 	setlocale(LC_CTYPE, locale_monetary);
577 #endif
578 
579 	/* Get formatting information for monetary */
580 	setlocale(LC_MONETARY, locale_monetary);
581 	extlconv = localeconv();
582 
583 	/* Must copy data now in case setlocale() overwrites it */
584 	worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
585 	worklconv.currency_symbol = strdup(extlconv->currency_symbol);
586 	worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
587 	worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
588 	worklconv.mon_grouping = strdup(extlconv->mon_grouping);
589 	worklconv.positive_sign = strdup(extlconv->positive_sign);
590 	worklconv.negative_sign = strdup(extlconv->negative_sign);
591 	/* Copy scalar fields as well */
592 	worklconv.int_frac_digits = extlconv->int_frac_digits;
593 	worklconv.frac_digits = extlconv->frac_digits;
594 	worklconv.p_cs_precedes = extlconv->p_cs_precedes;
595 	worklconv.p_sep_by_space = extlconv->p_sep_by_space;
596 	worklconv.n_cs_precedes = extlconv->n_cs_precedes;
597 	worklconv.n_sep_by_space = extlconv->n_sep_by_space;
598 	worklconv.p_sign_posn = extlconv->p_sign_posn;
599 	worklconv.n_sign_posn = extlconv->n_sign_posn;
600 
601 	/*
602 	 * Restore the prevailing locale settings; failure to do so is fatal.
603 	 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
604 	 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
605 	 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
606 	 * are almost certainly "C", there's really no reason that restoring those
607 	 * should fail.
608 	 */
609 #ifdef WIN32
610 	if (!setlocale(LC_CTYPE, save_lc_ctype))
611 		elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
612 #endif
613 	if (!setlocale(LC_MONETARY, save_lc_monetary))
614 		elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
615 	if (!setlocale(LC_NUMERIC, save_lc_numeric))
616 		elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
617 
618 	/*
619 	 * At this point we've done our best to clean up, and can call functions
620 	 * that might possibly throw errors with a clean conscience.  But let's
621 	 * make sure we don't leak any already-strdup'd fields in worklconv.
622 	 */
623 	PG_TRY();
624 	{
625 		int			encoding;
626 
627 		/* Release the pstrdup'd locale names */
628 		pfree(save_lc_monetary);
629 		pfree(save_lc_numeric);
630 #ifdef WIN32
631 		pfree(save_lc_ctype);
632 #endif
633 
634 		/* If any of the preceding strdup calls failed, complain now. */
635 		if (!struct_lconv_is_valid(&worklconv))
636 			ereport(ERROR,
637 					(errcode(ERRCODE_OUT_OF_MEMORY),
638 					 errmsg("out of memory")));
639 
640 		/*
641 		 * Now we must perform encoding conversion from whatever's associated
642 		 * with the locales into the database encoding.  If we can't identify
643 		 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
644 		 * use PG_SQL_ASCII, which will result in just validating that the
645 		 * strings are OK in the database encoding.
646 		 */
647 		encoding = pg_get_encoding_from_locale(locale_numeric, true);
648 		if (encoding < 0)
649 			encoding = PG_SQL_ASCII;
650 
651 		db_encoding_convert(encoding, &worklconv.decimal_point);
652 		db_encoding_convert(encoding, &worklconv.thousands_sep);
653 		/* grouping is not text and does not require conversion */
654 
655 		encoding = pg_get_encoding_from_locale(locale_monetary, true);
656 		if (encoding < 0)
657 			encoding = PG_SQL_ASCII;
658 
659 		db_encoding_convert(encoding, &worklconv.int_curr_symbol);
660 		db_encoding_convert(encoding, &worklconv.currency_symbol);
661 		db_encoding_convert(encoding, &worklconv.mon_decimal_point);
662 		db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
663 		/* mon_grouping is not text and does not require conversion */
664 		db_encoding_convert(encoding, &worklconv.positive_sign);
665 		db_encoding_convert(encoding, &worklconv.negative_sign);
666 	}
667 	PG_CATCH();
668 	{
669 		free_struct_lconv(&worklconv);
670 		PG_RE_THROW();
671 	}
672 	PG_END_TRY();
673 
674 	/*
675 	 * Everything is good, so save the results.
676 	 */
677 	CurrentLocaleConv = worklconv;
678 	CurrentLocaleConvAllocated = true;
679 	CurrentLocaleConvValid = true;
680 	return &CurrentLocaleConv;
681 }
682 
683 #ifdef WIN32
684 /*
685  * On Windows, strftime() returns its output in encoding CP_ACP (the default
686  * operating system codepage for the computer), which is likely different
687  * from SERVER_ENCODING.  This is especially important in Japanese versions
688  * of Windows which will use SJIS encoding, which we don't support as a
689  * server encoding.
690  *
691  * So, instead of using strftime(), use wcsftime() to return the value in
692  * wide characters (internally UTF16) and then convert to UTF8, which we
693  * know how to handle directly.
694  *
695  * Note that this only affects the calls to strftime() in this file, which are
696  * used to get the locale-aware strings. Other parts of the backend use
697  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
698  */
699 static size_t
strftime_win32(char * dst,size_t dstlen,const char * format,const struct tm * tm)700 strftime_win32(char *dst, size_t dstlen,
701 			   const char *format, const struct tm *tm)
702 {
703 	size_t		len;
704 	wchar_t		wformat[8];		/* formats used below need 3 chars */
705 	wchar_t		wbuf[MAX_L10N_DATA];
706 
707 	/*
708 	 * Get a wchar_t version of the format string.  We only actually use
709 	 * plain-ASCII formats in this file, so we can say that they're UTF8.
710 	 */
711 	len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
712 							  wformat, lengthof(wformat));
713 	if (len == 0)
714 		elog(ERROR, "could not convert format string from UTF-8: error code %lu",
715 			 GetLastError());
716 
717 	len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
718 	if (len == 0)
719 	{
720 		/*
721 		 * wcsftime failed, possibly because the result would not fit in
722 		 * MAX_L10N_DATA.  Return 0 with the contents of dst unspecified.
723 		 */
724 		return 0;
725 	}
726 
727 	len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
728 							  NULL, NULL);
729 	if (len == 0)
730 		elog(ERROR, "could not convert string to UTF-8: error code %lu",
731 			 GetLastError());
732 
733 	dst[len] = '\0';
734 
735 	return len;
736 }
737 
738 /* redefine strftime() */
739 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
740 #endif							/* WIN32 */
741 
742 /*
743  * Subroutine for cache_locale_time().
744  * Convert the given string from encoding "encoding" to the database
745  * encoding, and store the result at *dst, replacing any previous value.
746  */
747 static void
cache_single_string(char ** dst,const char * src,int encoding)748 cache_single_string(char **dst, const char *src, int encoding)
749 {
750 	char	   *ptr;
751 	char	   *olddst;
752 
753 	/* Convert the string to the database encoding, or validate it's OK */
754 	ptr = pg_any_to_server(src, strlen(src), encoding);
755 
756 	/* Store the string in long-lived storage, replacing any previous value */
757 	olddst = *dst;
758 	*dst = MemoryContextStrdup(TopMemoryContext, ptr);
759 	if (olddst)
760 		pfree(olddst);
761 
762 	/* Might as well clean up any palloc'd conversion result, too */
763 	if (ptr != src)
764 		pfree(ptr);
765 }
766 
767 /*
768  * Update the lc_time localization cache variables if needed.
769  */
770 void
cache_locale_time(void)771 cache_locale_time(void)
772 {
773 	char		buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
774 	char	   *bufptr;
775 	time_t		timenow;
776 	struct tm  *timeinfo;
777 	bool		strftimefail = false;
778 	int			encoding;
779 	int			i;
780 	char	   *save_lc_time;
781 #ifdef WIN32
782 	char	   *save_lc_ctype;
783 #endif
784 
785 	/* did we do this already? */
786 	if (CurrentLCTimeValid)
787 		return;
788 
789 	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
790 
791 	/*
792 	 * As in PGLC_localeconv(), it's critical that we not throw error while
793 	 * libc's locale settings have nondefault values.  Hence, we just call
794 	 * strftime() within the critical section, and then convert and save its
795 	 * results afterwards.
796 	 */
797 
798 	/* Save prevailing value of time locale */
799 	save_lc_time = setlocale(LC_TIME, NULL);
800 	if (!save_lc_time)
801 		elog(ERROR, "setlocale(NULL) failed");
802 	save_lc_time = pstrdup(save_lc_time);
803 
804 #ifdef WIN32
805 
806 	/*
807 	 * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
808 	 * must set it here.  This code looks the same as what PGLC_localeconv()
809 	 * does, but the underlying reason is different: this does NOT determine
810 	 * the encoding we'll get back from strftime_win32().
811 	 */
812 
813 	/* Save prevailing value of ctype locale */
814 	save_lc_ctype = setlocale(LC_CTYPE, NULL);
815 	if (!save_lc_ctype)
816 		elog(ERROR, "setlocale(NULL) failed");
817 	save_lc_ctype = pstrdup(save_lc_ctype);
818 
819 	/* use lc_time to set the ctype */
820 	setlocale(LC_CTYPE, locale_time);
821 #endif
822 
823 	setlocale(LC_TIME, locale_time);
824 
825 	/* We use times close to current time as data for strftime(). */
826 	timenow = time(NULL);
827 	timeinfo = localtime(&timenow);
828 
829 	/* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
830 	bufptr = buf;
831 
832 	/*
833 	 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
834 	 * POSIX defines no strftime() errors.  (Buffer space exhaustion is not an
835 	 * error.)  An implementation might report errors (e.g. ENOMEM) by
836 	 * returning 0 (or, less plausibly, a negative value) and setting errno.
837 	 * Report errno just in case the implementation did that, but clear it in
838 	 * advance of the calls so we don't emit a stale, unrelated errno.
839 	 */
840 	errno = 0;
841 
842 	/* localized days */
843 	for (i = 0; i < 7; i++)
844 	{
845 		timeinfo->tm_wday = i;
846 		if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
847 			strftimefail = true;
848 		bufptr += MAX_L10N_DATA;
849 		if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
850 			strftimefail = true;
851 		bufptr += MAX_L10N_DATA;
852 	}
853 
854 	/* localized months */
855 	for (i = 0; i < 12; i++)
856 	{
857 		timeinfo->tm_mon = i;
858 		timeinfo->tm_mday = 1;	/* make sure we don't have invalid date */
859 		if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
860 			strftimefail = true;
861 		bufptr += MAX_L10N_DATA;
862 		if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
863 			strftimefail = true;
864 		bufptr += MAX_L10N_DATA;
865 	}
866 
867 	/*
868 	 * Restore the prevailing locale settings; as in PGLC_localeconv(),
869 	 * failure to do so is fatal.
870 	 */
871 #ifdef WIN32
872 	if (!setlocale(LC_CTYPE, save_lc_ctype))
873 		elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
874 #endif
875 	if (!setlocale(LC_TIME, save_lc_time))
876 		elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
877 
878 	/*
879 	 * At this point we've done our best to clean up, and can throw errors, or
880 	 * call functions that might throw errors, with a clean conscience.
881 	 */
882 	if (strftimefail)
883 		elog(ERROR, "strftime() failed: %m");
884 
885 	/* Release the pstrdup'd locale names */
886 	pfree(save_lc_time);
887 #ifdef WIN32
888 	pfree(save_lc_ctype);
889 #endif
890 
891 #ifndef WIN32
892 
893 	/*
894 	 * As in PGLC_localeconv(), we must convert strftime()'s output from the
895 	 * encoding implied by LC_TIME to the database encoding.  If we can't
896 	 * identify the LC_TIME encoding, just perform encoding validation.
897 	 */
898 	encoding = pg_get_encoding_from_locale(locale_time, true);
899 	if (encoding < 0)
900 		encoding = PG_SQL_ASCII;
901 
902 #else
903 
904 	/*
905 	 * On Windows, strftime_win32() always returns UTF8 data, so convert from
906 	 * that if necessary.
907 	 */
908 	encoding = PG_UTF8;
909 
910 #endif							/* WIN32 */
911 
912 	bufptr = buf;
913 
914 	/* localized days */
915 	for (i = 0; i < 7; i++)
916 	{
917 		cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
918 		bufptr += MAX_L10N_DATA;
919 		cache_single_string(&localized_full_days[i], bufptr, encoding);
920 		bufptr += MAX_L10N_DATA;
921 	}
922 
923 	/* localized months */
924 	for (i = 0; i < 12; i++)
925 	{
926 		cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
927 		bufptr += MAX_L10N_DATA;
928 		cache_single_string(&localized_full_months[i], bufptr, encoding);
929 		bufptr += MAX_L10N_DATA;
930 	}
931 
932 	CurrentLCTimeValid = true;
933 }
934 
935 
936 #if defined(WIN32) && defined(LC_MESSAGES)
937 /*
938  * Convert a Windows setlocale() argument to a Unix-style one.
939  *
940  * Regardless of platform, we install message catalogs under a Unix-style
941  * LL[_CC][.ENCODING][@VARIANT] naming convention.  Only LC_MESSAGES settings
942  * following that style will elicit localized interface strings.
943  *
944  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
945  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
946  * case-insensitive.  setlocale() returns the fully-qualified form; for
947  * example, setlocale("thaI") returns "Thai_Thailand.874".  Internally,
948  * setlocale() and _create_locale() select a "locale identifier"[1] and store
949  * it in an undocumented _locale_t field.  From that LCID, we can retrieve the
950  * ISO 639 language and the ISO 3166 country.  Character encoding does not
951  * matter, because the server and client encodings govern that.
952  *
953  * Windows Vista introduced the "locale name" concept[2], closely following
954  * RFC 4646.  Locale identifiers are now deprecated.  Starting with Visual
955  * Studio 2012, setlocale() accepts locale names in addition to the strings it
956  * accepted historically.  It does not standardize them; setlocale("Th-tH")
957  * returns "Th-tH".  setlocale(category, "") still returns a traditional
958  * string.  Furthermore, msvcr110.dll changed the undocumented _locale_t
959  * content to carry locale names instead of locale identifiers.
960  *
961  * Visual Studio 2015 should still be able to do the same as Visual Studio
962  * 2012, but the declaration of locale_name is missing in _locale_t, causing
963  * this code compilation to fail, hence this falls back instead on to
964  * enumerating all system locales by using EnumSystemLocalesEx to find the
965  * required locale name.  If the input argument is in Unix-style then we can
966  * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
967  * LOCALE_SNAME.
968  *
969  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
970  * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
971  * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
972  * localized messages. In particular, every lc_messages setting that initdb
973  * can select automatically will yield only C-locale messages. XXX This could
974  * be fixed by running the fully-qualified locale name through a lookup table.
975  *
976  * This function returns a pointer to a static buffer bearing the converted
977  * name or NULL if conversion fails.
978  *
979  * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
980  * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
981  */
982 
983 #if _MSC_VER >= 1900
984 /*
985  * Callback function for EnumSystemLocalesEx() in get_iso_localename().
986  *
987  * This function enumerates all system locales, searching for one that matches
988  * an input with the format: <Language>[_<Country>], e.g.
989  * English[_United States]
990  *
991  * The input is a three wchar_t array as an LPARAM. The first element is the
992  * locale_name we want to match, the second element is an allocated buffer
993  * where the Unix-style locale is copied if a match is found, and the third
994  * element is the search status, 1 if a match was found, 0 otherwise.
995  */
996 static BOOL CALLBACK
search_locale_enum(LPWSTR pStr,DWORD dwFlags,LPARAM lparam)997 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
998 {
999 	wchar_t		test_locale[LOCALE_NAME_MAX_LENGTH];
1000 	wchar_t   **argv;
1001 
1002 	(void) (dwFlags);
1003 
1004 	argv = (wchar_t **) lparam;
1005 	*argv[2] = (wchar_t) 0;
1006 
1007 	memset(test_locale, 0, sizeof(test_locale));
1008 
1009 	/* Get the name of the <Language> in English */
1010 	if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1011 						test_locale, LOCALE_NAME_MAX_LENGTH))
1012 	{
1013 		/*
1014 		 * If the enumerated locale does not have a hyphen ("en") OR  the
1015 		 * lc_message input does not have an underscore ("English"), we only
1016 		 * need to compare the <Language> tags.
1017 		 */
1018 		if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1019 		{
1020 			if (_wcsicmp(argv[0], test_locale) == 0)
1021 			{
1022 				wcscpy(argv[1], pStr);
1023 				*argv[2] = (wchar_t) 1;
1024 				return FALSE;
1025 			}
1026 		}
1027 
1028 		/*
1029 		 * We have to compare a full <Language>_<Country> tag, so we append
1030 		 * the underscore and name of the country/region in English, e.g.
1031 		 * "English_United States".
1032 		 */
1033 		else
1034 		{
1035 			size_t		len;
1036 
1037 			wcscat(test_locale, L"_");
1038 			len = wcslen(test_locale);
1039 			if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1040 								test_locale + len,
1041 								LOCALE_NAME_MAX_LENGTH - len))
1042 			{
1043 				if (_wcsicmp(argv[0], test_locale) == 0)
1044 				{
1045 					wcscpy(argv[1], pStr);
1046 					*argv[2] = (wchar_t) 1;
1047 					return FALSE;
1048 				}
1049 			}
1050 		}
1051 	}
1052 
1053 	return TRUE;
1054 }
1055 
1056 /*
1057  * This function converts a Windows locale name to an ISO formatted version
1058  * for Visual Studio 2015 or greater.
1059  *
1060  * Returns NULL, if no valid conversion was found.
1061  */
1062 static char *
get_iso_localename(const char * winlocname)1063 get_iso_localename(const char *winlocname)
1064 {
1065 	wchar_t		wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1066 	wchar_t		buffer[LOCALE_NAME_MAX_LENGTH];
1067 	static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1068 	char	   *period;
1069 	int			len;
1070 	int			ret_val;
1071 
1072 	/*
1073 	 * Valid locales have the following syntax:
1074 	 * <Language>[_<Country>[.<CodePage>]]
1075 	 *
1076 	 * GetLocaleInfoEx can only take locale name without code-page and for the
1077 	 * purpose of this API the code-page doesn't matter.
1078 	 */
1079 	period = strchr(winlocname, '.');
1080 	if (period != NULL)
1081 		len = period - winlocname;
1082 	else
1083 		len = pg_mbstrlen(winlocname);
1084 
1085 	memset(wc_locale_name, 0, sizeof(wc_locale_name));
1086 	memset(buffer, 0, sizeof(buffer));
1087 	MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1088 						LOCALE_NAME_MAX_LENGTH);
1089 
1090 	/*
1091 	 * If the lc_messages is already an Unix-style string, we have a direct
1092 	 * match with LOCALE_SNAME, e.g. en-US, en_US.
1093 	 */
1094 	ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1095 							  LOCALE_NAME_MAX_LENGTH);
1096 	if (!ret_val)
1097 	{
1098 		/*
1099 		 * Search for a locale in the system that matches language and country
1100 		 * name.
1101 		 */
1102 		wchar_t    *argv[3];
1103 
1104 		argv[0] = wc_locale_name;
1105 		argv[1] = buffer;
1106 		argv[2] = (wchar_t *) &ret_val;
1107 		EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1108 							NULL);
1109 	}
1110 
1111 	if (ret_val)
1112 	{
1113 		size_t		rc;
1114 		char	   *hyphen;
1115 
1116 		/* Locale names use only ASCII, any conversion locale suffices. */
1117 		rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1118 		if (rc == -1 || rc == sizeof(iso_lc_messages))
1119 			return NULL;
1120 
1121 		/*
1122 		 * Simply replace the hyphen with an underscore.  See comments in
1123 		 * IsoLocaleName.
1124 		 */
1125 		hyphen = strchr(iso_lc_messages, '-');
1126 		if (hyphen)
1127 			*hyphen = '_';
1128 
1129 		return iso_lc_messages;
1130 	}
1131 
1132 	return NULL;
1133 }
1134 #endif							/* _MSC_VER >= 1900 */
1135 
1136 static char *
IsoLocaleName(const char * winlocname)1137 IsoLocaleName(const char *winlocname)
1138 {
1139 #if (_MSC_VER >= 1400)			/* VC8.0 or later */
1140 	static char iso_lc_messages[32];
1141 	_locale_t	loct = NULL;
1142 
1143 	if (pg_strcasecmp("c", winlocname) == 0 ||
1144 		pg_strcasecmp("posix", winlocname) == 0)
1145 	{
1146 		strcpy(iso_lc_messages, "C");
1147 		return iso_lc_messages;
1148 	}
1149 
1150 #if (_MSC_VER >= 1900)			/* Visual Studio 2015 or later */
1151 	return get_iso_localename(winlocname);
1152 #else
1153 	loct = _create_locale(LC_CTYPE, winlocname);
1154 	if (loct != NULL)
1155 	{
1156 #if (_MSC_VER >= 1700)			/* Visual Studio 2012 or later */
1157 		size_t		rc;
1158 		char	   *hyphen;
1159 
1160 		/* Locale names use only ASCII, any conversion locale suffices. */
1161 		rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1162 						sizeof(iso_lc_messages), NULL);
1163 		_free_locale(loct);
1164 		if (rc == -1 || rc == sizeof(iso_lc_messages))
1165 			return NULL;
1166 
1167 		/*
1168 		 * Since the message catalogs sit on a case-insensitive filesystem, we
1169 		 * need not standardize letter case here.  So long as we do not ship
1170 		 * message catalogs for which it would matter, we also need not
1171 		 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1172 		 * uz_UZ@cyrillic.  Simply replace the hyphen with an underscore.
1173 		 *
1174 		 * Note that the locale name can be less-specific than the value we
1175 		 * would derive under earlier Visual Studio releases.  For example,
1176 		 * French_France.1252 yields just "fr".  This does not affect any of
1177 		 * the country-specific message catalogs available as of this writing
1178 		 * (pt_BR, zh_CN, zh_TW).
1179 		 */
1180 		hyphen = strchr(iso_lc_messages, '-');
1181 		if (hyphen)
1182 			*hyphen = '_';
1183 #else
1184 		char		isolang[32],
1185 					isocrty[32];
1186 		LCID		lcid;
1187 
1188 		lcid = loct->locinfo->lc_handle[LC_CTYPE];
1189 		if (lcid == 0)
1190 			lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1191 		_free_locale(loct);
1192 
1193 		if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1194 			return NULL;
1195 		if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1196 			return NULL;
1197 		snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
1198 #endif
1199 		return iso_lc_messages;
1200 	}
1201 	return NULL;
1202 #endif							/* Visual Studio 2015 or later */
1203 #else
1204 	return NULL;				/* Not supported on this version of msvc/mingw */
1205 #endif							/* _MSC_VER >= 1400 */
1206 }
1207 #endif							/* WIN32 && LC_MESSAGES */
1208 
1209 
1210 /*
1211  * Detect aging strxfrm() implementations that, in a subset of locales, write
1212  * past the specified buffer length.  Affected users must update OS packages
1213  * before using PostgreSQL 9.5 or later.
1214  *
1215  * Assume that the bug can come and go from one postmaster startup to another
1216  * due to physical replication among diverse machines.  Assume that the bug's
1217  * presence will not change during the life of a particular postmaster.  Given
1218  * those assumptions, call this no less than once per postmaster startup per
1219  * LC_COLLATE setting used.  No known-affected system offers strxfrm_l(), so
1220  * there is no need to consider pg_collation locales.
1221  */
1222 void
check_strxfrm_bug(void)1223 check_strxfrm_bug(void)
1224 {
1225 	char		buf[32];
1226 	const int	canary = 0x7F;
1227 	bool		ok = true;
1228 
1229 	/*
1230 	 * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1231 	 * 05/08 returns 18 and modifies 10 bytes.  It respects limits above or
1232 	 * below that range.
1233 	 *
1234 	 * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1235 	 * 01/13 and Solaris 11.2.  Affected locales include is_IS.ISO8859-1,
1236 	 * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R.  Unaffected locales
1237 	 * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1238 	 */
1239 	buf[7] = canary;
1240 	(void) strxfrm(buf, "ab", 7);
1241 	if (buf[7] != canary)
1242 		ok = false;
1243 
1244 	/*
1245 	 * illumos bug #1594 was present in the source tree from 2010-10-11 to
1246 	 * 2012-02-01.  Given an ASCII string of any length and length limit 1,
1247 	 * affected systems ignore the length limit and modify a number of bytes
1248 	 * one less than the return value.  The problem inputs for this bug do not
1249 	 * overlap those for the Solaris bug, hence a distinct test.
1250 	 *
1251 	 * Affected systems include smartos-20110926T021612Z.  Affected locales
1252 	 * include en_US.ISO8859-1 and en_US.UTF-8.  Unaffected locales include C.
1253 	 */
1254 	buf[1] = canary;
1255 	(void) strxfrm(buf, "a", 1);
1256 	if (buf[1] != canary)
1257 		ok = false;
1258 
1259 	if (!ok)
1260 		ereport(ERROR,
1261 				(errcode(ERRCODE_SYSTEM_ERROR),
1262 				 errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1263 								 setlocale(LC_COLLATE, NULL)),
1264 				 errhint("Apply system library package updates.")));
1265 }
1266 
1267 
1268 /*
1269  * Cache mechanism for collation information.
1270  *
1271  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1272  * (or POSIX), so we can optimize a few code paths in various places.
1273  * For the built-in C and POSIX collations, we can know that without even
1274  * doing a cache lookup, but we want to support aliases for C/POSIX too.
1275  * For the "default" collation, there are separate static cache variables,
1276  * since consulting the pg_collation catalog doesn't tell us what we need.
1277  *
1278  * Also, if a pg_locale_t has been requested for a collation, we cache that
1279  * for the life of a backend.
1280  *
1281  * Note that some code relies on the flags not reporting false negatives
1282  * (that is, saying it's not C when it is).  For example, char2wchar()
1283  * could fail if the locale is C, so str_tolower() shouldn't call it
1284  * in that case.
1285  *
1286  * Note that we currently lack any way to flush the cache.  Since we don't
1287  * support ALTER COLLATION, this is OK.  The worst case is that someone
1288  * drops a collation, and a useless cache entry hangs around in existing
1289  * backends.
1290  */
1291 
1292 static collation_cache_entry *
lookup_collation_cache(Oid collation,bool set_flags)1293 lookup_collation_cache(Oid collation, bool set_flags)
1294 {
1295 	collation_cache_entry *cache_entry;
1296 	bool		found;
1297 
1298 	Assert(OidIsValid(collation));
1299 	Assert(collation != DEFAULT_COLLATION_OID);
1300 
1301 	if (collation_cache == NULL)
1302 	{
1303 		/* First time through, initialize the hash table */
1304 		HASHCTL		ctl;
1305 
1306 		memset(&ctl, 0, sizeof(ctl));
1307 		ctl.keysize = sizeof(Oid);
1308 		ctl.entrysize = sizeof(collation_cache_entry);
1309 		collation_cache = hash_create("Collation cache", 100, &ctl,
1310 									  HASH_ELEM | HASH_BLOBS);
1311 	}
1312 
1313 	cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1314 	if (!found)
1315 	{
1316 		/*
1317 		 * Make sure cache entry is marked invalid, in case we fail before
1318 		 * setting things.
1319 		 */
1320 		cache_entry->flags_valid = false;
1321 		cache_entry->locale = 0;
1322 	}
1323 
1324 	if (set_flags && !cache_entry->flags_valid)
1325 	{
1326 		/* Attempt to set the flags */
1327 		HeapTuple	tp;
1328 		Form_pg_collation collform;
1329 		const char *collcollate;
1330 		const char *collctype;
1331 
1332 		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1333 		if (!HeapTupleIsValid(tp))
1334 			elog(ERROR, "cache lookup failed for collation %u", collation);
1335 		collform = (Form_pg_collation) GETSTRUCT(tp);
1336 
1337 		collcollate = NameStr(collform->collcollate);
1338 		collctype = NameStr(collform->collctype);
1339 
1340 		cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1341 									 (strcmp(collcollate, "POSIX") == 0));
1342 		cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1343 								   (strcmp(collctype, "POSIX") == 0));
1344 
1345 		cache_entry->flags_valid = true;
1346 
1347 		ReleaseSysCache(tp);
1348 	}
1349 
1350 	return cache_entry;
1351 }
1352 
1353 
1354 /*
1355  * Detect whether collation's LC_COLLATE property is C
1356  */
1357 bool
lc_collate_is_c(Oid collation)1358 lc_collate_is_c(Oid collation)
1359 {
1360 	/*
1361 	 * If we're asked about "collation 0", return false, so that the code will
1362 	 * go into the non-C path and report that the collation is bogus.
1363 	 */
1364 	if (!OidIsValid(collation))
1365 		return false;
1366 
1367 	/*
1368 	 * If we're asked about the default collation, we have to inquire of the C
1369 	 * library.  Cache the result so we only have to compute it once.
1370 	 */
1371 	if (collation == DEFAULT_COLLATION_OID)
1372 	{
1373 		static int	result = -1;
1374 		char	   *localeptr;
1375 
1376 		if (result >= 0)
1377 			return (bool) result;
1378 		localeptr = setlocale(LC_COLLATE, NULL);
1379 		if (!localeptr)
1380 			elog(ERROR, "invalid LC_COLLATE setting");
1381 
1382 		if (strcmp(localeptr, "C") == 0)
1383 			result = true;
1384 		else if (strcmp(localeptr, "POSIX") == 0)
1385 			result = true;
1386 		else
1387 			result = false;
1388 		return (bool) result;
1389 	}
1390 
1391 	/*
1392 	 * If we're asked about the built-in C/POSIX collations, we know that.
1393 	 */
1394 	if (collation == C_COLLATION_OID ||
1395 		collation == POSIX_COLLATION_OID)
1396 		return true;
1397 
1398 	/*
1399 	 * Otherwise, we have to consult pg_collation, but we cache that.
1400 	 */
1401 	return (lookup_collation_cache(collation, true))->collate_is_c;
1402 }
1403 
1404 /*
1405  * Detect whether collation's LC_CTYPE property is C
1406  */
1407 bool
lc_ctype_is_c(Oid collation)1408 lc_ctype_is_c(Oid collation)
1409 {
1410 	/*
1411 	 * If we're asked about "collation 0", return false, so that the code will
1412 	 * go into the non-C path and report that the collation is bogus.
1413 	 */
1414 	if (!OidIsValid(collation))
1415 		return false;
1416 
1417 	/*
1418 	 * If we're asked about the default collation, we have to inquire of the C
1419 	 * library.  Cache the result so we only have to compute it once.
1420 	 */
1421 	if (collation == DEFAULT_COLLATION_OID)
1422 	{
1423 		static int	result = -1;
1424 		char	   *localeptr;
1425 
1426 		if (result >= 0)
1427 			return (bool) result;
1428 		localeptr = setlocale(LC_CTYPE, NULL);
1429 		if (!localeptr)
1430 			elog(ERROR, "invalid LC_CTYPE setting");
1431 
1432 		if (strcmp(localeptr, "C") == 0)
1433 			result = true;
1434 		else if (strcmp(localeptr, "POSIX") == 0)
1435 			result = true;
1436 		else
1437 			result = false;
1438 		return (bool) result;
1439 	}
1440 
1441 	/*
1442 	 * If we're asked about the built-in C/POSIX collations, we know that.
1443 	 */
1444 	if (collation == C_COLLATION_OID ||
1445 		collation == POSIX_COLLATION_OID)
1446 		return true;
1447 
1448 	/*
1449 	 * Otherwise, we have to consult pg_collation, but we cache that.
1450 	 */
1451 	return (lookup_collation_cache(collation, true))->ctype_is_c;
1452 }
1453 
1454 
1455 /* simple subroutine for reporting errors from newlocale() */
1456 #ifdef HAVE_LOCALE_T
1457 static void
report_newlocale_failure(const char * localename)1458 report_newlocale_failure(const char *localename)
1459 {
1460 	int			save_errno;
1461 
1462 	/*
1463 	 * Windows doesn't provide any useful error indication from
1464 	 * _create_locale(), and BSD-derived platforms don't seem to feel they
1465 	 * need to set errno either (even though POSIX is pretty clear that
1466 	 * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
1467 	 * is what to report.
1468 	 */
1469 	if (errno == 0)
1470 		errno = ENOENT;
1471 
1472 	/*
1473 	 * ENOENT means "no such locale", not "no such file", so clarify that
1474 	 * errno with an errdetail message.
1475 	 */
1476 	save_errno = errno;			/* auxiliary funcs might change errno */
1477 	ereport(ERROR,
1478 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1479 			 errmsg("could not create locale \"%s\": %m",
1480 					localename),
1481 			 (save_errno == ENOENT ?
1482 			  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1483 						localename) : 0)));
1484 }
1485 #endif							/* HAVE_LOCALE_T */
1486 
1487 
1488 /*
1489  * Create a locale_t from a collation OID.  Results are cached for the
1490  * lifetime of the backend.  Thus, do not free the result with freelocale().
1491  *
1492  * As a special optimization, the default/database collation returns 0.
1493  * Callers should then revert to the non-locale_t-enabled code path.
1494  * In fact, they shouldn't call this function at all when they are dealing
1495  * with the default locale.  That can save quite a bit in hotspots.
1496  * Also, callers should avoid calling this before going down a C/POSIX
1497  * fastpath, because such a fastpath should work even on platforms without
1498  * locale_t support in the C library.
1499  *
1500  * For simplicity, we always generate COLLATE + CTYPE even though we
1501  * might only need one of them.  Since this is called only once per session,
1502  * it shouldn't cost much.
1503  */
1504 pg_locale_t
pg_newlocale_from_collation(Oid collid)1505 pg_newlocale_from_collation(Oid collid)
1506 {
1507 	collation_cache_entry *cache_entry;
1508 
1509 	/* Callers must pass a valid OID */
1510 	Assert(OidIsValid(collid));
1511 
1512 	/* Return 0 for "default" collation, just in case caller forgets */
1513 	if (collid == DEFAULT_COLLATION_OID)
1514 		return (pg_locale_t) 0;
1515 
1516 	cache_entry = lookup_collation_cache(collid, false);
1517 
1518 	if (cache_entry->locale == 0)
1519 	{
1520 		/* We haven't computed this yet in this session, so do it */
1521 		HeapTuple	tp;
1522 		Form_pg_collation collform;
1523 		const char *collcollate;
1524 		const char *collctype pg_attribute_unused();
1525 		struct pg_locale_struct result;
1526 		pg_locale_t resultp;
1527 		Datum		collversion;
1528 		bool		isnull;
1529 
1530 		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1531 		if (!HeapTupleIsValid(tp))
1532 			elog(ERROR, "cache lookup failed for collation %u", collid);
1533 		collform = (Form_pg_collation) GETSTRUCT(tp);
1534 
1535 		collcollate = NameStr(collform->collcollate);
1536 		collctype = NameStr(collform->collctype);
1537 
1538 		/* We'll fill in the result struct locally before allocating memory */
1539 		memset(&result, 0, sizeof(result));
1540 		result.provider = collform->collprovider;
1541 
1542 		if (collform->collprovider == COLLPROVIDER_LIBC)
1543 		{
1544 #ifdef HAVE_LOCALE_T
1545 			locale_t	loc;
1546 
1547 			if (strcmp(collcollate, collctype) == 0)
1548 			{
1549 				/* Normal case where they're the same */
1550 				errno = 0;
1551 #ifndef WIN32
1552 				loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1553 								NULL);
1554 #else
1555 				loc = _create_locale(LC_ALL, collcollate);
1556 #endif
1557 				if (!loc)
1558 					report_newlocale_failure(collcollate);
1559 			}
1560 			else
1561 			{
1562 #ifndef WIN32
1563 				/* We need two newlocale() steps */
1564 				locale_t	loc1;
1565 
1566 				errno = 0;
1567 				loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1568 				if (!loc1)
1569 					report_newlocale_failure(collcollate);
1570 				errno = 0;
1571 				loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1572 				if (!loc)
1573 					report_newlocale_failure(collctype);
1574 #else
1575 
1576 				/*
1577 				 * XXX The _create_locale() API doesn't appear to support
1578 				 * this. Could perhaps be worked around by changing
1579 				 * pg_locale_t to contain two separate fields.
1580 				 */
1581 				ereport(ERROR,
1582 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1583 						 errmsg("collations with different collate and ctype values are not supported on this platform")));
1584 #endif
1585 			}
1586 
1587 			result.info.lt = loc;
1588 #else							/* not HAVE_LOCALE_T */
1589 			/* platform that doesn't support locale_t */
1590 			ereport(ERROR,
1591 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1592 					 errmsg("collation provider LIBC is not supported on this platform")));
1593 #endif							/* not HAVE_LOCALE_T */
1594 		}
1595 		else if (collform->collprovider == COLLPROVIDER_ICU)
1596 		{
1597 #ifdef USE_ICU
1598 			UCollator  *collator;
1599 			UErrorCode	status;
1600 
1601 			if (strcmp(collcollate, collctype) != 0)
1602 				ereport(ERROR,
1603 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1604 						 errmsg("collations with different collate and ctype values are not supported by ICU")));
1605 
1606 			status = U_ZERO_ERROR;
1607 			collator = ucol_open(collcollate, &status);
1608 			if (U_FAILURE(status))
1609 				ereport(ERROR,
1610 						(errmsg("could not open collator for locale \"%s\": %s",
1611 								collcollate, u_errorName(status))));
1612 
1613 			/* We will leak this string if we get an error below :-( */
1614 			result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1615 														 collcollate);
1616 			result.info.icu.ucol = collator;
1617 #else							/* not USE_ICU */
1618 			/* could get here if a collation was created by a build with ICU */
1619 			ereport(ERROR,
1620 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1621 					 errmsg("ICU is not supported in this build"), \
1622 					 errhint("You need to rebuild PostgreSQL using --with-icu.")));
1623 #endif							/* not USE_ICU */
1624 		}
1625 
1626 		collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1627 									  &isnull);
1628 		if (!isnull)
1629 		{
1630 			char	   *actual_versionstr;
1631 			char	   *collversionstr;
1632 
1633 			actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1634 			if (!actual_versionstr)
1635 			{
1636 				/*
1637 				 * This could happen when specifying a version in CREATE
1638 				 * COLLATION for a libc locale, or manually creating a mess in
1639 				 * the catalogs.
1640 				 */
1641 				ereport(ERROR,
1642 						(errmsg("collation \"%s\" has no actual version, but a version was specified",
1643 								NameStr(collform->collname))));
1644 			}
1645 			collversionstr = TextDatumGetCString(collversion);
1646 
1647 			if (strcmp(actual_versionstr, collversionstr) != 0)
1648 				ereport(WARNING,
1649 						(errmsg("collation \"%s\" has version mismatch",
1650 								NameStr(collform->collname)),
1651 						 errdetail("The collation in the database was created using version %s, "
1652 								   "but the operating system provides version %s.",
1653 								   collversionstr, actual_versionstr),
1654 						 errhint("Rebuild all objects affected by this collation and run "
1655 								 "ALTER COLLATION %s REFRESH VERSION, "
1656 								 "or build PostgreSQL with the right library version.",
1657 								 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1658 															NameStr(collform->collname)))));
1659 		}
1660 
1661 		ReleaseSysCache(tp);
1662 
1663 		/* We'll keep the pg_locale_t structures in TopMemoryContext */
1664 		resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1665 		*resultp = result;
1666 
1667 		cache_entry->locale = resultp;
1668 	}
1669 
1670 	return cache_entry->locale;
1671 }
1672 
1673 /*
1674  * Get provider-specific collation version string for the given collation from
1675  * the operating system/library.
1676  *
1677  * A particular provider must always either return a non-NULL string or return
1678  * NULL (if it doesn't support versions).  It must not return NULL for some
1679  * collcollate and not NULL for others.
1680  */
1681 char *
get_collation_actual_version(char collprovider,const char * collcollate)1682 get_collation_actual_version(char collprovider, const char *collcollate)
1683 {
1684 	char	   *collversion;
1685 
1686 #ifdef USE_ICU
1687 	if (collprovider == COLLPROVIDER_ICU)
1688 	{
1689 		UCollator  *collator;
1690 		UErrorCode	status;
1691 		UVersionInfo versioninfo;
1692 		char		buf[U_MAX_VERSION_STRING_LENGTH];
1693 
1694 		status = U_ZERO_ERROR;
1695 		collator = ucol_open(collcollate, &status);
1696 		if (U_FAILURE(status))
1697 			ereport(ERROR,
1698 					(errmsg("could not open collator for locale \"%s\": %s",
1699 							collcollate, u_errorName(status))));
1700 		ucol_getVersion(collator, versioninfo);
1701 		ucol_close(collator);
1702 
1703 		u_versionToString(versioninfo, buf);
1704 		collversion = pstrdup(buf);
1705 	}
1706 	else
1707 #endif
1708 		collversion = NULL;
1709 
1710 	return collversion;
1711 }
1712 
1713 
1714 #ifdef USE_ICU
1715 /*
1716  * Converter object for converting between ICU's UChar strings and C strings
1717  * in database encoding.  Since the database encoding doesn't change, we only
1718  * need one of these per session.
1719  */
1720 static UConverter *icu_converter = NULL;
1721 
1722 static void
init_icu_converter(void)1723 init_icu_converter(void)
1724 {
1725 	const char *icu_encoding_name;
1726 	UErrorCode	status;
1727 	UConverter *conv;
1728 
1729 	if (icu_converter)
1730 		return;
1731 
1732 	icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1733 
1734 	status = U_ZERO_ERROR;
1735 	conv = ucnv_open(icu_encoding_name, &status);
1736 	if (U_FAILURE(status))
1737 		ereport(ERROR,
1738 				(errmsg("could not open ICU converter for encoding \"%s\": %s",
1739 						icu_encoding_name, u_errorName(status))));
1740 
1741 	icu_converter = conv;
1742 }
1743 
1744 /*
1745  * Convert a string in the database encoding into a string of UChars.
1746  *
1747  * The source string at buff is of length nbytes
1748  * (it needn't be nul-terminated)
1749  *
1750  * *buff_uchar receives a pointer to the palloc'd result string, and
1751  * the function's result is the number of UChars generated.
1752  *
1753  * The result string is nul-terminated, though most callers rely on the
1754  * result length instead.
1755  */
1756 int32_t
icu_to_uchar(UChar ** buff_uchar,const char * buff,size_t nbytes)1757 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1758 {
1759 	UErrorCode	status;
1760 	int32_t		len_uchar;
1761 
1762 	init_icu_converter();
1763 
1764 	status = U_ZERO_ERROR;
1765 	len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1766 							  buff, nbytes, &status);
1767 	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1768 		ereport(ERROR,
1769 				(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1770 
1771 	*buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1772 
1773 	status = U_ZERO_ERROR;
1774 	len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1775 							  buff, nbytes, &status);
1776 	if (U_FAILURE(status))
1777 		ereport(ERROR,
1778 				(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1779 
1780 	return len_uchar;
1781 }
1782 
1783 /*
1784  * Convert a string of UChars into the database encoding.
1785  *
1786  * The source string at buff_uchar is of length len_uchar
1787  * (it needn't be nul-terminated)
1788  *
1789  * *result receives a pointer to the palloc'd result string, and the
1790  * function's result is the number of bytes generated (not counting nul).
1791  *
1792  * The result string is nul-terminated.
1793  */
1794 int32_t
icu_from_uchar(char ** result,const UChar * buff_uchar,int32_t len_uchar)1795 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1796 {
1797 	UErrorCode	status;
1798 	int32_t		len_result;
1799 
1800 	init_icu_converter();
1801 
1802 	status = U_ZERO_ERROR;
1803 	len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1804 								 buff_uchar, len_uchar, &status);
1805 	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1806 		ereport(ERROR,
1807 				(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1808 
1809 	*result = palloc(len_result + 1);
1810 
1811 	status = U_ZERO_ERROR;
1812 	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1813 								 buff_uchar, len_uchar, &status);
1814 	if (U_FAILURE(status))
1815 		ereport(ERROR,
1816 				(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1817 
1818 	return len_result;
1819 }
1820 #endif							/* USE_ICU */
1821 
1822 /*
1823  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1824  * Therefore we keep them here rather than with the mbutils code.
1825  */
1826 
1827 #ifdef USE_WIDE_UPPER_LOWER
1828 
1829 /*
1830  * wchar2char --- convert wide characters to multibyte format
1831  *
1832  * This has the same API as the standard wcstombs_l() function; in particular,
1833  * tolen is the maximum number of bytes to store at *to, and *from must be
1834  * zero-terminated.  The output will be zero-terminated iff there is room.
1835  */
1836 size_t
wchar2char(char * to,const wchar_t * from,size_t tolen,pg_locale_t locale)1837 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1838 {
1839 	size_t		result;
1840 
1841 	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1842 
1843 	if (tolen == 0)
1844 		return 0;
1845 
1846 #ifdef WIN32
1847 
1848 	/*
1849 	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1850 	 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1851 	 * MultiByteToWideChar().
1852 	 */
1853 	if (GetDatabaseEncoding() == PG_UTF8)
1854 	{
1855 		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1856 									 NULL, NULL);
1857 		/* A zero return is failure */
1858 		if (result <= 0)
1859 			result = -1;
1860 		else
1861 		{
1862 			Assert(result <= tolen);
1863 			/* Microsoft counts the zero terminator in the result */
1864 			result--;
1865 		}
1866 	}
1867 	else
1868 #endif							/* WIN32 */
1869 	if (locale == (pg_locale_t) 0)
1870 	{
1871 		/* Use wcstombs directly for the default locale */
1872 		result = wcstombs(to, from, tolen);
1873 	}
1874 	else
1875 	{
1876 #ifdef HAVE_LOCALE_T
1877 #ifdef HAVE_WCSTOMBS_L
1878 		/* Use wcstombs_l for nondefault locales */
1879 		result = wcstombs_l(to, from, tolen, locale->info.lt);
1880 #else							/* !HAVE_WCSTOMBS_L */
1881 		/* We have to temporarily set the locale as current ... ugh */
1882 		locale_t	save_locale = uselocale(locale->info.lt);
1883 
1884 		result = wcstombs(to, from, tolen);
1885 
1886 		uselocale(save_locale);
1887 #endif							/* HAVE_WCSTOMBS_L */
1888 #else							/* !HAVE_LOCALE_T */
1889 		/* Can't have locale != 0 without HAVE_LOCALE_T */
1890 		elog(ERROR, "wcstombs_l is not available");
1891 		result = 0;				/* keep compiler quiet */
1892 #endif							/* HAVE_LOCALE_T */
1893 	}
1894 
1895 	return result;
1896 }
1897 
1898 /*
1899  * char2wchar --- convert multibyte characters to wide characters
1900  *
1901  * This has almost the API of mbstowcs_l(), except that *from need not be
1902  * null-terminated; instead, the number of input bytes is specified as
1903  * fromlen.  Also, we ereport() rather than returning -1 for invalid
1904  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
1905  * The output will be zero-terminated iff there is room.
1906  */
1907 size_t
char2wchar(wchar_t * to,size_t tolen,const char * from,size_t fromlen,pg_locale_t locale)1908 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1909 		   pg_locale_t locale)
1910 {
1911 	size_t		result;
1912 
1913 	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1914 
1915 	if (tolen == 0)
1916 		return 0;
1917 
1918 #ifdef WIN32
1919 	/* See WIN32 "Unicode" comment above */
1920 	if (GetDatabaseEncoding() == PG_UTF8)
1921 	{
1922 		/* Win32 API does not work for zero-length input */
1923 		if (fromlen == 0)
1924 			result = 0;
1925 		else
1926 		{
1927 			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1928 			/* A zero return is failure */
1929 			if (result == 0)
1930 				result = -1;
1931 		}
1932 
1933 		if (result != -1)
1934 		{
1935 			Assert(result < tolen);
1936 			/* Append trailing null wchar (MultiByteToWideChar() does not) */
1937 			to[result] = 0;
1938 		}
1939 	}
1940 	else
1941 #endif							/* WIN32 */
1942 	{
1943 		/* mbstowcs requires ending '\0' */
1944 		char	   *str = pnstrdup(from, fromlen);
1945 
1946 		if (locale == (pg_locale_t) 0)
1947 		{
1948 			/* Use mbstowcs directly for the default locale */
1949 			result = mbstowcs(to, str, tolen);
1950 		}
1951 		else
1952 		{
1953 #ifdef HAVE_LOCALE_T
1954 #ifdef HAVE_MBSTOWCS_L
1955 			/* Use mbstowcs_l for nondefault locales */
1956 			result = mbstowcs_l(to, str, tolen, locale->info.lt);
1957 #else							/* !HAVE_MBSTOWCS_L */
1958 			/* We have to temporarily set the locale as current ... ugh */
1959 			locale_t	save_locale = uselocale(locale->info.lt);
1960 
1961 			result = mbstowcs(to, str, tolen);
1962 
1963 			uselocale(save_locale);
1964 #endif							/* HAVE_MBSTOWCS_L */
1965 #else							/* !HAVE_LOCALE_T */
1966 			/* Can't have locale != 0 without HAVE_LOCALE_T */
1967 			elog(ERROR, "mbstowcs_l is not available");
1968 			result = 0;			/* keep compiler quiet */
1969 #endif							/* HAVE_LOCALE_T */
1970 		}
1971 
1972 		pfree(str);
1973 	}
1974 
1975 	if (result == -1)
1976 	{
1977 		/*
1978 		 * Invalid multibyte character encountered.  We try to give a useful
1979 		 * error message by letting pg_verifymbstr check the string.  But it's
1980 		 * possible that the string is OK to us, and not OK to mbstowcs ---
1981 		 * this suggests that the LC_CTYPE locale is different from the
1982 		 * database encoding.  Give a generic error message if verifymbstr
1983 		 * can't find anything wrong.
1984 		 */
1985 		pg_verifymbstr(from, fromlen, false);	/* might not return */
1986 		/* but if it does ... */
1987 		ereport(ERROR,
1988 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1989 				 errmsg("invalid multibyte character for locale"),
1990 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1991 	}
1992 
1993 	return result;
1994 }
1995 
1996 #endif							/* USE_WIDE_UPPER_LOWER */
1997