1 /*-----------------------------------------------------------------------
2  *
3  * PostgreSQL locale utilities
4  *
5  * Portions Copyright (c) 2002-2018, PostgreSQL Global Development Group
6  *
7  * src/backend/utils/adt/pg_locale.c
8  *
9  *-----------------------------------------------------------------------
10  */
11 
12 /*----------
13  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16  * toupper(), etc. are always in the same fixed locale.
17  *
18  * LC_MESSAGES is settable at run time and will take effect
19  * immediately.
20  *
21  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22  * settable at run-time.  However, we don't actually set those locale
23  * categories permanently.  This would have bizarre effects like no
24  * longer accepting standard floating-point literals in some locales.
25  * Instead, we only set these locale categories briefly when needed,
26  * cache the required information obtained from localeconv() or
27  * strftime(), and then set the locale categories back to "C".
28  * The cached information is only used by the formatting functions
29  * (to_char, etc.) and the money type.  For the user, this should all be
30  * transparent.
31  *
32  * !!! NOW HEAR THIS !!!
33  *
34  * We've been bitten repeatedly by this bug, so let's try to keep it in
35  * mind in future: on some platforms, the locale functions return pointers
36  * to static data that will be overwritten by any later locale function.
37  * Thus, for example, the obvious-looking sequence
38  *			save = setlocale(category, NULL);
39  *			if (!setlocale(category, value))
40  *				fail = true;
41  *			setlocale(category, save);
42  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43  * will change the memory save is pointing at.  To do this sort of thing
44  * safely, you *must* pstrdup what setlocale returns the first time.
45  *
46  * The POSIX locale standard is available here:
47  *
48  *	http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49  *----------
50  */
51 
52 
53 #include "postgres.h"
54 
55 #include <time.h>
56 
57 #include "access/htup_details.h"
58 #include "catalog/pg_collation.h"
59 #include "catalog/pg_control.h"
60 #include "mb/pg_wchar.h"
61 #include "utils/builtins.h"
62 #include "utils/hsearch.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_locale.h"
66 #include "utils/syscache.h"
67 
68 #ifdef USE_ICU
69 #include <unicode/ucnv.h>
70 #endif
71 
72 #ifdef WIN32
73 /*
74  * This Windows file defines StrNCpy. We don't need it here, so we undefine
75  * it to keep the compiler quiet, and undefine it again after the file is
76  * included, so we don't accidentally use theirs.
77  */
78 #undef StrNCpy
79 #include <shlwapi.h>
80 #ifdef StrNCpy
81 #undef STrNCpy
82 #endif
83 #endif
84 
85 #define		MAX_L10N_DATA		80
86 
87 
88 /* GUC settings */
89 char	   *locale_messages;
90 char	   *locale_monetary;
91 char	   *locale_numeric;
92 char	   *locale_time;
93 
94 /* lc_time localization cache */
95 char	   *localized_abbrev_days[7];
96 char	   *localized_full_days[7];
97 char	   *localized_abbrev_months[12];
98 char	   *localized_full_months[12];
99 
100 /* indicates whether locale information cache is valid */
101 static bool CurrentLocaleConvValid = false;
102 static bool CurrentLCTimeValid = false;
103 
104 /* Environment variable storage area */
105 
106 #define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
107 
108 static char lc_collate_envbuf[LC_ENV_BUFSIZE];
109 static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
110 
111 #ifdef LC_MESSAGES
112 static char lc_messages_envbuf[LC_ENV_BUFSIZE];
113 #endif
114 static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
115 static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
116 static char lc_time_envbuf[LC_ENV_BUFSIZE];
117 
118 /* Cache for collation-related knowledge */
119 
120 typedef struct
121 {
122 	Oid			collid;			/* hash key: pg_collation OID */
123 	bool		collate_is_c;	/* is collation's LC_COLLATE C? */
124 	bool		ctype_is_c;		/* is collation's LC_CTYPE C? */
125 	bool		flags_valid;	/* true if above flags are valid */
126 	pg_locale_t locale;			/* locale_t struct, or 0 if not valid */
127 } collation_cache_entry;
128 
129 static HTAB *collation_cache = NULL;
130 
131 
132 #if defined(WIN32) && defined(LC_MESSAGES)
133 static char *IsoLocaleName(const char *);	/* MSVC specific */
134 #endif
135 
136 
137 /*
138  * pg_perm_setlocale
139  *
140  * This wraps the libc function setlocale(), with two additions.  First, when
141  * changing LC_CTYPE, update gettext's encoding for the current message
142  * domain.  GNU gettext automatically tracks LC_CTYPE on most platforms, but
143  * not on Windows.  Second, if the operation is successful, the corresponding
144  * LC_XXX environment variable is set to match.  By setting the environment
145  * variable, we ensure that any subsequent use of setlocale(..., "") will
146  * preserve the settings made through this routine.  Of course, LC_ALL must
147  * also be unset to fully ensure that, but that has to be done elsewhere after
148  * all the individual LC_XXX variables have been set correctly.  (Thank you
149  * Perl for making this kluge necessary.)
150  */
151 char *
pg_perm_setlocale(int category,const char * locale)152 pg_perm_setlocale(int category, const char *locale)
153 {
154 	char	   *result;
155 	const char *envvar;
156 	char	   *envbuf;
157 
158 #ifndef WIN32
159 	result = setlocale(category, locale);
160 #else
161 
162 	/*
163 	 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
164 	 * the given value is good and set it in the environment variables. We
165 	 * must ignore attempts to set to "", which means "keep using the old
166 	 * environment value".
167 	 */
168 #ifdef LC_MESSAGES
169 	if (category == LC_MESSAGES)
170 	{
171 		result = (char *) locale;
172 		if (locale == NULL || locale[0] == '\0')
173 			return result;
174 	}
175 	else
176 #endif
177 		result = setlocale(category, locale);
178 #endif							/* WIN32 */
179 
180 	if (result == NULL)
181 		return result;			/* fall out immediately on failure */
182 
183 	/*
184 	 * Use the right encoding in translated messages.  Under ENABLE_NLS, let
185 	 * pg_bind_textdomain_codeset() figure it out.  Under !ENABLE_NLS, message
186 	 * format strings are ASCII, but database-encoding strings may enter the
187 	 * message via %s.  This makes the overall message encoding equal to the
188 	 * database encoding.
189 	 */
190 	if (category == LC_CTYPE)
191 	{
192 		static char save_lc_ctype[LC_ENV_BUFSIZE];
193 
194 		/* copy setlocale() return value before callee invokes it again */
195 		strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
196 		result = save_lc_ctype;
197 
198 #ifdef ENABLE_NLS
199 		SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
200 #else
201 		SetMessageEncoding(GetDatabaseEncoding());
202 #endif
203 	}
204 
205 	switch (category)
206 	{
207 		case LC_COLLATE:
208 			envvar = "LC_COLLATE";
209 			envbuf = lc_collate_envbuf;
210 			break;
211 		case LC_CTYPE:
212 			envvar = "LC_CTYPE";
213 			envbuf = lc_ctype_envbuf;
214 			break;
215 #ifdef LC_MESSAGES
216 		case LC_MESSAGES:
217 			envvar = "LC_MESSAGES";
218 			envbuf = lc_messages_envbuf;
219 #ifdef WIN32
220 			result = IsoLocaleName(locale);
221 			if (result == NULL)
222 				result = (char *) locale;
223 			elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
224 #endif							/* WIN32 */
225 			break;
226 #endif							/* LC_MESSAGES */
227 		case LC_MONETARY:
228 			envvar = "LC_MONETARY";
229 			envbuf = lc_monetary_envbuf;
230 			break;
231 		case LC_NUMERIC:
232 			envvar = "LC_NUMERIC";
233 			envbuf = lc_numeric_envbuf;
234 			break;
235 		case LC_TIME:
236 			envvar = "LC_TIME";
237 			envbuf = lc_time_envbuf;
238 			break;
239 		default:
240 			elog(FATAL, "unrecognized LC category: %d", category);
241 			envvar = NULL;		/* keep compiler quiet */
242 			envbuf = NULL;
243 			return NULL;
244 	}
245 
246 	snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
247 
248 	if (putenv(envbuf))
249 		return NULL;
250 
251 	return result;
252 }
253 
254 
255 /*
256  * Is the locale name valid for the locale category?
257  *
258  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
259  * canonical name is stored there.  This is especially useful for figuring out
260  * what locale name "" means (ie, the server environment value).  (Actually,
261  * it seems that on most implementations that's the only thing it's good for;
262  * we could wish that setlocale gave back a canonically spelled version of
263  * the locale name, but typically it doesn't.)
264  */
265 bool
check_locale(int category,const char * locale,char ** canonname)266 check_locale(int category, const char *locale, char **canonname)
267 {
268 	char	   *save;
269 	char	   *res;
270 
271 	if (canonname)
272 		*canonname = NULL;		/* in case of failure */
273 
274 	save = setlocale(category, NULL);
275 	if (!save)
276 		return false;			/* won't happen, we hope */
277 
278 	/* save may be pointing at a modifiable scratch variable, see above. */
279 	save = pstrdup(save);
280 
281 	/* set the locale with setlocale, to see if it accepts it. */
282 	res = setlocale(category, locale);
283 
284 	/* save canonical name if requested. */
285 	if (res && canonname)
286 		*canonname = pstrdup(res);
287 
288 	/* restore old value. */
289 	if (!setlocale(category, save))
290 		elog(WARNING, "failed to restore old locale \"%s\"", save);
291 	pfree(save);
292 
293 	return (res != NULL);
294 }
295 
296 
297 /*
298  * GUC check/assign hooks
299  *
300  * For most locale categories, the assign hook doesn't actually set the locale
301  * permanently, just reset flags so that the next use will cache the
302  * appropriate values.  (See explanation at the top of this file.)
303  *
304  * Note: we accept value = "" as selecting the postmaster's environment
305  * value, whatever it was (so long as the environment setting is legal).
306  * This will have been locked down by an earlier call to pg_perm_setlocale.
307  */
308 bool
check_locale_monetary(char ** newval,void ** extra,GucSource source)309 check_locale_monetary(char **newval, void **extra, GucSource source)
310 {
311 	return check_locale(LC_MONETARY, *newval, NULL);
312 }
313 
314 void
assign_locale_monetary(const char * newval,void * extra)315 assign_locale_monetary(const char *newval, void *extra)
316 {
317 	CurrentLocaleConvValid = false;
318 }
319 
320 bool
check_locale_numeric(char ** newval,void ** extra,GucSource source)321 check_locale_numeric(char **newval, void **extra, GucSource source)
322 {
323 	return check_locale(LC_NUMERIC, *newval, NULL);
324 }
325 
326 void
assign_locale_numeric(const char * newval,void * extra)327 assign_locale_numeric(const char *newval, void *extra)
328 {
329 	CurrentLocaleConvValid = false;
330 }
331 
332 bool
check_locale_time(char ** newval,void ** extra,GucSource source)333 check_locale_time(char **newval, void **extra, GucSource source)
334 {
335 	return check_locale(LC_TIME, *newval, NULL);
336 }
337 
338 void
assign_locale_time(const char * newval,void * extra)339 assign_locale_time(const char *newval, void *extra)
340 {
341 	CurrentLCTimeValid = false;
342 }
343 
344 /*
345  * We allow LC_MESSAGES to actually be set globally.
346  *
347  * Note: we normally disallow value = "" because it wouldn't have consistent
348  * semantics (it'd effectively just use the previous value).  However, this
349  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
350  * not even if the attempted setting fails due to invalid environment value.
351  * The idea there is just to accept the environment setting *if possible*
352  * during startup, until we can read the proper value from postgresql.conf.
353  */
354 bool
check_locale_messages(char ** newval,void ** extra,GucSource source)355 check_locale_messages(char **newval, void **extra, GucSource source)
356 {
357 	if (**newval == '\0')
358 	{
359 		if (source == PGC_S_DEFAULT)
360 			return true;
361 		else
362 			return false;
363 	}
364 
365 	/*
366 	 * LC_MESSAGES category does not exist everywhere, but accept it anyway
367 	 *
368 	 * On Windows, we can't even check the value, so accept blindly
369 	 */
370 #if defined(LC_MESSAGES) && !defined(WIN32)
371 	return check_locale(LC_MESSAGES, *newval, NULL);
372 #else
373 	return true;
374 #endif
375 }
376 
377 void
assign_locale_messages(const char * newval,void * extra)378 assign_locale_messages(const char *newval, void *extra)
379 {
380 	/*
381 	 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
382 	 * We ignore failure, as per comment above.
383 	 */
384 #ifdef LC_MESSAGES
385 	(void) pg_perm_setlocale(LC_MESSAGES, newval);
386 #endif
387 }
388 
389 
390 /*
391  * Frees the malloced content of a struct lconv.  (But not the struct
392  * itself.)  It's important that this not throw elog(ERROR).
393  */
394 static void
free_struct_lconv(struct lconv * s)395 free_struct_lconv(struct lconv *s)
396 {
397 	if (s->decimal_point)
398 		free(s->decimal_point);
399 	if (s->thousands_sep)
400 		free(s->thousands_sep);
401 	if (s->grouping)
402 		free(s->grouping);
403 	if (s->int_curr_symbol)
404 		free(s->int_curr_symbol);
405 	if (s->currency_symbol)
406 		free(s->currency_symbol);
407 	if (s->mon_decimal_point)
408 		free(s->mon_decimal_point);
409 	if (s->mon_thousands_sep)
410 		free(s->mon_thousands_sep);
411 	if (s->mon_grouping)
412 		free(s->mon_grouping);
413 	if (s->positive_sign)
414 		free(s->positive_sign);
415 	if (s->negative_sign)
416 		free(s->negative_sign);
417 }
418 
419 /*
420  * Check that all fields of a struct lconv (or at least, the ones we care
421  * about) are non-NULL.  The field list must match free_struct_lconv().
422  */
423 static bool
struct_lconv_is_valid(struct lconv * s)424 struct_lconv_is_valid(struct lconv *s)
425 {
426 	if (s->decimal_point == NULL)
427 		return false;
428 	if (s->thousands_sep == NULL)
429 		return false;
430 	if (s->grouping == NULL)
431 		return false;
432 	if (s->int_curr_symbol == NULL)
433 		return false;
434 	if (s->currency_symbol == NULL)
435 		return false;
436 	if (s->mon_decimal_point == NULL)
437 		return false;
438 	if (s->mon_thousands_sep == NULL)
439 		return false;
440 	if (s->mon_grouping == NULL)
441 		return false;
442 	if (s->positive_sign == NULL)
443 		return false;
444 	if (s->negative_sign == NULL)
445 		return false;
446 	return true;
447 }
448 
449 
450 /*
451  * Convert the strdup'd string at *str from the specified encoding to the
452  * database encoding.
453  */
454 static void
db_encoding_convert(int encoding,char ** str)455 db_encoding_convert(int encoding, char **str)
456 {
457 	char	   *pstr;
458 	char	   *mstr;
459 
460 	/* convert the string to the database encoding */
461 	pstr = pg_any_to_server(*str, strlen(*str), encoding);
462 	if (pstr == *str)
463 		return;					/* no conversion happened */
464 
465 	/* need it malloc'd not palloc'd */
466 	mstr = strdup(pstr);
467 	if (mstr == NULL)
468 		ereport(ERROR,
469 				(errcode(ERRCODE_OUT_OF_MEMORY),
470 				 errmsg("out of memory")));
471 
472 	/* replace old string */
473 	free(*str);
474 	*str = mstr;
475 
476 	pfree(pstr);
477 }
478 
479 
480 /*
481  * Return the POSIX lconv struct (contains number/money formatting
482  * information) with locale information for all categories.
483  */
484 struct lconv *
PGLC_localeconv(void)485 PGLC_localeconv(void)
486 {
487 	static struct lconv CurrentLocaleConv;
488 	static bool CurrentLocaleConvAllocated = false;
489 	struct lconv *extlconv;
490 	struct lconv worklconv;
491 	char	   *save_lc_monetary;
492 	char	   *save_lc_numeric;
493 #ifdef WIN32
494 	char	   *save_lc_ctype;
495 #endif
496 
497 	/* Did we do it already? */
498 	if (CurrentLocaleConvValid)
499 		return &CurrentLocaleConv;
500 
501 	/* Free any already-allocated storage */
502 	if (CurrentLocaleConvAllocated)
503 	{
504 		free_struct_lconv(&CurrentLocaleConv);
505 		CurrentLocaleConvAllocated = false;
506 	}
507 
508 	/*
509 	 * This is tricky because we really don't want to risk throwing error
510 	 * while the locale is set to other than our usual settings.  Therefore,
511 	 * the process is: collect the usual settings, set locale to special
512 	 * setting, copy relevant data into worklconv using strdup(), restore
513 	 * normal settings, convert data to desired encoding, and finally stash
514 	 * the collected data in CurrentLocaleConv.  This makes it safe if we
515 	 * throw an error during encoding conversion or run out of memory anywhere
516 	 * in the process.  All data pointed to by struct lconv members is
517 	 * allocated with strdup, to avoid premature elog(ERROR) and to allow
518 	 * using a single cleanup routine.
519 	 */
520 	memset(&worklconv, 0, sizeof(worklconv));
521 
522 	/* Save prevailing values of monetary and numeric locales */
523 	save_lc_monetary = setlocale(LC_MONETARY, NULL);
524 	if (!save_lc_monetary)
525 		elog(ERROR, "setlocale(NULL) failed");
526 	save_lc_monetary = pstrdup(save_lc_monetary);
527 
528 	save_lc_numeric = setlocale(LC_NUMERIC, NULL);
529 	if (!save_lc_numeric)
530 		elog(ERROR, "setlocale(NULL) failed");
531 	save_lc_numeric = pstrdup(save_lc_numeric);
532 
533 #ifdef WIN32
534 
535 	/*
536 	 * The POSIX standard explicitly says that it is undefined what happens if
537 	 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
538 	 * that implied by LC_CTYPE.  In practice, all Unix-ish platforms seem to
539 	 * believe that localeconv() should return strings that are encoded in the
540 	 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name.  Hence,
541 	 * once we have successfully collected the localeconv() results, we will
542 	 * convert them from that codeset to the desired server encoding.
543 	 *
544 	 * Windows, of course, resolutely does things its own way; on that
545 	 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
546 	 * results.  Hence, we must temporarily set that category as well.
547 	 */
548 
549 	/* Save prevailing value of ctype locale */
550 	save_lc_ctype = setlocale(LC_CTYPE, NULL);
551 	if (!save_lc_ctype)
552 		elog(ERROR, "setlocale(NULL) failed");
553 	save_lc_ctype = pstrdup(save_lc_ctype);
554 
555 	/* Here begins the critical section where we must not throw error */
556 
557 	/* use numeric to set the ctype */
558 	setlocale(LC_CTYPE, locale_numeric);
559 #endif
560 
561 	/* Get formatting information for numeric */
562 	setlocale(LC_NUMERIC, locale_numeric);
563 	extlconv = localeconv();
564 
565 	/* Must copy data now in case setlocale() overwrites it */
566 	worklconv.decimal_point = strdup(extlconv->decimal_point);
567 	worklconv.thousands_sep = strdup(extlconv->thousands_sep);
568 	worklconv.grouping = strdup(extlconv->grouping);
569 
570 #ifdef WIN32
571 	/* use monetary to set the ctype */
572 	setlocale(LC_CTYPE, locale_monetary);
573 #endif
574 
575 	/* Get formatting information for monetary */
576 	setlocale(LC_MONETARY, locale_monetary);
577 	extlconv = localeconv();
578 
579 	/* Must copy data now in case setlocale() overwrites it */
580 	worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
581 	worklconv.currency_symbol = strdup(extlconv->currency_symbol);
582 	worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
583 	worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
584 	worklconv.mon_grouping = strdup(extlconv->mon_grouping);
585 	worklconv.positive_sign = strdup(extlconv->positive_sign);
586 	worklconv.negative_sign = strdup(extlconv->negative_sign);
587 	/* Copy scalar fields as well */
588 	worklconv.int_frac_digits = extlconv->int_frac_digits;
589 	worklconv.frac_digits = extlconv->frac_digits;
590 	worklconv.p_cs_precedes = extlconv->p_cs_precedes;
591 	worklconv.p_sep_by_space = extlconv->p_sep_by_space;
592 	worklconv.n_cs_precedes = extlconv->n_cs_precedes;
593 	worklconv.n_sep_by_space = extlconv->n_sep_by_space;
594 	worklconv.p_sign_posn = extlconv->p_sign_posn;
595 	worklconv.n_sign_posn = extlconv->n_sign_posn;
596 
597 	/*
598 	 * Restore the prevailing locale settings; failure to do so is fatal.
599 	 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
600 	 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
601 	 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
602 	 * are almost certainly "C", there's really no reason that restoring those
603 	 * should fail.
604 	 */
605 #ifdef WIN32
606 	if (!setlocale(LC_CTYPE, save_lc_ctype))
607 		elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
608 #endif
609 	if (!setlocale(LC_MONETARY, save_lc_monetary))
610 		elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
611 	if (!setlocale(LC_NUMERIC, save_lc_numeric))
612 		elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
613 
614 	/*
615 	 * At this point we've done our best to clean up, and can call functions
616 	 * that might possibly throw errors with a clean conscience.  But let's
617 	 * make sure we don't leak any already-strdup'd fields in worklconv.
618 	 */
619 	PG_TRY();
620 	{
621 		int			encoding;
622 
623 		/* Release the pstrdup'd locale names */
624 		pfree(save_lc_monetary);
625 		pfree(save_lc_numeric);
626 #ifdef WIN32
627 		pfree(save_lc_ctype);
628 #endif
629 
630 		/* If any of the preceding strdup calls failed, complain now. */
631 		if (!struct_lconv_is_valid(&worklconv))
632 			ereport(ERROR,
633 					(errcode(ERRCODE_OUT_OF_MEMORY),
634 					 errmsg("out of memory")));
635 
636 		/*
637 		 * Now we must perform encoding conversion from whatever's associated
638 		 * with the locales into the database encoding.  If we can't identify
639 		 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
640 		 * use PG_SQL_ASCII, which will result in just validating that the
641 		 * strings are OK in the database encoding.
642 		 */
643 		encoding = pg_get_encoding_from_locale(locale_numeric, true);
644 		if (encoding < 0)
645 			encoding = PG_SQL_ASCII;
646 
647 		db_encoding_convert(encoding, &worklconv.decimal_point);
648 		db_encoding_convert(encoding, &worklconv.thousands_sep);
649 		/* grouping is not text and does not require conversion */
650 
651 		encoding = pg_get_encoding_from_locale(locale_monetary, true);
652 		if (encoding < 0)
653 			encoding = PG_SQL_ASCII;
654 
655 		db_encoding_convert(encoding, &worklconv.int_curr_symbol);
656 		db_encoding_convert(encoding, &worklconv.currency_symbol);
657 		db_encoding_convert(encoding, &worklconv.mon_decimal_point);
658 		db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
659 		/* mon_grouping is not text and does not require conversion */
660 		db_encoding_convert(encoding, &worklconv.positive_sign);
661 		db_encoding_convert(encoding, &worklconv.negative_sign);
662 	}
663 	PG_CATCH();
664 	{
665 		free_struct_lconv(&worklconv);
666 		PG_RE_THROW();
667 	}
668 	PG_END_TRY();
669 
670 	/*
671 	 * Everything is good, so save the results.
672 	 */
673 	CurrentLocaleConv = worklconv;
674 	CurrentLocaleConvAllocated = true;
675 	CurrentLocaleConvValid = true;
676 	return &CurrentLocaleConv;
677 }
678 
679 #ifdef WIN32
680 /*
681  * On Windows, strftime() returns its output in encoding CP_ACP (the default
682  * operating system codepage for the computer), which is likely different
683  * from SERVER_ENCODING.  This is especially important in Japanese versions
684  * of Windows which will use SJIS encoding, which we don't support as a
685  * server encoding.
686  *
687  * So, instead of using strftime(), use wcsftime() to return the value in
688  * wide characters (internally UTF16) and then convert to UTF8, which we
689  * know how to handle directly.
690  *
691  * Note that this only affects the calls to strftime() in this file, which are
692  * used to get the locale-aware strings. Other parts of the backend use
693  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
694  */
695 static size_t
strftime_win32(char * dst,size_t dstlen,const char * format,const struct tm * tm)696 strftime_win32(char *dst, size_t dstlen,
697 			   const char *format, const struct tm *tm)
698 {
699 	size_t		len;
700 	wchar_t		wformat[8];		/* formats used below need 3 chars */
701 	wchar_t		wbuf[MAX_L10N_DATA];
702 
703 	/*
704 	 * Get a wchar_t version of the format string.  We only actually use
705 	 * plain-ASCII formats in this file, so we can say that they're UTF8.
706 	 */
707 	len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
708 							  wformat, lengthof(wformat));
709 	if (len == 0)
710 		elog(ERROR, "could not convert format string from UTF-8: error code %lu",
711 			 GetLastError());
712 
713 	len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
714 	if (len == 0)
715 	{
716 		/*
717 		 * wcsftime failed, possibly because the result would not fit in
718 		 * MAX_L10N_DATA.  Return 0 with the contents of dst unspecified.
719 		 */
720 		return 0;
721 	}
722 
723 	len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
724 							  NULL, NULL);
725 	if (len == 0)
726 		elog(ERROR, "could not convert string to UTF-8: error code %lu",
727 			 GetLastError());
728 
729 	dst[len] = '\0';
730 
731 	return len;
732 }
733 
734 /* redefine strftime() */
735 #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
736 #endif							/* WIN32 */
737 
738 /*
739  * Subroutine for cache_locale_time().
740  * Convert the given string from encoding "encoding" to the database
741  * encoding, and store the result at *dst, replacing any previous value.
742  */
743 static void
cache_single_string(char ** dst,const char * src,int encoding)744 cache_single_string(char **dst, const char *src, int encoding)
745 {
746 	char	   *ptr;
747 	char	   *olddst;
748 
749 	/* Convert the string to the database encoding, or validate it's OK */
750 	ptr = pg_any_to_server(src, strlen(src), encoding);
751 
752 	/* Store the string in long-lived storage, replacing any previous value */
753 	olddst = *dst;
754 	*dst = MemoryContextStrdup(TopMemoryContext, ptr);
755 	if (olddst)
756 		pfree(olddst);
757 
758 	/* Might as well clean up any palloc'd conversion result, too */
759 	if (ptr != src)
760 		pfree(ptr);
761 }
762 
763 /*
764  * Update the lc_time localization cache variables if needed.
765  */
766 void
cache_locale_time(void)767 cache_locale_time(void)
768 {
769 	char		buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
770 	char	   *bufptr;
771 	time_t		timenow;
772 	struct tm  *timeinfo;
773 	bool		strftimefail = false;
774 	int			encoding;
775 	int			i;
776 	char	   *save_lc_time;
777 #ifdef WIN32
778 	char	   *save_lc_ctype;
779 #endif
780 
781 	/* did we do this already? */
782 	if (CurrentLCTimeValid)
783 		return;
784 
785 	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
786 
787 	/*
788 	 * As in PGLC_localeconv(), it's critical that we not throw error while
789 	 * libc's locale settings have nondefault values.  Hence, we just call
790 	 * strftime() within the critical section, and then convert and save its
791 	 * results afterwards.
792 	 */
793 
794 	/* Save prevailing value of time locale */
795 	save_lc_time = setlocale(LC_TIME, NULL);
796 	if (!save_lc_time)
797 		elog(ERROR, "setlocale(NULL) failed");
798 	save_lc_time = pstrdup(save_lc_time);
799 
800 #ifdef WIN32
801 
802 	/*
803 	 * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
804 	 * must set it here.  This code looks the same as what PGLC_localeconv()
805 	 * does, but the underlying reason is different: this does NOT determine
806 	 * the encoding we'll get back from strftime_win32().
807 	 */
808 
809 	/* Save prevailing value of ctype locale */
810 	save_lc_ctype = setlocale(LC_CTYPE, NULL);
811 	if (!save_lc_ctype)
812 		elog(ERROR, "setlocale(NULL) failed");
813 	save_lc_ctype = pstrdup(save_lc_ctype);
814 
815 	/* use lc_time to set the ctype */
816 	setlocale(LC_CTYPE, locale_time);
817 #endif
818 
819 	setlocale(LC_TIME, locale_time);
820 
821 	/* We use times close to current time as data for strftime(). */
822 	timenow = time(NULL);
823 	timeinfo = localtime(&timenow);
824 
825 	/* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
826 	bufptr = buf;
827 
828 	/*
829 	 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
830 	 * POSIX defines no strftime() errors.  (Buffer space exhaustion is not an
831 	 * error.)  An implementation might report errors (e.g. ENOMEM) by
832 	 * returning 0 (or, less plausibly, a negative value) and setting errno.
833 	 * Report errno just in case the implementation did that, but clear it in
834 	 * advance of the calls so we don't emit a stale, unrelated errno.
835 	 */
836 	errno = 0;
837 
838 	/* localized days */
839 	for (i = 0; i < 7; i++)
840 	{
841 		timeinfo->tm_wday = i;
842 		if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
843 			strftimefail = true;
844 		bufptr += MAX_L10N_DATA;
845 		if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
846 			strftimefail = true;
847 		bufptr += MAX_L10N_DATA;
848 	}
849 
850 	/* localized months */
851 	for (i = 0; i < 12; i++)
852 	{
853 		timeinfo->tm_mon = i;
854 		timeinfo->tm_mday = 1;	/* make sure we don't have invalid date */
855 		if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
856 			strftimefail = true;
857 		bufptr += MAX_L10N_DATA;
858 		if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
859 			strftimefail = true;
860 		bufptr += MAX_L10N_DATA;
861 	}
862 
863 	/*
864 	 * Restore the prevailing locale settings; as in PGLC_localeconv(),
865 	 * failure to do so is fatal.
866 	 */
867 #ifdef WIN32
868 	if (!setlocale(LC_CTYPE, save_lc_ctype))
869 		elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
870 #endif
871 	if (!setlocale(LC_TIME, save_lc_time))
872 		elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
873 
874 	/*
875 	 * At this point we've done our best to clean up, and can throw errors, or
876 	 * call functions that might throw errors, with a clean conscience.
877 	 */
878 	if (strftimefail)
879 		elog(ERROR, "strftime() failed: %m");
880 
881 	/* Release the pstrdup'd locale names */
882 	pfree(save_lc_time);
883 #ifdef WIN32
884 	pfree(save_lc_ctype);
885 #endif
886 
887 #ifndef WIN32
888 
889 	/*
890 	 * As in PGLC_localeconv(), we must convert strftime()'s output from the
891 	 * encoding implied by LC_TIME to the database encoding.  If we can't
892 	 * identify the LC_TIME encoding, just perform encoding validation.
893 	 */
894 	encoding = pg_get_encoding_from_locale(locale_time, true);
895 	if (encoding < 0)
896 		encoding = PG_SQL_ASCII;
897 
898 #else
899 
900 	/*
901 	 * On Windows, strftime_win32() always returns UTF8 data, so convert from
902 	 * that if necessary.
903 	 */
904 	encoding = PG_UTF8;
905 
906 #endif							/* WIN32 */
907 
908 	bufptr = buf;
909 
910 	/* localized days */
911 	for (i = 0; i < 7; i++)
912 	{
913 		cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
914 		bufptr += MAX_L10N_DATA;
915 		cache_single_string(&localized_full_days[i], bufptr, encoding);
916 		bufptr += MAX_L10N_DATA;
917 	}
918 
919 	/* localized months */
920 	for (i = 0; i < 12; i++)
921 	{
922 		cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
923 		bufptr += MAX_L10N_DATA;
924 		cache_single_string(&localized_full_months[i], bufptr, encoding);
925 		bufptr += MAX_L10N_DATA;
926 	}
927 
928 	CurrentLCTimeValid = true;
929 }
930 
931 
932 #if defined(WIN32) && defined(LC_MESSAGES)
933 /*
934  * Convert a Windows setlocale() argument to a Unix-style one.
935  *
936  * Regardless of platform, we install message catalogs under a Unix-style
937  * LL[_CC][.ENCODING][@VARIANT] naming convention.  Only LC_MESSAGES settings
938  * following that style will elicit localized interface strings.
939  *
940  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
941  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
942  * case-insensitive.  setlocale() returns the fully-qualified form; for
943  * example, setlocale("thaI") returns "Thai_Thailand.874".  Internally,
944  * setlocale() and _create_locale() select a "locale identifier"[1] and store
945  * it in an undocumented _locale_t field.  From that LCID, we can retrieve the
946  * ISO 639 language and the ISO 3166 country.  Character encoding does not
947  * matter, because the server and client encodings govern that.
948  *
949  * Windows Vista introduced the "locale name" concept[2], closely following
950  * RFC 4646.  Locale identifiers are now deprecated.  Starting with Visual
951  * Studio 2012, setlocale() accepts locale names in addition to the strings it
952  * accepted historically.  It does not standardize them; setlocale("Th-tH")
953  * returns "Th-tH".  setlocale(category, "") still returns a traditional
954  * string.  Furthermore, msvcr110.dll changed the undocumented _locale_t
955  * content to carry locale names instead of locale identifiers.
956  *
957  * Visual Studio 2015 should still be able to do the same as Visual Studio
958  * 2012, but the declaration of locale_name is missing in _locale_t, causing
959  * this code compilation to fail, hence this falls back instead on to
960  * enumerating all system locales by using EnumSystemLocalesEx to find the
961  * required locale name.  If the input argument is in Unix-style then we can
962  * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
963  * LOCALE_SNAME.
964  *
965  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
966  * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
967  * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
968  * localized messages. In particular, every lc_messages setting that initdb
969  * can select automatically will yield only C-locale messages. XXX This could
970  * be fixed by running the fully-qualified locale name through a lookup table.
971  *
972  * This function returns a pointer to a static buffer bearing the converted
973  * name or NULL if conversion fails.
974  *
975  * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
976  * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
977  */
978 
979 #if _MSC_VER >= 1900
980 /*
981  * Callback function for EnumSystemLocalesEx() in get_iso_localename().
982  *
983  * This function enumerates all system locales, searching for one that matches
984  * an input with the format: <Language>[_<Country>], e.g.
985  * English[_United States]
986  *
987  * The input is a three wchar_t array as an LPARAM. The first element is the
988  * locale_name we want to match, the second element is an allocated buffer
989  * where the Unix-style locale is copied if a match is found, and the third
990  * element is the search status, 1 if a match was found, 0 otherwise.
991  */
992 static BOOL CALLBACK
search_locale_enum(LPWSTR pStr,DWORD dwFlags,LPARAM lparam)993 search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
994 {
995 	wchar_t		test_locale[LOCALE_NAME_MAX_LENGTH];
996 	wchar_t   **argv;
997 
998 	(void) (dwFlags);
999 
1000 	argv = (wchar_t **) lparam;
1001 	*argv[2] = (wchar_t) 0;
1002 
1003 	memset(test_locale, 0, sizeof(test_locale));
1004 
1005 	/* Get the name of the <Language> in English */
1006 	if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
1007 						test_locale, LOCALE_NAME_MAX_LENGTH))
1008 	{
1009 		/*
1010 		 * If the enumerated locale does not have a hyphen ("en") OR  the
1011 		 * lc_message input does not have an underscore ("English"), we only
1012 		 * need to compare the <Language> tags.
1013 		 */
1014 		if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
1015 		{
1016 			if (_wcsicmp(argv[0], test_locale) == 0)
1017 			{
1018 				wcscpy(argv[1], pStr);
1019 				*argv[2] = (wchar_t) 1;
1020 				return FALSE;
1021 			}
1022 		}
1023 
1024 		/*
1025 		 * We have to compare a full <Language>_<Country> tag, so we append
1026 		 * the underscore and name of the country/region in English, e.g.
1027 		 * "English_United States".
1028 		 */
1029 		else
1030 		{
1031 			size_t		len;
1032 
1033 			wcscat(test_locale, L"_");
1034 			len = wcslen(test_locale);
1035 			if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
1036 								test_locale + len,
1037 								LOCALE_NAME_MAX_LENGTH - len))
1038 			{
1039 				if (_wcsicmp(argv[0], test_locale) == 0)
1040 				{
1041 					wcscpy(argv[1], pStr);
1042 					*argv[2] = (wchar_t) 1;
1043 					return FALSE;
1044 				}
1045 			}
1046 		}
1047 	}
1048 
1049 	return TRUE;
1050 }
1051 
1052 /*
1053  * This function converts a Windows locale name to an ISO formatted version
1054  * for Visual Studio 2015 or greater.
1055  *
1056  * Returns NULL, if no valid conversion was found.
1057  */
1058 static char *
get_iso_localename(const char * winlocname)1059 get_iso_localename(const char *winlocname)
1060 {
1061 	wchar_t		wc_locale_name[LOCALE_NAME_MAX_LENGTH];
1062 	wchar_t		buffer[LOCALE_NAME_MAX_LENGTH];
1063 	static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
1064 	char	   *period;
1065 	int			len;
1066 	int			ret_val;
1067 
1068 	/*
1069 	 * Valid locales have the following syntax:
1070 	 * <Language>[_<Country>[.<CodePage>]]
1071 	 *
1072 	 * GetLocaleInfoEx can only take locale name without code-page and for the
1073 	 * purpose of this API the code-page doesn't matter.
1074 	 */
1075 	period = strchr(winlocname, '.');
1076 	if (period != NULL)
1077 		len = period - winlocname;
1078 	else
1079 		len = pg_mbstrlen(winlocname);
1080 
1081 	memset(wc_locale_name, 0, sizeof(wc_locale_name));
1082 	memset(buffer, 0, sizeof(buffer));
1083 	MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
1084 						LOCALE_NAME_MAX_LENGTH);
1085 
1086 	/*
1087 	 * If the lc_messages is already an Unix-style string, we have a direct
1088 	 * match with LOCALE_SNAME, e.g. en-US, en_US.
1089 	 */
1090 	ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
1091 							  LOCALE_NAME_MAX_LENGTH);
1092 	if (!ret_val)
1093 	{
1094 		/*
1095 		 * Search for a locale in the system that matches language and country
1096 		 * name.
1097 		 */
1098 		wchar_t    *argv[3];
1099 
1100 		argv[0] = wc_locale_name;
1101 		argv[1] = buffer;
1102 		argv[2] = (wchar_t *) &ret_val;
1103 		EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
1104 							NULL);
1105 	}
1106 
1107 	if (ret_val)
1108 	{
1109 		size_t		rc;
1110 		char	   *hyphen;
1111 
1112 		/* Locale names use only ASCII, any conversion locale suffices. */
1113 		rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
1114 		if (rc == -1 || rc == sizeof(iso_lc_messages))
1115 			return NULL;
1116 
1117 		/*
1118 		 * Simply replace the hyphen with an underscore.  See comments in
1119 		 * IsoLocaleName.
1120 		 */
1121 		hyphen = strchr(iso_lc_messages, '-');
1122 		if (hyphen)
1123 			*hyphen = '_';
1124 
1125 		return iso_lc_messages;
1126 	}
1127 
1128 	return NULL;
1129 }
1130 #endif							/* _MSC_VER >= 1900 */
1131 
1132 static char *
IsoLocaleName(const char * winlocname)1133 IsoLocaleName(const char *winlocname)
1134 {
1135 #if (_MSC_VER >= 1400)			/* VC8.0 or later */
1136 	static char iso_lc_messages[32];
1137 	_locale_t	loct = NULL;
1138 
1139 	if (pg_strcasecmp("c", winlocname) == 0 ||
1140 		pg_strcasecmp("posix", winlocname) == 0)
1141 	{
1142 		strcpy(iso_lc_messages, "C");
1143 		return iso_lc_messages;
1144 	}
1145 
1146 #if (_MSC_VER >= 1900)			/* Visual Studio 2015 or later */
1147 	return get_iso_localename(winlocname);
1148 #else
1149 	loct = _create_locale(LC_CTYPE, winlocname);
1150 	if (loct != NULL)
1151 	{
1152 #if (_MSC_VER >= 1700)			/* Visual Studio 2012 or later */
1153 		size_t		rc;
1154 		char	   *hyphen;
1155 
1156 		/* Locale names use only ASCII, any conversion locale suffices. */
1157 		rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
1158 						sizeof(iso_lc_messages), NULL);
1159 		_free_locale(loct);
1160 		if (rc == -1 || rc == sizeof(iso_lc_messages))
1161 			return NULL;
1162 
1163 		/*
1164 		 * Since the message catalogs sit on a case-insensitive filesystem, we
1165 		 * need not standardize letter case here.  So long as we do not ship
1166 		 * message catalogs for which it would matter, we also need not
1167 		 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1168 		 * uz_UZ@cyrillic.  Simply replace the hyphen with an underscore.
1169 		 *
1170 		 * Note that the locale name can be less-specific than the value we
1171 		 * would derive under earlier Visual Studio releases.  For example,
1172 		 * French_France.1252 yields just "fr".  This does not affect any of
1173 		 * the country-specific message catalogs available as of this writing
1174 		 * (pt_BR, zh_CN, zh_TW).
1175 		 */
1176 		hyphen = strchr(iso_lc_messages, '-');
1177 		if (hyphen)
1178 			*hyphen = '_';
1179 #else
1180 		char		isolang[32],
1181 					isocrty[32];
1182 		LCID		lcid;
1183 
1184 		lcid = loct->locinfo->lc_handle[LC_CTYPE];
1185 		if (lcid == 0)
1186 			lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1187 		_free_locale(loct);
1188 
1189 		if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1190 			return NULL;
1191 		if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1192 			return NULL;
1193 		snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
1194 #endif
1195 		return iso_lc_messages;
1196 	}
1197 	return NULL;
1198 #endif							/* Visual Studio 2015 or later */
1199 #else
1200 	return NULL;				/* Not supported on this version of msvc/mingw */
1201 #endif							/* _MSC_VER >= 1400 */
1202 }
1203 #endif							/* WIN32 && LC_MESSAGES */
1204 
1205 
1206 /*
1207  * Detect aging strxfrm() implementations that, in a subset of locales, write
1208  * past the specified buffer length.  Affected users must update OS packages
1209  * before using PostgreSQL 9.5 or later.
1210  *
1211  * Assume that the bug can come and go from one postmaster startup to another
1212  * due to physical replication among diverse machines.  Assume that the bug's
1213  * presence will not change during the life of a particular postmaster.  Given
1214  * those assumptions, call this no less than once per postmaster startup per
1215  * LC_COLLATE setting used.  No known-affected system offers strxfrm_l(), so
1216  * there is no need to consider pg_collation locales.
1217  */
1218 void
check_strxfrm_bug(void)1219 check_strxfrm_bug(void)
1220 {
1221 	char		buf[32];
1222 	const int	canary = 0x7F;
1223 	bool		ok = true;
1224 
1225 	/*
1226 	 * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1227 	 * 05/08 returns 18 and modifies 10 bytes.  It respects limits above or
1228 	 * below that range.
1229 	 *
1230 	 * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1231 	 * 01/13 and Solaris 11.2.  Affected locales include is_IS.ISO8859-1,
1232 	 * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R.  Unaffected locales
1233 	 * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1234 	 */
1235 	buf[7] = canary;
1236 	(void) strxfrm(buf, "ab", 7);
1237 	if (buf[7] != canary)
1238 		ok = false;
1239 
1240 	/*
1241 	 * illumos bug #1594 was present in the source tree from 2010-10-11 to
1242 	 * 2012-02-01.  Given an ASCII string of any length and length limit 1,
1243 	 * affected systems ignore the length limit and modify a number of bytes
1244 	 * one less than the return value.  The problem inputs for this bug do not
1245 	 * overlap those for the Solaris bug, hence a distinct test.
1246 	 *
1247 	 * Affected systems include smartos-20110926T021612Z.  Affected locales
1248 	 * include en_US.ISO8859-1 and en_US.UTF-8.  Unaffected locales include C.
1249 	 */
1250 	buf[1] = canary;
1251 	(void) strxfrm(buf, "a", 1);
1252 	if (buf[1] != canary)
1253 		ok = false;
1254 
1255 	if (!ok)
1256 		ereport(ERROR,
1257 				(errcode(ERRCODE_SYSTEM_ERROR),
1258 				 errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1259 								 setlocale(LC_COLLATE, NULL)),
1260 				 errhint("Apply system library package updates.")));
1261 }
1262 
1263 
1264 /*
1265  * Cache mechanism for collation information.
1266  *
1267  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1268  * (or POSIX), so we can optimize a few code paths in various places.
1269  * For the built-in C and POSIX collations, we can know that without even
1270  * doing a cache lookup, but we want to support aliases for C/POSIX too.
1271  * For the "default" collation, there are separate static cache variables,
1272  * since consulting the pg_collation catalog doesn't tell us what we need.
1273  *
1274  * Also, if a pg_locale_t has been requested for a collation, we cache that
1275  * for the life of a backend.
1276  *
1277  * Note that some code relies on the flags not reporting false negatives
1278  * (that is, saying it's not C when it is).  For example, char2wchar()
1279  * could fail if the locale is C, so str_tolower() shouldn't call it
1280  * in that case.
1281  *
1282  * Note that we currently lack any way to flush the cache.  Since we don't
1283  * support ALTER COLLATION, this is OK.  The worst case is that someone
1284  * drops a collation, and a useless cache entry hangs around in existing
1285  * backends.
1286  */
1287 
1288 static collation_cache_entry *
lookup_collation_cache(Oid collation,bool set_flags)1289 lookup_collation_cache(Oid collation, bool set_flags)
1290 {
1291 	collation_cache_entry *cache_entry;
1292 	bool		found;
1293 
1294 	Assert(OidIsValid(collation));
1295 	Assert(collation != DEFAULT_COLLATION_OID);
1296 
1297 	if (collation_cache == NULL)
1298 	{
1299 		/* First time through, initialize the hash table */
1300 		HASHCTL		ctl;
1301 
1302 		memset(&ctl, 0, sizeof(ctl));
1303 		ctl.keysize = sizeof(Oid);
1304 		ctl.entrysize = sizeof(collation_cache_entry);
1305 		collation_cache = hash_create("Collation cache", 100, &ctl,
1306 									  HASH_ELEM | HASH_BLOBS);
1307 	}
1308 
1309 	cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1310 	if (!found)
1311 	{
1312 		/*
1313 		 * Make sure cache entry is marked invalid, in case we fail before
1314 		 * setting things.
1315 		 */
1316 		cache_entry->flags_valid = false;
1317 		cache_entry->locale = 0;
1318 	}
1319 
1320 	if (set_flags && !cache_entry->flags_valid)
1321 	{
1322 		/* Attempt to set the flags */
1323 		HeapTuple	tp;
1324 		Form_pg_collation collform;
1325 		const char *collcollate;
1326 		const char *collctype;
1327 
1328 		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1329 		if (!HeapTupleIsValid(tp))
1330 			elog(ERROR, "cache lookup failed for collation %u", collation);
1331 		collform = (Form_pg_collation) GETSTRUCT(tp);
1332 
1333 		collcollate = NameStr(collform->collcollate);
1334 		collctype = NameStr(collform->collctype);
1335 
1336 		cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1337 									 (strcmp(collcollate, "POSIX") == 0));
1338 		cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1339 								   (strcmp(collctype, "POSIX") == 0));
1340 
1341 		cache_entry->flags_valid = true;
1342 
1343 		ReleaseSysCache(tp);
1344 	}
1345 
1346 	return cache_entry;
1347 }
1348 
1349 
1350 /*
1351  * Detect whether collation's LC_COLLATE property is C
1352  */
1353 bool
lc_collate_is_c(Oid collation)1354 lc_collate_is_c(Oid collation)
1355 {
1356 	/*
1357 	 * If we're asked about "collation 0", return false, so that the code will
1358 	 * go into the non-C path and report that the collation is bogus.
1359 	 */
1360 	if (!OidIsValid(collation))
1361 		return false;
1362 
1363 	/*
1364 	 * If we're asked about the default collation, we have to inquire of the C
1365 	 * library.  Cache the result so we only have to compute it once.
1366 	 */
1367 	if (collation == DEFAULT_COLLATION_OID)
1368 	{
1369 		static int	result = -1;
1370 		char	   *localeptr;
1371 
1372 		if (result >= 0)
1373 			return (bool) result;
1374 		localeptr = setlocale(LC_COLLATE, NULL);
1375 		if (!localeptr)
1376 			elog(ERROR, "invalid LC_COLLATE setting");
1377 
1378 		if (strcmp(localeptr, "C") == 0)
1379 			result = true;
1380 		else if (strcmp(localeptr, "POSIX") == 0)
1381 			result = true;
1382 		else
1383 			result = false;
1384 		return (bool) result;
1385 	}
1386 
1387 	/*
1388 	 * If we're asked about the built-in C/POSIX collations, we know that.
1389 	 */
1390 	if (collation == C_COLLATION_OID ||
1391 		collation == POSIX_COLLATION_OID)
1392 		return true;
1393 
1394 	/*
1395 	 * Otherwise, we have to consult pg_collation, but we cache that.
1396 	 */
1397 	return (lookup_collation_cache(collation, true))->collate_is_c;
1398 }
1399 
1400 /*
1401  * Detect whether collation's LC_CTYPE property is C
1402  */
1403 bool
lc_ctype_is_c(Oid collation)1404 lc_ctype_is_c(Oid collation)
1405 {
1406 	/*
1407 	 * If we're asked about "collation 0", return false, so that the code will
1408 	 * go into the non-C path and report that the collation is bogus.
1409 	 */
1410 	if (!OidIsValid(collation))
1411 		return false;
1412 
1413 	/*
1414 	 * If we're asked about the default collation, we have to inquire of the C
1415 	 * library.  Cache the result so we only have to compute it once.
1416 	 */
1417 	if (collation == DEFAULT_COLLATION_OID)
1418 	{
1419 		static int	result = -1;
1420 		char	   *localeptr;
1421 
1422 		if (result >= 0)
1423 			return (bool) result;
1424 		localeptr = setlocale(LC_CTYPE, NULL);
1425 		if (!localeptr)
1426 			elog(ERROR, "invalid LC_CTYPE setting");
1427 
1428 		if (strcmp(localeptr, "C") == 0)
1429 			result = true;
1430 		else if (strcmp(localeptr, "POSIX") == 0)
1431 			result = true;
1432 		else
1433 			result = false;
1434 		return (bool) result;
1435 	}
1436 
1437 	/*
1438 	 * If we're asked about the built-in C/POSIX collations, we know that.
1439 	 */
1440 	if (collation == C_COLLATION_OID ||
1441 		collation == POSIX_COLLATION_OID)
1442 		return true;
1443 
1444 	/*
1445 	 * Otherwise, we have to consult pg_collation, but we cache that.
1446 	 */
1447 	return (lookup_collation_cache(collation, true))->ctype_is_c;
1448 }
1449 
1450 
1451 /* simple subroutine for reporting errors from newlocale() */
1452 #ifdef HAVE_LOCALE_T
1453 static void
report_newlocale_failure(const char * localename)1454 report_newlocale_failure(const char *localename)
1455 {
1456 	int			save_errno;
1457 
1458 	/*
1459 	 * Windows doesn't provide any useful error indication from
1460 	 * _create_locale(), and BSD-derived platforms don't seem to feel they
1461 	 * need to set errno either (even though POSIX is pretty clear that
1462 	 * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
1463 	 * is what to report.
1464 	 */
1465 	if (errno == 0)
1466 		errno = ENOENT;
1467 
1468 	/*
1469 	 * ENOENT means "no such locale", not "no such file", so clarify that
1470 	 * errno with an errdetail message.
1471 	 */
1472 	save_errno = errno;			/* auxiliary funcs might change errno */
1473 	ereport(ERROR,
1474 			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1475 			 errmsg("could not create locale \"%s\": %m",
1476 					localename),
1477 			 (save_errno == ENOENT ?
1478 			  errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1479 						localename) : 0)));
1480 }
1481 #endif							/* HAVE_LOCALE_T */
1482 
1483 
1484 /*
1485  * Create a locale_t from a collation OID.  Results are cached for the
1486  * lifetime of the backend.  Thus, do not free the result with freelocale().
1487  *
1488  * As a special optimization, the default/database collation returns 0.
1489  * Callers should then revert to the non-locale_t-enabled code path.
1490  * In fact, they shouldn't call this function at all when they are dealing
1491  * with the default locale.  That can save quite a bit in hotspots.
1492  * Also, callers should avoid calling this before going down a C/POSIX
1493  * fastpath, because such a fastpath should work even on platforms without
1494  * locale_t support in the C library.
1495  *
1496  * For simplicity, we always generate COLLATE + CTYPE even though we
1497  * might only need one of them.  Since this is called only once per session,
1498  * it shouldn't cost much.
1499  */
1500 pg_locale_t
pg_newlocale_from_collation(Oid collid)1501 pg_newlocale_from_collation(Oid collid)
1502 {
1503 	collation_cache_entry *cache_entry;
1504 
1505 	/* Callers must pass a valid OID */
1506 	Assert(OidIsValid(collid));
1507 
1508 	/* Return 0 for "default" collation, just in case caller forgets */
1509 	if (collid == DEFAULT_COLLATION_OID)
1510 		return (pg_locale_t) 0;
1511 
1512 	cache_entry = lookup_collation_cache(collid, false);
1513 
1514 	if (cache_entry->locale == 0)
1515 	{
1516 		/* We haven't computed this yet in this session, so do it */
1517 		HeapTuple	tp;
1518 		Form_pg_collation collform;
1519 		const char *collcollate;
1520 		const char *collctype pg_attribute_unused();
1521 		struct pg_locale_struct result;
1522 		pg_locale_t resultp;
1523 		Datum		collversion;
1524 		bool		isnull;
1525 
1526 		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1527 		if (!HeapTupleIsValid(tp))
1528 			elog(ERROR, "cache lookup failed for collation %u", collid);
1529 		collform = (Form_pg_collation) GETSTRUCT(tp);
1530 
1531 		collcollate = NameStr(collform->collcollate);
1532 		collctype = NameStr(collform->collctype);
1533 
1534 		/* We'll fill in the result struct locally before allocating memory */
1535 		memset(&result, 0, sizeof(result));
1536 		result.provider = collform->collprovider;
1537 
1538 		if (collform->collprovider == COLLPROVIDER_LIBC)
1539 		{
1540 #ifdef HAVE_LOCALE_T
1541 			locale_t	loc;
1542 
1543 			if (strcmp(collcollate, collctype) == 0)
1544 			{
1545 				/* Normal case where they're the same */
1546 				errno = 0;
1547 #ifndef WIN32
1548 				loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1549 								NULL);
1550 #else
1551 				loc = _create_locale(LC_ALL, collcollate);
1552 #endif
1553 				if (!loc)
1554 					report_newlocale_failure(collcollate);
1555 			}
1556 			else
1557 			{
1558 #ifndef WIN32
1559 				/* We need two newlocale() steps */
1560 				locale_t	loc1;
1561 
1562 				errno = 0;
1563 				loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1564 				if (!loc1)
1565 					report_newlocale_failure(collcollate);
1566 				errno = 0;
1567 				loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1568 				if (!loc)
1569 					report_newlocale_failure(collctype);
1570 #else
1571 
1572 				/*
1573 				 * XXX The _create_locale() API doesn't appear to support
1574 				 * this. Could perhaps be worked around by changing
1575 				 * pg_locale_t to contain two separate fields.
1576 				 */
1577 				ereport(ERROR,
1578 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1579 						 errmsg("collations with different collate and ctype values are not supported on this platform")));
1580 #endif
1581 			}
1582 
1583 			result.info.lt = loc;
1584 #else							/* not HAVE_LOCALE_T */
1585 			/* platform that doesn't support locale_t */
1586 			ereport(ERROR,
1587 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1588 					 errmsg("collation provider LIBC is not supported on this platform")));
1589 #endif							/* not HAVE_LOCALE_T */
1590 		}
1591 		else if (collform->collprovider == COLLPROVIDER_ICU)
1592 		{
1593 #ifdef USE_ICU
1594 			UCollator  *collator;
1595 			UErrorCode	status;
1596 
1597 			if (strcmp(collcollate, collctype) != 0)
1598 				ereport(ERROR,
1599 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1600 						 errmsg("collations with different collate and ctype values are not supported by ICU")));
1601 
1602 			status = U_ZERO_ERROR;
1603 			collator = ucol_open(collcollate, &status);
1604 			if (U_FAILURE(status))
1605 				ereport(ERROR,
1606 						(errmsg("could not open collator for locale \"%s\": %s",
1607 								collcollate, u_errorName(status))));
1608 
1609 			/* We will leak this string if we get an error below :-( */
1610 			result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1611 														 collcollate);
1612 			result.info.icu.ucol = collator;
1613 #else							/* not USE_ICU */
1614 			/* could get here if a collation was created by a build with ICU */
1615 			ereport(ERROR,
1616 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1617 					 errmsg("ICU is not supported in this build"), \
1618 					 errhint("You need to rebuild PostgreSQL using --with-icu.")));
1619 #endif							/* not USE_ICU */
1620 		}
1621 
1622 		collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1623 									  &isnull);
1624 		if (!isnull)
1625 		{
1626 			char	   *actual_versionstr;
1627 			char	   *collversionstr;
1628 
1629 			actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1630 			if (!actual_versionstr)
1631 			{
1632 				/*
1633 				 * This could happen when specifying a version in CREATE
1634 				 * COLLATION for a libc locale, or manually creating a mess in
1635 				 * the catalogs.
1636 				 */
1637 				ereport(ERROR,
1638 						(errmsg("collation \"%s\" has no actual version, but a version was specified",
1639 								NameStr(collform->collname))));
1640 			}
1641 			collversionstr = TextDatumGetCString(collversion);
1642 
1643 			if (strcmp(actual_versionstr, collversionstr) != 0)
1644 				ereport(WARNING,
1645 						(errmsg("collation \"%s\" has version mismatch",
1646 								NameStr(collform->collname)),
1647 						 errdetail("The collation in the database was created using version %s, "
1648 								   "but the operating system provides version %s.",
1649 								   collversionstr, actual_versionstr),
1650 						 errhint("Rebuild all objects affected by this collation and run "
1651 								 "ALTER COLLATION %s REFRESH VERSION, "
1652 								 "or build PostgreSQL with the right library version.",
1653 								 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1654 															NameStr(collform->collname)))));
1655 		}
1656 
1657 		ReleaseSysCache(tp);
1658 
1659 		/* We'll keep the pg_locale_t structures in TopMemoryContext */
1660 		resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1661 		*resultp = result;
1662 
1663 		cache_entry->locale = resultp;
1664 	}
1665 
1666 	return cache_entry->locale;
1667 }
1668 
1669 /*
1670  * Get provider-specific collation version string for the given collation from
1671  * the operating system/library.
1672  *
1673  * A particular provider must always either return a non-NULL string or return
1674  * NULL (if it doesn't support versions).  It must not return NULL for some
1675  * collcollate and not NULL for others.
1676  */
1677 char *
get_collation_actual_version(char collprovider,const char * collcollate)1678 get_collation_actual_version(char collprovider, const char *collcollate)
1679 {
1680 	char	   *collversion;
1681 
1682 #ifdef USE_ICU
1683 	if (collprovider == COLLPROVIDER_ICU)
1684 	{
1685 		UCollator  *collator;
1686 		UErrorCode	status;
1687 		UVersionInfo versioninfo;
1688 		char		buf[U_MAX_VERSION_STRING_LENGTH];
1689 
1690 		status = U_ZERO_ERROR;
1691 		collator = ucol_open(collcollate, &status);
1692 		if (U_FAILURE(status))
1693 			ereport(ERROR,
1694 					(errmsg("could not open collator for locale \"%s\": %s",
1695 							collcollate, u_errorName(status))));
1696 		ucol_getVersion(collator, versioninfo);
1697 		ucol_close(collator);
1698 
1699 		u_versionToString(versioninfo, buf);
1700 		collversion = pstrdup(buf);
1701 	}
1702 	else
1703 #endif
1704 		collversion = NULL;
1705 
1706 	return collversion;
1707 }
1708 
1709 
1710 #ifdef USE_ICU
1711 /*
1712  * Converter object for converting between ICU's UChar strings and C strings
1713  * in database encoding.  Since the database encoding doesn't change, we only
1714  * need one of these per session.
1715  */
1716 static UConverter *icu_converter = NULL;
1717 
1718 static void
init_icu_converter(void)1719 init_icu_converter(void)
1720 {
1721 	const char *icu_encoding_name;
1722 	UErrorCode	status;
1723 	UConverter *conv;
1724 
1725 	if (icu_converter)
1726 		return;
1727 
1728 	icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1729 
1730 	status = U_ZERO_ERROR;
1731 	conv = ucnv_open(icu_encoding_name, &status);
1732 	if (U_FAILURE(status))
1733 		ereport(ERROR,
1734 				(errmsg("could not open ICU converter for encoding \"%s\": %s",
1735 						icu_encoding_name, u_errorName(status))));
1736 
1737 	icu_converter = conv;
1738 }
1739 
1740 /*
1741  * Convert a string in the database encoding into a string of UChars.
1742  *
1743  * The source string at buff is of length nbytes
1744  * (it needn't be nul-terminated)
1745  *
1746  * *buff_uchar receives a pointer to the palloc'd result string, and
1747  * the function's result is the number of UChars generated.
1748  *
1749  * The result string is nul-terminated, though most callers rely on the
1750  * result length instead.
1751  */
1752 int32_t
icu_to_uchar(UChar ** buff_uchar,const char * buff,size_t nbytes)1753 icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1754 {
1755 	UErrorCode	status;
1756 	int32_t		len_uchar;
1757 
1758 	init_icu_converter();
1759 
1760 	status = U_ZERO_ERROR;
1761 	len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1762 							  buff, nbytes, &status);
1763 	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1764 		ereport(ERROR,
1765 				(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1766 
1767 	*buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1768 
1769 	status = U_ZERO_ERROR;
1770 	len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1771 							  buff, nbytes, &status);
1772 	if (U_FAILURE(status))
1773 		ereport(ERROR,
1774 				(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
1775 
1776 	return len_uchar;
1777 }
1778 
1779 /*
1780  * Convert a string of UChars into the database encoding.
1781  *
1782  * The source string at buff_uchar is of length len_uchar
1783  * (it needn't be nul-terminated)
1784  *
1785  * *result receives a pointer to the palloc'd result string, and the
1786  * function's result is the number of bytes generated (not counting nul).
1787  *
1788  * The result string is nul-terminated.
1789  */
1790 int32_t
icu_from_uchar(char ** result,const UChar * buff_uchar,int32_t len_uchar)1791 icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1792 {
1793 	UErrorCode	status;
1794 	int32_t		len_result;
1795 
1796 	init_icu_converter();
1797 
1798 	status = U_ZERO_ERROR;
1799 	len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1800 								 buff_uchar, len_uchar, &status);
1801 	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1802 		ereport(ERROR,
1803 				(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1804 
1805 	*result = palloc(len_result + 1);
1806 
1807 	status = U_ZERO_ERROR;
1808 	len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1809 								 buff_uchar, len_uchar, &status);
1810 	if (U_FAILURE(status))
1811 		ereport(ERROR,
1812 				(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
1813 
1814 	return len_result;
1815 }
1816 
1817 #endif							/* USE_ICU */
1818 
1819 /*
1820  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1821  * Therefore we keep them here rather than with the mbutils code.
1822  */
1823 
1824 /*
1825  * wchar2char --- convert wide characters to multibyte format
1826  *
1827  * This has the same API as the standard wcstombs_l() function; in particular,
1828  * tolen is the maximum number of bytes to store at *to, and *from must be
1829  * zero-terminated.  The output will be zero-terminated iff there is room.
1830  */
1831 size_t
wchar2char(char * to,const wchar_t * from,size_t tolen,pg_locale_t locale)1832 wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1833 {
1834 	size_t		result;
1835 
1836 	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1837 
1838 	if (tolen == 0)
1839 		return 0;
1840 
1841 #ifdef WIN32
1842 
1843 	/*
1844 	 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1845 	 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1846 	 * MultiByteToWideChar().
1847 	 */
1848 	if (GetDatabaseEncoding() == PG_UTF8)
1849 	{
1850 		result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1851 									 NULL, NULL);
1852 		/* A zero return is failure */
1853 		if (result <= 0)
1854 			result = -1;
1855 		else
1856 		{
1857 			Assert(result <= tolen);
1858 			/* Microsoft counts the zero terminator in the result */
1859 			result--;
1860 		}
1861 	}
1862 	else
1863 #endif							/* WIN32 */
1864 	if (locale == (pg_locale_t) 0)
1865 	{
1866 		/* Use wcstombs directly for the default locale */
1867 		result = wcstombs(to, from, tolen);
1868 	}
1869 	else
1870 	{
1871 #ifdef HAVE_LOCALE_T
1872 #ifdef HAVE_WCSTOMBS_L
1873 		/* Use wcstombs_l for nondefault locales */
1874 		result = wcstombs_l(to, from, tolen, locale->info.lt);
1875 #else							/* !HAVE_WCSTOMBS_L */
1876 		/* We have to temporarily set the locale as current ... ugh */
1877 		locale_t	save_locale = uselocale(locale->info.lt);
1878 
1879 		result = wcstombs(to, from, tolen);
1880 
1881 		uselocale(save_locale);
1882 #endif							/* HAVE_WCSTOMBS_L */
1883 #else							/* !HAVE_LOCALE_T */
1884 		/* Can't have locale != 0 without HAVE_LOCALE_T */
1885 		elog(ERROR, "wcstombs_l is not available");
1886 		result = 0;				/* keep compiler quiet */
1887 #endif							/* HAVE_LOCALE_T */
1888 	}
1889 
1890 	return result;
1891 }
1892 
1893 /*
1894  * char2wchar --- convert multibyte characters to wide characters
1895  *
1896  * This has almost the API of mbstowcs_l(), except that *from need not be
1897  * null-terminated; instead, the number of input bytes is specified as
1898  * fromlen.  Also, we ereport() rather than returning -1 for invalid
1899  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
1900  * The output will be zero-terminated iff there is room.
1901  */
1902 size_t
char2wchar(wchar_t * to,size_t tolen,const char * from,size_t fromlen,pg_locale_t locale)1903 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1904 		   pg_locale_t locale)
1905 {
1906 	size_t		result;
1907 
1908 	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1909 
1910 	if (tolen == 0)
1911 		return 0;
1912 
1913 #ifdef WIN32
1914 	/* See WIN32 "Unicode" comment above */
1915 	if (GetDatabaseEncoding() == PG_UTF8)
1916 	{
1917 		/* Win32 API does not work for zero-length input */
1918 		if (fromlen == 0)
1919 			result = 0;
1920 		else
1921 		{
1922 			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1923 			/* A zero return is failure */
1924 			if (result == 0)
1925 				result = -1;
1926 		}
1927 
1928 		if (result != -1)
1929 		{
1930 			Assert(result < tolen);
1931 			/* Append trailing null wchar (MultiByteToWideChar() does not) */
1932 			to[result] = 0;
1933 		}
1934 	}
1935 	else
1936 #endif							/* WIN32 */
1937 	{
1938 		/* mbstowcs requires ending '\0' */
1939 		char	   *str = pnstrdup(from, fromlen);
1940 
1941 		if (locale == (pg_locale_t) 0)
1942 		{
1943 			/* Use mbstowcs directly for the default locale */
1944 			result = mbstowcs(to, str, tolen);
1945 		}
1946 		else
1947 		{
1948 #ifdef HAVE_LOCALE_T
1949 #ifdef HAVE_MBSTOWCS_L
1950 			/* Use mbstowcs_l for nondefault locales */
1951 			result = mbstowcs_l(to, str, tolen, locale->info.lt);
1952 #else							/* !HAVE_MBSTOWCS_L */
1953 			/* We have to temporarily set the locale as current ... ugh */
1954 			locale_t	save_locale = uselocale(locale->info.lt);
1955 
1956 			result = mbstowcs(to, str, tolen);
1957 
1958 			uselocale(save_locale);
1959 #endif							/* HAVE_MBSTOWCS_L */
1960 #else							/* !HAVE_LOCALE_T */
1961 			/* Can't have locale != 0 without HAVE_LOCALE_T */
1962 			elog(ERROR, "mbstowcs_l is not available");
1963 			result = 0;			/* keep compiler quiet */
1964 #endif							/* HAVE_LOCALE_T */
1965 		}
1966 
1967 		pfree(str);
1968 	}
1969 
1970 	if (result == -1)
1971 	{
1972 		/*
1973 		 * Invalid multibyte character encountered.  We try to give a useful
1974 		 * error message by letting pg_verifymbstr check the string.  But it's
1975 		 * possible that the string is OK to us, and not OK to mbstowcs ---
1976 		 * this suggests that the LC_CTYPE locale is different from the
1977 		 * database encoding.  Give a generic error message if verifymbstr
1978 		 * can't find anything wrong.
1979 		 */
1980 		pg_verifymbstr(from, fromlen, false);	/* might not return */
1981 		/* but if it does ... */
1982 		ereport(ERROR,
1983 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1984 				 errmsg("invalid multibyte character for locale"),
1985 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1986 	}
1987 
1988 	return result;
1989 }
1990