1 /*-------------------------------------------------------------------------
2  *
3  * win32setlocale.c
4  *		Wrapper to work around bugs in Windows setlocale() implementation
5  *
6  * Copyright (c) 2011-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/port/win32setlocale.c
10  *
11  *
12  * The setlocale() function in Windows is broken in two ways. First, it
13  * has a problem with locale names that have a dot in the country name. For
14  * example:
15  *
16  * "Chinese (Traditional)_Hong Kong S.A.R..950"
17  *
18  * For some reason, setlocale() doesn't accept that as argument, even though
19  * setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts
20  * various alternative names for such countries, so to work around the broken
21  * setlocale() function, we map the troublemaking locale names to accepted
22  * aliases, before calling setlocale().
23  *
24  * The second problem is that the locale name for "Norwegian (Bokmål)"
25  * contains a non-ASCII character. That's problematic, because it's not clear
26  * what encoding the locale name itself is supposed to be in, when you
27  * haven't yet set a locale. Also, it causes problems when the cluster
28  * contains databases with different encodings, as the locale name is stored
29  * in the pg_database system catalog. To work around that, when setlocale()
30  * returns that locale name, map it to a pure-ASCII alias for the same
31  * locale.
32  *-------------------------------------------------------------------------
33  */
34 
35 #include "c.h"
36 
37 #undef setlocale
38 
39 struct locale_map
40 {
41 	/*
42 	 * String in locale name to replace. Can be a single string (end is NULL),
43 	 * or separate start and end strings. If two strings are given, the locale
44 	 * name must contain both of them, and everything between them is
45 	 * replaced. This is used for a poor-man's regexp search, allowing
46 	 * replacement of "start.*end".
47 	 */
48 	const char *locale_name_start;
49 	const char *locale_name_end;
50 
51 	const char *replacement;	/* string to replace the match with */
52 };
53 
54 /*
55  * Mappings applied before calling setlocale(), to the argument.
56  */
57 static const struct locale_map locale_map_argument[] = {
58 	/*
59 	 * "HKG" is listed here:
60 	 * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx
61 	 * (Country/Region Strings).
62 	 *
63 	 * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the
64 	 * above list, but seems to work anyway.
65 	 */
66 	{"Hong Kong S.A.R.", NULL, "HKG"},
67 	{"U.A.E.", NULL, "ARE"},
68 
69 	/*
70 	 * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't
71 	 * seem to recognize that. And Macau isn't listed in the table of accepted
72 	 * abbreviations linked above. Fortunately, "ZHM" seems to be accepted as
73 	 * an alias for "Chinese (Traditional)_Macau S.A.R..950". I'm not sure
74 	 * where "ZHM" comes from, must be some legacy naming scheme. But hey, it
75 	 * works.
76 	 *
77 	 * Note that unlike HKG and ARE, ZHM is an alias for the *whole* locale
78 	 * name, not just the country part.
79 	 *
80 	 * Some versions of Windows spell it "Macau", others "Macao".
81 	 */
82 	{"Chinese (Traditional)_Macau S.A.R..950", NULL, "ZHM"},
83 	{"Chinese_Macau S.A.R..950", NULL, "ZHM"},
84 	{"Chinese (Traditional)_Macao S.A.R..950", NULL, "ZHM"},
85 	{"Chinese_Macao S.A.R..950", NULL, "ZHM"},
86 	{NULL, NULL, NULL}
87 };
88 
89 /*
90  * Mappings applied after calling setlocale(), to its return value.
91  */
92 static const struct locale_map locale_map_result[] = {
93 	/*
94 	 * "Norwegian (Bokmål)" locale name contains the a-ring character.
95 	 * Map it to a pure-ASCII alias.
96 	 *
97 	 * It's not clear what encoding setlocale() uses when it returns the
98 	 * locale name, so to play it safe, we search for "Norwegian (Bok*l)".
99 	 *
100 	 * Just to make life even more complicated, some versions of Windows spell
101 	 * the locale name without parentheses.  Translate that too.
102 	 */
103 	{"Norwegian (Bokm", "l)_Norway", "Norwegian_Norway"},
104 	{"Norwegian Bokm", "l_Norway", "Norwegian_Norway"},
105 	{NULL, NULL, NULL}
106 };
107 
108 #define MAX_LOCALE_NAME_LEN		100
109 
110 static const char *
map_locale(const struct locale_map * map,const char * locale)111 map_locale(const struct locale_map *map, const char *locale)
112 {
113 	static char aliasbuf[MAX_LOCALE_NAME_LEN];
114 	int			i;
115 
116 	/* Check if the locale name matches any of the problematic ones. */
117 	for (i = 0; map[i].locale_name_start != NULL; i++)
118 	{
119 		const char *needle_start = map[i].locale_name_start;
120 		const char *needle_end = map[i].locale_name_end;
121 		const char *replacement = map[i].replacement;
122 		char	   *match;
123 		char	   *match_start = NULL;
124 		char	   *match_end = NULL;
125 
126 		match = strstr(locale, needle_start);
127 		if (match)
128 		{
129 			/*
130 			 * Found a match for the first part. If this was a two-part
131 			 * replacement, find the second part.
132 			 */
133 			match_start = match;
134 			if (needle_end)
135 			{
136 				match = strstr(match_start + strlen(needle_start), needle_end);
137 				if (match)
138 					match_end = match + strlen(needle_end);
139 				else
140 					match_start = NULL;
141 			}
142 			else
143 				match_end = match_start + strlen(needle_start);
144 		}
145 
146 		if (match_start)
147 		{
148 			/* Found a match. Replace the matched string. */
149 			int			matchpos = match_start - locale;
150 			int			replacementlen = strlen(replacement);
151 			char	   *rest = match_end;
152 			int			restlen = strlen(rest);
153 
154 			/* check that the result fits in the static buffer */
155 			if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN)
156 				return NULL;
157 
158 			memcpy(&aliasbuf[0], &locale[0], matchpos);
159 			memcpy(&aliasbuf[matchpos], replacement, replacementlen);
160 			/* includes null terminator */
161 			memcpy(&aliasbuf[matchpos + replacementlen], rest, restlen + 1);
162 
163 			return aliasbuf;
164 		}
165 	}
166 
167 	/* no match, just return the original string */
168 	return locale;
169 }
170 
171 char *
pgwin32_setlocale(int category,const char * locale)172 pgwin32_setlocale(int category, const char *locale)
173 {
174 	const char *argument;
175 	char	   *result;
176 
177 	if (locale == NULL)
178 		argument = NULL;
179 	else
180 		argument = map_locale(locale_map_argument, locale);
181 
182 	/* Call the real setlocale() function */
183 	result = setlocale(category, argument);
184 
185 	/*
186 	 * setlocale() is specified to return a "char *" that the caller is
187 	 * forbidden to modify, so casting away the "const" is innocuous.
188 	 */
189 	if (result)
190 		result = unconstify(char *, map_locale(locale_map_result, result));
191 
192 	return result;
193 }
194