1 /*
2 * xsltlocale.c: locale handling
3 *
4 * Reference:
5 * RFC 3066: Tags for the Identification of Languages
6 * http://www.ietf.org/rfc/rfc3066.txt
7 * ISO 639-1, ISO 3166-1
8 *
9 * Author: Nick Wellnhofer
10 * winapi port: Roumen Petrov
11 */
12
13 #include "precomp.h"
14
15 #include "xsltlocale.h"
16
17 #define TOUPPER(c) (c & ~0x20)
18 #define TOLOWER(c) (c | 0x20)
19 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
20
21 /*without terminating null character*/
22 #define XSLTMAX_ISO639LANGLEN 8
23 #define XSLTMAX_ISO3166CNTRYLEN 8
24 /* <lang>-<cntry> */
25 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
26
27 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
28
29 #ifdef XSLT_LOCALE_WINAPI
30 xmlRMutexPtr xsltLocaleMutex = NULL;
31
32 struct xsltRFC1766Info_s {
33 /*note typedef unsigned char xmlChar !*/
34 xmlChar tag[XSLTMAX_LANGTAGLEN+1];
35 /*note typedef LCID xsltLocale !*/
36 xsltLocale lcid;
37 };
38 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
39
40 static int xsltLocaleListSize = 0;
41 static xsltRFC1766Info *xsltLocaleList = NULL;
42
43
44 static xsltLocale
xslt_locale_WINAPI(const xmlChar * languageTag)45 xslt_locale_WINAPI(const xmlChar *languageTag) {
46 int k;
47 xsltRFC1766Info *p = xsltLocaleList;
48
49 for (k=0; k<xsltLocaleListSize; k++, p++)
50 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
51 return((xsltLocale)0);
52 }
53
54 static void xsltEnumSupportedLocales(void);
55 #endif
56
57 /**
58 * xsltFreeLocales:
59 *
60 * Cleanup function for the locale support on shutdown
61 */
62 void
xsltFreeLocales(void)63 xsltFreeLocales(void) {
64 #ifdef XSLT_LOCALE_WINAPI
65 xmlRMutexLock(xsltLocaleMutex);
66 xmlFree(xsltLocaleList);
67 xsltLocaleList = NULL;
68 xmlRMutexUnlock(xsltLocaleMutex);
69 #endif
70 }
71
72 /**
73 * xsltNewLocale:
74 * @languageTag: RFC 3066 language tag
75 *
76 * Creates a new locale of an opaque system dependent type based on the
77 * language tag.
78 *
79 * Returns the locale or NULL on error or if no matching locale was found
80 */
81 xsltLocale
xsltNewLocale(const xmlChar * languageTag)82 xsltNewLocale(const xmlChar *languageTag) {
83 #ifdef XSLT_LOCALE_POSIX
84 xsltLocale locale;
85 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
86 const xmlChar *p = languageTag;
87 const char *region = NULL;
88 char *q = localeName;
89 int i, llen;
90
91 /* Convert something like "pt-br" to "pt_BR.utf8" */
92
93 if (languageTag == NULL)
94 return(NULL);
95
96 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
97 *q++ = TOLOWER(*p++);
98
99 if (i == 0)
100 return(NULL);
101
102 llen = i;
103
104 if (*p) {
105 if (*p++ != '-')
106 return(NULL);
107 *q++ = '_';
108
109 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
110 *q++ = TOUPPER(*p++);
111
112 if (i == 0 || *p)
113 return(NULL);
114
115 memcpy(q, ".utf8", 6);
116 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
117 if (locale != NULL)
118 return(locale);
119
120 /* Continue without using country code */
121
122 q = localeName + llen;
123 }
124
125 /* Try locale without territory, e.g. for Esperanto (eo) */
126
127 memcpy(q, ".utf8", 6);
128 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
129 if (locale != NULL)
130 return(locale);
131
132 /* Try to find most common country for language */
133
134 if (llen != 2)
135 return(NULL);
136
137 region = (char *)xsltDefaultRegion((xmlChar *)localeName);
138 if (region == NULL)
139 return(NULL);
140
141 q = localeName + llen;
142 *q++ = '_';
143 *q++ = region[0];
144 *q++ = region[1];
145 memcpy(q, ".utf8", 6);
146 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
147
148 return(locale);
149 #endif
150
151 #ifdef XSLT_LOCALE_WINAPI
152 {
153 xsltLocale locale = (xsltLocale)0;
154 xmlChar localeName[XSLTMAX_LANGTAGLEN+1];
155 xmlChar *q = localeName;
156 const xmlChar *p = languageTag;
157 int i, llen;
158 const xmlChar *region = NULL;
159
160 if (languageTag == NULL) goto end;
161
162 xsltEnumSupportedLocales();
163
164 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
165 *q++ = TOLOWER(*p++);
166 if (i == 0) goto end;
167
168 llen = i;
169 *q++ = '-';
170 if (*p) { /*if country tag is given*/
171 if (*p++ != '-') goto end;
172
173 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
174 *q++ = TOUPPER(*p++);
175 if (i == 0 || *p) goto end;
176
177 *q = '\0';
178 locale = xslt_locale_WINAPI(localeName);
179 if (locale != (xsltLocale)0) goto end;
180 }
181 /* Try to find most common country for language */
182 region = xsltDefaultRegion(localeName);
183 if (region == NULL) goto end;
184
185 strcpy((char *) localeName + llen + 1, (char *) region);
186 locale = xslt_locale_WINAPI(localeName);
187 end:
188 return(locale);
189 }
190 #endif
191
192 #ifdef XSLT_LOCALE_NONE
193 return(NULL);
194 #endif
195 }
196
197 static const xmlChar*
xsltDefaultRegion(const xmlChar * localeName)198 xsltDefaultRegion(const xmlChar *localeName) {
199 xmlChar c;
200 /* region should be xmlChar, but gcc warns on all string assignments */
201 const char *region = NULL;
202
203 c = localeName[1];
204 /* This is based on the locales from glibc 2.3.3 */
205
206 switch (localeName[0]) {
207 case 'a':
208 if (c == 'a' || c == 'm') region = "ET";
209 else if (c == 'f') region = "ZA";
210 else if (c == 'n') region = "ES";
211 else if (c == 'r') region = "AE";
212 else if (c == 'z') region = "AZ";
213 break;
214 case 'b':
215 if (c == 'e') region = "BY";
216 else if (c == 'g') region = "BG";
217 else if (c == 'n') region = "BD";
218 else if (c == 'r') region = "FR";
219 else if (c == 's') region = "BA";
220 break;
221 case 'c':
222 if (c == 'a') region = "ES";
223 else if (c == 's') region = "CZ";
224 else if (c == 'y') region = "GB";
225 break;
226 case 'd':
227 if (c == 'a') region = "DK";
228 else if (c == 'e') region = "DE";
229 break;
230 case 'e':
231 if (c == 'l') region = "GR";
232 else if (c == 'n' || c == 'o') region = "US";
233 else if (c == 's' || c == 'u') region = "ES";
234 else if (c == 't') region = "EE";
235 break;
236 case 'f':
237 if (c == 'a') region = "IR";
238 else if (c == 'i') region = "FI";
239 else if (c == 'o') region = "FO";
240 else if (c == 'r') region = "FR";
241 break;
242 case 'g':
243 if (c == 'a') region = "IE";
244 else if (c == 'l') region = "ES";
245 else if (c == 'v') region = "GB";
246 break;
247 case 'h':
248 if (c == 'e') region = "IL";
249 else if (c == 'i') region = "IN";
250 else if (c == 'r') region = "HT";
251 else if (c == 'u') region = "HU";
252 break;
253 case 'i':
254 if (c == 'd') region = "ID";
255 else if (c == 's') region = "IS";
256 else if (c == 't') region = "IT";
257 else if (c == 'w') region = "IL";
258 break;
259 case 'j':
260 if (c == 'a') region = "JP";
261 break;
262 case 'k':
263 if (c == 'l') region = "GL";
264 else if (c == 'o') region = "KR";
265 else if (c == 'w') region = "GB";
266 break;
267 case 'l':
268 if (c == 't') region = "LT";
269 else if (c == 'v') region = "LV";
270 break;
271 case 'm':
272 if (c == 'k') region = "MK";
273 else if (c == 'l' || c == 'r') region = "IN";
274 else if (c == 'n') region = "MN";
275 else if (c == 's') region = "MY";
276 else if (c == 't') region = "MT";
277 break;
278 case 'n':
279 if (c == 'b' || c == 'n' || c == 'o') region = "NO";
280 else if (c == 'e') region = "NP";
281 else if (c == 'l') region = "NL";
282 break;
283 case 'o':
284 if (c == 'm') region = "ET";
285 break;
286 case 'p':
287 if (c == 'a') region = "IN";
288 else if (c == 'l') region = "PL";
289 else if (c == 't') region = "PT";
290 break;
291 case 'r':
292 if (c == 'o') region = "RO";
293 else if (c == 'u') region = "RU";
294 break;
295 case 's':
296 switch (c) {
297 case 'e': region = "NO"; break;
298 case 'h': region = "YU"; break;
299 case 'k': region = "SK"; break;
300 case 'l': region = "SI"; break;
301 case 'o': region = "ET"; break;
302 case 'q': region = "AL"; break;
303 case 't': region = "ZA"; break;
304 case 'v': region = "SE"; break;
305 }
306 break;
307 case 't':
308 if (c == 'a' || c == 'e') region = "IN";
309 else if (c == 'h') region = "TH";
310 else if (c == 'i') region = "ER";
311 else if (c == 'r') region = "TR";
312 else if (c == 't') region = "RU";
313 break;
314 case 'u':
315 if (c == 'k') region = "UA";
316 else if (c == 'r') region = "PK";
317 break;
318 case 'v':
319 if (c == 'i') region = "VN";
320 break;
321 case 'w':
322 if (c == 'a') region = "BE";
323 break;
324 case 'x':
325 if (c == 'h') region = "ZA";
326 break;
327 case 'z':
328 if (c == 'h') region = "CN";
329 else if (c == 'u') region = "ZA";
330 break;
331 }
332 return((xmlChar *)region);
333 }
334
335 /**
336 * xsltFreeLocale:
337 * @locale: the locale to free
338 *
339 * Frees a locale created with xsltNewLocale
340 */
341 void
xsltFreeLocale(xsltLocale locale)342 xsltFreeLocale(xsltLocale locale) {
343 #ifdef XSLT_LOCALE_POSIX
344 if (locale != NULL)
345 freelocale(locale);
346 #endif
347 }
348
349 /**
350 * xsltStrxfrm:
351 * @locale: locale created with xsltNewLocale
352 * @string: UTF-8 string to transform
353 *
354 * Transforms a string according to locale. The transformed string must then be
355 * compared with xsltLocaleStrcmp and freed with xmlFree.
356 *
357 * Returns the transformed string or NULL on error
358 */
359 xsltLocaleChar *
xsltStrxfrm(xsltLocale locale,const xmlChar * string)360 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
361 {
362 #ifdef XSLT_LOCALE_NONE
363 return(NULL);
364 #else
365 size_t xstrlen, r;
366 xsltLocaleChar *xstr;
367
368 #ifdef XSLT_LOCALE_POSIX
369 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
370 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
371 if (xstr == NULL) {
372 xsltTransformError(NULL, NULL, NULL,
373 "xsltStrxfrm : out of memory error\n");
374 return(NULL);
375 }
376
377 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
378 #endif
379
380 #ifdef XSLT_LOCALE_WINAPI
381 xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
382 if (xstrlen == 0) {
383 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
384 return(NULL);
385 }
386 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
387 if (xstr == NULL) {
388 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
389 return(NULL);
390 }
391 r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen);
392 if (r == 0) {
393 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
394 xmlFree(xstr);
395 return(NULL);
396 }
397 return(xstr);
398 #endif /* XSLT_LOCALE_WINAPI */
399
400 if (r >= xstrlen) {
401 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
402 xmlFree(xstr);
403 return(NULL);
404 }
405
406 return(xstr);
407 #endif /* XSLT_LOCALE_NONE */
408 }
409
410 /**
411 * xsltLocaleStrcmp:
412 * @locale: a locale identifier
413 * @str1: a string transformed with xsltStrxfrm
414 * @str2: a string transformed with xsltStrxfrm
415 *
416 * Compares two strings transformed with xsltStrxfrm
417 *
418 * Returns a value < 0 if str1 sorts before str2,
419 * a value > 0 if str1 sorts after str2,
420 * 0 if str1 and str2 are equal wrt sorting
421 */
422 int
xsltLocaleStrcmp(xsltLocale locale,const xsltLocaleChar * str1,const xsltLocaleChar * str2)423 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
424 (void)locale;
425 #ifdef XSLT_LOCALE_WINAPI
426 {
427 int ret;
428 if (str1 == str2) return(0);
429 if (str1 == NULL) return(-1);
430 if (str2 == NULL) return(1);
431 ret = CompareStringW(locale, 0, str1, -1, str2, -1);
432 if (ret == 0) {
433 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
434 return(0);
435 }
436 return(ret - 2);
437 }
438 #else
439 return(xmlStrcmp(str1, str2));
440 #endif
441 }
442
443 #ifdef XSLT_LOCALE_WINAPI
444 /**
445 * xsltCountSupportedLocales:
446 * @lcid: not used
447 *
448 * callback used to count locales
449 *
450 * Returns TRUE
451 */
452 BOOL CALLBACK
xsltCountSupportedLocales(LPSTR lcid)453 xsltCountSupportedLocales(LPSTR lcid) {
454 (void) lcid;
455 ++xsltLocaleListSize;
456 return(TRUE);
457 }
458
459 /**
460 * xsltIterateSupportedLocales:
461 * @lcid: not used
462 *
463 * callback used to track locales
464 *
465 * Returns TRUE if not at the end of the array
466 */
467 BOOL CALLBACK
xsltIterateSupportedLocales(LPSTR lcid)468 xsltIterateSupportedLocales(LPSTR lcid) {
469 static int count = 0;
470 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1];
471 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
472 int k, l;
473 xsltRFC1766Info *p = xsltLocaleList + count;
474
475 k = sscanf(lcid, "%lx", (long*)&p->lcid);
476 if (k < 1) goto end;
477 /*don't count terminating null character*/
478 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
479 (char *) iso639lang, sizeof(iso639lang));
480 if (--k < 1) goto end;
481 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
482 (char *) iso3136ctry, sizeof(iso3136ctry));
483 if (--l < 1) goto end;
484
485 { /*fill results*/
486 xmlChar *q = p->tag;
487 memcpy(q, iso639lang, k);
488 q += k;
489 *q++ = '-';
490 memcpy(q, iso3136ctry, l);
491 q += l;
492 *q = '\0';
493 }
494 ++count;
495 end:
496 return((count < xsltLocaleListSize) ? TRUE : FALSE);
497 }
498
499
500 static void
xsltEnumSupportedLocales(void)501 xsltEnumSupportedLocales(void) {
502 xmlRMutexLock(xsltLocaleMutex);
503 if (xsltLocaleListSize <= 0) {
504 size_t len;
505
506 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
507
508 len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
509 xsltLocaleList = xmlMalloc(len);
510 memset(xsltLocaleList, 0, len);
511 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
512 }
513 xmlRMutexUnlock(xsltLocaleMutex);
514 }
515
516 #endif /*def XSLT_LOCALE_WINAPI*/
517