xref: /reactos/dll/3rdparty/libxslt/xsltlocale.c (revision 02e84521)
1 /*
2  * xsltlocale.c: locale handling
3  *
4  * Reference:
5  * RFC 3066: Tags for the Identification of Languages
6  * http://www.ietf.org/rfc/rfc3066.txt
7  * ISO 639-1, ISO 3166-1
8  *
9  * Author: Nick Wellnhofer
10  * winapi port: Roumen Petrov
11  */
12 
13 #include "precomp.h"
14 
15 #include "xsltlocale.h"
16 
17 #define TOUPPER(c) (c & ~0x20)
18 #define TOLOWER(c) (c | 0x20)
19 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
20 
21 /*without terminating null character*/
22 #define XSLTMAX_ISO639LANGLEN		8
23 #define XSLTMAX_ISO3166CNTRYLEN		8
24 					/* <lang>-<cntry> */
25 #define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
26 
27 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
28 
29 #ifdef XSLT_LOCALE_WINAPI
30 xmlRMutexPtr xsltLocaleMutex = NULL;
31 
32 struct xsltRFC1766Info_s {
33       /*note typedef unsigned char xmlChar !*/
34     xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
35       /*note typedef LCID xsltLocale !*/
36     xsltLocale lcid;
37 };
38 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
39 
40 static int xsltLocaleListSize = 0;
41 static xsltRFC1766Info *xsltLocaleList = NULL;
42 
43 
44 static xsltLocale
45 xslt_locale_WINAPI(const xmlChar *languageTag) {
46     int k;
47     xsltRFC1766Info *p = xsltLocaleList;
48 
49     for (k=0; k<xsltLocaleListSize; k++, p++)
50 	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
51     return((xsltLocale)0);
52 }
53 
54 static void xsltEnumSupportedLocales(void);
55 #endif
56 
57 /**
58  * xsltFreeLocales:
59  *
60  * Cleanup function for the locale support on shutdown
61  */
62 void
63 xsltFreeLocales(void) {
64 #ifdef XSLT_LOCALE_WINAPI
65     xmlRMutexLock(xsltLocaleMutex);
66     xmlFree(xsltLocaleList);
67     xsltLocaleList = NULL;
68     xmlRMutexUnlock(xsltLocaleMutex);
69 #endif
70 }
71 
72 /**
73  * xsltNewLocale:
74  * @languageTag: RFC 3066 language tag
75  *
76  * Creates a new locale of an opaque system dependent type based on the
77  * language tag.
78  *
79  * Returns the locale or NULL on error or if no matching locale was found
80  */
81 xsltLocale
82 xsltNewLocale(const xmlChar *languageTag) {
83 #ifdef XSLT_LOCALE_POSIX
84     xsltLocale locale;
85     char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
86     const xmlChar *p = languageTag;
87     const char *region = NULL;
88     char *q = localeName;
89     int i, llen;
90 
91     /* Convert something like "pt-br" to "pt_BR.utf8" */
92 
93     if (languageTag == NULL)
94 	return(NULL);
95 
96     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
97 	*q++ = TOLOWER(*p++);
98 
99     if (i == 0)
100 	return(NULL);
101 
102     llen = i;
103 
104     if (*p) {
105 	if (*p++ != '-')
106 	    return(NULL);
107         *q++ = '_';
108 
109 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
110 	    *q++ = TOUPPER(*p++);
111 
112 	if (i == 0 || *p)
113 	    return(NULL);
114 
115         memcpy(q, ".utf8", 6);
116         locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
117         if (locale != NULL)
118             return(locale);
119 
120         /* Continue without using country code */
121 
122         q = localeName + llen;
123     }
124 
125     /* Try locale without territory, e.g. for Esperanto (eo) */
126 
127     memcpy(q, ".utf8", 6);
128     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
129     if (locale != NULL)
130         return(locale);
131 
132     /* Try to find most common country for language */
133 
134     if (llen != 2)
135         return(NULL);
136 
137     region = (char *)xsltDefaultRegion((xmlChar *)localeName);
138     if (region == NULL)
139         return(NULL);
140 
141     q = localeName + llen;
142     *q++ = '_';
143     *q++ = region[0];
144     *q++ = region[1];
145     memcpy(q, ".utf8", 6);
146     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
147 
148     return(locale);
149 #endif
150 
151 #ifdef XSLT_LOCALE_WINAPI
152 {
153     xsltLocale    locale = (xsltLocale)0;
154     xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
155     xmlChar       *q = localeName;
156     const xmlChar *p = languageTag;
157     int           i, llen;
158     const xmlChar *region = NULL;
159 
160     if (languageTag == NULL) goto end;
161 
162     xsltEnumSupportedLocales();
163 
164     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
165 	*q++ = TOLOWER(*p++);
166     if (i == 0) goto end;
167 
168     llen = i;
169     *q++ = '-';
170     if (*p) { /*if country tag is given*/
171 	if (*p++ != '-') goto end;
172 
173 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
174 	    *q++ = TOUPPER(*p++);
175 	if (i == 0 || *p) goto end;
176 
177 	*q = '\0';
178 	locale = xslt_locale_WINAPI(localeName);
179 	if (locale != (xsltLocale)0) goto end;
180     }
181     /* Try to find most common country for language */
182     region = xsltDefaultRegion(localeName);
183     if (region == NULL) goto end;
184 
185     strcpy((char *) localeName + llen + 1, (char *) region);
186     locale = xslt_locale_WINAPI(localeName);
187 end:
188     return(locale);
189 }
190 #endif
191 
192 #ifdef XSLT_LOCALE_NONE
193     return(NULL);
194 #endif
195 }
196 
197 static const xmlChar*
198 xsltDefaultRegion(const xmlChar *localeName) {
199     xmlChar c;
200     /* region should be xmlChar, but gcc warns on all string assignments */
201     const char *region = NULL;
202 
203     c = localeName[1];
204     /* This is based on the locales from glibc 2.3.3 */
205 
206     switch (localeName[0]) {
207         case 'a':
208             if (c == 'a' || c == 'm') region = "ET";
209             else if (c == 'f') region = "ZA";
210             else if (c == 'n') region = "ES";
211             else if (c == 'r') region = "AE";
212             else if (c == 'z') region = "AZ";
213             break;
214         case 'b':
215             if (c == 'e') region = "BY";
216             else if (c == 'g') region = "BG";
217             else if (c == 'n') region = "BD";
218             else if (c == 'r') region = "FR";
219             else if (c == 's') region = "BA";
220             break;
221         case 'c':
222             if (c == 'a') region = "ES";
223             else if (c == 's') region = "CZ";
224             else if (c == 'y') region = "GB";
225             break;
226         case 'd':
227             if (c == 'a') region = "DK";
228             else if (c == 'e') region = "DE";
229             break;
230         case 'e':
231             if (c == 'l') region = "GR";
232             else if (c == 'n' || c == 'o') region = "US";
233             else if (c == 's' || c == 'u') region = "ES";
234             else if (c == 't') region = "EE";
235             break;
236         case 'f':
237             if (c == 'a') region = "IR";
238             else if (c == 'i') region = "FI";
239             else if (c == 'o') region = "FO";
240             else if (c == 'r') region = "FR";
241             break;
242         case 'g':
243             if (c == 'a') region = "IE";
244             else if (c == 'l') region = "ES";
245             else if (c == 'v') region = "GB";
246             break;
247         case 'h':
248             if (c == 'e') region = "IL";
249             else if (c == 'i') region = "IN";
250             else if (c == 'r') region = "HT";
251             else if (c == 'u') region = "HU";
252             break;
253         case 'i':
254             if (c == 'd') region = "ID";
255             else if (c == 's') region = "IS";
256             else if (c == 't') region = "IT";
257             else if (c == 'w') region = "IL";
258             break;
259         case 'j':
260             if (c == 'a') region = "JP";
261             break;
262         case 'k':
263             if (c == 'l') region = "GL";
264             else if (c == 'o') region = "KR";
265             else if (c == 'w') region = "GB";
266             break;
267         case 'l':
268             if (c == 't') region = "LT";
269             else if (c == 'v') region = "LV";
270             break;
271         case 'm':
272             if (c == 'k') region = "MK";
273             else if (c == 'l' || c == 'r') region = "IN";
274             else if (c == 'n') region = "MN";
275             else if (c == 's') region = "MY";
276             else if (c == 't') region = "MT";
277             break;
278         case 'n':
279             if (c == 'b' || c == 'n' || c == 'o') region = "NO";
280             else if (c == 'e') region = "NP";
281             else if (c == 'l') region = "NL";
282             break;
283         case 'o':
284             if (c == 'm') region = "ET";
285             break;
286         case 'p':
287             if (c == 'a') region = "IN";
288             else if (c == 'l') region = "PL";
289             else if (c == 't') region = "PT";
290             break;
291         case 'r':
292             if (c == 'o') region = "RO";
293             else if (c == 'u') region = "RU";
294             break;
295         case 's':
296             switch (c) {
297                 case 'e': region = "NO"; break;
298                 case 'h': region = "YU"; break;
299                 case 'k': region = "SK"; break;
300                 case 'l': region = "SI"; break;
301                 case 'o': region = "ET"; break;
302                 case 'q': region = "AL"; break;
303                 case 't': region = "ZA"; break;
304                 case 'v': region = "SE"; break;
305             }
306             break;
307         case 't':
308             if (c == 'a' || c == 'e') region = "IN";
309             else if (c == 'h') region = "TH";
310             else if (c == 'i') region = "ER";
311             else if (c == 'r') region = "TR";
312             else if (c == 't') region = "RU";
313             break;
314         case 'u':
315             if (c == 'k') region = "UA";
316             else if (c == 'r') region = "PK";
317             break;
318         case 'v':
319             if (c == 'i') region = "VN";
320             break;
321         case 'w':
322             if (c == 'a') region = "BE";
323             break;
324         case 'x':
325             if (c == 'h') region = "ZA";
326             break;
327         case 'z':
328             if (c == 'h') region = "CN";
329             else if (c == 'u') region = "ZA";
330             break;
331     }
332     return((xmlChar *)region);
333 }
334 
335 /**
336  * xsltFreeLocale:
337  * @locale: the locale to free
338  *
339  * Frees a locale created with xsltNewLocale
340  */
341 void
342 xsltFreeLocale(xsltLocale locale) {
343 #ifdef XSLT_LOCALE_POSIX
344     freelocale(locale);
345 #endif
346 }
347 
348 /**
349  * xsltStrxfrm:
350  * @locale: locale created with xsltNewLocale
351  * @string: UTF-8 string to transform
352  *
353  * Transforms a string according to locale. The transformed string must then be
354  * compared with xsltLocaleStrcmp and freed with xmlFree.
355  *
356  * Returns the transformed string or NULL on error
357  */
358 xsltLocaleChar *
359 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
360 {
361 #ifdef XSLT_LOCALE_NONE
362     return(NULL);
363 #else
364     size_t xstrlen, r;
365     xsltLocaleChar *xstr;
366 
367 #ifdef XSLT_LOCALE_POSIX
368     xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
369     xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
370     if (xstr == NULL) {
371 	xsltTransformError(NULL, NULL, NULL,
372 	    "xsltStrxfrm : out of memory error\n");
373 	return(NULL);
374     }
375 
376     r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
377 #endif
378 
379 #ifdef XSLT_LOCALE_WINAPI
380     xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
381     if (xstrlen == 0) {
382         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
383         return(NULL);
384     }
385     xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
386     if (xstr == NULL) {
387         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
388         return(NULL);
389     }
390     r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen);
391     if (r == 0) {
392         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
393         xmlFree(xstr);
394         return(NULL);
395     }
396     return(xstr);
397 #endif /* XSLT_LOCALE_WINAPI */
398 
399     if (r >= xstrlen) {
400 	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
401         xmlFree(xstr);
402         return(NULL);
403     }
404 
405     return(xstr);
406 #endif /* XSLT_LOCALE_NONE */
407 }
408 
409 /**
410  * xsltLocaleStrcmp:
411  * @locale: a locale identifier
412  * @str1: a string transformed with xsltStrxfrm
413  * @str2: a string transformed with xsltStrxfrm
414  *
415  * Compares two strings transformed with xsltStrxfrm
416  *
417  * Returns a value < 0 if str1 sorts before str2,
418  *         a value > 0 if str1 sorts after str2,
419  *         0 if str1 and str2 are equal wrt sorting
420  */
421 int
422 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
423     (void)locale;
424 #ifdef XSLT_LOCALE_WINAPI
425 {
426     int ret;
427     if (str1 == str2) return(0);
428     if (str1 == NULL) return(-1);
429     if (str2 == NULL) return(1);
430     ret = CompareStringW(locale, 0, str1, -1, str2, -1);
431     if (ret == 0) {
432         xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
433         return(0);
434     }
435     return(ret - 2);
436 }
437 #else
438     return(xmlStrcmp(str1, str2));
439 #endif
440 }
441 
442 #ifdef XSLT_LOCALE_WINAPI
443 /**
444  * xsltCountSupportedLocales:
445  * @lcid: not used
446  *
447  * callback used to count locales
448  *
449  * Returns TRUE
450  */
451 BOOL CALLBACK
452 xsltCountSupportedLocales(LPSTR lcid) {
453     (void) lcid;
454     ++xsltLocaleListSize;
455     return(TRUE);
456 }
457 
458 /**
459  * xsltIterateSupportedLocales:
460  * @lcid: not used
461  *
462  * callback used to track locales
463  *
464  * Returns TRUE if not at the end of the array
465  */
466 BOOL CALLBACK
467 xsltIterateSupportedLocales(LPSTR lcid) {
468     static int count = 0;
469     xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
470     xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
471     int        k, l;
472     xsltRFC1766Info *p = xsltLocaleList + count;
473 
474     k = sscanf(lcid, "%lx", (long*)&p->lcid);
475     if (k < 1) goto end;
476     /*don't count terminating null character*/
477     k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
478                        (char *) iso639lang, sizeof(iso639lang));
479     if (--k < 1) goto end;
480     l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
481                        (char *) iso3136ctry, sizeof(iso3136ctry));
482     if (--l < 1) goto end;
483 
484     {  /*fill results*/
485 	xmlChar    *q = p->tag;
486 	memcpy(q, iso639lang, k);
487 	q += k;
488 	*q++ = '-';
489 	memcpy(q, iso3136ctry, l);
490 	q += l;
491 	*q = '\0';
492     }
493     ++count;
494 end:
495     return((count < xsltLocaleListSize) ? TRUE : FALSE);
496 }
497 
498 
499 static void
500 xsltEnumSupportedLocales(void) {
501     xmlRMutexLock(xsltLocaleMutex);
502     if (xsltLocaleListSize <= 0) {
503 	size_t len;
504 
505 	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
506 
507 	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
508 	xsltLocaleList = xmlMalloc(len);
509 	memset(xsltLocaleList, 0, len);
510 	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
511     }
512     xmlRMutexUnlock(xsltLocaleMutex);
513 }
514 
515 #endif /*def XSLT_LOCALE_WINAPI*/
516