xref: /reactos/dll/3rdparty/libxslt/xsltlocale.c (revision 09dde2cf)
1 /*
2  * xsltlocale.c: locale handling
3  *
4  * Reference:
5  * RFC 3066: Tags for the Identification of Languages
6  * http://www.ietf.org/rfc/rfc3066.txt
7  * ISO 639-1, ISO 3166-1
8  *
9  * Author: Nick Wellnhofer
10  * winapi port: Roumen Petrov
11  */
12 
13 #include "precomp.h"
14 
15 #include "xsltlocale.h"
16 
17 #define TOUPPER(c) (c & ~0x20)
18 #define TOLOWER(c) (c | 0x20)
19 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)
20 
21 /*without terminating null character*/
22 #define XSLTMAX_ISO639LANGLEN		8
23 #define XSLTMAX_ISO3166CNTRYLEN		8
24 					/* <lang>-<cntry> */
25 #define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
26 
27 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
28 
29 #ifdef XSLT_LOCALE_WINAPI
30 xmlRMutexPtr xsltLocaleMutex = NULL;
31 
32 struct xsltRFC1766Info_s {
33       /*note typedef unsigned char xmlChar !*/
34     xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
35       /*note typedef LCID xsltLocale !*/
36     xsltLocale lcid;
37 };
38 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
39 
40 static int xsltLocaleListSize = 0;
41 static xsltRFC1766Info *xsltLocaleList = NULL;
42 
43 
44 static xsltLocale
45 xslt_locale_WINAPI(const xmlChar *languageTag) {
46     int k;
47     xsltRFC1766Info *p = xsltLocaleList;
48 
49     for (k=0; k<xsltLocaleListSize; k++, p++)
50 	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
51     return((xsltLocale)0);
52 }
53 
54 static void xsltEnumSupportedLocales(void);
55 #endif
56 
57 /**
58  * xsltFreeLocales:
59  *
60  * Cleanup function for the locale support on shutdown
61  */
62 void
63 xsltFreeLocales(void) {
64 #ifdef XSLT_LOCALE_WINAPI
65     xmlRMutexLock(xsltLocaleMutex);
66     xmlFree(xsltLocaleList);
67     xsltLocaleList = NULL;
68     xmlRMutexUnlock(xsltLocaleMutex);
69 #endif
70 }
71 
72 /**
73  * xsltNewLocale:
74  * @languageTag: RFC 3066 language tag
75  *
76  * Creates a new locale of an opaque system dependent type based on the
77  * language tag.
78  *
79  * Returns the locale or NULL on error or if no matching locale was found
80  */
81 xsltLocale
82 xsltNewLocale(const xmlChar *languageTag) {
83 #ifdef XSLT_LOCALE_POSIX
84     xsltLocale locale;
85     char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
86     const xmlChar *p = languageTag;
87     const char *region = NULL;
88     char *q = localeName;
89     int i, llen;
90 
91     /* Convert something like "pt-br" to "pt_BR.utf8" */
92 
93     if (languageTag == NULL)
94 	return(NULL);
95 
96     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
97 	*q++ = TOLOWER(*p++);
98 
99     if (i == 0)
100 	return(NULL);
101 
102     llen = i;
103 
104     if (*p) {
105 	if (*p++ != '-')
106 	    return(NULL);
107         *q++ = '_';
108 
109 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
110 	    *q++ = TOUPPER(*p++);
111 
112 	if (i == 0 || *p)
113 	    return(NULL);
114 
115         memcpy(q, ".utf8", 6);
116         locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
117         if (locale != NULL)
118             return(locale);
119 
120         /* Continue without using country code */
121 
122         q = localeName + llen;
123     }
124 
125     /* Try locale without territory, e.g. for Esperanto (eo) */
126 
127     memcpy(q, ".utf8", 6);
128     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
129     if (locale != NULL)
130         return(locale);
131 
132     /* Try to find most common country for language */
133 
134     if (llen != 2)
135         return(NULL);
136 
137     region = (char *)xsltDefaultRegion((xmlChar *)localeName);
138     if (region == NULL)
139         return(NULL);
140 
141     q = localeName + llen;
142     *q++ = '_';
143     *q++ = region[0];
144     *q++ = region[1];
145     memcpy(q, ".utf8", 6);
146     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
147 
148     return(locale);
149 #endif
150 
151 #ifdef XSLT_LOCALE_WINAPI
152 {
153     xsltLocale    locale = (xsltLocale)0;
154     xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
155     xmlChar       *q = localeName;
156     const xmlChar *p = languageTag;
157     int           i, llen;
158     const xmlChar *region = NULL;
159 
160     if (languageTag == NULL) goto end;
161 
162     xsltEnumSupportedLocales();
163 
164     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
165 	*q++ = TOLOWER(*p++);
166     if (i == 0) goto end;
167 
168     llen = i;
169     *q++ = '-';
170     if (*p) { /*if country tag is given*/
171 	if (*p++ != '-') goto end;
172 
173 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
174 	    *q++ = TOUPPER(*p++);
175 	if (i == 0 || *p) goto end;
176 
177 	*q = '\0';
178 	locale = xslt_locale_WINAPI(localeName);
179 	if (locale != (xsltLocale)0) goto end;
180     }
181     /* Try to find most common country for language */
182     region = xsltDefaultRegion(localeName);
183     if (region == NULL) goto end;
184 
185     strcpy((char *) localeName + llen + 1, (char *) region);
186     locale = xslt_locale_WINAPI(localeName);
187 end:
188     return(locale);
189 }
190 #endif
191 
192 #ifdef XSLT_LOCALE_NONE
193     return(NULL);
194 #endif
195 }
196 
197 static const xmlChar*
198 xsltDefaultRegion(const xmlChar *localeName) {
199     xmlChar c;
200     /* region should be xmlChar, but gcc warns on all string assignments */
201     const char *region = NULL;
202 
203     c = localeName[1];
204     /* This is based on the locales from glibc 2.3.3 */
205 
206     switch (localeName[0]) {
207         case 'a':
208             if (c == 'a' || c == 'm') region = "ET";
209             else if (c == 'f') region = "ZA";
210             else if (c == 'n') region = "ES";
211             else if (c == 'r') region = "AE";
212             else if (c == 'z') region = "AZ";
213             break;
214         case 'b':
215             if (c == 'e') region = "BY";
216             else if (c == 'g') region = "BG";
217             else if (c == 'n') region = "BD";
218             else if (c == 'r') region = "FR";
219             else if (c == 's') region = "BA";
220             break;
221         case 'c':
222             if (c == 'a') region = "ES";
223             else if (c == 's') region = "CZ";
224             else if (c == 'y') region = "GB";
225             break;
226         case 'd':
227             if (c == 'a') region = "DK";
228             else if (c == 'e') region = "DE";
229             break;
230         case 'e':
231             if (c == 'l') region = "GR";
232             else if (c == 'n' || c == 'o') region = "US";
233             else if (c == 's' || c == 'u') region = "ES";
234             else if (c == 't') region = "EE";
235             break;
236         case 'f':
237             if (c == 'a') region = "IR";
238             else if (c == 'i') region = "FI";
239             else if (c == 'o') region = "FO";
240             else if (c == 'r') region = "FR";
241             break;
242         case 'g':
243             if (c == 'a') region = "IE";
244             else if (c == 'l') region = "ES";
245             else if (c == 'v') region = "GB";
246             break;
247         case 'h':
248             if (c == 'e') region = "IL";
249             else if (c == 'i') region = "IN";
250             else if (c == 'r') region = "HT";
251             else if (c == 'u') region = "HU";
252             break;
253         case 'i':
254             if (c == 'd') region = "ID";
255             else if (c == 's') region = "IS";
256             else if (c == 't') region = "IT";
257             else if (c == 'w') region = "IL";
258             break;
259         case 'j':
260             if (c == 'a') region = "JP";
261             break;
262         case 'k':
263             if (c == 'l') region = "GL";
264             else if (c == 'o') region = "KR";
265             else if (c == 'w') region = "GB";
266             break;
267         case 'l':
268             if (c == 't') region = "LT";
269             else if (c == 'v') region = "LV";
270             break;
271         case 'm':
272             if (c == 'k') region = "MK";
273             else if (c == 'l' || c == 'r') region = "IN";
274             else if (c == 'n') region = "MN";
275             else if (c == 's') region = "MY";
276             else if (c == 't') region = "MT";
277             break;
278         case 'n':
279             if (c == 'b' || c == 'n' || c == 'o') region = "NO";
280             else if (c == 'e') region = "NP";
281             else if (c == 'l') region = "NL";
282             break;
283         case 'o':
284             if (c == 'm') region = "ET";
285             break;
286         case 'p':
287             if (c == 'a') region = "IN";
288             else if (c == 'l') region = "PL";
289             else if (c == 't') region = "PT";
290             break;
291         case 'r':
292             if (c == 'o') region = "RO";
293             else if (c == 'u') region = "RU";
294             break;
295         case 's':
296             switch (c) {
297                 case 'e': region = "NO"; break;
298                 case 'h': region = "YU"; break;
299                 case 'k': region = "SK"; break;
300                 case 'l': region = "SI"; break;
301                 case 'o': region = "ET"; break;
302                 case 'q': region = "AL"; break;
303                 case 't': region = "ZA"; break;
304                 case 'v': region = "SE"; break;
305             }
306             break;
307         case 't':
308             if (c == 'a' || c == 'e') region = "IN";
309             else if (c == 'h') region = "TH";
310             else if (c == 'i') region = "ER";
311             else if (c == 'r') region = "TR";
312             else if (c == 't') region = "RU";
313             break;
314         case 'u':
315             if (c == 'k') region = "UA";
316             else if (c == 'r') region = "PK";
317             break;
318         case 'v':
319             if (c == 'i') region = "VN";
320             break;
321         case 'w':
322             if (c == 'a') region = "BE";
323             break;
324         case 'x':
325             if (c == 'h') region = "ZA";
326             break;
327         case 'z':
328             if (c == 'h') region = "CN";
329             else if (c == 'u') region = "ZA";
330             break;
331     }
332     return((xmlChar *)region);
333 }
334 
335 /**
336  * xsltFreeLocale:
337  * @locale: the locale to free
338  *
339  * Frees a locale created with xsltNewLocale
340  */
341 void
342 xsltFreeLocale(xsltLocale locale) {
343 #ifdef XSLT_LOCALE_POSIX
344     if (locale != NULL)
345         freelocale(locale);
346 #endif
347 }
348 
349 /**
350  * xsltStrxfrm:
351  * @locale: locale created with xsltNewLocale
352  * @string: UTF-8 string to transform
353  *
354  * Transforms a string according to locale. The transformed string must then be
355  * compared with xsltLocaleStrcmp and freed with xmlFree.
356  *
357  * Returns the transformed string or NULL on error
358  */
359 xsltLocaleChar *
360 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
361 {
362 #ifdef XSLT_LOCALE_NONE
363     return(NULL);
364 #else
365     size_t xstrlen, r;
366     xsltLocaleChar *xstr;
367 
368 #ifdef XSLT_LOCALE_POSIX
369     xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
370     xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
371     if (xstr == NULL) {
372 	xsltTransformError(NULL, NULL, NULL,
373 	    "xsltStrxfrm : out of memory error\n");
374 	return(NULL);
375     }
376 
377     r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
378 #endif
379 
380 #ifdef XSLT_LOCALE_WINAPI
381     xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0);
382     if (xstrlen == 0) {
383         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
384         return(NULL);
385     }
386     xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
387     if (xstr == NULL) {
388         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
389         return(NULL);
390     }
391     r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen);
392     if (r == 0) {
393         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
394         xmlFree(xstr);
395         return(NULL);
396     }
397     return(xstr);
398 #endif /* XSLT_LOCALE_WINAPI */
399 
400     if (r >= xstrlen) {
401 	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
402         xmlFree(xstr);
403         return(NULL);
404     }
405 
406     return(xstr);
407 #endif /* XSLT_LOCALE_NONE */
408 }
409 
410 /**
411  * xsltLocaleStrcmp:
412  * @locale: a locale identifier
413  * @str1: a string transformed with xsltStrxfrm
414  * @str2: a string transformed with xsltStrxfrm
415  *
416  * Compares two strings transformed with xsltStrxfrm
417  *
418  * Returns a value < 0 if str1 sorts before str2,
419  *         a value > 0 if str1 sorts after str2,
420  *         0 if str1 and str2 are equal wrt sorting
421  */
422 int
423 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
424     (void)locale;
425 #ifdef XSLT_LOCALE_WINAPI
426 {
427     int ret;
428     if (str1 == str2) return(0);
429     if (str1 == NULL) return(-1);
430     if (str2 == NULL) return(1);
431     ret = CompareStringW(locale, 0, str1, -1, str2, -1);
432     if (ret == 0) {
433         xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
434         return(0);
435     }
436     return(ret - 2);
437 }
438 #else
439     return(xmlStrcmp(str1, str2));
440 #endif
441 }
442 
443 #ifdef XSLT_LOCALE_WINAPI
444 /**
445  * xsltCountSupportedLocales:
446  * @lcid: not used
447  *
448  * callback used to count locales
449  *
450  * Returns TRUE
451  */
452 BOOL CALLBACK
453 xsltCountSupportedLocales(LPSTR lcid) {
454     (void) lcid;
455     ++xsltLocaleListSize;
456     return(TRUE);
457 }
458 
459 /**
460  * xsltIterateSupportedLocales:
461  * @lcid: not used
462  *
463  * callback used to track locales
464  *
465  * Returns TRUE if not at the end of the array
466  */
467 BOOL CALLBACK
468 xsltIterateSupportedLocales(LPSTR lcid) {
469     static int count = 0;
470     xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
471     xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
472     int        k, l;
473     xsltRFC1766Info *p = xsltLocaleList + count;
474 
475     k = sscanf(lcid, "%lx", (long*)&p->lcid);
476     if (k < 1) goto end;
477     /*don't count terminating null character*/
478     k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME,
479                        (char *) iso639lang, sizeof(iso639lang));
480     if (--k < 1) goto end;
481     l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME,
482                        (char *) iso3136ctry, sizeof(iso3136ctry));
483     if (--l < 1) goto end;
484 
485     {  /*fill results*/
486 	xmlChar    *q = p->tag;
487 	memcpy(q, iso639lang, k);
488 	q += k;
489 	*q++ = '-';
490 	memcpy(q, iso3136ctry, l);
491 	q += l;
492 	*q = '\0';
493     }
494     ++count;
495 end:
496     return((count < xsltLocaleListSize) ? TRUE : FALSE);
497 }
498 
499 
500 static void
501 xsltEnumSupportedLocales(void) {
502     xmlRMutexLock(xsltLocaleMutex);
503     if (xsltLocaleListSize <= 0) {
504 	size_t len;
505 
506 	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
507 
508 	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
509 	xsltLocaleList = xmlMalloc(len);
510 	memset(xsltLocaleList, 0, len);
511 	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
512     }
513     xmlRMutexUnlock(xsltLocaleMutex);
514 }
515 
516 #endif /*def XSLT_LOCALE_WINAPI*/
517