1 /* 2 * xsltlocale.c: locale handling 3 * 4 * Reference: 5 * RFC 3066: Tags for the Identification of Languages 6 * http://www.ietf.org/rfc/rfc3066.txt 7 * ISO 639-1, ISO 3166-1 8 * 9 * Author: Nick Wellnhofer 10 * winapi port: Roumen Petrov 11 */ 12 13 #include "precomp.h" 14 15 #include "xsltlocale.h" 16 17 #define TOUPPER(c) (c & ~0x20) 18 #define TOLOWER(c) (c | 0x20) 19 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26) 20 21 /*without terminating null character*/ 22 #define XSLTMAX_ISO639LANGLEN 8 23 #define XSLTMAX_ISO3166CNTRYLEN 8 24 /* <lang>-<cntry> */ 25 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN) 26 27 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName); 28 29 #ifdef XSLT_LOCALE_WINAPI 30 xmlRMutexPtr xsltLocaleMutex = NULL; 31 32 struct xsltRFC1766Info_s { 33 /*note typedef unsigned char xmlChar !*/ 34 xmlChar tag[XSLTMAX_LANGTAGLEN+1]; 35 /*note typedef LCID xsltLocale !*/ 36 xsltLocale lcid; 37 }; 38 typedef struct xsltRFC1766Info_s xsltRFC1766Info; 39 40 static int xsltLocaleListSize = 0; 41 static xsltRFC1766Info *xsltLocaleList = NULL; 42 43 44 static xsltLocale 45 xslt_locale_WINAPI(const xmlChar *languageTag) { 46 int k; 47 xsltRFC1766Info *p = xsltLocaleList; 48 49 for (k=0; k<xsltLocaleListSize; k++, p++) 50 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid; 51 return((xsltLocale)0); 52 } 53 54 static void xsltEnumSupportedLocales(void); 55 #endif 56 57 /** 58 * xsltFreeLocales: 59 * 60 * Cleanup function for the locale support on shutdown 61 */ 62 void 63 xsltFreeLocales(void) { 64 #ifdef XSLT_LOCALE_WINAPI 65 xmlRMutexLock(xsltLocaleMutex); 66 xmlFree(xsltLocaleList); 67 xsltLocaleList = NULL; 68 xmlRMutexUnlock(xsltLocaleMutex); 69 #endif 70 } 71 72 /** 73 * xsltNewLocale: 74 * @languageTag: RFC 3066 language tag 75 * 76 * Creates a new locale of an opaque system dependent type based on the 77 * language tag. 78 * 79 * Returns the locale or NULL on error or if no matching locale was found 80 */ 81 xsltLocale 82 xsltNewLocale(const xmlChar *languageTag) { 83 #ifdef XSLT_LOCALE_POSIX 84 xsltLocale locale; 85 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */ 86 const xmlChar *p = languageTag; 87 const char *region = NULL; 88 char *q = localeName; 89 int i, llen; 90 91 /* Convert something like "pt-br" to "pt_BR.utf8" */ 92 93 if (languageTag == NULL) 94 return(NULL); 95 96 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 97 *q++ = TOLOWER(*p++); 98 99 if (i == 0) 100 return(NULL); 101 102 llen = i; 103 104 if (*p) { 105 if (*p++ != '-') 106 return(NULL); 107 *q++ = '_'; 108 109 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 110 *q++ = TOUPPER(*p++); 111 112 if (i == 0 || *p) 113 return(NULL); 114 115 memcpy(q, ".utf8", 6); 116 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 117 if (locale != NULL) 118 return(locale); 119 120 /* Continue without using country code */ 121 122 q = localeName + llen; 123 } 124 125 /* Try locale without territory, e.g. for Esperanto (eo) */ 126 127 memcpy(q, ".utf8", 6); 128 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 129 if (locale != NULL) 130 return(locale); 131 132 /* Try to find most common country for language */ 133 134 if (llen != 2) 135 return(NULL); 136 137 region = (char *)xsltDefaultRegion((xmlChar *)localeName); 138 if (region == NULL) 139 return(NULL); 140 141 q = localeName + llen; 142 *q++ = '_'; 143 *q++ = region[0]; 144 *q++ = region[1]; 145 memcpy(q, ".utf8", 6); 146 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 147 148 return(locale); 149 #endif 150 151 #ifdef XSLT_LOCALE_WINAPI 152 { 153 xsltLocale locale = (xsltLocale)0; 154 xmlChar localeName[XSLTMAX_LANGTAGLEN+1]; 155 xmlChar *q = localeName; 156 const xmlChar *p = languageTag; 157 int i, llen; 158 const xmlChar *region = NULL; 159 160 if (languageTag == NULL) goto end; 161 162 xsltEnumSupportedLocales(); 163 164 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 165 *q++ = TOLOWER(*p++); 166 if (i == 0) goto end; 167 168 llen = i; 169 *q++ = '-'; 170 if (*p) { /*if country tag is given*/ 171 if (*p++ != '-') goto end; 172 173 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 174 *q++ = TOUPPER(*p++); 175 if (i == 0 || *p) goto end; 176 177 *q = '\0'; 178 locale = xslt_locale_WINAPI(localeName); 179 if (locale != (xsltLocale)0) goto end; 180 } 181 /* Try to find most common country for language */ 182 region = xsltDefaultRegion(localeName); 183 if (region == NULL) goto end; 184 185 strcpy((char *) localeName + llen + 1, (char *) region); 186 locale = xslt_locale_WINAPI(localeName); 187 end: 188 return(locale); 189 } 190 #endif 191 192 #ifdef XSLT_LOCALE_NONE 193 return(NULL); 194 #endif 195 } 196 197 static const xmlChar* 198 xsltDefaultRegion(const xmlChar *localeName) { 199 xmlChar c; 200 /* region should be xmlChar, but gcc warns on all string assignments */ 201 const char *region = NULL; 202 203 c = localeName[1]; 204 /* This is based on the locales from glibc 2.3.3 */ 205 206 switch (localeName[0]) { 207 case 'a': 208 if (c == 'a' || c == 'm') region = "ET"; 209 else if (c == 'f') region = "ZA"; 210 else if (c == 'n') region = "ES"; 211 else if (c == 'r') region = "AE"; 212 else if (c == 'z') region = "AZ"; 213 break; 214 case 'b': 215 if (c == 'e') region = "BY"; 216 else if (c == 'g') region = "BG"; 217 else if (c == 'n') region = "BD"; 218 else if (c == 'r') region = "FR"; 219 else if (c == 's') region = "BA"; 220 break; 221 case 'c': 222 if (c == 'a') region = "ES"; 223 else if (c == 's') region = "CZ"; 224 else if (c == 'y') region = "GB"; 225 break; 226 case 'd': 227 if (c == 'a') region = "DK"; 228 else if (c == 'e') region = "DE"; 229 break; 230 case 'e': 231 if (c == 'l') region = "GR"; 232 else if (c == 'n' || c == 'o') region = "US"; 233 else if (c == 's' || c == 'u') region = "ES"; 234 else if (c == 't') region = "EE"; 235 break; 236 case 'f': 237 if (c == 'a') region = "IR"; 238 else if (c == 'i') region = "FI"; 239 else if (c == 'o') region = "FO"; 240 else if (c == 'r') region = "FR"; 241 break; 242 case 'g': 243 if (c == 'a') region = "IE"; 244 else if (c == 'l') region = "ES"; 245 else if (c == 'v') region = "GB"; 246 break; 247 case 'h': 248 if (c == 'e') region = "IL"; 249 else if (c == 'i') region = "IN"; 250 else if (c == 'r') region = "HT"; 251 else if (c == 'u') region = "HU"; 252 break; 253 case 'i': 254 if (c == 'd') region = "ID"; 255 else if (c == 's') region = "IS"; 256 else if (c == 't') region = "IT"; 257 else if (c == 'w') region = "IL"; 258 break; 259 case 'j': 260 if (c == 'a') region = "JP"; 261 break; 262 case 'k': 263 if (c == 'l') region = "GL"; 264 else if (c == 'o') region = "KR"; 265 else if (c == 'w') region = "GB"; 266 break; 267 case 'l': 268 if (c == 't') region = "LT"; 269 else if (c == 'v') region = "LV"; 270 break; 271 case 'm': 272 if (c == 'k') region = "MK"; 273 else if (c == 'l' || c == 'r') region = "IN"; 274 else if (c == 'n') region = "MN"; 275 else if (c == 's') region = "MY"; 276 else if (c == 't') region = "MT"; 277 break; 278 case 'n': 279 if (c == 'b' || c == 'n' || c == 'o') region = "NO"; 280 else if (c == 'e') region = "NP"; 281 else if (c == 'l') region = "NL"; 282 break; 283 case 'o': 284 if (c == 'm') region = "ET"; 285 break; 286 case 'p': 287 if (c == 'a') region = "IN"; 288 else if (c == 'l') region = "PL"; 289 else if (c == 't') region = "PT"; 290 break; 291 case 'r': 292 if (c == 'o') region = "RO"; 293 else if (c == 'u') region = "RU"; 294 break; 295 case 's': 296 switch (c) { 297 case 'e': region = "NO"; break; 298 case 'h': region = "YU"; break; 299 case 'k': region = "SK"; break; 300 case 'l': region = "SI"; break; 301 case 'o': region = "ET"; break; 302 case 'q': region = "AL"; break; 303 case 't': region = "ZA"; break; 304 case 'v': region = "SE"; break; 305 } 306 break; 307 case 't': 308 if (c == 'a' || c == 'e') region = "IN"; 309 else if (c == 'h') region = "TH"; 310 else if (c == 'i') region = "ER"; 311 else if (c == 'r') region = "TR"; 312 else if (c == 't') region = "RU"; 313 break; 314 case 'u': 315 if (c == 'k') region = "UA"; 316 else if (c == 'r') region = "PK"; 317 break; 318 case 'v': 319 if (c == 'i') region = "VN"; 320 break; 321 case 'w': 322 if (c == 'a') region = "BE"; 323 break; 324 case 'x': 325 if (c == 'h') region = "ZA"; 326 break; 327 case 'z': 328 if (c == 'h') region = "CN"; 329 else if (c == 'u') region = "ZA"; 330 break; 331 } 332 return((xmlChar *)region); 333 } 334 335 /** 336 * xsltFreeLocale: 337 * @locale: the locale to free 338 * 339 * Frees a locale created with xsltNewLocale 340 */ 341 void 342 xsltFreeLocale(xsltLocale locale) { 343 #ifdef XSLT_LOCALE_POSIX 344 if (locale != NULL) 345 freelocale(locale); 346 #endif 347 } 348 349 /** 350 * xsltStrxfrm: 351 * @locale: locale created with xsltNewLocale 352 * @string: UTF-8 string to transform 353 * 354 * Transforms a string according to locale. The transformed string must then be 355 * compared with xsltLocaleStrcmp and freed with xmlFree. 356 * 357 * Returns the transformed string or NULL on error 358 */ 359 xsltLocaleChar * 360 xsltStrxfrm(xsltLocale locale, const xmlChar *string) 361 { 362 #ifdef XSLT_LOCALE_NONE 363 return(NULL); 364 #else 365 size_t xstrlen, r; 366 xsltLocaleChar *xstr; 367 368 #ifdef XSLT_LOCALE_POSIX 369 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1; 370 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen); 371 if (xstr == NULL) { 372 xsltTransformError(NULL, NULL, NULL, 373 "xsltStrxfrm : out of memory error\n"); 374 return(NULL); 375 } 376 377 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale); 378 #endif 379 380 #ifdef XSLT_LOCALE_WINAPI 381 xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0); 382 if (xstrlen == 0) { 383 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n"); 384 return(NULL); 385 } 386 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar)); 387 if (xstr == NULL) { 388 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n"); 389 return(NULL); 390 } 391 r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen); 392 if (r == 0) { 393 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n"); 394 xmlFree(xstr); 395 return(NULL); 396 } 397 return(xstr); 398 #endif /* XSLT_LOCALE_WINAPI */ 399 400 if (r >= xstrlen) { 401 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n"); 402 xmlFree(xstr); 403 return(NULL); 404 } 405 406 return(xstr); 407 #endif /* XSLT_LOCALE_NONE */ 408 } 409 410 /** 411 * xsltLocaleStrcmp: 412 * @locale: a locale identifier 413 * @str1: a string transformed with xsltStrxfrm 414 * @str2: a string transformed with xsltStrxfrm 415 * 416 * Compares two strings transformed with xsltStrxfrm 417 * 418 * Returns a value < 0 if str1 sorts before str2, 419 * a value > 0 if str1 sorts after str2, 420 * 0 if str1 and str2 are equal wrt sorting 421 */ 422 int 423 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) { 424 (void)locale; 425 #ifdef XSLT_LOCALE_WINAPI 426 { 427 int ret; 428 if (str1 == str2) return(0); 429 if (str1 == NULL) return(-1); 430 if (str2 == NULL) return(1); 431 ret = CompareStringW(locale, 0, str1, -1, str2, -1); 432 if (ret == 0) { 433 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n"); 434 return(0); 435 } 436 return(ret - 2); 437 } 438 #else 439 return(xmlStrcmp(str1, str2)); 440 #endif 441 } 442 443 #ifdef XSLT_LOCALE_WINAPI 444 /** 445 * xsltCountSupportedLocales: 446 * @lcid: not used 447 * 448 * callback used to count locales 449 * 450 * Returns TRUE 451 */ 452 BOOL CALLBACK 453 xsltCountSupportedLocales(LPSTR lcid) { 454 (void) lcid; 455 ++xsltLocaleListSize; 456 return(TRUE); 457 } 458 459 /** 460 * xsltIterateSupportedLocales: 461 * @lcid: not used 462 * 463 * callback used to track locales 464 * 465 * Returns TRUE if not at the end of the array 466 */ 467 BOOL CALLBACK 468 xsltIterateSupportedLocales(LPSTR lcid) { 469 static int count = 0; 470 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1]; 471 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1]; 472 int k, l; 473 xsltRFC1766Info *p = xsltLocaleList + count; 474 475 k = sscanf(lcid, "%lx", (long*)&p->lcid); 476 if (k < 1) goto end; 477 /*don't count terminating null character*/ 478 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME, 479 (char *) iso639lang, sizeof(iso639lang)); 480 if (--k < 1) goto end; 481 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, 482 (char *) iso3136ctry, sizeof(iso3136ctry)); 483 if (--l < 1) goto end; 484 485 { /*fill results*/ 486 xmlChar *q = p->tag; 487 memcpy(q, iso639lang, k); 488 q += k; 489 *q++ = '-'; 490 memcpy(q, iso3136ctry, l); 491 q += l; 492 *q = '\0'; 493 } 494 ++count; 495 end: 496 return((count < xsltLocaleListSize) ? TRUE : FALSE); 497 } 498 499 500 static void 501 xsltEnumSupportedLocales(void) { 502 xmlRMutexLock(xsltLocaleMutex); 503 if (xsltLocaleListSize <= 0) { 504 size_t len; 505 506 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED); 507 508 len = xsltLocaleListSize * sizeof(xsltRFC1766Info); 509 xsltLocaleList = xmlMalloc(len); 510 memset(xsltLocaleList, 0, len); 511 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED); 512 } 513 xmlRMutexUnlock(xsltLocaleMutex); 514 } 515 516 #endif /*def XSLT_LOCALE_WINAPI*/ 517