1 /* 2 * xsltlocale.c: locale handling 3 * 4 * Reference: 5 * RFC 3066: Tags for the Identification of Languages 6 * http://www.ietf.org/rfc/rfc3066.txt 7 * ISO 639-1, ISO 3166-1 8 * 9 * Author: Nick Wellnhofer 10 * winapi port: Roumen Petrov 11 */ 12 13 #include "precomp.h" 14 15 #include "xsltlocale.h" 16 17 #define TOUPPER(c) (c & ~0x20) 18 #define TOLOWER(c) (c | 0x20) 19 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26) 20 21 /*without terminating null character*/ 22 #define XSLTMAX_ISO639LANGLEN 8 23 #define XSLTMAX_ISO3166CNTRYLEN 8 24 /* <lang>-<cntry> */ 25 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN) 26 27 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName); 28 29 #ifdef XSLT_LOCALE_WINAPI 30 xmlRMutexPtr xsltLocaleMutex = NULL; 31 32 struct xsltRFC1766Info_s { 33 /*note typedef unsigned char xmlChar !*/ 34 xmlChar tag[XSLTMAX_LANGTAGLEN+1]; 35 /*note typedef LCID xsltLocale !*/ 36 xsltLocale lcid; 37 }; 38 typedef struct xsltRFC1766Info_s xsltRFC1766Info; 39 40 static int xsltLocaleListSize = 0; 41 static xsltRFC1766Info *xsltLocaleList = NULL; 42 43 44 static xsltLocale 45 xslt_locale_WINAPI(const xmlChar *languageTag) { 46 int k; 47 xsltRFC1766Info *p = xsltLocaleList; 48 49 for (k=0; k<xsltLocaleListSize; k++, p++) 50 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid; 51 return((xsltLocale)0); 52 } 53 54 static void xsltEnumSupportedLocales(void); 55 #endif 56 57 /** 58 * xsltFreeLocales: 59 * 60 * Cleanup function for the locale support on shutdown 61 */ 62 void 63 xsltFreeLocales(void) { 64 #ifdef XSLT_LOCALE_WINAPI 65 xmlRMutexLock(xsltLocaleMutex); 66 xmlFree(xsltLocaleList); 67 xsltLocaleList = NULL; 68 xmlRMutexUnlock(xsltLocaleMutex); 69 #endif 70 } 71 72 /** 73 * xsltNewLocale: 74 * @languageTag: RFC 3066 language tag 75 * 76 * Creates a new locale of an opaque system dependent type based on the 77 * language tag. 78 * 79 * Returns the locale or NULL on error or if no matching locale was found 80 */ 81 xsltLocale 82 xsltNewLocale(const xmlChar *languageTag) { 83 #ifdef XSLT_LOCALE_POSIX 84 xsltLocale locale; 85 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */ 86 const xmlChar *p = languageTag; 87 const char *region = NULL; 88 char *q = localeName; 89 int i, llen; 90 91 /* Convert something like "pt-br" to "pt_BR.utf8" */ 92 93 if (languageTag == NULL) 94 return(NULL); 95 96 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 97 *q++ = TOLOWER(*p++); 98 99 if (i == 0) 100 return(NULL); 101 102 llen = i; 103 104 if (*p) { 105 if (*p++ != '-') 106 return(NULL); 107 *q++ = '_'; 108 109 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 110 *q++ = TOUPPER(*p++); 111 112 if (i == 0 || *p) 113 return(NULL); 114 115 memcpy(q, ".utf8", 6); 116 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 117 if (locale != NULL) 118 return(locale); 119 120 /* Continue without using country code */ 121 122 q = localeName + llen; 123 } 124 125 /* Try locale without territory, e.g. for Esperanto (eo) */ 126 127 memcpy(q, ".utf8", 6); 128 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 129 if (locale != NULL) 130 return(locale); 131 132 /* Try to find most common country for language */ 133 134 if (llen != 2) 135 return(NULL); 136 137 region = (char *)xsltDefaultRegion((xmlChar *)localeName); 138 if (region == NULL) 139 return(NULL); 140 141 q = localeName + llen; 142 *q++ = '_'; 143 *q++ = region[0]; 144 *q++ = region[1]; 145 memcpy(q, ".utf8", 6); 146 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); 147 148 return(locale); 149 #endif 150 151 #ifdef XSLT_LOCALE_WINAPI 152 { 153 xsltLocale locale = (xsltLocale)0; 154 xmlChar localeName[XSLTMAX_LANGTAGLEN+1]; 155 xmlChar *q = localeName; 156 const xmlChar *p = languageTag; 157 int i, llen; 158 const xmlChar *region = NULL; 159 160 if (languageTag == NULL) goto end; 161 162 xsltEnumSupportedLocales(); 163 164 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) 165 *q++ = TOLOWER(*p++); 166 if (i == 0) goto end; 167 168 llen = i; 169 *q++ = '-'; 170 if (*p) { /*if country tag is given*/ 171 if (*p++ != '-') goto end; 172 173 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) 174 *q++ = TOUPPER(*p++); 175 if (i == 0 || *p) goto end; 176 177 *q = '\0'; 178 locale = xslt_locale_WINAPI(localeName); 179 if (locale != (xsltLocale)0) goto end; 180 } 181 /* Try to find most common country for language */ 182 region = xsltDefaultRegion(localeName); 183 if (region == NULL) goto end; 184 185 strcpy((char *) localeName + llen + 1, (char *) region); 186 locale = xslt_locale_WINAPI(localeName); 187 end: 188 return(locale); 189 } 190 #endif 191 192 #ifdef XSLT_LOCALE_NONE 193 return(NULL); 194 #endif 195 } 196 197 static const xmlChar* 198 xsltDefaultRegion(const xmlChar *localeName) { 199 xmlChar c; 200 /* region should be xmlChar, but gcc warns on all string assignments */ 201 const char *region = NULL; 202 203 c = localeName[1]; 204 /* This is based on the locales from glibc 2.3.3 */ 205 206 switch (localeName[0]) { 207 case 'a': 208 if (c == 'a' || c == 'm') region = "ET"; 209 else if (c == 'f') region = "ZA"; 210 else if (c == 'n') region = "ES"; 211 else if (c == 'r') region = "AE"; 212 else if (c == 'z') region = "AZ"; 213 break; 214 case 'b': 215 if (c == 'e') region = "BY"; 216 else if (c == 'g') region = "BG"; 217 else if (c == 'n') region = "BD"; 218 else if (c == 'r') region = "FR"; 219 else if (c == 's') region = "BA"; 220 break; 221 case 'c': 222 if (c == 'a') region = "ES"; 223 else if (c == 's') region = "CZ"; 224 else if (c == 'y') region = "GB"; 225 break; 226 case 'd': 227 if (c == 'a') region = "DK"; 228 else if (c == 'e') region = "DE"; 229 break; 230 case 'e': 231 if (c == 'l') region = "GR"; 232 else if (c == 'n' || c == 'o') region = "US"; 233 else if (c == 's' || c == 'u') region = "ES"; 234 else if (c == 't') region = "EE"; 235 break; 236 case 'f': 237 if (c == 'a') region = "IR"; 238 else if (c == 'i') region = "FI"; 239 else if (c == 'o') region = "FO"; 240 else if (c == 'r') region = "FR"; 241 break; 242 case 'g': 243 if (c == 'a') region = "IE"; 244 else if (c == 'l') region = "ES"; 245 else if (c == 'v') region = "GB"; 246 break; 247 case 'h': 248 if (c == 'e') region = "IL"; 249 else if (c == 'i') region = "IN"; 250 else if (c == 'r') region = "HT"; 251 else if (c == 'u') region = "HU"; 252 break; 253 case 'i': 254 if (c == 'd') region = "ID"; 255 else if (c == 's') region = "IS"; 256 else if (c == 't') region = "IT"; 257 else if (c == 'w') region = "IL"; 258 break; 259 case 'j': 260 if (c == 'a') region = "JP"; 261 break; 262 case 'k': 263 if (c == 'l') region = "GL"; 264 else if (c == 'o') region = "KR"; 265 else if (c == 'w') region = "GB"; 266 break; 267 case 'l': 268 if (c == 't') region = "LT"; 269 else if (c == 'v') region = "LV"; 270 break; 271 case 'm': 272 if (c == 'k') region = "MK"; 273 else if (c == 'l' || c == 'r') region = "IN"; 274 else if (c == 'n') region = "MN"; 275 else if (c == 's') region = "MY"; 276 else if (c == 't') region = "MT"; 277 break; 278 case 'n': 279 if (c == 'b' || c == 'n' || c == 'o') region = "NO"; 280 else if (c == 'e') region = "NP"; 281 else if (c == 'l') region = "NL"; 282 break; 283 case 'o': 284 if (c == 'm') region = "ET"; 285 break; 286 case 'p': 287 if (c == 'a') region = "IN"; 288 else if (c == 'l') region = "PL"; 289 else if (c == 't') region = "PT"; 290 break; 291 case 'r': 292 if (c == 'o') region = "RO"; 293 else if (c == 'u') region = "RU"; 294 break; 295 case 's': 296 switch (c) { 297 case 'e': region = "NO"; break; 298 case 'h': region = "YU"; break; 299 case 'k': region = "SK"; break; 300 case 'l': region = "SI"; break; 301 case 'o': region = "ET"; break; 302 case 'q': region = "AL"; break; 303 case 't': region = "ZA"; break; 304 case 'v': region = "SE"; break; 305 } 306 break; 307 case 't': 308 if (c == 'a' || c == 'e') region = "IN"; 309 else if (c == 'h') region = "TH"; 310 else if (c == 'i') region = "ER"; 311 else if (c == 'r') region = "TR"; 312 else if (c == 't') region = "RU"; 313 break; 314 case 'u': 315 if (c == 'k') region = "UA"; 316 else if (c == 'r') region = "PK"; 317 break; 318 case 'v': 319 if (c == 'i') region = "VN"; 320 break; 321 case 'w': 322 if (c == 'a') region = "BE"; 323 break; 324 case 'x': 325 if (c == 'h') region = "ZA"; 326 break; 327 case 'z': 328 if (c == 'h') region = "CN"; 329 else if (c == 'u') region = "ZA"; 330 break; 331 } 332 return((xmlChar *)region); 333 } 334 335 /** 336 * xsltFreeLocale: 337 * @locale: the locale to free 338 * 339 * Frees a locale created with xsltNewLocale 340 */ 341 void 342 xsltFreeLocale(xsltLocale locale) { 343 #ifdef XSLT_LOCALE_POSIX 344 freelocale(locale); 345 #endif 346 } 347 348 /** 349 * xsltStrxfrm: 350 * @locale: locale created with xsltNewLocale 351 * @string: UTF-8 string to transform 352 * 353 * Transforms a string according to locale. The transformed string must then be 354 * compared with xsltLocaleStrcmp and freed with xmlFree. 355 * 356 * Returns the transformed string or NULL on error 357 */ 358 xsltLocaleChar * 359 xsltStrxfrm(xsltLocale locale, const xmlChar *string) 360 { 361 #ifdef XSLT_LOCALE_NONE 362 return(NULL); 363 #else 364 size_t xstrlen, r; 365 xsltLocaleChar *xstr; 366 367 #ifdef XSLT_LOCALE_POSIX 368 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1; 369 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen); 370 if (xstr == NULL) { 371 xsltTransformError(NULL, NULL, NULL, 372 "xsltStrxfrm : out of memory error\n"); 373 return(NULL); 374 } 375 376 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale); 377 #endif 378 379 #ifdef XSLT_LOCALE_WINAPI 380 xstrlen = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, NULL, 0); 381 if (xstrlen == 0) { 382 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n"); 383 return(NULL); 384 } 385 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar)); 386 if (xstr == NULL) { 387 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n"); 388 return(NULL); 389 } 390 r = MultiByteToWideChar(CP_UTF8, 0, (char *) string, -1, xstr, xstrlen); 391 if (r == 0) { 392 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n"); 393 xmlFree(xstr); 394 return(NULL); 395 } 396 return(xstr); 397 #endif /* XSLT_LOCALE_WINAPI */ 398 399 if (r >= xstrlen) { 400 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n"); 401 xmlFree(xstr); 402 return(NULL); 403 } 404 405 return(xstr); 406 #endif /* XSLT_LOCALE_NONE */ 407 } 408 409 /** 410 * xsltLocaleStrcmp: 411 * @locale: a locale identifier 412 * @str1: a string transformed with xsltStrxfrm 413 * @str2: a string transformed with xsltStrxfrm 414 * 415 * Compares two strings transformed with xsltStrxfrm 416 * 417 * Returns a value < 0 if str1 sorts before str2, 418 * a value > 0 if str1 sorts after str2, 419 * 0 if str1 and str2 are equal wrt sorting 420 */ 421 int 422 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) { 423 (void)locale; 424 #ifdef XSLT_LOCALE_WINAPI 425 { 426 int ret; 427 if (str1 == str2) return(0); 428 if (str1 == NULL) return(-1); 429 if (str2 == NULL) return(1); 430 ret = CompareStringW(locale, 0, str1, -1, str2, -1); 431 if (ret == 0) { 432 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n"); 433 return(0); 434 } 435 return(ret - 2); 436 } 437 #else 438 return(xmlStrcmp(str1, str2)); 439 #endif 440 } 441 442 #ifdef XSLT_LOCALE_WINAPI 443 /** 444 * xsltCountSupportedLocales: 445 * @lcid: not used 446 * 447 * callback used to count locales 448 * 449 * Returns TRUE 450 */ 451 BOOL CALLBACK 452 xsltCountSupportedLocales(LPSTR lcid) { 453 (void) lcid; 454 ++xsltLocaleListSize; 455 return(TRUE); 456 } 457 458 /** 459 * xsltIterateSupportedLocales: 460 * @lcid: not used 461 * 462 * callback used to track locales 463 * 464 * Returns TRUE if not at the end of the array 465 */ 466 BOOL CALLBACK 467 xsltIterateSupportedLocales(LPSTR lcid) { 468 static int count = 0; 469 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1]; 470 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1]; 471 int k, l; 472 xsltRFC1766Info *p = xsltLocaleList + count; 473 474 k = sscanf(lcid, "%lx", (long*)&p->lcid); 475 if (k < 1) goto end; 476 /*don't count terminating null character*/ 477 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME, 478 (char *) iso639lang, sizeof(iso639lang)); 479 if (--k < 1) goto end; 480 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, 481 (char *) iso3136ctry, sizeof(iso3136ctry)); 482 if (--l < 1) goto end; 483 484 { /*fill results*/ 485 xmlChar *q = p->tag; 486 memcpy(q, iso639lang, k); 487 q += k; 488 *q++ = '-'; 489 memcpy(q, iso3136ctry, l); 490 q += l; 491 *q = '\0'; 492 } 493 ++count; 494 end: 495 return((count < xsltLocaleListSize) ? TRUE : FALSE); 496 } 497 498 499 static void 500 xsltEnumSupportedLocales(void) { 501 xmlRMutexLock(xsltLocaleMutex); 502 if (xsltLocaleListSize <= 0) { 503 size_t len; 504 505 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED); 506 507 len = xsltLocaleListSize * sizeof(xsltRFC1766Info); 508 xsltLocaleList = xmlMalloc(len); 509 memset(xsltLocaleList, 0, len); 510 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED); 511 } 512 xmlRMutexUnlock(xsltLocaleMutex); 513 } 514 515 #endif /*def XSLT_LOCALE_WINAPI*/ 516