1 /* locale.c 2 * 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 4 * 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * A Elbereth Gilthoniel, 13 * silivren penna m�riel 14 * o menel aglar elenath! 15 * Na-chaered palan-d�riel 16 * o galadhremmin ennorath, 17 * Fanuilos, le linnathon 18 * nef aear, si nef aearon! 19 * 20 * [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"] 21 */ 22 23 /* utility functions for handling locale-specific stuff like what 24 * character represents the decimal point. 25 */ 26 27 #include "EXTERN.h" 28 #define PERL_IN_LOCALE_C 29 #include "perl.h" 30 31 #ifdef I_LOCALE 32 # include <locale.h> 33 #endif 34 35 #ifdef I_LANGINFO 36 # include <langinfo.h> 37 #endif 38 39 #include "reentr.h" 40 41 #if defined(USE_LOCALE_NUMERIC) || defined(USE_LOCALE_COLLATE) 42 /* 43 * Standardize the locale name from a string returned by 'setlocale'. 44 * 45 * The standard return value of setlocale() is either 46 * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL 47 * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL 48 * (the space-separated values represent the various sublocales, 49 * in some unspecificed order) 50 * 51 * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", 52 * which is harmful for further use of the string in setlocale(). 53 * 54 */ 55 STATIC char * 56 S_stdize_locale(pTHX_ char *locs) 57 { 58 const char * const s = strchr(locs, '='); 59 bool okay = TRUE; 60 61 PERL_ARGS_ASSERT_STDIZE_LOCALE; 62 63 if (s) { 64 const char * const t = strchr(s, '.'); 65 okay = FALSE; 66 if (t) { 67 const char * const u = strchr(t, '\n'); 68 if (u && (u[1] == 0)) { 69 const STRLEN len = u - s; 70 Move(s + 1, locs, len, char); 71 locs[len] = 0; 72 okay = TRUE; 73 } 74 } 75 } 76 77 if (!okay) 78 Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs); 79 80 return locs; 81 } 82 #endif 83 84 void 85 Perl_set_numeric_radix(pTHX) 86 { 87 #ifdef USE_LOCALE_NUMERIC 88 dVAR; 89 # ifdef HAS_LOCALECONV 90 const struct lconv* const lc = localeconv(); 91 92 if (lc && lc->decimal_point) { 93 if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { 94 SvREFCNT_dec(PL_numeric_radix_sv); 95 PL_numeric_radix_sv = NULL; 96 } 97 else { 98 if (PL_numeric_radix_sv) 99 sv_setpv(PL_numeric_radix_sv, lc->decimal_point); 100 else 101 PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); 102 } 103 } 104 else 105 PL_numeric_radix_sv = NULL; 106 # endif /* HAS_LOCALECONV */ 107 #endif /* USE_LOCALE_NUMERIC */ 108 } 109 110 /* 111 * Set up for a new numeric locale. 112 */ 113 void 114 Perl_new_numeric(pTHX_ const char *newnum) 115 { 116 #ifdef USE_LOCALE_NUMERIC 117 dVAR; 118 119 if (! newnum) { 120 Safefree(PL_numeric_name); 121 PL_numeric_name = NULL; 122 PL_numeric_standard = TRUE; 123 PL_numeric_local = TRUE; 124 return; 125 } 126 127 if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) { 128 Safefree(PL_numeric_name); 129 PL_numeric_name = stdize_locale(savepv(newnum)); 130 PL_numeric_standard = ((*newnum == 'C' && newnum[1] == '\0') 131 || strEQ(newnum, "POSIX")); 132 PL_numeric_local = TRUE; 133 set_numeric_radix(); 134 } 135 136 #endif /* USE_LOCALE_NUMERIC */ 137 } 138 139 void 140 Perl_set_numeric_standard(pTHX) 141 { 142 #ifdef USE_LOCALE_NUMERIC 143 dVAR; 144 145 if (! PL_numeric_standard) { 146 setlocale(LC_NUMERIC, "C"); 147 PL_numeric_standard = TRUE; 148 PL_numeric_local = FALSE; 149 set_numeric_radix(); 150 } 151 152 #endif /* USE_LOCALE_NUMERIC */ 153 } 154 155 void 156 Perl_set_numeric_local(pTHX) 157 { 158 #ifdef USE_LOCALE_NUMERIC 159 dVAR; 160 161 if (! PL_numeric_local) { 162 setlocale(LC_NUMERIC, PL_numeric_name); 163 PL_numeric_standard = FALSE; 164 PL_numeric_local = TRUE; 165 set_numeric_radix(); 166 } 167 168 #endif /* USE_LOCALE_NUMERIC */ 169 } 170 171 /* 172 * Set up for a new ctype locale. 173 */ 174 void 175 Perl_new_ctype(pTHX_ const char *newctype) 176 { 177 #ifdef USE_LOCALE_CTYPE 178 dVAR; 179 int i; 180 181 PERL_ARGS_ASSERT_NEW_CTYPE; 182 183 for (i = 0; i < 256; i++) { 184 if (isUPPER_LC(i)) 185 PL_fold_locale[i] = toLOWER_LC(i); 186 else if (isLOWER_LC(i)) 187 PL_fold_locale[i] = toUPPER_LC(i); 188 else 189 PL_fold_locale[i] = i; 190 } 191 192 #endif /* USE_LOCALE_CTYPE */ 193 PERL_ARGS_ASSERT_NEW_CTYPE; 194 PERL_UNUSED_ARG(newctype); 195 PERL_UNUSED_CONTEXT; 196 } 197 198 /* 199 * Set up for a new collation locale. 200 */ 201 void 202 Perl_new_collate(pTHX_ const char *newcoll) 203 { 204 #ifdef USE_LOCALE_COLLATE 205 dVAR; 206 207 if (! newcoll) { 208 if (PL_collation_name) { 209 ++PL_collation_ix; 210 Safefree(PL_collation_name); 211 PL_collation_name = NULL; 212 } 213 PL_collation_standard = TRUE; 214 PL_collxfrm_base = 0; 215 PL_collxfrm_mult = 2; 216 return; 217 } 218 219 if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { 220 ++PL_collation_ix; 221 Safefree(PL_collation_name); 222 PL_collation_name = stdize_locale(savepv(newcoll)); 223 PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0') 224 || strEQ(newcoll, "POSIX")); 225 226 { 227 /* 2: at most so many chars ('a', 'b'). */ 228 /* 50: surely no system expands a char more. */ 229 #define XFRMBUFSIZE (2 * 50) 230 char xbuf[XFRMBUFSIZE]; 231 const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); 232 const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); 233 const SSize_t mult = fb - fa; 234 if (mult < 1) 235 Perl_croak(aTHX_ "strxfrm() gets absurd"); 236 PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; 237 PL_collxfrm_mult = mult; 238 } 239 } 240 241 #endif /* USE_LOCALE_COLLATE */ 242 } 243 244 /* 245 * Initialize locale awareness. 246 */ 247 int 248 Perl_init_i18nl10n(pTHX_ int printwarn) 249 { 250 int ok = 1; 251 /* returns 252 * 1 = set ok or not applicable, 253 * 0 = fallback to C locale, 254 * -1 = fallback to C locale failed 255 */ 256 257 #if defined(USE_LOCALE) 258 dVAR; 259 260 #ifdef USE_LOCALE_CTYPE 261 char *curctype = NULL; 262 #endif /* USE_LOCALE_CTYPE */ 263 #ifdef USE_LOCALE_COLLATE 264 char *curcoll = NULL; 265 #endif /* USE_LOCALE_COLLATE */ 266 #ifdef USE_LOCALE_NUMERIC 267 char *curnum = NULL; 268 #endif /* USE_LOCALE_NUMERIC */ 269 #ifdef __GLIBC__ 270 char * const language = PerlEnv_getenv("LANGUAGE"); 271 #endif 272 char * const lc_all = PerlEnv_getenv("LC_ALL"); 273 char * const lang = PerlEnv_getenv("LANG"); 274 bool setlocale_failure = FALSE; 275 276 #ifdef LOCALE_ENVIRON_REQUIRED 277 278 /* 279 * Ultrix setlocale(..., "") fails if there are no environment 280 * variables from which to get a locale name. 281 */ 282 283 bool done = FALSE; 284 285 #ifdef LC_ALL 286 if (lang) { 287 if (setlocale(LC_ALL, "")) 288 done = TRUE; 289 else 290 setlocale_failure = TRUE; 291 } 292 if (!setlocale_failure) { 293 #ifdef USE_LOCALE_CTYPE 294 Safefree(curctype); 295 if (! (curctype = 296 setlocale(LC_CTYPE, 297 (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) 298 ? "" : NULL))) 299 setlocale_failure = TRUE; 300 else 301 curctype = savepv(curctype); 302 #endif /* USE_LOCALE_CTYPE */ 303 #ifdef USE_LOCALE_COLLATE 304 Safefree(curcoll); 305 if (! (curcoll = 306 setlocale(LC_COLLATE, 307 (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) 308 ? "" : NULL))) 309 setlocale_failure = TRUE; 310 else 311 curcoll = savepv(curcoll); 312 #endif /* USE_LOCALE_COLLATE */ 313 #ifdef USE_LOCALE_NUMERIC 314 Safefree(curnum); 315 if (! (curnum = 316 setlocale(LC_NUMERIC, 317 (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) 318 ? "" : NULL))) 319 setlocale_failure = TRUE; 320 else 321 curnum = savepv(curnum); 322 #endif /* USE_LOCALE_NUMERIC */ 323 } 324 325 #endif /* LC_ALL */ 326 327 #endif /* !LOCALE_ENVIRON_REQUIRED */ 328 329 #ifdef LC_ALL 330 if (! setlocale(LC_ALL, "")) 331 setlocale_failure = TRUE; 332 #endif /* LC_ALL */ 333 334 if (!setlocale_failure) { 335 #ifdef USE_LOCALE_CTYPE 336 Safefree(curctype); 337 if (! (curctype = setlocale(LC_CTYPE, ""))) 338 setlocale_failure = TRUE; 339 else 340 curctype = savepv(curctype); 341 #endif /* USE_LOCALE_CTYPE */ 342 #ifdef USE_LOCALE_COLLATE 343 Safefree(curcoll); 344 if (! (curcoll = setlocale(LC_COLLATE, ""))) 345 setlocale_failure = TRUE; 346 else 347 curcoll = savepv(curcoll); 348 #endif /* USE_LOCALE_COLLATE */ 349 #ifdef USE_LOCALE_NUMERIC 350 Safefree(curnum); 351 if (! (curnum = setlocale(LC_NUMERIC, ""))) 352 setlocale_failure = TRUE; 353 else 354 curnum = savepv(curnum); 355 #endif /* USE_LOCALE_NUMERIC */ 356 } 357 358 if (setlocale_failure) { 359 char *p; 360 const bool locwarn = (printwarn > 1 || 361 (printwarn && 362 (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); 363 364 if (locwarn) { 365 #ifdef LC_ALL 366 367 PerlIO_printf(Perl_error_log, 368 "perl: warning: Setting locale failed.\n"); 369 370 #else /* !LC_ALL */ 371 372 PerlIO_printf(Perl_error_log, 373 "perl: warning: Setting locale failed for the categories:\n\t"); 374 #ifdef USE_LOCALE_CTYPE 375 if (! curctype) 376 PerlIO_printf(Perl_error_log, "LC_CTYPE "); 377 #endif /* USE_LOCALE_CTYPE */ 378 #ifdef USE_LOCALE_COLLATE 379 if (! curcoll) 380 PerlIO_printf(Perl_error_log, "LC_COLLATE "); 381 #endif /* USE_LOCALE_COLLATE */ 382 #ifdef USE_LOCALE_NUMERIC 383 if (! curnum) 384 PerlIO_printf(Perl_error_log, "LC_NUMERIC "); 385 #endif /* USE_LOCALE_NUMERIC */ 386 PerlIO_printf(Perl_error_log, "\n"); 387 388 #endif /* LC_ALL */ 389 390 PerlIO_printf(Perl_error_log, 391 "perl: warning: Please check that your locale settings:\n"); 392 393 #ifdef __GLIBC__ 394 PerlIO_printf(Perl_error_log, 395 "\tLANGUAGE = %c%s%c,\n", 396 language ? '"' : '(', 397 language ? language : "unset", 398 language ? '"' : ')'); 399 #endif 400 401 PerlIO_printf(Perl_error_log, 402 "\tLC_ALL = %c%s%c,\n", 403 lc_all ? '"' : '(', 404 lc_all ? lc_all : "unset", 405 lc_all ? '"' : ')'); 406 407 #if defined(USE_ENVIRON_ARRAY) 408 { 409 char **e; 410 for (e = environ; *e; e++) { 411 if (strnEQ(*e, "LC_", 3) 412 && strnNE(*e, "LC_ALL=", 7) 413 && (p = strchr(*e, '='))) 414 PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n", 415 (int)(p - *e), *e, p + 1); 416 } 417 } 418 #else 419 PerlIO_printf(Perl_error_log, 420 "\t(possibly more locale environment variables)\n"); 421 #endif 422 423 PerlIO_printf(Perl_error_log, 424 "\tLANG = %c%s%c\n", 425 lang ? '"' : '(', 426 lang ? lang : "unset", 427 lang ? '"' : ')'); 428 429 PerlIO_printf(Perl_error_log, 430 " are supported and installed on your system.\n"); 431 } 432 433 #ifdef LC_ALL 434 435 if (setlocale(LC_ALL, "C")) { 436 if (locwarn) 437 PerlIO_printf(Perl_error_log, 438 "perl: warning: Falling back to the standard locale (\"C\").\n"); 439 ok = 0; 440 } 441 else { 442 if (locwarn) 443 PerlIO_printf(Perl_error_log, 444 "perl: warning: Failed to fall back to the standard locale (\"C\").\n"); 445 ok = -1; 446 } 447 448 #else /* ! LC_ALL */ 449 450 if (0 451 #ifdef USE_LOCALE_CTYPE 452 || !(curctype || setlocale(LC_CTYPE, "C")) 453 #endif /* USE_LOCALE_CTYPE */ 454 #ifdef USE_LOCALE_COLLATE 455 || !(curcoll || setlocale(LC_COLLATE, "C")) 456 #endif /* USE_LOCALE_COLLATE */ 457 #ifdef USE_LOCALE_NUMERIC 458 || !(curnum || setlocale(LC_NUMERIC, "C")) 459 #endif /* USE_LOCALE_NUMERIC */ 460 ) 461 { 462 if (locwarn) 463 PerlIO_printf(Perl_error_log, 464 "perl: warning: Cannot fall back to the standard locale (\"C\").\n"); 465 ok = -1; 466 } 467 468 #endif /* ! LC_ALL */ 469 470 #ifdef USE_LOCALE_CTYPE 471 Safefree(curctype); 472 curctype = savepv(setlocale(LC_CTYPE, NULL)); 473 #endif /* USE_LOCALE_CTYPE */ 474 #ifdef USE_LOCALE_COLLATE 475 Safefree(curcoll); 476 curcoll = savepv(setlocale(LC_COLLATE, NULL)); 477 #endif /* USE_LOCALE_COLLATE */ 478 #ifdef USE_LOCALE_NUMERIC 479 Safefree(curnum); 480 curnum = savepv(setlocale(LC_NUMERIC, NULL)); 481 #endif /* USE_LOCALE_NUMERIC */ 482 } 483 else { 484 485 #ifdef USE_LOCALE_CTYPE 486 new_ctype(curctype); 487 #endif /* USE_LOCALE_CTYPE */ 488 489 #ifdef USE_LOCALE_COLLATE 490 new_collate(curcoll); 491 #endif /* USE_LOCALE_COLLATE */ 492 493 #ifdef USE_LOCALE_NUMERIC 494 new_numeric(curnum); 495 #endif /* USE_LOCALE_NUMERIC */ 496 497 } 498 499 #endif /* USE_LOCALE */ 500 501 #ifdef USE_PERLIO 502 { 503 /* Set PL_utf8locale to TRUE if using PerlIO _and_ 504 any of the following are true: 505 - nl_langinfo(CODESET) contains /^utf-?8/i 506 - $ENV{LC_ALL} contains /^utf-?8/i 507 - $ENV{LC_CTYPE} contains /^utf-?8/i 508 - $ENV{LANG} contains /^utf-?8/i 509 The LC_ALL, LC_CTYPE, LANG obey the usual override 510 hierarchy of locale environment variables. (LANGUAGE 511 affects only LC_MESSAGES only under glibc.) (If present, 512 it overrides LC_MESSAGES for GNU gettext, and it also 513 can have more than one locale, separated by spaces, 514 in case you need to know.) 515 If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) 516 are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer 517 on STDIN, STDOUT, STDERR, _and_ the default open discipline. 518 */ 519 bool utf8locale = FALSE; 520 char *codeset = NULL; 521 #if defined(HAS_NL_LANGINFO) && defined(CODESET) 522 codeset = nl_langinfo(CODESET); 523 #endif 524 if (codeset) 525 utf8locale = (ibcmp(codeset, STR_WITH_LEN("UTF-8")) == 0 || 526 ibcmp(codeset, STR_WITH_LEN("UTF8") ) == 0); 527 #if defined(USE_LOCALE) 528 else { /* nl_langinfo(CODESET) is supposed to correctly 529 * interpret the locale environment variables, 530 * but just in case it fails, let's do this manually. */ 531 if (lang) 532 utf8locale = (ibcmp(lang, STR_WITH_LEN("UTF-8")) == 0 || 533 ibcmp(lang, STR_WITH_LEN("UTF8") ) == 0); 534 #ifdef USE_LOCALE_CTYPE 535 if (curctype) 536 utf8locale = (ibcmp(curctype, STR_WITH_LEN("UTF-8")) == 0 || 537 ibcmp(curctype, STR_WITH_LEN("UTF8") ) == 0); 538 #endif 539 if (lc_all) 540 utf8locale = (ibcmp(lc_all, STR_WITH_LEN("UTF-8")) == 0 || 541 ibcmp(lc_all, STR_WITH_LEN("UTF8") ) == 0); 542 } 543 #endif /* USE_LOCALE */ 544 if (utf8locale) 545 PL_utf8locale = TRUE; 546 } 547 /* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. 548 This is an alternative to using the -C command line switch 549 (the -C if present will override this). */ 550 { 551 const char *p = PerlEnv_getenv("PERL_UNICODE"); 552 PL_unicode = p ? parse_unicode_opts(&p) : 0; 553 if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) 554 PL_utf8cache = -1; 555 } 556 #endif 557 558 #ifdef USE_LOCALE_CTYPE 559 Safefree(curctype); 560 #endif /* USE_LOCALE_CTYPE */ 561 #ifdef USE_LOCALE_COLLATE 562 Safefree(curcoll); 563 #endif /* USE_LOCALE_COLLATE */ 564 #ifdef USE_LOCALE_NUMERIC 565 Safefree(curnum); 566 #endif /* USE_LOCALE_NUMERIC */ 567 return ok; 568 } 569 570 #ifdef USE_LOCALE_COLLATE 571 572 /* 573 * mem_collxfrm() is a bit like strxfrm() but with two important 574 * differences. First, it handles embedded NULs. Second, it allocates 575 * a bit more memory than needed for the transformed data itself. 576 * The real transformed data begins at offset sizeof(collationix). 577 * Please see sv_collxfrm() to see how this is used. 578 */ 579 580 char * 581 Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) 582 { 583 dVAR; 584 char *xbuf; 585 STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ 586 587 PERL_ARGS_ASSERT_MEM_COLLXFRM; 588 589 /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ 590 /* the +1 is for the terminating NUL. */ 591 592 xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; 593 Newx(xbuf, xAlloc, char); 594 if (! xbuf) 595 goto bad; 596 597 *(U32*)xbuf = PL_collation_ix; 598 xout = sizeof(PL_collation_ix); 599 for (xin = 0; xin < len; ) { 600 Size_t xused; 601 602 for (;;) { 603 xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); 604 if (xused >= PERL_INT_MAX) 605 goto bad; 606 if ((STRLEN)xused < xAlloc - xout) 607 break; 608 xAlloc = (2 * xAlloc) + 1; 609 Renew(xbuf, xAlloc, char); 610 if (! xbuf) 611 goto bad; 612 } 613 614 xin += strlen(s + xin) + 1; 615 xout += xused; 616 617 /* Embedded NULs are understood but silently skipped 618 * because they make no sense in locale collation. */ 619 } 620 621 xbuf[xout] = '\0'; 622 *xlen = xout - sizeof(PL_collation_ix); 623 return xbuf; 624 625 bad: 626 Safefree(xbuf); 627 *xlen = 0; 628 return NULL; 629 } 630 631 #endif /* USE_LOCALE_COLLATE */ 632 633 /* 634 * Local variables: 635 * c-indentation-style: bsd 636 * c-basic-offset: 4 637 * indent-tabs-mode: t 638 * End: 639 * 640 * ex: set ts=8 sts=4 sw=4 noet: 641 */ 642