1# $Id: encoding.pm,v 2.22 2018/02/11 05:32:03 dankogai Exp $ 2package encoding; 3our $VERSION = sprintf "%d.%02d", q$Revision: 2.22 $ =~ /(\d+)/g; 4 5use Encode; 6use strict; 7use warnings; 8use Config; 9 10use constant { 11 DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, 12 HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, 13 PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped 14}; 15 16sub _exception { 17 my $name = shift; 18 $] > 5.008 and return 0; # 5.8.1 or higher then no 19 my %utfs = map { $_ => 1 } 20 qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE 21 UTF-32 UTF-32BE UTF-32LE); 22 $utfs{$name} or return 0; # UTFs or no 23 require Config; 24 Config->import(); 25 our %Config; 26 return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no 27} 28 29sub in_locale { $^H & ( $locale::hint_bits || 0 ) } 30 31sub _get_locale_encoding { 32 my $locale_encoding; 33 34 if ($^O eq 'MSWin32') { 35 my @tries = ( 36 # First try to get the OutputCP. This will work only if we 37 # are attached to a console 38 'Win32.pm' => 'Win32::GetConsoleOutputCP', 39 'Win32/Console.pm' => 'Win32::Console::OutputCP', 40 # If above failed, this means that we are a GUI app 41 # Let's assume that the ANSI codepage is what matters 42 'Win32.pm' => 'Win32::GetACP', 43 ); 44 while (@tries) { 45 my $cp = eval { 46 require $tries[0]; 47 no strict 'refs'; 48 &{$tries[1]}() 49 }; 50 if ($cp) { 51 if ($cp == 65001) { # Code page for UTF-8 52 $locale_encoding = 'UTF-8'; 53 } else { 54 $locale_encoding = 'cp' . $cp; 55 } 56 return $locale_encoding; 57 } 58 splice(@tries, 0, 2) 59 } 60 } 61 62 # I18N::Langinfo isn't available everywhere 63 $locale_encoding = eval { 64 require I18N::Langinfo; 65 find_encoding( 66 I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() ) 67 )->name 68 }; 69 return $locale_encoding if defined $locale_encoding; 70 71 eval { 72 require POSIX; 73 # Get the current locale 74 # Remember that MSVCRT impl is quite different from Unixes 75 my $locale = POSIX::setlocale(POSIX::LC_CTYPE()); 76 if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) { 77 my $country_language; 78 ( $country_language, $locale_encoding ) = ( $1, $2 ); 79 80 # Could do more heuristics based on the country and language 81 # since we have Locale::Country and Locale::Language available. 82 # TODO: get a database of Language -> Encoding mappings 83 # (the Estonian database at http://www.eki.ee/letter/ 84 # would be excellent!) --jhi 85 if (lc($locale_encoding) eq 'euc') { 86 if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { 87 $locale_encoding = 'euc-jp'; 88 } 89 elsif ( $country_language =~ /^ko_KR|korean?$/i ) { 90 $locale_encoding = 'euc-kr'; 91 } 92 elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { 93 $locale_encoding = 'euc-cn'; 94 } 95 elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { 96 $locale_encoding = 'euc-tw'; 97 } 98 else { 99 require Carp; 100 Carp::croak( 101 "encoding: Locale encoding '$locale_encoding' too ambiguous" 102 ); 103 } 104 } 105 } 106 }; 107 108 return $locale_encoding; 109} 110 111sub import { 112 113 if ( ord("A") == 193 ) { 114 require Carp; 115 Carp::croak("encoding: pragma does not support EBCDIC platforms"); 116 } 117 118 my $deprecate = 119 ($] >= 5.017 and !$Config{usecperl}) 120 ? "Use of the encoding pragma is deprecated" : 0; 121 122 my $class = shift; 123 my $name = shift; 124 if (!$name){ 125 require Carp; 126 Carp::croak("encoding: no encoding specified."); 127 } 128 if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm 129 my $caller = caller(); 130 { 131 no strict 'refs'; 132 *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding; 133 } 134 return; 135 } 136 $name = _get_locale_encoding() if $name eq ':locale'; 137 BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; } 138 my %arg = @_; 139 $name = $ENV{PERL_ENCODING} unless defined $name; 140 my $enc = find_encoding($name); 141 unless ( defined $enc ) { 142 require Carp; 143 Carp::croak("encoding: Unknown encoding '$name'"); 144 } 145 $name = $enc->name; # canonize 146 unless ( $arg{Filter} ) { 147 if ($] >= 5.025003 and !$Config{usecperl}) { 148 require Carp; 149 Carp::croak("The encoding pragma is no longer supported. Check cperl"); 150 } 151 warnings::warnif("deprecated",$deprecate) if $deprecate; 152 153 DEBUG and warn "_exception($name) = ", _exception($name); 154 if (! _exception($name)) { 155 if (!PERL_5_21_7) { 156 ${^ENCODING} = $enc; 157 } 158 else { 159 # Starting with 5.21.7, this pragma uses a shadow variable 160 # designed explicitly for it, ${^E_NCODING}, to enforce 161 # lexical scope; instead of ${^ENCODING}. 162 $^H{'encoding'} = 1; 163 ${^E_NCODING} = $enc; 164 } 165 } 166 if (! HAS_PERLIO ) { 167 return 1; 168 } 169 } 170 else { 171 warnings::warnif("deprecated",$deprecate) if $deprecate; 172 173 defined( ${^ENCODING} ) and undef ${^ENCODING}; 174 undef ${^E_NCODING} if PERL_5_21_7; 175 176 # implicitly 'use utf8' 177 require utf8; # to fetch $utf8::hint_bits; 178 $^H |= $utf8::hint_bits; 179 180 require Filter::Util::Call; 181 Filter::Util::Call->import; 182 filter_add( 183 sub { 184 my $status = filter_read(); 185 if ( $status > 0 ) { 186 $_ = $enc->decode( $_, 1 ); 187 DEBUG and warn $_; 188 } 189 $status; 190 } 191 ); 192 } 193 defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; 194 for my $h (qw(STDIN STDOUT)) { 195 if ( $arg{$h} ) { 196 unless ( defined find_encoding( $arg{$h} ) ) { 197 require Carp; 198 Carp::croak( 199 "encoding: Unknown encoding for $h, '$arg{$h}'"); 200 } 201 binmode( $h, ":raw :encoding($arg{$h})" ); 202 } 203 else { 204 unless ( exists $arg{$h} ) { 205 no warnings 'uninitialized'; 206 binmode( $h, ":raw :encoding($name)" ); 207 } 208 } 209 } 210 return 1; # I doubt if we need it, though 211} 212 213sub unimport { 214 no warnings; 215 undef ${^ENCODING}; 216 undef ${^E_NCODING} if PERL_5_21_7; 217 if (HAS_PERLIO) { 218 binmode( STDIN, ":raw" ); 219 binmode( STDOUT, ":raw" ); 220 } 221 else { 222 binmode(STDIN); 223 binmode(STDOUT); 224 } 225 if ( $INC{"Filter/Util/Call.pm"} ) { 226 eval { filter_del() }; 227 } 228} 229 2301; 231__END__ 232 233=pod 234 235=head1 NAME 236 237encoding - allows you to write your script in non-ASCII and non-UTF-8 238 239=head1 WARNING 240 241This module has been deprecated since perl v5.18. See L</DESCRIPTION> and 242L</BUGS>. 243 244=head1 SYNOPSIS 245 246 use encoding "greek"; # Perl like Greek to you? 247 use encoding "euc-jp"; # Jperl! 248 249 # or you can even do this if your shell supports your native encoding 250 251 perl -Mencoding=latin2 -e'...' # Feeling centrally European? 252 perl -Mencoding=euc-kr -e'...' # Or Korean? 253 254 # more control 255 256 # A simple euc-cn => utf-8 converter 257 use encoding "euc-cn", STDOUT => "utf8"; while(<>){print}; 258 259 # "no encoding;" supported 260 no encoding; 261 262 # an alternate way, Filter 263 use encoding "euc-jp", Filter=>1; 264 # now you can use kanji identifiers -- in euc-jp! 265 266 # encode based on the current locale - specialized purposes only; 267 # fraught with danger!! 268 use encoding ':locale'; 269 270=head1 DESCRIPTION 271 272This pragma is used to enable a Perl script to be written in encodings that 273aren't strictly ASCII nor UTF-8. It translates all or portions of the Perl 274program script from a given encoding into UTF-8, and changes the PerlIO layers 275of C<STDIN> and C<STDOUT> to the encoding specified. 276 277This pragma dates from the days when UTF-8-enabled editors were uncommon. But 278that was long ago, and the need for it is greatly diminished. That, coupled 279with the fact that it doesn't work with threads, along with other problems, 280(see L</BUGS>) have led to its being deprecated. It is planned to remove this 281pragma in a future Perl version. New code should be written in UTF-8, and the 282C<use utf8> pragma used instead (see L<perluniintro> and L<utf8> for details). 283Old code should be converted to UTF-8, via something like the recipe in the 284L</SYNOPSIS> (though this simple approach may require manual adjustments 285afterwards). 286 287If UTF-8 is not an option, it is recommended that one use a simple source 288filter, such as that provided by L<Filter::Encoding> on CPAN or this 289pragma's own C<Filter> option (see below). 290 291The only legitimate use of this pragma is almost certainly just one per file, 292near the top, with file scope, as the file is likely going to only be written 293in one encoding. Further restrictions apply in Perls before v5.22 (see 294L</Prior to Perl v5.22>). 295 296There are two basic modes of operation (plus turning if off): 297 298=over 4 299 300=item C<use encoding ['I<ENCNAME>'] ;> 301 302Please note: This mode of operation is no longer supported as of Perl 303v5.26. 304 305This is the normal operation. It translates various literals encountered in 306the Perl source file from the encoding I<ENCNAME> into UTF-8, and similarly 307converts character code points. This is used when the script is a combination 308of ASCII (for the variable names and punctuation, I<etc>), but the literal 309data is in the specified encoding. 310 311I<ENCNAME> is optional. If omitted, the encoding specified in the environment 312variable L<C<PERL_ENCODING>|perlrun/PERL_ENCODING> is used. If this isn't 313set, or the resolved-to encoding is not known to C<L<Encode>>, the error 314C<Unknown encoding 'I<ENCNAME>'> will be thrown. 315 316Starting in Perl v5.8.6 (C<Encode> version 2.0.1), I<ENCNAME> may be the 317name C<:locale>. This is for very specialized applications, and is documented 318in L</The C<:locale> sub-pragma> below. 319 320The literals that are converted are C<q//, qq//, qr//, qw///, qx//>, and 321starting in v5.8.1, C<tr///>. Operations that do conversions include C<chr>, 322C<ord>, C<utf8::upgrade> (but not C<utf8::downgrade>), and C<chomp>. 323 324Also starting in v5.8.1, the C<DATA> pseudo-filehandle is translated from the 325encoding into UTF-8. 326 327For example, you can write code in EUC-JP as follows: 328 329 my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji 330 #<-char-><-char-> # 4 octets 331 s/\bCamel\b/$Rakuda/; 332 333And with C<use encoding "euc-jp"> in effect, it is the same thing as 334that code in UTF-8: 335 336 my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters 337 s/\bCamel\b/$Rakuda/; 338 339See L</EXAMPLE> below for a more complete example. 340 341Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the 342PerlIO layers of C<STDIN> and C<STDOUT> are set to "C<:encoding(I<ENCNAME>)>". 343Therefore, 344 345 use encoding "euc-jp"; 346 my $message = "Camel is the symbol of perl.\n"; 347 my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji 348 $message =~ s/\bCamel\b/$Rakuda/; 349 print $message; 350 351will print 352 353 "\xF1\xD1\xF1\xCC is the symbol of perl.\n" 354 355not 356 357 "\x{99F1}\x{99DD} is the symbol of perl.\n" 358 359You can override this by giving extra arguments; see below. 360 361Note that C<STDERR> WILL NOT be changed, regardless. 362 363Also note that non-STD file handles remain unaffected. Use C<use 364open> or C<binmode> to change the layers of those. 365 366=item C<use encoding I<ENCNAME>, Filter=E<gt>1;> 367 368This operates as above, but the C<Filter> argument with a non-zero 369value causes the entire script, and not just literals, to be translated from 370the encoding into UTF-8. This allows identifiers in the source to be in that 371encoding as well. (Problems may occur if the encoding is not a superset of 372ASCII; imagine all your semi-colons being translated into something 373different.) One can use this form to make 374 375 ${"\x{4eba}"}++ 376 377work. (This is equivalent to C<$I<human>++>, where I<human> is a single Han 378ideograph). 379 380This effectively means that your source code behaves as if it were written in 381UTF-8 with C<'use utf8>' in effect. So even if your editor only supports 382Shift_JIS, for example, you can still try examples in Chapter 15 of 383C<Programming Perl, 3rd Ed.>. 384 385This option is significantly slower than the other one. 386 387=item C<no encoding;> 388 389Unsets the script encoding. The layers of C<STDIN>, C<STDOUT> are 390reset to "C<:raw>" (the default unprocessed raw stream of bytes). 391 392=back 393 394=head1 OPTIONS 395 396=head2 Setting C<STDIN> and/or C<STDOUT> individually 397 398The encodings of C<STDIN> and C<STDOUT> are individually settable by parameters to 399the pragma: 400 401 use encoding 'euc-tw', STDIN => 'greek' ...; 402 403In this case, you cannot omit the first I<ENCNAME>. C<< STDIN => undef >> 404turns the I/O transcoding completely off for that filehandle. 405 406When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, 407these options will be completely ignored. See L<perlvar/C<${^UNICODE}>> and 408L<"C<-C>" in perlrun|perlrun/-C [numberE<sol>list]> for details. 409 410=head2 The C<:locale> sub-pragma 411 412Starting in v5.8.6, the encoding name may be C<:locale>. This means that the 413encoding is taken from the current locale, and not hard-coded by the pragma. 414Since a script really can only be encoded in exactly one encoding, this option 415is dangerous. It makes sense only if the script itself is written in ASCII, 416and all the possible locales that will be in use when the script is executed 417are supersets of ASCII. That means that the script itself doesn't get 418changed, but the I/O handles have the specified encoding added, and the 419operations like C<chr> and C<ord> use that encoding. 420 421The logic of finding which locale C<:locale> uses is as follows: 422 423=over 4 424 425=item 1. 426 427If the platform supports the C<langinfo(CODESET)> interface, the codeset 428returned is used as the default encoding for the open pragma. 429 430=item 2. 431 432If 1. didn't work but we are under the locale pragma, the environment 433variables C<LC_ALL> and C<LANG> (in that order) are matched for encodings 434(the part after "C<.>", if any), and if any found, that is used 435as the default encoding for the open pragma. 436 437=item 3. 438 439If 1. and 2. didn't work, the environment variables C<LC_ALL> and C<LANG> 440(in that order) are matched for anything looking like UTF-8, and if 441any found, C<:utf8> is used as the default encoding for the open 442pragma. 443 444=back 445 446If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>) 447contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), 448the default encoding of your C<STDIN>, C<STDOUT>, and C<STDERR>, and of 449B<any subsequent file open>, is UTF-8. 450 451=head1 CAVEATS 452 453=head2 SIDE EFFECTS 454 455=over 456 457=item * 458 459If the C<encoding> pragma is in scope then the lengths returned are 460calculated from the length of C<$/> in Unicode characters, which is not 461always the same as the length of C<$/> in the native encoding. 462 463=item * 464 465Without this pragma, if strings operating under byte semantics and strings 466with Unicode character data are concatenated, the new string will 467be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>. 468 469The B<encoding> pragma changes this to use the specified encoding 470instead. For example: 471 472 use encoding 'utf8'; 473 my $string = chr(20000); # a Unicode string 474 utf8::encode($string); # now it's a UTF-8 encoded byte string 475 # concatenate with another Unicode string 476 print length($string . chr(20000)); 477 478Will print C<2>, because C<$string> is upgraded as UTF-8. Without 479C<use encoding 'utf8';>, it will print C<4> instead, since C<$string> 480is three octets when interpreted as Latin-1. 481 482=back 483 484=head2 DO NOT MIX MULTIPLE ENCODINGS 485 486Notice that only literals (string or regular expression) having only 487legacy code points are affected: if you mix data like this 488 489 \x{100}\xDF 490 \xDF\x{100} 491 492the data is assumed to be in (Latin 1 and) Unicode, not in your native 493encoding. In other words, this will match in "greek": 494 495 "\xDF" =~ /\x{3af}/ 496 497but this will not 498 499 "\xDF\x{100}" =~ /\x{3af}\x{100}/ 500 501since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on 502the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL 503LETTER IOTA WITH TONOS) because of the C<\x{100}> on the left. You 504should not be mixing your legacy data and Unicode in the same string. 505 506This pragma also affects encoding of the 0x80..0xFF code point range: 507normally characters in that range are left as eight-bit bytes (unless 508they are combined with characters with code points 0x100 or larger, 509in which case all characters need to become UTF-8 encoded), but if 510the C<encoding> pragma is present, even the 0x80..0xFF range always 511gets UTF-8 encoded. 512 513After all, the best thing about this pragma is that you don't have to 514resort to \x{....} just to spell your name in a native encoding. 515So feel free to put your strings in your encoding in quotes and 516regexes. 517 518=head2 Prior to Perl v5.22 519 520The pragma was a per script, not a per block lexical. Only the last 521C<use encoding> or C<no encoding> mattered, and it affected 522B<the whole script>. However, the C<no encoding> pragma was supported and 523C<use encoding> could appear as many times as you want in a given script 524(though only the last was effective). 525 526Since the scope wasn't lexical, other modules' use of C<chr>, C<ord>, I<etc.> 527were affected. This leads to spooky, incorrect action at a distance that is 528hard to debug. 529 530This means you would have to be very careful of the load order: 531 532 # called module 533 package Module_IN_BAR; 534 use encoding "bar"; 535 # stuff in "bar" encoding here 536 1; 537 538 # caller script 539 use encoding "foo" 540 use Module_IN_BAR; 541 # surprise! use encoding "bar" is in effect. 542 543The best way to avoid this oddity is to use this pragma RIGHT AFTER 544other modules are loaded. i.e. 545 546 use Module_IN_BAR; 547 use encoding "foo"; 548 549=head2 Prior to Encode version 1.87 550 551=over 552 553=item * 554 555C<STDIN> and C<STDOUT> were not set under the filter option. 556And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> didn't work like 557non-filter version. 558 559=item * 560 561C<use utf8> wasn't implicitly declared so you have to C<use utf8> to do 562 563 ${"\x{4eba}"}++ 564 565=back 566 567=head2 Prior to Perl v5.8.1 568 569=over 570 571=item "NON-EUC" doublebyte encodings 572 573Because perl needs to parse the script before applying this pragma, such 574encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH; 575C<\x5c>) in the second byte fail because the second byte may 576accidentally escape the quoting character that follows. 577 578=item C<tr///> 579 580The B<encoding> pragma works by decoding string literals in 581C<q//,qq//,qr//,qw///, qx//> and so forth. In perl v5.8.0, this 582does not apply to C<tr///>. Therefore, 583 584 use encoding 'euc-jp'; 585 #.... 586 $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/; 587 # -------- -------- -------- -------- 588 589Does not work as 590 591 $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/; 592 593=over 594 595=item Legend of characters above 596 597 utf8 euc-jp charnames::viacode() 598 ----------------------------------------- 599 \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A 600 \x{3093} \xA4\xF3 HIRAGANA LETTER N 601 \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A 602 \x{30f3} \xA5\xF3 KATAKANA LETTER N 603 604=back 605 606This counterintuitive behavior has been fixed in perl v5.8.1. 607 608In perl v5.8.0, you can work around this as follows; 609 610 use encoding 'euc-jp'; 611 # .... 612 eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ }; 613 614Note the C<tr//> expression is surrounded by C<qq{}>. The idea behind 615this is the same as the classic idiom that makes C<tr///> 'interpolate': 616 617 tr/$from/$to/; # wrong! 618 eval qq{ tr/$from/$to/ }; # workaround. 619 620=back 621 622=head1 EXAMPLE - Greekperl 623 624 use encoding "iso 8859-7"; 625 626 # \xDF in ISO 8859-7 (Greek) is \x{3af} in Unicode. 627 628 $a = "\xDF"; 629 $b = "\x{100}"; 630 631 printf "%#x\n", ord($a); # will print 0x3af, not 0xdf 632 633 $c = $a . $b; 634 635 # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". 636 637 # chr() is affected, and ... 638 639 print "mega\n" if ord(chr(0xdf)) == 0x3af; 640 641 # ... ord() is affected by the encoding pragma ... 642 643 print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; 644 645 # ... as are eq and cmp ... 646 647 print "peta\n" if "\x{3af}" eq pack("C", 0xdf); 648 print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0; 649 650 # ... but pack/unpack C are not affected, in case you still 651 # want to go back to your native encoding 652 653 print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; 654 655=head1 BUGS 656 657=over 658 659=item Thread safety 660 661C<use encoding ...> is not thread-safe (i.e., do not use in threaded 662applications). 663 664=item Can't be used by more than one module in a single program. 665 666Only one encoding is allowed. If you combine modules in a program that have 667different encodings, only one will be actually used. 668 669=item Other modules using C<STDIN> and C<STDOUT> get the encoded stream 670 671They may be expecting something completely different. 672 673=item literals in regex that are longer than 127 bytes 674 675For native multibyte encodings (either fixed or variable length), 676the current implementation of the regular expressions may introduce 677recoding errors for regular expression literals longer than 127 bytes. 678 679=item EBCDIC 680 681The encoding pragma is not supported on EBCDIC platforms. 682 683=item C<format> 684 685This pragma doesn't work well with C<format> because PerlIO does not 686get along very well with it. When C<format> contains non-ASCII 687characters it prints funny or gets "wide character warnings". 688To understand it, try the code below. 689 690 # Save this one in utf8 691 # replace *non-ascii* with a non-ascii string 692 my $camel; 693 format STDOUT = 694 *non-ascii*@>>>>>>> 695 $camel 696 . 697 $camel = "*non-ascii*"; 698 binmode(STDOUT=>':encoding(utf8)'); # bang! 699 write; # funny 700 print $camel, "\n"; # fine 701 702Without binmode this happens to work but without binmode, print() 703fails instead of write(). 704 705At any rate, the very use of C<format> is questionable when it comes to 706unicode characters since you have to consider such things as character 707width (i.e. double-width for ideographs) and directions (i.e. BIDI for 708Arabic and Hebrew). 709 710=item See also L</CAVEATS> 711 712=back 713 714=head1 HISTORY 715 716This pragma first appeared in Perl v5.8.0. It has been enhanced in later 717releases as specified above. 718 719=head1 SEE ALSO 720 721L<perlunicode>, L<Encode>, L<open>, L<Filter::Util::Call>, 722 723Ch. 15 of C<Programming Perl (3rd Edition)> 724by Larry Wall, Tom Christiansen, Jon Orwant; 725O'Reilly & Associates; ISBN 0-596-00027-8 726 727=cut 728