1use 5.10.0; 2use warnings; 3#no warnings 'experimental'; 4use strict 'vars'; 5#use Carp::Always; 6 7package Text::Bidi; 8# ABSTRACT: Unicode bidi algorithm using libfribidi 9$Text::Bidi::VERSION = '2.15'; 10use Exporter; 11use base qw(Exporter); 12use Carp; 13 14use Text::Bidi::private; 15use Text::Bidi::Array::Byte; 16use Text::Bidi::Array::Long; 17use Encode qw(encode decode); 18 19 20BEGIN { 21 our %EXPORT_TAGS = ( 22 'all' => [ qw( 23 log2vis 24 is_bidi 25 get_mirror_char 26 get_bidi_type_name 27 fribidi_version 28 fribidi_version_num 29 unicode_version 30 ) ], 31 ); 32 our @EXPORT_OK = ( @{$EXPORT_TAGS{'all'}} ); 33} 34 35 36# The following mechanism is used to provide both kinds of interface: Every 37# method starts with 'my $self = S(@_)' instead of 'my $self = shift'. S 38# shifts and returns the object if there is one, or returns a global object, 39# stored in $Global, if there is in @_. The first time $Global is needed, it 40# is created with type $GlobalClass. 41 42my $Global; 43our $GlobalClass = __PACKAGE__; 44 45 46sub S(\@) { 47 my $l = shift; 48 my $s = $l->[0]; 49 return shift @$l if eval { $s->isa('Text::Bidi') }; 50 $Global = new $GlobalClass unless $Global; 51 $Global 52} 53 54 55sub new { 56 my $class = shift; 57 my $self = { 58 tie_byte => 'Text::Bidi::Array::Byte', 59 tie_long => 'Text::Bidi::Array::Long', 60 @_ 61 }; 62 bless $self => $class 63} 64 65 66sub tie_byte { 67 my $self = shift; 68 return \undef unless defined $_[0]; 69 $self->{'tie_byte'}->new(@_) 70} 71 72sub tie_long { 73 my $self = shift; 74 return \undef unless defined $_[0]; 75 $self->{'tie_long'}->new(@_) 76} 77 78 79sub utf8_to_internal { 80 my $self = S(@_); 81 my $str = shift; 82 my ($i, $res) = 83 Text::Bidi::private::utf8_to_internal(encode('utf8', $str)); 84 $self->tie_long($res) 85} 86 87 88sub internal_to_utf8 { 89 my $self = S(@_); 90 my $u = shift; 91 $u = $self->tie_long($u) unless eval { defined $$u }; 92 my $r = Text::Bidi::private::internal_to_utf8($$u); 93 decode('utf8', $r) 94} 95 96 97sub get_bidi_types { 98 my $self = S(@_); 99 my $u = shift; 100 my $t = Text::Bidi::private::get_bidi_types($$u); 101 $self->tie_long($t) 102} 103 104 105sub get_bidi_type_name { 106 my $self = S(@_); 107 Text::Bidi::private::get_bidi_type_name(@_) 108} 109 110 111sub get_joining_types { 112 my $self = S(@_); 113 my $u = shift; 114 $self->tie_byte(Text::Bidi::private::get_joining_types($$u)) 115} 116 117 118sub get_joining_type_name { 119 my $self = S(@_); 120 Text::Bidi::private::get_joining_type_name(@_) 121} 122 123 124sub get_par_embedding_levels { 125 my $self = S(@_); 126 my $bt = shift; 127 my $p = shift // $Text::Bidi::private::FRIBIDI_PAR_ON; 128 my ($lev, $par, $out) = 129 Text::Bidi::private::get_par_embedding_levels($$bt, $p); 130 my $res = $self->tie_byte($out); 131 ($par, $res) 132} 133 134 135sub join_arabic { 136 my $self = S(@_); 137 my ($t, $l, $j) = @_; 138 $self->tie_byte(Text::Bidi::private::join_arabic($$t, $$l, $$j)); 139} 140 141 142sub shaped { 143 my $self = S(@_); 144 my ($flags, $el, $prop, $u) = @_; 145 return ($prop, $u) unless defined $flags; 146 $flags ||= $Text::Bidi::private::FRIBIDI_FLAGS_ARABIC; 147 my ($p, $v) =Text::Bidi::private::shape_arabic($flags, $$el, $$prop, 148 $$u); 149 ($self->tie_byte($p), $self->tie_long($v)) 150} 151 152 153 154sub mirrored { 155 my $self = S(@_); 156 my ($el, $u) = @_; 157 my $r =Text::Bidi::private::shape_mirroring($$el, $$u); 158 my $res = $self->tie_long($r) 159} 160 161 162sub hash2flags { 163 my ($self, $flags) = @_; 164 my $res = 0; 165 foreach ( keys %$flags ) { 166 next unless $flags->{$_}; 167 next unless $_ eq uc; 168 $res |= ${"Text::Bidi::private::FRIBIDI_FLAG_$_"}; 169 } 170 $res 171} 172 173 174sub reorder { 175 my $self = S(@_); 176 my ($str, $map, $off, $len) = @_; 177 $off //= 0; 178 $len //= @$str - $off; 179 join('', (@$str)[@$map[$off..$off+$len-1]]) 180} 181 182 183sub reorder_map { 184 my $self = S(@_); 185 my ($bt, $off, $len, $par, $map, $el, $flags) = @_; 186 unless ( defined $el ) { 187 (my $p, $el) = $self->get_par_embedding_levels($bt, $par); 188 $par //= $p; 189 } 190 if ( defined $flags ) { 191 $flags = $self->hash2flags($flags) if ref $flags; 192 } else { 193 $flags = $Text::Bidi::private::FRIBIDI_FLAGS_DEFAULT; 194 } 195 $map //= [0..$#$bt]; 196 197 $map = $self->tie_long($map) unless eval {defined $$map}; 198 199 200 my ($lev, $elout, $mout) = Text::Bidi::private::reorder_map( 201 $flags, $$bt, $off, $len, $par, $$el, $$map); 202 203 ($elout, $mout) 204} 205 206# TODO this doesn't work 207 208 209sub remove_bidi_marks { 210 my $self = S(@_); 211 my ($v, $to, $from, $levels) = @_; 212 $to = $self->tie_long($to) unless eval {defined $$to}; 213 if ( defined($from) ) { 214 $from = $self->tie_long($from) unless eval {defined $$from}; 215 } else { 216 $from = \undef; 217 } 218 $levels = $self->tie_byte($levels) unless eval {defined $$levels}; 219 no warnings 'uninitialized'; 220 my ($len, $vout, $toout, $fromout, $levelsout) = 221 Text::Bidi::private::remove_bidi_marks($v, $$to, $$from, $$levels); 222 ($vout, $toout, $fromout, $levelsout) 223} 224 225 226sub log2vis { 227 require Text::Bidi::Paragraph; 228 my ($log, $width, $dir, $flags) = @_; 229 my $p = new Text::Bidi::Paragraph $log, dir => $dir; 230 $width //= $p->len; 231 my $off = 0; 232 my @visual; 233 while ( $off < $p->len ) { 234 my $v = $p->visual($off, $width, $flags); 235 my $l = length($v); 236 $off += $l; 237 $v = (' ' x ($width - $l)) . $v if $p->is_rtl; 238 push @visual, $v; 239 } 240 ($p, join("\n", @visual)) 241} 242 243 244sub is_bidi { $_[0] =~ /\p{bc=R}|\p{bc=AL}/ } 245 246 247sub get_mirror_char { 248 my $self = S(@_); 249 my $u = shift; 250 $u = $self->utf8_to_internal($u) unless ref($u); 251 my $r = Text::Bidi::private::get_mirror_char($u->[0]); 252 my $res = $self->tie_long([$r]); 253 wantarray ? ($res) : $self->internal_to_utf8($res) 254} 255 256 257sub fribidi_version { 258 $Text::Bidi::private::version_info 259} 260 261 262sub fribidi_version_num { 263 fribidi_version =~ /\(GNU FriBidi\) ([0-9.]*)/ ? $1 : () 264} 265 266 267sub unicode_version { 268 $Text::Bidi::private::unicode_version 269} 270 271 2721; # End of Text::Bidi 273 274__END__ 275 276=pod 277 278=head1 NAME 279 280Text::Bidi - Unicode bidi algorithm using libfribidi 281 282=head1 VERSION 283 284version 2.15 285 286=head1 SYNOPSIS 287 288 # Each displayed line is a "paragraph" 289 use Text::Bidi qw(log2vis); 290 ($par, $map, $visual) = log2vis($logical); 291 # or just 292 $visual = log2vis(...); 293 294 # For real paragraphs, need to specify the display width 295 ($par, $map, $visual) = log2vis($logical, $width); 296 297 # object oriented approach allows one to display line by line 298 $p = new Text::Bidi::Paragraph $logical; 299 $visual = $p->visual($off, $len); 300 301=head1 EXPORT 302 303The following functions can be exported (nothing is exported by default): 304 305=over 306 307=item * 308 309L</log2vis> 310 311=item * 312 313L</is_bidi> 314 315=item * 316 317L</get_mirror_char> 318 319=item * 320 321L</get_bidi_type_name> 322 323=item * 324 325L</fribidi_version> 326 327=item * 328 329L</unicode_version> 330 331=item * 332 333L</fribidi_version_num> 334 335=back 336 337All of them can be exported together using the C<:all> tag. 338 339=head1 DESCRIPTION 340 341This module provides basic support for the Unicode bidirectional (Bidi) text 342algorithm, for displaying text consisting of both left-to-right and 343right-to-left written languages (such as Hebrew and Arabic.) It does so via 344a I<swig> interface file to the I<libfribidi> library. 345 346The fundamental purpose of the bidi algorithm is to reorder text given in 347logical order into text in visually correct order, suitable for display using 348standard printing commands. ``Logical order'' means that the characters are 349given in the order in which they would be read if printed correctly. The 350direction of the text is determined by properties of the Unicode characters, 351usually without additional hints. See 352L<http://www.unicode.org/unicode/reports/tr9/> for more details on the 353problem and the algorithm. 354 355=head2 Standard usage 356 357The bidi algorithm works in two stages. The first is on the level of a 358paragraph, where the direction of each character is computed. The second is 359on the level of the lines to be displayed. The main practical difference is 360that the first stage requires only the text of the paragraph, while the 361second requires knowledge of the width of the displayed lines. The module (or 362the library) does not determine how the text is broken into paragraphs. 363 364The full interface is provided by L<Text::Bidi::Paragraph>, see there for 365details. This module provides an abbreviation, L</log2vis>, which combines 366creating a paragraph object with calling L<Text::Bidi::Paragraph/visual> on 367it. It is particularly useful in the case that the whole paragraph should be 368displayed at once, and the display width is known: 369 370 $visual = log2vis($logical, $width); 371 372There are more options (see L</log2vis>), but this is essentially it. The 373rest of this documentation will probably be useful only to people who are 374familiar with I<libfribidi> and who wish to extend or modify the module. 375 376=head2 The object-oriented approach 377 378All functions here can be called using either a procedural or an object 379oriented approach. For example, you may do either 380 381 $visual = log2vis($logical); 382 383or 384 385 $bidi = new Text::Bidi; 386 $visual = $bidi->log2vis($logical); 387 388The advantages of the second form is that it is easier to move to a 389sub-class, and that two or more objects with different parameters can be used 390simultaneously. If you are interested in deriving from this class, please see 391L</SUBCLASSING>. 392 393=head1 FUNCTIONS 394 395=head2 get_bidi_type_name 396 397 say $tb->get_bidi_type_name($Text::Bidi::Type::LTR); # says 'LTR' 398 399Return the string representation of a Bidi character type, as in 400fribidi_get_bidi_type_name(3). Note that for the above example, one needs to 401use L<Text::Bidi::Constants>. 402 403=head2 log2vis 404 405 ($p, $visual) = log2vis($logical[,$width[,$dir[,$flags]]]); 406 407Convert the input paragraph B<$logical> to visual. This constructs a 408L<Text::Bidi::Paragraph> object, and calls L<Text::Bidi::Paragraph/visual> 409several times, as required. B<$width> is the maximum width of a line, 410defaulting to the whole length of the paragraph. B<$dir> is the base 411direction of the paragraph, determined automatically if not provided. 412B<$flags> is as in L<Text::Bidi::Paragraph/visual>. The paragraph will be 413justified to the right if it is RTL. 414 415The output consists of the L<Text::Bidi::Paragraph> object B<$p> and the 416visual string B<$visual>. 417 418=head2 is_bidi() 419 420 my $bidi = is_bidi($logical); 421 422Returns true if the input B<$logical> contains bidi characters. Otherwise, 423the output of the bidi algorithm will be identical to the input, hence this 424helps if we want to short-circuit. 425 426=head2 get_mirror_char() 427 428 my $mir = get_mirror_char('['); # $mir == ']' 429 430Return the mirror character of the input, possibly itself. 431 432=head2 fribidi_version 433 434 say fribidi_version(); 435 436Returns the version information for the fribidi library 437 438=head2 fribidi_version_num 439 440 say fribidi_version_num(); 441 442Returns the version number for the fribidi library 443 444=head2 unicode_version 445 446 say unicode_version(); 447 448Returns the Unicode version used by the fribidi library 449 450=head1 SUBCLASSING 451 452The rest of the documentation is only interesting if you would like to derive 453from this class. The methods listed under L</METHODS> are wrappers around the 454similarly named functions in libfribidi, and may be useful for this purpose. 455 456If you do sub-class this class, and would like the procedural interface to 457use your functions, put a line like 458 459 $Text::Bidi::GlobalClass = __PACKAGE__; 460 461in your module. 462 463=head1 METHODS 464 465=head2 new 466 467 $tb = new Text::Bidi [tie_byte => ..., tie_long => ...]; 468 469Create a new L<Text::Bidi> object. If the I<tie_byte> or I<tie_long> options 470are given, they should be the names (strings) of the classes used as dual 471life arrays, most probably derived class of L<Text::Bidi::Array::Byte> and 472L<Text::Bidi::Array::Long>, respectively. 473 474This method is probably of little interest for standard (procedural) use. 475 476=head2 utf8_to_internal 477 478 $la = $tb->utf8_to_internal($str); 479 480Convert the Perl string I<$str> into the representation used by libfribidi. 481The result will be a L<Text::Bidi::Array::Long>. 482 483=head2 internal_to_utf8 484 485 $str = $tb->internal_to_utf8($la); 486 487Convert the long array I<$la>, representing a string encoded in to format 488used by libfribidi, into a Perl string. The array I<$la> can be either a 489L<Text::Bidi::Array::Long>, or anything that can be used to construct it. 490 491=head2 get_bidi_types 492 493 $types = $tb->get_bidi_types($internal); 494 495Returns a L<Text::Bidi::Array::Long> with the list of Bidi types of the text 496given by $internal, a representation of the paragraph text, as returned by 497utf8_to_internal(). Wraps fribidi_get_bidi_types(3). 498 499=head2 get_joining_types 500 501 $types = $tb->get_joining_types($internal); 502 503Returns a L<Text::Bidi::Array::Byte> with the list of joining types of the 504text given by B<$internal>, a representation of the paragraph text, as returned 505by L</utf8_to_internal>. Wraps fribidi_get_joining_types(3). 506 507=head2 get_joining_type_name 508 509 say $tb->get_joining_type_name($Text::Bidi::Joining::U); # says 'U' 510 511Return the string representation of a joining character type, as in 512fribidi_get_joining_type_name(3). Note that for the above example, one needs 513to use L<Text::Bidi::Constants>. 514 515=head2 get_par_embedding_levels 516 517 ($odir, $lvl) = $tb->get_par_embedding_levels($types[, $dir]); 518 519Return the embedding levels of the characters, whose types are given by 520I<$types>. I<$types> is a L<Text::Bidi::Array::Long> of Bidi types, as 521returned by L</get_bidi_types>. I<$dir> is the base paragraph direction. If 522not given, it defaults to C<FRIBIDI_PAR_ON> (neutral). 523 524The output is the resolved paragraph direction I<$odir>, and the 525L<Text::Bidi::Array::Byte> array I<$lvl> of embedding levels. 526 527=head2 join_arabic 528 529 $props = $tb->join_arabic($bidi_types, $lvl, $join_types); 530 531Returns a L<Text::Bidi::Array::Byte> with B<$props>, as returned by 532fribidi_join_arabic(3). The inputs are B<$bidi_types>, as returned by 533L</get_bidi_types>, B<$lvl>, as returned by 534L</get_par_embedding_levels>, and B<$join_types> as returned by 535L</get_joining_types>. Wraps fribidi_join_arabic(3). 536 537=head2 shaped 538 539 ($newp, $shaped) = $tb->shaped($flags, $lvl, $prop, $internal); 540 541Returns the internal representation of the paragraph, with shaping applied. 542The internal representation of the original paragraph (as returned by 543L</utf8_to_internal>) should be passed in B<$internal>, while the embedding 544levels (as returned by L</get_par_embedding_levels>) should be in B<$lvl>. 545See the documentation of F<fribidi-arabic.h> for B<$flags>, but as a special 546case, a value of C<undef> here skips shaping (returning B<($prop, $internal)>), 547while any other false value becomes the default. B<$prop> is as 548returned by L</join_arabic>. This method wraps fribidi_shape_arabic(3). 549 550=head2 mirrored 551 552 $mirrored = $tb->mirrored($lvl, $internal); 553 554Returns the internal representation of the paragraph, with mirroring applied. 555The internal representation of the original paragraph (as returned by 556L</utf8_to_internal>) should be passed in B<$internal>, while the embedding 557levels (as returned by L</get_par_embedding_levels>) should be in B<$lvl>. 558This method wraps fribidi_shape_mirroring(3). 559 560=head2 reorder 561 562 $str = $tb->reorder($in, $map[, $offset[, $len]]); 563 say $tb->reorder([qw(A B C)], [2, 0, 1]); # says CAB 564 565View the array ref B<$map> as a permutation, and permute the list (of 566characters) B<$in> according to it. The result is joined, to obtain a string. 567If B<$offset> and B<$len> are given, returns only that part of the resulting 568string. 569 570=head2 reorder_map 571 572 ($elout, $mout) = $tb->reorder_map($types, $offset, $len, $par, 573 $map, $el, $flags); 574 575Compute the reordering map for bidi types given by B<$types>, for the 576interval starting with B<$offset> of length B<$len>. Note that this part of 577the algorithm depends on the interval in an essential way. B<$types> is an 578array of types, as computed by L</get_bidi_types>. The other arguments are 579optional: 580 581=over 582 583=item B<$par> 584 585The base paragraph direction. Computed via L</get_par_embedding_levels> if 586not defined. 587 588=item B<$map> 589 590An array ref (or a L<Text::Bidi::Array::Long>) from a previous call (with a 591different interval). The method is called repeatedly for the same paragraph, 592with different intervals, and the reordering map is updated for the given 593interval. If not defined, initialised to the identity map. 594 595=item B<$el> 596 597The embedding levels. If not given, computed by a call to 598L</get_par_embedding_levels>. 599 600=item B<$flags> 601 602A specification of flags, as described in fribidi_reorder_line(3). The flags 603can be given either as a number (using C<$Text::Bidi::Flags::..> from 604L<Text::Bidi::Constants>), or as a hashref of the form 605C<{REORDER_NSM =E<gt> 1}>. Defaults to C<FRIBIDI_FLAGS_DEFAULT>. 606 607=back 608 609The output consists of the modified map B<$mout> (a 610L<Text::Bidi::Array::Long>), and possibly modified embedding levels 611B<$elout>. 612 613=for Pod::Coverage S 614 615=for Pod::Coverage tie_byte tie_long 616 617=for Pod::Coverage hash2flags 618 619=begin comment 620 621 622 623 624=end comment 625 626method remove_bidi_marks 627 628 ($v, $to, $from, $levels) = 629 $tb->remove_bidi_marks($v[, $to[, $from[, $levels]]]) 630 631Remove the explicit bidi marks from C<$v>. The optional arguments, if given, 632are the map from the logical to the visual string, the inverse map, and 633embedding levels, respectively, as returned by L</reorder_map>. The inverse 634map C<$from> can be obtained from the direct one C<$to> by a command like: 635 636 @$from[@$map] = 0..$#$map 637 638Each of the arguments can be C<undef>, in which case it will be skipped. This 639implements step X9, see fribidi_remove_bidi_marks(3). 640 641=head1 BUGS 642 643There are no real tests for any of this. 644 645Shaping is not supported (probably), since I don't know what it is. Help 646welcome! 647 648=head1 SEE ALSO 649 650L<Text::Bidi::Paragraph> 651 652L<Text::Bidi::Constants> 653 654L<Encode> 655 656L<The fribidi library|http://fribidi.org/> 657 658L<Swig|http://www.swig.org> 659 660L<The unicode bidi algorithm|http://www.unicode.org/unicode/reports/tr9/> 661 662=head1 AUTHOR 663 664Moshe Kamensky <kamensky@cpan.org> 665 666=head1 COPYRIGHT AND LICENSE 667 668This software is copyright (c) 2015 by Moshe Kamensky. 669 670This is free software; you can redistribute it and/or modify it under 671the same terms as the Perl 5 programming language system itself. 672 673=cut 674