1use 5.10.0;
2use warnings;
3#no warnings 'experimental';
4use strict 'vars';
5#use Carp::Always;
6
7package Text::Bidi;
8# ABSTRACT: Unicode bidi algorithm using libfribidi
9$Text::Bidi::VERSION = '2.15';
10use Exporter;
11use base qw(Exporter);
12use Carp;
13
14use Text::Bidi::private;
15use Text::Bidi::Array::Byte;
16use Text::Bidi::Array::Long;
17use Encode qw(encode decode);
18
19
20BEGIN {
21    our %EXPORT_TAGS = (
22        'all' => [ qw(
23            log2vis
24            is_bidi
25            get_mirror_char
26            get_bidi_type_name
27            fribidi_version
28            fribidi_version_num
29            unicode_version
30        ) ],
31    );
32    our @EXPORT_OK = ( @{$EXPORT_TAGS{'all'}} );
33}
34
35
36# The following mechanism is used to provide both kinds of interface: Every
37# method starts with 'my $self = S(@_)' instead of 'my $self = shift'. S
38# shifts and returns the object if there is one, or returns a global object,
39# stored in $Global, if there is in @_. The first time $Global is needed, it
40# is created with type $GlobalClass.
41
42my $Global;
43our $GlobalClass = __PACKAGE__;
44
45
46sub S(\@) {
47    my $l = shift;
48    my $s = $l->[0];
49    return shift @$l if eval { $s->isa('Text::Bidi') };
50    $Global = new $GlobalClass unless $Global;
51    $Global
52}
53
54
55sub new {
56    my $class = shift;
57    my $self = {
58        tie_byte => 'Text::Bidi::Array::Byte',
59        tie_long => 'Text::Bidi::Array::Long',
60        @_
61    };
62    bless $self => $class
63}
64
65
66sub tie_byte {
67    my $self = shift;
68    return \undef unless defined $_[0];
69    $self->{'tie_byte'}->new(@_)
70}
71
72sub tie_long {
73    my $self = shift;
74    return \undef unless defined $_[0];
75    $self->{'tie_long'}->new(@_)
76}
77
78
79sub utf8_to_internal {
80    my $self = S(@_);
81    my $str = shift;
82    my ($i, $res) =
83      Text::Bidi::private::utf8_to_internal(encode('utf8', $str));
84    $self->tie_long($res)
85}
86
87
88sub internal_to_utf8 {
89    my $self = S(@_);
90    my $u = shift;
91    $u = $self->tie_long($u) unless eval { defined $$u };
92    my $r = Text::Bidi::private::internal_to_utf8($$u);
93    decode('utf8', $r)
94}
95
96
97sub get_bidi_types {
98    my $self = S(@_);
99    my $u = shift;
100    my $t = Text::Bidi::private::get_bidi_types($$u);
101    $self->tie_long($t)
102}
103
104
105sub get_bidi_type_name {
106    my $self = S(@_);
107    Text::Bidi::private::get_bidi_type_name(@_)
108}
109
110
111sub get_joining_types {
112    my $self = S(@_);
113    my $u = shift;
114    $self->tie_byte(Text::Bidi::private::get_joining_types($$u))
115}
116
117
118sub get_joining_type_name {
119    my $self = S(@_);
120    Text::Bidi::private::get_joining_type_name(@_)
121}
122
123
124sub get_par_embedding_levels {
125    my $self = S(@_);
126    my $bt = shift;
127    my $p = shift // $Text::Bidi::private::FRIBIDI_PAR_ON;
128    my ($lev, $par, $out) =
129        Text::Bidi::private::get_par_embedding_levels($$bt, $p);
130    my $res = $self->tie_byte($out);
131    ($par, $res)
132}
133
134
135sub join_arabic {
136    my $self = S(@_);
137    my ($t, $l, $j) = @_;
138    $self->tie_byte(Text::Bidi::private::join_arabic($$t, $$l, $$j));
139}
140
141
142sub shaped {
143    my $self = S(@_);
144    my ($flags, $el, $prop, $u) = @_;
145    return ($prop, $u) unless defined $flags;
146    $flags ||= $Text::Bidi::private::FRIBIDI_FLAGS_ARABIC;
147    my ($p, $v) =Text::Bidi::private::shape_arabic($flags, $$el, $$prop,
148        $$u);
149    ($self->tie_byte($p), $self->tie_long($v))
150}
151
152
153
154sub mirrored {
155    my $self = S(@_);
156    my ($el, $u) = @_;
157    my $r =Text::Bidi::private::shape_mirroring($$el, $$u);
158    my $res = $self->tie_long($r)
159}
160
161
162sub hash2flags {
163    my ($self, $flags) = @_;
164    my $res = 0;
165    foreach ( keys %$flags ) {
166        next unless $flags->{$_};
167        next unless $_ eq uc;
168        $res |= ${"Text::Bidi::private::FRIBIDI_FLAG_$_"};
169    }
170    $res
171}
172
173
174sub reorder {
175    my $self = S(@_);
176    my ($str, $map, $off, $len) = @_;
177    $off //= 0;
178    $len //= @$str - $off;
179    join('', (@$str)[@$map[$off..$off+$len-1]])
180}
181
182
183sub reorder_map {
184    my $self = S(@_);
185    my ($bt, $off, $len, $par, $map, $el, $flags) = @_;
186    unless ( defined $el ) {
187        (my $p, $el) = $self->get_par_embedding_levels($bt, $par);
188        $par //= $p;
189    }
190    if ( defined $flags ) {
191        $flags = $self->hash2flags($flags) if ref $flags;
192    } else {
193        $flags = $Text::Bidi::private::FRIBIDI_FLAGS_DEFAULT;
194    }
195    $map //= [0..$#$bt];
196
197    $map = $self->tie_long($map) unless eval {defined $$map};
198
199
200    my ($lev, $elout, $mout) = Text::Bidi::private::reorder_map(
201        $flags, $$bt, $off, $len, $par, $$el, $$map);
202
203    ($elout, $mout)
204}
205
206# TODO this doesn't work
207
208
209sub remove_bidi_marks {
210    my $self = S(@_);
211    my ($v, $to, $from, $levels) = @_;
212    $to = $self->tie_long($to) unless eval {defined $$to};
213    if ( defined($from) ) {
214        $from = $self->tie_long($from) unless eval {defined $$from};
215    } else {
216        $from = \undef;
217    }
218    $levels = $self->tie_byte($levels) unless eval {defined $$levels};
219    no warnings 'uninitialized';
220    my ($len, $vout, $toout, $fromout, $levelsout) =
221      Text::Bidi::private::remove_bidi_marks($v, $$to, $$from, $$levels);
222    ($vout, $toout, $fromout, $levelsout)
223}
224
225
226sub log2vis {
227    require Text::Bidi::Paragraph;
228    my ($log, $width, $dir, $flags) = @_;
229    my $p = new Text::Bidi::Paragraph $log, dir => $dir;
230    $width //= $p->len;
231    my $off = 0;
232    my @visual;
233    while ( $off < $p->len ) {
234        my $v = $p->visual($off, $width, $flags);
235        my $l = length($v);
236        $off += $l;
237        $v = (' ' x ($width - $l)) . $v if $p->is_rtl;
238        push @visual, $v;
239    }
240    ($p, join("\n", @visual))
241}
242
243
244sub is_bidi { $_[0] =~ /\p{bc=R}|\p{bc=AL}/ }
245
246
247sub get_mirror_char {
248    my $self = S(@_);
249    my $u = shift;
250    $u = $self->utf8_to_internal($u) unless ref($u);
251    my $r = Text::Bidi::private::get_mirror_char($u->[0]);
252    my $res = $self->tie_long([$r]);
253    wantarray ? ($res) : $self->internal_to_utf8($res)
254}
255
256
257sub fribidi_version {
258    $Text::Bidi::private::version_info
259}
260
261
262sub fribidi_version_num {
263    fribidi_version =~ /\(GNU FriBidi\) ([0-9.]*)/ ? $1 : ()
264}
265
266
267sub unicode_version {
268    $Text::Bidi::private::unicode_version
269}
270
271
2721; # End of Text::Bidi
273
274__END__
275
276=pod
277
278=head1 NAME
279
280Text::Bidi - Unicode bidi algorithm using libfribidi
281
282=head1 VERSION
283
284version 2.15
285
286=head1 SYNOPSIS
287
288    # Each displayed line is a "paragraph"
289    use Text::Bidi qw(log2vis);
290    ($par, $map, $visual) = log2vis($logical);
291    # or just
292    $visual = log2vis(...);
293
294    # For real paragraphs, need to specify the display width
295    ($par, $map, $visual) = log2vis($logical, $width);
296
297    # object oriented approach allows one to display line by line
298    $p = new Text::Bidi::Paragraph $logical;
299    $visual = $p->visual($off, $len);
300
301=head1 EXPORT
302
303The following functions can be exported (nothing is exported by default):
304
305=over
306
307=item *
308
309L</log2vis>
310
311=item *
312
313L</is_bidi>
314
315=item *
316
317L</get_mirror_char>
318
319=item *
320
321L</get_bidi_type_name>
322
323=item *
324
325L</fribidi_version>
326
327=item *
328
329L</unicode_version>
330
331=item *
332
333L</fribidi_version_num>
334
335=back
336
337All of them can be exported together using the C<:all> tag.
338
339=head1 DESCRIPTION
340
341This module provides basic support for the Unicode bidirectional (Bidi) text
342algorithm, for displaying text consisting of both left-to-right and
343right-to-left written languages (such as Hebrew and Arabic.) It does so via
344a I<swig> interface file to the I<libfribidi> library.
345
346The fundamental purpose of the bidi algorithm is to reorder text given in
347logical order into text in visually correct order, suitable for display using
348standard printing commands. ``Logical order'' means that the characters are
349given in the order in which they would be read if printed correctly. The
350direction of the text is determined by properties of the Unicode characters,
351usually without additional hints.  See
352L<http://www.unicode.org/unicode/reports/tr9/> for more details on the
353problem and the algorithm.
354
355=head2 Standard usage
356
357The bidi algorithm works in two stages. The first is on the level of a
358paragraph, where the direction of each character is computed. The second is
359on the level of the lines to be displayed. The main practical difference is
360that the first stage requires only the text of the paragraph, while the
361second requires knowledge of the width of the displayed lines. The module (or
362the library) does not determine how the text is broken into paragraphs.
363
364The full interface is provided by L<Text::Bidi::Paragraph>, see there for
365details. This module provides an abbreviation, L</log2vis>, which combines
366creating a paragraph object with calling L<Text::Bidi::Paragraph/visual> on
367it.  It is particularly useful in the case that the whole paragraph should be
368displayed at once, and the display width is known:
369
370    $visual = log2vis($logical, $width);
371
372There are more options (see L</log2vis>), but this is essentially it. The
373rest of this documentation will probably be useful only to people who are
374familiar with I<libfribidi> and who wish to extend or modify the module.
375
376=head2 The object-oriented approach
377
378All functions here can be called using either a procedural or an object
379oriented approach. For example, you may do either
380
381        $visual = log2vis($logical);
382
383or
384
385        $bidi = new Text::Bidi;
386        $visual = $bidi->log2vis($logical);
387
388The advantages of the second form is that it is easier to move to a
389sub-class, and that two or more objects with different parameters can be used
390simultaneously. If you are interested in deriving from this class, please see
391L</SUBCLASSING>.
392
393=head1 FUNCTIONS
394
395=head2 get_bidi_type_name
396
397    say $tb->get_bidi_type_name($Text::Bidi::Type::LTR); # says 'LTR'
398
399Return the string representation of a Bidi character type, as in
400fribidi_get_bidi_type_name(3). Note that for the above example, one needs to
401use L<Text::Bidi::Constants>.
402
403=head2 log2vis
404
405    ($p, $visual) = log2vis($logical[,$width[,$dir[,$flags]]]);
406
407Convert the input paragraph B<$logical> to visual. This constructs a
408L<Text::Bidi::Paragraph> object, and calls L<Text::Bidi::Paragraph/visual>
409several times, as required. B<$width> is the maximum width of a line,
410defaulting to the whole length of the paragraph.  B<$dir> is the base
411direction of the paragraph, determined automatically if not provided.
412B<$flags> is as in L<Text::Bidi::Paragraph/visual>. The paragraph will be
413justified to the right if it is RTL.
414
415The output consists of the L<Text::Bidi::Paragraph> object B<$p> and the
416visual string B<$visual>.
417
418=head2 is_bidi()
419
420    my $bidi = is_bidi($logical);
421
422Returns true if the input B<$logical> contains bidi characters. Otherwise,
423the output of the bidi algorithm will be identical to the input, hence this
424helps if we want to short-circuit.
425
426=head2 get_mirror_char()
427
428    my $mir = get_mirror_char('['); # $mir == ']'
429
430Return the mirror character of the input, possibly itself.
431
432=head2 fribidi_version
433
434    say fribidi_version();
435
436Returns the version information for the fribidi library
437
438=head2 fribidi_version_num
439
440    say fribidi_version_num();
441
442Returns the version number for the fribidi library
443
444=head2 unicode_version
445
446    say unicode_version();
447
448Returns the Unicode version used by the fribidi library
449
450=head1 SUBCLASSING
451
452The rest of the documentation is only interesting if you would like to derive
453from this class. The methods listed under L</METHODS> are wrappers around the
454similarly named functions in libfribidi, and may be useful for this purpose.
455
456If you do sub-class this class, and would like the procedural interface to
457use your functions, put a line like
458
459        $Text::Bidi::GlobalClass = __PACKAGE__;
460
461in your module.
462
463=head1 METHODS
464
465=head2 new
466
467    $tb = new Text::Bidi [tie_byte => ..., tie_long => ...];
468
469Create a new L<Text::Bidi> object. If the I<tie_byte> or I<tie_long> options
470are given, they should be the names (strings) of the classes used as dual
471life arrays, most probably derived class of L<Text::Bidi::Array::Byte> and
472L<Text::Bidi::Array::Long>, respectively.
473
474This method is probably of little interest for standard (procedural) use.
475
476=head2 utf8_to_internal
477
478    $la = $tb->utf8_to_internal($str);
479
480Convert the Perl string I<$str> into the representation used by libfribidi.
481The result will be a L<Text::Bidi::Array::Long>.
482
483=head2 internal_to_utf8
484
485    $str = $tb->internal_to_utf8($la);
486
487Convert the long array I<$la>, representing a string encoded in to format
488used by libfribidi, into a Perl string. The array I<$la> can be either a
489L<Text::Bidi::Array::Long>, or anything that can be used to construct it.
490
491=head2 get_bidi_types
492
493    $types = $tb->get_bidi_types($internal);
494
495Returns a L<Text::Bidi::Array::Long> with the list of Bidi types of the text
496given by $internal, a representation of the paragraph text, as returned by
497utf8_to_internal(). Wraps fribidi_get_bidi_types(3).
498
499=head2 get_joining_types
500
501    $types = $tb->get_joining_types($internal);
502
503Returns a L<Text::Bidi::Array::Byte> with the list of joining types of the
504text given by B<$internal>, a representation of the paragraph text, as returned
505by L</utf8_to_internal>. Wraps fribidi_get_joining_types(3).
506
507=head2 get_joining_type_name
508
509    say $tb->get_joining_type_name($Text::Bidi::Joining::U); # says 'U'
510
511Return the string representation of a joining character type, as in
512fribidi_get_joining_type_name(3). Note that for the above example, one needs
513to use L<Text::Bidi::Constants>.
514
515=head2 get_par_embedding_levels
516
517   ($odir, $lvl) = $tb->get_par_embedding_levels($types[, $dir]);
518
519Return the embedding levels of the characters, whose types are given by
520I<$types>. I<$types> is a L<Text::Bidi::Array::Long> of Bidi types, as
521returned by L</get_bidi_types>. I<$dir> is the base paragraph direction. If
522not given, it defaults to C<FRIBIDI_PAR_ON> (neutral).
523
524The output is the resolved paragraph direction I<$odir>, and the
525L<Text::Bidi::Array::Byte> array I<$lvl> of embedding levels.
526
527=head2 join_arabic
528
529    $props = $tb->join_arabic($bidi_types, $lvl, $join_types);
530
531Returns a L<Text::Bidi::Array::Byte> with B<$props>, as returned by
532fribidi_join_arabic(3). The inputs are B<$bidi_types>, as returned by
533L</get_bidi_types>, B<$lvl>, as returned by
534L</get_par_embedding_levels>, and B<$join_types> as returned by
535L</get_joining_types>.  Wraps fribidi_join_arabic(3).
536
537=head2 shaped
538
539    ($newp, $shaped) = $tb->shaped($flags, $lvl, $prop, $internal);
540
541Returns the internal representation of the paragraph, with shaping applied.
542The internal representation of the original paragraph (as returned by
543L</utf8_to_internal>) should be passed in B<$internal>, while the embedding
544levels (as returned by L</get_par_embedding_levels>) should be in B<$lvl>.
545See the documentation of F<fribidi-arabic.h> for B<$flags>, but as a special
546case, a value of C<undef> here skips shaping (returning B<($prop, $internal)>),
547while any other false value becomes the default. B<$prop> is as
548returned by L</join_arabic>.  This method wraps fribidi_shape_arabic(3).
549
550=head2 mirrored
551
552    $mirrored = $tb->mirrored($lvl, $internal);
553
554Returns the internal representation of the paragraph, with mirroring applied.
555The internal representation of the original paragraph (as returned by
556L</utf8_to_internal>) should be passed in B<$internal>, while the embedding
557levels (as returned by L</get_par_embedding_levels>) should be in B<$lvl>.
558This method wraps fribidi_shape_mirroring(3).
559
560=head2 reorder
561
562    $str = $tb->reorder($in, $map[, $offset[, $len]]);
563    say $tb->reorder([qw(A B C)], [2, 0, 1]); # says CAB
564
565View the array ref B<$map> as a permutation, and permute the list (of
566characters) B<$in> according to it. The result is joined, to obtain a string.
567If B<$offset> and B<$len> are given, returns only that part of the resulting
568string.
569
570=head2 reorder_map
571
572    ($elout, $mout) = $tb->reorder_map($types, $offset, $len, $par,
573                                       $map, $el, $flags);
574
575Compute the reordering map for bidi types given by B<$types>, for the
576interval starting with B<$offset> of length B<$len>. Note that this part of
577the algorithm depends on the interval in an essential way. B<$types> is an
578array of types, as computed by L</get_bidi_types>. The other arguments are
579optional:
580
581=over
582
583=item B<$par>
584
585The base paragraph direction. Computed via L</get_par_embedding_levels> if
586not defined.
587
588=item B<$map>
589
590An array ref (or a L<Text::Bidi::Array::Long>) from a previous call (with a
591different interval). The method is called repeatedly for the same paragraph,
592with different intervals, and the reordering map is updated for the given
593interval. If not defined, initialised to the identity map.
594
595=item B<$el>
596
597The embedding levels. If not given, computed by a call to
598L</get_par_embedding_levels>.
599
600=item B<$flags>
601
602A specification of flags, as described in fribidi_reorder_line(3). The flags
603can be given either as a number (using C<$Text::Bidi::Flags::..> from
604L<Text::Bidi::Constants>), or as a hashref of the form
605C<{REORDER_NSM =E<gt> 1}>. Defaults to C<FRIBIDI_FLAGS_DEFAULT>.
606
607=back
608
609The output consists of the modified map B<$mout> (a
610L<Text::Bidi::Array::Long>), and possibly modified embedding levels
611B<$elout>.
612
613=for Pod::Coverage S
614
615=for Pod::Coverage tie_byte tie_long
616
617=for Pod::Coverage hash2flags
618
619=begin comment
620
621
622
623
624=end comment
625
626method remove_bidi_marks
627
628    ($v, $to, $from, $levels) =
629        $tb->remove_bidi_marks($v[, $to[, $from[, $levels]]])
630
631Remove the explicit bidi marks from C<$v>. The optional arguments, if given,
632are the map from the logical to the visual string, the inverse map, and
633embedding levels, respectively, as returned by L</reorder_map>. The inverse
634map C<$from> can be obtained from the direct one C<$to> by a command like:
635
636    @$from[@$map] = 0..$#$map
637
638Each of the arguments can be C<undef>, in which case it will be skipped. This
639implements step X9, see fribidi_remove_bidi_marks(3).
640
641=head1 BUGS
642
643There are no real tests for any of this.
644
645Shaping is not supported (probably), since I don't know what it is. Help
646welcome!
647
648=head1 SEE ALSO
649
650L<Text::Bidi::Paragraph>
651
652L<Text::Bidi::Constants>
653
654L<Encode>
655
656L<The fribidi library|http://fribidi.org/>
657
658L<Swig|http://www.swig.org>
659
660L<The unicode bidi algorithm|http://www.unicode.org/unicode/reports/tr9/>
661
662=head1 AUTHOR
663
664Moshe Kamensky <kamensky@cpan.org>
665
666=head1 COPYRIGHT AND LICENSE
667
668This software is copyright (c) 2015 by Moshe Kamensky.
669
670This is free software; you can redistribute it and/or modify it under
671the same terms as the Perl 5 programming language system itself.
672
673=cut
674