xref: /openbsd/gnu/usr.bin/perl/lib/open.pm (revision 256a93a4)
1package open;
2use warnings;
3
4our $VERSION = '1.13';
5
6require 5.008001; # for PerlIO::get_layers()
7
8my $locale_encoding;
9
10sub _get_encname {
11    return ($1, Encode::resolve_alias($1)) if $_[0] =~ /^:?encoding\((.+)\)$/;
12    return;
13}
14
15sub croak {
16    require Carp; goto &Carp::croak;
17}
18
19sub _drop_oldenc {
20    # If by the time we arrive here there already is at the top of the
21    # perlio layer stack an encoding identical to what we would like
22    # to push via this open pragma, we will pop away the old encoding
23    # (+utf8) so that we can push ourselves in place (this is easier
24    # than ignoring pushing ourselves because of the way how ${^OPEN}
25    # works).  So we are looking for something like
26    #
27    #   stdio encoding(xxx) utf8
28    #
29    # in the existing layer stack, and in the new stack chunk for
30    #
31    #   :encoding(xxx)
32    #
33    # If we find a match, we pop the old stack (once, since
34    # the utf8 is just a flag on the encoding layer)
35    my ($h, @new) = @_;
36    return unless @new >= 1 && $new[-1] =~ /^:encoding\(.+\)$/;
37    my @old = PerlIO::get_layers($h);
38    return unless @old >= 3 &&
39	          $old[-1] eq 'utf8' &&
40                  $old[-2] =~ /^encoding\(.+\)$/;
41    require Encode;
42    my ($loname, $lcname) = _get_encname($old[-2]);
43    unless (defined $lcname) { # Should we trust get_layers()?
44	croak("open: Unknown encoding '$loname'");
45    }
46    my ($voname, $vcname) = _get_encname($new[-1]);
47    unless (defined $vcname) {
48	croak("open: Unknown encoding '$voname'");
49    }
50    if ($lcname eq $vcname) {
51	binmode($h, ":pop"); # utf8 is part of the encoding layer
52    }
53}
54
55sub import {
56    my ($class,@args) = @_;
57    croak("open: needs explicit list of PerlIO layers") unless @args;
58    my $std;
59    my ($in,$out) = split(/\0/,(${^OPEN} || "\0"), -1);
60    while (@args) {
61	my $type = shift(@args);
62	my $dscp;
63	if ($type =~ /^:?(utf8|locale|encoding\(.+\))$/) {
64	    $type = 'IO';
65	    $dscp = ":$1";
66	} elsif ($type eq ':std') {
67	    $std = 1;
68	    next;
69	} else {
70	    $dscp = shift(@args) || '';
71	}
72	my @val;
73	foreach my $layer (split(/\s+/,$dscp)) {
74            $layer =~ s/^://;
75	    if ($layer eq 'locale') {
76		require Encode;
77		require encoding;
78		$locale_encoding = encoding::_get_locale_encoding()
79		    unless defined $locale_encoding;
80		(warnings::warnif("layer", "Cannot figure out an encoding to use"), last)
81		    unless defined $locale_encoding;
82                $layer = "encoding($locale_encoding)";
83		$std = 1;
84	    } else {
85		my $target = $layer;		# the layer name itself
86		$target =~ s/^(\w+)\(.+\)$/$1/;	# strip parameters
87
88		unless(PerlIO::Layer::->find($target,1)) {
89		    warnings::warnif("layer", "Unknown PerlIO layer '$target'");
90		}
91	    }
92	    push(@val,":$layer");
93	    if ($layer =~ /^(crlf|raw)$/) {
94		$^H{"open_$type"} = $layer;
95	    }
96	}
97	if ($type eq 'IN') {
98	    _drop_oldenc(*STDIN, @val) if $std;
99	    $in  = join(' ', @val);
100	}
101	elsif ($type eq 'OUT') {
102	    if ($std) {
103		_drop_oldenc(*STDOUT, @val);
104		_drop_oldenc(*STDERR, @val);
105	    }
106	    $out = join(' ', @val);
107	}
108	elsif ($type eq 'IO') {
109	    if ($std) {
110		_drop_oldenc(*STDIN, @val);
111		_drop_oldenc(*STDOUT, @val);
112		_drop_oldenc(*STDERR, @val);
113	    }
114	    $in = $out = join(' ', @val);
115	}
116	else {
117	    croak "Unknown PerlIO layer class '$type' (need IN, OUT or IO)";
118	}
119    }
120    ${^OPEN} = join("\0", $in, $out);
121    if ($std) {
122	if ($in) {
123	    binmode STDIN, $in;
124	}
125	if ($out) {
126	    binmode(STDOUT, $out);
127	    binmode(STDERR, $out);
128	}
129    }
130}
131
1321;
133__END__
134
135=head1 NAME
136
137open - perl pragma to set default PerlIO layers for input and output
138
139=head1 SYNOPSIS
140
141    use open IN  => ':crlf', OUT => ':raw';
142    open my $in, '<', 'foo.txt' or die "open failed: $!";
143    my $line = <$in>; # CRLF translated
144    close $in;
145    open my $out, '>', 'bar.txt' or die "open failed: $!";
146    print $out $line; # no translation of bytes
147    close $out;
148
149    use open OUT => ':encoding(UTF-8)';
150    use open IN  => ':encoding(iso-8859-7)';
151
152    use open IO  => ':locale';
153
154    # IO implicit only for :utf8, :encoding, :locale
155    use open ':encoding(UTF-8)';
156    use open ':encoding(iso-8859-7)';
157    use open ':locale';
158
159    # with :std, also affect global standard handles
160    use open ':std', ':encoding(UTF-8)';
161    use open ':std', OUT => ':encoding(cp1252)';
162    use open ':std', IO => ':raw :encoding(UTF-16LE)';
163
164=head1 DESCRIPTION
165
166Full-fledged support for I/O layers is now implemented provided
167Perl is configured to use PerlIO as its IO system (which has been the
168default since 5.8, and the only supported configuration since 5.16).
169
170The C<open> pragma serves as one of the interfaces to declare default
171"layers" (previously known as "disciplines") for all I/O. Any open(),
172readpipe() (aka qx//) and similar operators found within the
173lexical scope of this pragma will use the declared defaults via the
174L<C<${^OPEN}>|perlvar/${^OPEN}> variable.
175
176Layers are specified with a leading colon by convention. You can
177specify a stack of multiple layers as a space-separated string.
178See L<PerlIO> for more information on the available layers.
179
180With the C<IN> subpragma you can declare the default layers
181of input streams, and with the C<OUT> subpragma you can declare
182the default layers of output streams.  With the C<IO> subpragma
183(may be omitted for C<:utf8>, C<:locale>, or C<:encoding>) you
184can control both input and output streams simultaneously.
185
186When open() is given an explicit list of layers (with the three-arg
187syntax), they override the list declared using this pragma.  open() can
188also be given a single colon (:) for a layer name, to override this pragma
189and use the default as detailed in
190L<PerlIO/Defaults and how to override them>.
191
192To translate from and to an arbitrary text encoding, use the C<:encoding>
193layer.  The matching of encoding names in C<:encoding> is loose: case does
194not matter, and many encodings have several aliases.  See
195L<Encode::Supported> for details and the list of supported locales.
196
197If you want to set your encoding layers based on your
198locale environment variables, you can use the C<:locale> pseudo-layer.
199For example:
200
201    $ENV{LANG} = 'ru_RU.KOI8-R';
202    # the :locale will probe the locale environment variables like LANG
203    use open OUT => ':locale';
204    open(my $out, '>', 'koi8') or die "open failed: $!";
205    print $out chr(0x430); # CYRILLIC SMALL LETTER A = KOI8-R 0xc1
206    close $out;
207    open(my $in, '<', 'koi8') or die "open failed: $!";
208    printf "%#x\n", ord(<$in>); # this should print 0xc1
209    close $in;
210
211The logic of C<:locale> is described in full in
212L<encoding/The C<:locale> sub-pragma>,
213but in short it is first trying nl_langinfo(CODESET) and then
214guessing from the LC_ALL and LANG locale environment variables.
215C<:locale> also implicitly turns on C<:std>.
216
217C<:std> is not a layer but an additional subpragma.  When specified in the
218import list, it activates an additional functionality of pushing the
219layers selected for input/output handles to the standard filehandles
220(STDIN, STDOUT, STDERR).  If the new layers and existing layer stack both
221end with an C<:encoding> layer, the existing C<:encoding> layer will also
222be removed.
223
224For example, if both input and out are chosen to be C<:encoding(UTF-8)>, a
225C<:std> will mean that STDIN, STDOUT, and STDERR will also have
226C<:encoding(UTF-8)> set.  On the other hand, if only output is chosen to
227be in C<:encoding(koi8r)>, a C<:std> will cause only the STDOUT and STDERR
228to be in C<koi8r>.
229
230The effect of C<:std> is not lexical as it modifies the layer stack of the
231global handles.  If you wish to apply only this global effect and not the
232effect on handles that are opened in that scope, you can isolate the call
233to this pragma in its own lexical scope.
234
235    { use open ':std', IO => ':encoding(UTF-8)' }
236
237Before Perl 5.34, C<:std> would only apply the first layer provided that is
238either C<:utf8> or has a layer argument, e.g. C<:encoding(UTF-8)>. Since
239Perl 5.34 it will apply the same layer stack it provides to C<${^OPEN}>.
240
241=head1 IMPLEMENTATION DETAILS
242
243There is a class method in C<PerlIO::Layer> C<find> which is
244implemented as XS code.  It is called by C<import> to validate the
245layers:
246
247   PerlIO::Layer::->find("perlio")
248
249The return value (if defined) is a Perl object, of class
250C<PerlIO::Layer> which is created by the C code in F<perlio.c>.  As
251yet there is nothing useful you can do with the object at the perl
252level.
253
254=head1 SEE ALSO
255
256L<perlfunc/"binmode">, L<perlfunc/"open">, L<perlunicode>, L<PerlIO>,
257L<encoding>
258
259=cut
260