xref: /openbsd/gnu/usr.bin/perl/t/op/utf8cache.t (revision 5af055cd)
1#!./perl -w
2# Test for malfunctions of utf8 cache
3
4BEGIN {
5    chdir 't' if -d 't';
6    @INC = '../lib';
7    require './test.pl';
8}
9
10use strict;
11
12plan(tests => 15);
13
14SKIP: {
15skip_without_dynamic_extension("Devel::Peek");
16
17my $pid = open CHILD, '-|';
18die "kablam: $!\n" unless defined $pid;
19unless ($pid) {
20    open STDERR, ">&STDOUT";
21    $a = "hello \x{1234}";
22    for (1..2) {
23        bar(substr($a, $_, 1));
24    }
25    sub bar {
26        $_[0] = "\x{4321}";
27        Devel::Peek::Dump($_[0]);
28    }
29    exit;
30}
31
32{ local $/; $_ = <CHILD> }
33
34my $utf8magic = qr{ ^ \s+ MAGIC \s = .* \n
35                      \s+ MG_VIRTUAL \s = .* \n
36                      \s+ MG_TYPE \s = \s PERL_MAGIC_utf8 .* \n
37                      \s+ MG_LEN \s = .* \n }xm;
38
39unlike($_, qr{ $utf8magic $utf8magic }x);
40
41} # SKIP
42
43# With bad caching, this code used to go quadratic and take 10s of minutes.
44# The 'test' in this case is simply that it doesn't hang.
45
46{
47    local ${^UTF8CACHE} = 1; # enable cache, disable debugging
48    my $x = "\x{100}" x 1000000;
49    while ($x =~ /./g) {
50	my $p = pos($x);
51    }
52    pass("quadratic pos");
53}
54
55# Get-magic can reallocate the PV.  Check that the cache is reset in
56# such cases.
57
58# Regexp vars
59"\x{100}" =~ /(.+)/;
60() = substr $1, 0, 1;
61"a\x{100}" =~ /(.+)/;
62is ord substr($1, 1, 1), 0x100, 'get-magic resets utf8cache on match vars';
63
64# Substr lvalues
65my $x = "a\x{100}";
66my $l = \substr $x, 0;
67() = substr $$l, 1, 1;
68substr $x, 0, 1, = "\x{100}";
69is ord substr($$l, 1, 1), 0x100, 'get-magic resets utf8cache on LVALUEs';
70
71# defelem magic
72my %h;
73sub {
74  $_[0] = "a\x{100}";
75  () = ord substr $_[0], 1, 1;
76  $h{k} = "\x{100}"x2;
77  is ord substr($_[0], 1, 1), 0x100,
78    'get-magic resets uf8cache on defelems';
79}->($h{k});
80
81
82# Overloading can also reallocate the PV.
83
84package UTF8Toggle {
85    use overload '""' => 'stringify', fallback => 1;
86
87    sub new {
88	my $class = shift;
89	my $value = shift;
90	my $state = shift||0;
91	return bless [$value, $state], $class;
92    }
93
94    sub stringify {
95	my $self = shift;
96	$self->[1] = ! $self->[1];
97	if ($self->[1]) {
98	    utf8::downgrade($self->[0]);
99	} else {
100	    utf8::upgrade($self->[0]);
101	}
102	$self->[0];
103    }
104}
105my $u = UTF8Toggle->new(" \x{c2}7 ");
106
107pos $u = 2;
108is pos $u, 2, 'pos on overloaded utf8 toggler';
109() = "$u"; # flip flag
110pos $u = 2;
111is pos $u, 2, 'pos on overloaded utf8 toggler (again)';
112
113() = ord ${\substr $u, 1};
114is ord ${\substr($u, 1)}, 0xc2,
115    'utf8 cache + overloading does not confuse substr lvalues';
116() = "$u"; # flip flag
117() = ord substr $u, 1;
118is ord substr($u, 1), 0xc2,
119    'utf8 cache + overloading does not confuse substr lvalues (again)';
120
121$u = UTF8Toggle->new(" \x{c2}7 ");
122() = ord ${\substr $u, 2};
123{ no warnings; ${\substr($u, 2, 1)} = 0; }
124is $u, " \x{c2}0 ",
125    'utf8 cache + overloading does not confuse substr lvalue assignment';
126$u = UTF8Toggle->new(" \x{c2}7 ");
127() = "$u"; # flip flag
128() = ord ${\substr $u, 2};
129{ no warnings; ${\substr($u, 2, 1)} = 0; }
130is $u, " \x{c2}0 ",
131    'utf8 cache + overload does not confuse substr lv assignment (again)';
132
133
134# Typeglobs and references should not get a cache
135use utf8;
136
137#substr
138my $globref = \*αabcdefg_::_;
139() = substr($$globref, 2, 3);
140*_abcdefgα:: = \%αabcdefg_::;
141undefabcdefg_::;
142{ no strict; () = *{"_abcdefgα::_"} }
143is substr($$globref, 2, 3), "abc", 'no utf8 pos cache on globs';
144
145my $ref = bless [], "αabcd_";
146() = substr($ref, 1, 3);
147bless $ref, "_abcdα";
148is substr($ref, 1, 3), "abc", 'no utf8 pos cache on references';
149
150#length
151$globref = \*αabcdefg_::_;
152() = "$$globref";  # turn utf8 flag on
153() = length($$globref);
154*_abcdefgα:: = \%αabcdefg_::;
155undefabcdefg_::;
156{ no strict; () = *{"_abcdefgα::_"} }
157is length($$globref), length("$$globref"), 'no utf8 length cache on globs';
158
159$ref = bless [], "αabcd_";
160() = "$ref"; # turn utf8 flag on
161() = length $ref;
162bless $ref, "α";
163is length $ref, length "$ref", 'no utf8 length cache on references';
164