xref: /openbsd/gnu/usr.bin/perl/t/lib/warnings/utf8 (revision 898184e3)
1
2  utf8.c AOK
3
4     [utf8_to_uvchr_buf]
5     Malformed UTF-8 character
6	my $a = ord "\x80" ;
7
8     Malformed UTF-8 character
9	my $a = ord "\xf080" ;
10     <<<<<< this warning can't be easily triggered from perl anymore
11
12     [utf16_to_utf8]
13     Malformed UTF-16 surrogate
14     <<<<<< Add a test when something actually calls utf16_to_utf8
15
16__END__
17# utf8.c [utf8_to_uvchr_buf] -W
18BEGIN {
19    if (ord('A') == 193) {
20        print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings.";
21        exit 0;
22    }
23}
24use utf8 ;
25my $a = "sn�storm" ;
26{
27    no warnings 'utf8' ;
28    my $a = "sn�storm";
29    use warnings 'utf8' ;
30    my $a = "sn�storm";
31}
32EXPECT
33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9.
34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
35########
36use warnings 'utf8';
37my $d7ff  = uc(chr(0xD7FF));
38my $d800  = uc(chr(0xD800));
39my $dfff  = uc(chr(0xDFFF));
40my $e000  = uc(chr(0xE000));
41my $feff  = uc(chr(0xFEFF));
42my $fffd  = uc(chr(0xFFFD));
43my $fffe  = uc(chr(0xFFFE));
44my $ffff  = uc(chr(0xFFFF));
45my $hex4  = uc(chr(0x10000));
46my $hex5  = uc(chr(0x100000));
47my $maxm1 = uc(chr(0x10FFFE));
48my $max   = uc(chr(0x10FFFF));
49my $nonUnicode =  uc(chr(0x110000));
50no warnings 'utf8';
51my $d7ff  = uc(chr(0xD7FF));
52my $d800  = uc(chr(0xD800));
53my $dfff  = uc(chr(0xDFFF));
54my $e000  = uc(chr(0xE000));
55my $feff  = uc(chr(0xFEFF));
56my $fffd  = uc(chr(0xFFFD));
57my $fffe  = uc(chr(0xFFFE));
58my $ffff  = uc(chr(0xFFFF));
59my $hex4  = uc(chr(0x10000));
60my $hex5  = uc(chr(0x100000));
61my $maxm1 = uc(chr(0x10FFFE));
62my $max   = uc(chr(0x10FFFF));
63my $nonUnicode =  uc(chr(0x110000));
64EXPECT
65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
68########
69use warnings 'utf8';
70my $d800  = uc(chr(0xD800));
71my $nonUnicode =  uc(chr(0x110000));
72no warnings 'surrogate';
73my $d800  = uc(chr(0xD800));
74my $nonUnicode =  uc(chr(0x110000));
75EXPECT
76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
79########
80use warnings 'utf8';
81my $d800  = uc(chr(0xD800));
82my $nonUnicode =  uc(chr(0x110000));
83my $big_nonUnicode = uc(chr(0x8000_0000));
84no warnings 'non_unicode';
85my $d800  = uc(chr(0xD800));
86my $nonUnicode =  uc(chr(0x110000));
87my $big_nonUnicode = uc(chr(0x8000_0000));
88EXPECT
89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
90Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
91Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
92Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
93########
94use warnings 'utf8';
95my $d7ff  = lc pack("U", 0xD7FF);
96my $d800  = lc pack("U", 0xD800);
97my $dfff  = lc pack("U", 0xDFFF);
98my $e000  = lc pack("U", 0xE000);
99my $feff  = lc pack("U", 0xFEFF);
100my $fffd  = lc pack("U", 0xFFFD);
101my $fffe  = lc pack("U", 0xFFFE);
102my $ffff  = lc pack("U", 0xFFFF);
103my $hex4  = lc pack("U", 0x10000);
104my $hex5  = lc pack("U", 0x100000);
105my $maxm1 = lc pack("U", 0x10FFFE);
106my $max   = lc pack("U", 0x10FFFF);
107my $nonUnicode =  lc(pack("U", 0x110000));
108no warnings 'utf8';
109my $d7ff  = lc pack("U", 0xD7FF);
110my $d800  = lc pack("U", 0xD800);
111my $dfff  = lc pack("U", 0xDFFF);
112my $e000  = lc pack("U", 0xE000);
113my $feff  = lc pack("U", 0xFEFF);
114my $fffd  = lc pack("U", 0xFFFD);
115my $fffe  = lc pack("U", 0xFFFE);
116my $ffff  = lc pack("U", 0xFFFF);
117my $hex4  = lc pack("U", 0x10000);
118my $hex5  = lc pack("U", 0x100000);
119my $maxm1 = lc pack("U", 0x10FFFE);
120my $max   = lc pack("U", 0x10FFFF);
121my $nonUnicode =  lc(pack("U", 0x110000));
122EXPECT
123Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3.
124Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
125Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14.
126########
127use warnings 'utf8';
128my $d7ff  = ucfirst "\x{D7FF}";
129my $d800  = ucfirst "\x{D800}";
130my $dfff  = ucfirst "\x{DFFF}";
131my $e000  = ucfirst "\x{E000}";
132my $feff  = ucfirst "\x{FEFF}";
133my $fffd  = ucfirst "\x{FFFD}";
134my $fffe  = ucfirst "\x{FFFE}";
135my $ffff  = ucfirst "\x{FFFF}";
136my $hex4  = ucfirst "\x{10000}";
137my $hex5  = ucfirst "\x{100000}";
138my $maxm1 = ucfirst "\x{10FFFE}";
139my $max   = ucfirst "\x{10FFFF}";
140my $nonUnicode =  ucfirst "\x{110000}";
141no warnings 'utf8';
142my $d7ff  = ucfirst "\x{D7FF}";
143my $d800  = ucfirst "\x{D800}";
144my $dfff  = ucfirst "\x{DFFF}";
145my $e000  = ucfirst "\x{E000}";
146my $feff  = ucfirst "\x{FEFF}";
147my $fffd  = ucfirst "\x{FFFD}";
148my $fffe  = ucfirst "\x{FFFE}";
149my $ffff  = ucfirst "\x{FFFF}";
150my $hex4  = ucfirst "\x{10000}";
151my $hex5  = ucfirst "\x{100000}";
152my $maxm1 = ucfirst "\x{10FFFE}";
153my $max   = ucfirst "\x{10FFFF}";
154my $nonUnicode =  ucfirst "\x{110000}";
155EXPECT
156Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3.
157Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
158Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14.
159########
160use warnings 'utf8';
161chr(0xD7FF) =~ /\p{Any}/;
162chr(0xD800) =~ /\p{Any}/;
163chr(0xDFFF) =~ /\p{Any}/;
164chr(0xE000) =~ /\p{Any}/;
165chr(0xFEFF) =~ /\p{Any}/;
166chr(0xFFFD) =~ /\p{Any}/;
167chr(0xFFFE) =~ /\p{Any}/;
168chr(0xFFFF) =~ /\p{Any}/;
169chr(0x10000) =~ /\p{Any}/;
170chr(0x100000) =~ /\p{Any}/;
171chr(0x10FFFE) =~ /\p{Any}/;
172chr(0x10FFFF) =~ /\p{Any}/;
173chr(0x110000) =~ /\p{Any}/;
174no warnings 'utf8';
175chr(0xD7FF) =~ /\p{Any}/;
176chr(0xD800) =~ /\p{Any}/;
177chr(0xDFFF) =~ /\p{Any}/;
178chr(0xE000) =~ /\p{Any}/;
179chr(0xFEFF) =~ /\p{Any}/;
180chr(0xFFFD) =~ /\p{Any}/;
181chr(0xFFFE) =~ /\p{Any}/;
182chr(0xFFFF) =~ /\p{Any}/;
183chr(0x10000) =~ /\p{Any}/;
184chr(0x100000) =~ /\p{Any}/;
185chr(0x10FFFE) =~ /\p{Any}/;
186chr(0x10FFFF) =~ /\p{Any}/;
187chr(0x110000) =~ /\p{Any}/;
188EXPECT
189Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14.
190########
191use warnings 'utf8';
192chr(0x110000) =~ /\p{Any}/;
193no warnings 'non_unicode';
194chr(0x110000) =~ /\p{Any}/;
195EXPECT
196Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2.
197########
198require "../test.pl";
199use warnings 'utf8';
200sub Is_Super { return '!utf8::Any' }
201# The extra char is to avoid an optimization that avoids the problem when the
202# property is the only non-latin1 char in a class
203print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n";
204EXPECT
2051
206########
207require "../test.pl";
208use warnings 'utf8';
209my $file = tempfile();
210open(my $fh, "+>:utf8", $file);
211print $fh "\x{D7FF}", "\n";
212print $fh "\x{D800}", "\n";
213print $fh "\x{DFFF}", "\n";
214print $fh "\x{E000}", "\n";
215print $fh "\x{FDCF}", "\n";
216print $fh "\x{FDD0}", "\n";
217print $fh "\x{FDEF}", "\n";
218print $fh "\x{FDF0}", "\n";
219print $fh "\x{FEFF}", "\n";
220print $fh "\x{FFFD}", "\n";
221print $fh "\x{FFFE}", "\n";
222print $fh "\x{FFFF}", "\n";
223print $fh "\x{10000}", "\n";
224print $fh "\x{1FFFE}", "\n";
225print $fh "\x{1FFFF}", "\n";
226print $fh "\x{2FFFE}", "\n";
227print $fh "\x{2FFFF}", "\n";
228print $fh "\x{3FFFE}", "\n";
229print $fh "\x{3FFFF}", "\n";
230print $fh "\x{4FFFE}", "\n";
231print $fh "\x{4FFFF}", "\n";
232print $fh "\x{5FFFE}", "\n";
233print $fh "\x{5FFFF}", "\n";
234print $fh "\x{6FFFE}", "\n";
235print $fh "\x{6FFFF}", "\n";
236print $fh "\x{7FFFE}", "\n";
237print $fh "\x{7FFFF}", "\n";
238print $fh "\x{8FFFE}", "\n";
239print $fh "\x{8FFFF}", "\n";
240print $fh "\x{9FFFE}", "\n";
241print $fh "\x{9FFFF}", "\n";
242print $fh "\x{AFFFE}", "\n";
243print $fh "\x{AFFFF}", "\n";
244print $fh "\x{BFFFE}", "\n";
245print $fh "\x{BFFFF}", "\n";
246print $fh "\x{CFFFE}", "\n";
247print $fh "\x{CFFFF}", "\n";
248print $fh "\x{DFFFE}", "\n";
249print $fh "\x{DFFFF}", "\n";
250print $fh "\x{EFFFE}", "\n";
251print $fh "\x{EFFFF}", "\n";
252print $fh "\x{FFFFE}", "\n";
253print $fh "\x{FFFFF}", "\n";
254print $fh "\x{100000}", "\n";
255print $fh "\x{10FFFE}", "\n";
256print $fh "\x{10FFFF}", "\n";
257print $fh "\x{110000}", "\n";
258close $fh;
259EXPECT
260Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
261Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7.
262Unicode non-character U+FDD0 is illegal for open interchange at - line 10.
263Unicode non-character U+FDEF is illegal for open interchange at - line 11.
264Unicode non-character U+FFFE is illegal for open interchange at - line 15.
265Unicode non-character U+FFFF is illegal for open interchange at - line 16.
266Unicode non-character U+1FFFE is illegal for open interchange at - line 18.
267Unicode non-character U+1FFFF is illegal for open interchange at - line 19.
268Unicode non-character U+2FFFE is illegal for open interchange at - line 20.
269Unicode non-character U+2FFFF is illegal for open interchange at - line 21.
270Unicode non-character U+3FFFE is illegal for open interchange at - line 22.
271Unicode non-character U+3FFFF is illegal for open interchange at - line 23.
272Unicode non-character U+4FFFE is illegal for open interchange at - line 24.
273Unicode non-character U+4FFFF is illegal for open interchange at - line 25.
274Unicode non-character U+5FFFE is illegal for open interchange at - line 26.
275Unicode non-character U+5FFFF is illegal for open interchange at - line 27.
276Unicode non-character U+6FFFE is illegal for open interchange at - line 28.
277Unicode non-character U+6FFFF is illegal for open interchange at - line 29.
278Unicode non-character U+7FFFE is illegal for open interchange at - line 30.
279Unicode non-character U+7FFFF is illegal for open interchange at - line 31.
280Unicode non-character U+8FFFE is illegal for open interchange at - line 32.
281Unicode non-character U+8FFFF is illegal for open interchange at - line 33.
282Unicode non-character U+9FFFE is illegal for open interchange at - line 34.
283Unicode non-character U+9FFFF is illegal for open interchange at - line 35.
284Unicode non-character U+AFFFE is illegal for open interchange at - line 36.
285Unicode non-character U+AFFFF is illegal for open interchange at - line 37.
286Unicode non-character U+BFFFE is illegal for open interchange at - line 38.
287Unicode non-character U+BFFFF is illegal for open interchange at - line 39.
288Unicode non-character U+CFFFE is illegal for open interchange at - line 40.
289Unicode non-character U+CFFFF is illegal for open interchange at - line 41.
290Unicode non-character U+DFFFE is illegal for open interchange at - line 42.
291Unicode non-character U+DFFFF is illegal for open interchange at - line 43.
292Unicode non-character U+EFFFE is illegal for open interchange at - line 44.
293Unicode non-character U+EFFFF is illegal for open interchange at - line 45.
294Unicode non-character U+FFFFE is illegal for open interchange at - line 46.
295Unicode non-character U+FFFFF is illegal for open interchange at - line 47.
296Unicode non-character U+10FFFE is illegal for open interchange at - line 49.
297Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
298Code point 0x110000 is not Unicode, may not be portable at - line 51.
299########
300require "../test.pl";
301use warnings 'utf8';
302my $file = tempfile();
303open(my $fh, "+>:utf8", $file);
304print $fh "\x{D800}", "\n";
305print $fh "\x{FFFF}", "\n";
306print $fh "\x{110000}", "\n";
307close $fh;
308EXPECT
309Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
310Unicode non-character U+FFFF is illegal for open interchange at - line 6.
311Code point 0x110000 is not Unicode, may not be portable at - line 7.
312########
313require "../test.pl";
314use warnings 'utf8';
315no warnings 'surrogate';
316my $file = tempfile();
317open(my $fh, "+>:utf8", $file);
318print $fh "\x{D800}", "\n";
319print $fh "\x{FFFF}", "\n";
320print $fh "\x{110000}", "\n";
321close $fh;
322EXPECT
323Unicode non-character U+FFFF is illegal for open interchange at - line 7.
324Code point 0x110000 is not Unicode, may not be portable at - line 8.
325########
326require "../test.pl";
327use warnings 'utf8';
328no warnings 'nonchar';
329my $file = tempfile();
330open(my $fh, "+>:utf8", $file);
331print $fh "\x{D800}", "\n";
332print $fh "\x{FFFF}", "\n";
333print $fh "\x{110000}", "\n";
334close $fh;
335EXPECT
336Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
337Code point 0x110000 is not Unicode, may not be portable at - line 8.
338########
339require "../test.pl";
340use warnings 'utf8';
341no warnings 'non_unicode';
342my $file = tempfile();
343open(my $fh, "+>:utf8", $file);
344print $fh "\x{D800}", "\n";
345print $fh "\x{FFFF}", "\n";
346print $fh "\x{110000}", "\n";
347close $fh;
348EXPECT
349Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
350Unicode non-character U+FFFF is illegal for open interchange at - line 7.
351########
352# NAME C<use warnings "nonchar"> works in isolation
353require "../test.pl";
354use warnings 'nonchar';
355my $file = tempfile();
356open(my $fh, "+>:utf8", $file);
357print $fh "\x{FFFF}", "\n";
358close $fh;
359EXPECT
360Unicode non-character U+FFFF is illegal for open interchange at - line 5.
361########
362# NAME C<use warnings "surrogate"> works in isolation
363require "../test.pl";
364use warnings 'surrogate';
365my $file = tempfile();
366open(my $fh, "+>:utf8", $file);
367print $fh "\x{D800}", "\n";
368close $fh;
369EXPECT
370Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
371########
372# NAME C<use warnings "non_unicode"> works in isolation
373require "../test.pl";
374use warnings 'non_unicode';
375my $file = tempfile();
376open(my $fh, "+>:utf8", $file);
377print $fh "\x{110000}", "\n";
378close $fh;
379EXPECT
380Code point 0x110000 is not Unicode, may not be portable at - line 5.
381########
382require "../test.pl";
383no warnings 'utf8';
384my $file = tempfile();
385open(my $fh, "+>:utf8", $file);
386print $fh "\x{D7FF}", "\n";
387print $fh "\x{D800}", "\n";
388print $fh "\x{DFFF}", "\n";
389print $fh "\x{E000}", "\n";
390print $fh "\x{FDCF}", "\n";
391print $fh "\x{FDD0}", "\n";
392print $fh "\x{FDEF}", "\n";
393print $fh "\x{FDF0}", "\n";
394print $fh "\x{FEFF}", "\n";
395print $fh "\x{FFFD}", "\n";
396print $fh "\x{FFFE}", "\n";
397print $fh "\x{FFFF}", "\n";
398print $fh "\x{10000}", "\n";
399print $fh "\x{1FFFE}", "\n";
400print $fh "\x{1FFFF}", "\n";
401print $fh "\x{2FFFE}", "\n";
402print $fh "\x{2FFFF}", "\n";
403print $fh "\x{3FFFE}", "\n";
404print $fh "\x{3FFFF}", "\n";
405print $fh "\x{4FFFE}", "\n";
406print $fh "\x{4FFFF}", "\n";
407print $fh "\x{5FFFE}", "\n";
408print $fh "\x{5FFFF}", "\n";
409print $fh "\x{6FFFE}", "\n";
410print $fh "\x{6FFFF}", "\n";
411print $fh "\x{7FFFE}", "\n";
412print $fh "\x{7FFFF}", "\n";
413print $fh "\x{8FFFE}", "\n";
414print $fh "\x{8FFFF}", "\n";
415print $fh "\x{9FFFE}", "\n";
416print $fh "\x{9FFFF}", "\n";
417print $fh "\x{AFFFE}", "\n";
418print $fh "\x{AFFFF}", "\n";
419print $fh "\x{BFFFE}", "\n";
420print $fh "\x{BFFFF}", "\n";
421print $fh "\x{CFFFE}", "\n";
422print $fh "\x{CFFFF}", "\n";
423print $fh "\x{DFFFE}", "\n";
424print $fh "\x{DFFFF}", "\n";
425print $fh "\x{EFFFE}", "\n";
426print $fh "\x{EFFFF}", "\n";
427print $fh "\x{FFFFE}", "\n";
428print $fh "\x{FFFFF}", "\n";
429print $fh "\x{100000}", "\n";
430print $fh "\x{10FFFE}", "\n";
431print $fh "\x{10FFFF}", "\n";
432print $fh "\x{110000}", "\n";
433close $fh;
434EXPECT
435