1 2 utf8.c AOK 3 4 [utf8_to_uvchr_buf] 5 Malformed UTF-8 character 6 my $a = ord "\x80" ; 7 8 Malformed UTF-8 character 9 my $a = ord "\xf080" ; 10 <<<<<< this warning can't be easily triggered from perl anymore 11 12 [utf16_to_utf8] 13 Malformed UTF-16 surrogate 14 <<<<<< Add a test when something actually calls utf16_to_utf8 15 16__END__ 17# utf8.c [utf8_to_uvchr_buf] -W 18BEGIN { 19 if (ord('A') == 193) { 20 print "SKIPPED\n# ebcdic platforms do not generate Malformed UTF-8 warnings."; 21 exit 0; 22 } 23} 24use utf8 ; 25my $a = "sn�storm" ; 26{ 27 no warnings 'utf8' ; 28 my $a = "sn�storm"; 29 use warnings 'utf8' ; 30 my $a = "sn�storm"; 31} 32EXPECT 33Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 9. 34Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14. 35######## 36use warnings 'utf8'; 37my $d7ff = uc(chr(0xD7FF)); 38my $d800 = uc(chr(0xD800)); 39my $dfff = uc(chr(0xDFFF)); 40my $e000 = uc(chr(0xE000)); 41my $feff = uc(chr(0xFEFF)); 42my $fffd = uc(chr(0xFFFD)); 43my $fffe = uc(chr(0xFFFE)); 44my $ffff = uc(chr(0xFFFF)); 45my $hex4 = uc(chr(0x10000)); 46my $hex5 = uc(chr(0x100000)); 47my $maxm1 = uc(chr(0x10FFFE)); 48my $max = uc(chr(0x10FFFF)); 49my $nonUnicode = uc(chr(0x110000)); 50no warnings 'utf8'; 51my $d7ff = uc(chr(0xD7FF)); 52my $d800 = uc(chr(0xD800)); 53my $dfff = uc(chr(0xDFFF)); 54my $e000 = uc(chr(0xE000)); 55my $feff = uc(chr(0xFEFF)); 56my $fffd = uc(chr(0xFFFD)); 57my $fffe = uc(chr(0xFFFE)); 58my $ffff = uc(chr(0xFFFF)); 59my $hex4 = uc(chr(0x10000)); 60my $hex5 = uc(chr(0x100000)); 61my $maxm1 = uc(chr(0x10FFFE)); 62my $max = uc(chr(0x10FFFF)); 63my $nonUnicode = uc(chr(0x110000)); 64EXPECT 65Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 66Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 67Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14. 68######## 69use warnings 'utf8'; 70my $d800 = uc(chr(0xD800)); 71my $nonUnicode = uc(chr(0x110000)); 72no warnings 'surrogate'; 73my $d800 = uc(chr(0xD800)); 74my $nonUnicode = uc(chr(0x110000)); 75EXPECT 76Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 77Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 78Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6. 79######## 80use warnings 'utf8'; 81my $d800 = uc(chr(0xD800)); 82my $nonUnicode = uc(chr(0x110000)); 83my $big_nonUnicode = uc(chr(0x8000_0000)); 84no warnings 'non_unicode'; 85my $d800 = uc(chr(0xD800)); 86my $nonUnicode = uc(chr(0x110000)); 87my $big_nonUnicode = uc(chr(0x8000_0000)); 88EXPECT 89Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2. 90Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3. 91Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4. 92Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6. 93######## 94use warnings 'utf8'; 95my $d7ff = lc pack("U", 0xD7FF); 96my $d800 = lc pack("U", 0xD800); 97my $dfff = lc pack("U", 0xDFFF); 98my $e000 = lc pack("U", 0xE000); 99my $feff = lc pack("U", 0xFEFF); 100my $fffd = lc pack("U", 0xFFFD); 101my $fffe = lc pack("U", 0xFFFE); 102my $ffff = lc pack("U", 0xFFFF); 103my $hex4 = lc pack("U", 0x10000); 104my $hex5 = lc pack("U", 0x100000); 105my $maxm1 = lc pack("U", 0x10FFFE); 106my $max = lc pack("U", 0x10FFFF); 107my $nonUnicode = lc(pack("U", 0x110000)); 108no warnings 'utf8'; 109my $d7ff = lc pack("U", 0xD7FF); 110my $d800 = lc pack("U", 0xD800); 111my $dfff = lc pack("U", 0xDFFF); 112my $e000 = lc pack("U", 0xE000); 113my $feff = lc pack("U", 0xFEFF); 114my $fffd = lc pack("U", 0xFFFD); 115my $fffe = lc pack("U", 0xFFFE); 116my $ffff = lc pack("U", 0xFFFF); 117my $hex4 = lc pack("U", 0x10000); 118my $hex5 = lc pack("U", 0x100000); 119my $maxm1 = lc pack("U", 0x10FFFE); 120my $max = lc pack("U", 0x10FFFF); 121my $nonUnicode = lc(pack("U", 0x110000)); 122EXPECT 123Operation "lc" returns its argument for UTF-16 surrogate U+D800 at - line 3. 124Operation "lc" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 125Operation "lc" returns its argument for non-Unicode code point 0x110000 at - line 14. 126######## 127use warnings 'utf8'; 128my $d7ff = ucfirst "\x{D7FF}"; 129my $d800 = ucfirst "\x{D800}"; 130my $dfff = ucfirst "\x{DFFF}"; 131my $e000 = ucfirst "\x{E000}"; 132my $feff = ucfirst "\x{FEFF}"; 133my $fffd = ucfirst "\x{FFFD}"; 134my $fffe = ucfirst "\x{FFFE}"; 135my $ffff = ucfirst "\x{FFFF}"; 136my $hex4 = ucfirst "\x{10000}"; 137my $hex5 = ucfirst "\x{100000}"; 138my $maxm1 = ucfirst "\x{10FFFE}"; 139my $max = ucfirst "\x{10FFFF}"; 140my $nonUnicode = ucfirst "\x{110000}"; 141no warnings 'utf8'; 142my $d7ff = ucfirst "\x{D7FF}"; 143my $d800 = ucfirst "\x{D800}"; 144my $dfff = ucfirst "\x{DFFF}"; 145my $e000 = ucfirst "\x{E000}"; 146my $feff = ucfirst "\x{FEFF}"; 147my $fffd = ucfirst "\x{FFFD}"; 148my $fffe = ucfirst "\x{FFFE}"; 149my $ffff = ucfirst "\x{FFFF}"; 150my $hex4 = ucfirst "\x{10000}"; 151my $hex5 = ucfirst "\x{100000}"; 152my $maxm1 = ucfirst "\x{10FFFE}"; 153my $max = ucfirst "\x{10FFFF}"; 154my $nonUnicode = ucfirst "\x{110000}"; 155EXPECT 156Operation "ucfirst" returns its argument for UTF-16 surrogate U+D800 at - line 3. 157Operation "ucfirst" returns its argument for UTF-16 surrogate U+DFFF at - line 4. 158Operation "ucfirst" returns its argument for non-Unicode code point 0x110000 at - line 14. 159######## 160use warnings 'utf8'; 161chr(0xD7FF) =~ /\p{Any}/; 162chr(0xD800) =~ /\p{Any}/; 163chr(0xDFFF) =~ /\p{Any}/; 164chr(0xE000) =~ /\p{Any}/; 165chr(0xFEFF) =~ /\p{Any}/; 166chr(0xFFFD) =~ /\p{Any}/; 167chr(0xFFFE) =~ /\p{Any}/; 168chr(0xFFFF) =~ /\p{Any}/; 169chr(0x10000) =~ /\p{Any}/; 170chr(0x100000) =~ /\p{Any}/; 171chr(0x10FFFE) =~ /\p{Any}/; 172chr(0x10FFFF) =~ /\p{Any}/; 173chr(0x110000) =~ /\p{Any}/; 174no warnings 'utf8'; 175chr(0xD7FF) =~ /\p{Any}/; 176chr(0xD800) =~ /\p{Any}/; 177chr(0xDFFF) =~ /\p{Any}/; 178chr(0xE000) =~ /\p{Any}/; 179chr(0xFEFF) =~ /\p{Any}/; 180chr(0xFFFD) =~ /\p{Any}/; 181chr(0xFFFE) =~ /\p{Any}/; 182chr(0xFFFF) =~ /\p{Any}/; 183chr(0x10000) =~ /\p{Any}/; 184chr(0x100000) =~ /\p{Any}/; 185chr(0x10FFFE) =~ /\p{Any}/; 186chr(0x10FFFF) =~ /\p{Any}/; 187chr(0x110000) =~ /\p{Any}/; 188EXPECT 189Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 14. 190######## 191use warnings 'utf8'; 192chr(0x110000) =~ /\p{Any}/; 193no warnings 'non_unicode'; 194chr(0x110000) =~ /\p{Any}/; 195EXPECT 196Code point 0x110000 is not Unicode, all \p{} matches fail; all \P{} matches succeed at - line 2. 197######## 198require "../test.pl"; 199use warnings 'utf8'; 200sub Is_Super { return '!utf8::Any' } 201# The extra char is to avoid an optimization that avoids the problem when the 202# property is the only non-latin1 char in a class 203print "\x{1100000}" =~ /^[\p{Is_Super}\x{100}]$/, "\n"; 204EXPECT 2051 206######## 207require "../test.pl"; 208use warnings 'utf8'; 209my $file = tempfile(); 210open(my $fh, "+>:utf8", $file); 211print $fh "\x{D7FF}", "\n"; 212print $fh "\x{D800}", "\n"; 213print $fh "\x{DFFF}", "\n"; 214print $fh "\x{E000}", "\n"; 215print $fh "\x{FDCF}", "\n"; 216print $fh "\x{FDD0}", "\n"; 217print $fh "\x{FDEF}", "\n"; 218print $fh "\x{FDF0}", "\n"; 219print $fh "\x{FEFF}", "\n"; 220print $fh "\x{FFFD}", "\n"; 221print $fh "\x{FFFE}", "\n"; 222print $fh "\x{FFFF}", "\n"; 223print $fh "\x{10000}", "\n"; 224print $fh "\x{1FFFE}", "\n"; 225print $fh "\x{1FFFF}", "\n"; 226print $fh "\x{2FFFE}", "\n"; 227print $fh "\x{2FFFF}", "\n"; 228print $fh "\x{3FFFE}", "\n"; 229print $fh "\x{3FFFF}", "\n"; 230print $fh "\x{4FFFE}", "\n"; 231print $fh "\x{4FFFF}", "\n"; 232print $fh "\x{5FFFE}", "\n"; 233print $fh "\x{5FFFF}", "\n"; 234print $fh "\x{6FFFE}", "\n"; 235print $fh "\x{6FFFF}", "\n"; 236print $fh "\x{7FFFE}", "\n"; 237print $fh "\x{7FFFF}", "\n"; 238print $fh "\x{8FFFE}", "\n"; 239print $fh "\x{8FFFF}", "\n"; 240print $fh "\x{9FFFE}", "\n"; 241print $fh "\x{9FFFF}", "\n"; 242print $fh "\x{AFFFE}", "\n"; 243print $fh "\x{AFFFF}", "\n"; 244print $fh "\x{BFFFE}", "\n"; 245print $fh "\x{BFFFF}", "\n"; 246print $fh "\x{CFFFE}", "\n"; 247print $fh "\x{CFFFF}", "\n"; 248print $fh "\x{DFFFE}", "\n"; 249print $fh "\x{DFFFF}", "\n"; 250print $fh "\x{EFFFE}", "\n"; 251print $fh "\x{EFFFF}", "\n"; 252print $fh "\x{FFFFE}", "\n"; 253print $fh "\x{FFFFF}", "\n"; 254print $fh "\x{100000}", "\n"; 255print $fh "\x{10FFFE}", "\n"; 256print $fh "\x{10FFFF}", "\n"; 257print $fh "\x{110000}", "\n"; 258close $fh; 259EXPECT 260Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 261Unicode surrogate U+DFFF is illegal in UTF-8 at - line 7. 262Unicode non-character U+FDD0 is illegal for open interchange at - line 10. 263Unicode non-character U+FDEF is illegal for open interchange at - line 11. 264Unicode non-character U+FFFE is illegal for open interchange at - line 15. 265Unicode non-character U+FFFF is illegal for open interchange at - line 16. 266Unicode non-character U+1FFFE is illegal for open interchange at - line 18. 267Unicode non-character U+1FFFF is illegal for open interchange at - line 19. 268Unicode non-character U+2FFFE is illegal for open interchange at - line 20. 269Unicode non-character U+2FFFF is illegal for open interchange at - line 21. 270Unicode non-character U+3FFFE is illegal for open interchange at - line 22. 271Unicode non-character U+3FFFF is illegal for open interchange at - line 23. 272Unicode non-character U+4FFFE is illegal for open interchange at - line 24. 273Unicode non-character U+4FFFF is illegal for open interchange at - line 25. 274Unicode non-character U+5FFFE is illegal for open interchange at - line 26. 275Unicode non-character U+5FFFF is illegal for open interchange at - line 27. 276Unicode non-character U+6FFFE is illegal for open interchange at - line 28. 277Unicode non-character U+6FFFF is illegal for open interchange at - line 29. 278Unicode non-character U+7FFFE is illegal for open interchange at - line 30. 279Unicode non-character U+7FFFF is illegal for open interchange at - line 31. 280Unicode non-character U+8FFFE is illegal for open interchange at - line 32. 281Unicode non-character U+8FFFF is illegal for open interchange at - line 33. 282Unicode non-character U+9FFFE is illegal for open interchange at - line 34. 283Unicode non-character U+9FFFF is illegal for open interchange at - line 35. 284Unicode non-character U+AFFFE is illegal for open interchange at - line 36. 285Unicode non-character U+AFFFF is illegal for open interchange at - line 37. 286Unicode non-character U+BFFFE is illegal for open interchange at - line 38. 287Unicode non-character U+BFFFF is illegal for open interchange at - line 39. 288Unicode non-character U+CFFFE is illegal for open interchange at - line 40. 289Unicode non-character U+CFFFF is illegal for open interchange at - line 41. 290Unicode non-character U+DFFFE is illegal for open interchange at - line 42. 291Unicode non-character U+DFFFF is illegal for open interchange at - line 43. 292Unicode non-character U+EFFFE is illegal for open interchange at - line 44. 293Unicode non-character U+EFFFF is illegal for open interchange at - line 45. 294Unicode non-character U+FFFFE is illegal for open interchange at - line 46. 295Unicode non-character U+FFFFF is illegal for open interchange at - line 47. 296Unicode non-character U+10FFFE is illegal for open interchange at - line 49. 297Unicode non-character U+10FFFF is illegal for open interchange at - line 50. 298Code point 0x110000 is not Unicode, may not be portable at - line 51. 299######## 300require "../test.pl"; 301use warnings 'utf8'; 302my $file = tempfile(); 303open(my $fh, "+>:utf8", $file); 304print $fh "\x{D800}", "\n"; 305print $fh "\x{FFFF}", "\n"; 306print $fh "\x{110000}", "\n"; 307close $fh; 308EXPECT 309Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 310Unicode non-character U+FFFF is illegal for open interchange at - line 6. 311Code point 0x110000 is not Unicode, may not be portable at - line 7. 312######## 313require "../test.pl"; 314use warnings 'utf8'; 315no warnings 'surrogate'; 316my $file = tempfile(); 317open(my $fh, "+>:utf8", $file); 318print $fh "\x{D800}", "\n"; 319print $fh "\x{FFFF}", "\n"; 320print $fh "\x{110000}", "\n"; 321close $fh; 322EXPECT 323Unicode non-character U+FFFF is illegal for open interchange at - line 7. 324Code point 0x110000 is not Unicode, may not be portable at - line 8. 325######## 326require "../test.pl"; 327use warnings 'utf8'; 328no warnings 'nonchar'; 329my $file = tempfile(); 330open(my $fh, "+>:utf8", $file); 331print $fh "\x{D800}", "\n"; 332print $fh "\x{FFFF}", "\n"; 333print $fh "\x{110000}", "\n"; 334close $fh; 335EXPECT 336Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 337Code point 0x110000 is not Unicode, may not be portable at - line 8. 338######## 339require "../test.pl"; 340use warnings 'utf8'; 341no warnings 'non_unicode'; 342my $file = tempfile(); 343open(my $fh, "+>:utf8", $file); 344print $fh "\x{D800}", "\n"; 345print $fh "\x{FFFF}", "\n"; 346print $fh "\x{110000}", "\n"; 347close $fh; 348EXPECT 349Unicode surrogate U+D800 is illegal in UTF-8 at - line 6. 350Unicode non-character U+FFFF is illegal for open interchange at - line 7. 351######## 352# NAME C<use warnings "nonchar"> works in isolation 353require "../test.pl"; 354use warnings 'nonchar'; 355my $file = tempfile(); 356open(my $fh, "+>:utf8", $file); 357print $fh "\x{FFFF}", "\n"; 358close $fh; 359EXPECT 360Unicode non-character U+FFFF is illegal for open interchange at - line 5. 361######## 362# NAME C<use warnings "surrogate"> works in isolation 363require "../test.pl"; 364use warnings 'surrogate'; 365my $file = tempfile(); 366open(my $fh, "+>:utf8", $file); 367print $fh "\x{D800}", "\n"; 368close $fh; 369EXPECT 370Unicode surrogate U+D800 is illegal in UTF-8 at - line 5. 371######## 372# NAME C<use warnings "non_unicode"> works in isolation 373require "../test.pl"; 374use warnings 'non_unicode'; 375my $file = tempfile(); 376open(my $fh, "+>:utf8", $file); 377print $fh "\x{110000}", "\n"; 378close $fh; 379EXPECT 380Code point 0x110000 is not Unicode, may not be portable at - line 5. 381######## 382require "../test.pl"; 383no warnings 'utf8'; 384my $file = tempfile(); 385open(my $fh, "+>:utf8", $file); 386print $fh "\x{D7FF}", "\n"; 387print $fh "\x{D800}", "\n"; 388print $fh "\x{DFFF}", "\n"; 389print $fh "\x{E000}", "\n"; 390print $fh "\x{FDCF}", "\n"; 391print $fh "\x{FDD0}", "\n"; 392print $fh "\x{FDEF}", "\n"; 393print $fh "\x{FDF0}", "\n"; 394print $fh "\x{FEFF}", "\n"; 395print $fh "\x{FFFD}", "\n"; 396print $fh "\x{FFFE}", "\n"; 397print $fh "\x{FFFF}", "\n"; 398print $fh "\x{10000}", "\n"; 399print $fh "\x{1FFFE}", "\n"; 400print $fh "\x{1FFFF}", "\n"; 401print $fh "\x{2FFFE}", "\n"; 402print $fh "\x{2FFFF}", "\n"; 403print $fh "\x{3FFFE}", "\n"; 404print $fh "\x{3FFFF}", "\n"; 405print $fh "\x{4FFFE}", "\n"; 406print $fh "\x{4FFFF}", "\n"; 407print $fh "\x{5FFFE}", "\n"; 408print $fh "\x{5FFFF}", "\n"; 409print $fh "\x{6FFFE}", "\n"; 410print $fh "\x{6FFFF}", "\n"; 411print $fh "\x{7FFFE}", "\n"; 412print $fh "\x{7FFFF}", "\n"; 413print $fh "\x{8FFFE}", "\n"; 414print $fh "\x{8FFFF}", "\n"; 415print $fh "\x{9FFFE}", "\n"; 416print $fh "\x{9FFFF}", "\n"; 417print $fh "\x{AFFFE}", "\n"; 418print $fh "\x{AFFFF}", "\n"; 419print $fh "\x{BFFFE}", "\n"; 420print $fh "\x{BFFFF}", "\n"; 421print $fh "\x{CFFFE}", "\n"; 422print $fh "\x{CFFFF}", "\n"; 423print $fh "\x{DFFFE}", "\n"; 424print $fh "\x{DFFFF}", "\n"; 425print $fh "\x{EFFFE}", "\n"; 426print $fh "\x{EFFFF}", "\n"; 427print $fh "\x{FFFFE}", "\n"; 428print $fh "\x{FFFFF}", "\n"; 429print $fh "\x{100000}", "\n"; 430print $fh "\x{10FFFE}", "\n"; 431print $fh "\x{10FFFF}", "\n"; 432print $fh "\x{110000}", "\n"; 433close $fh; 434EXPECT 435