1#!./perl 2 3BEGIN { 4 chdir 't' if -d 't'; 5 require './test.pl'; 6} 7 8skip_all_if_miniperl("no dynamic loading on miniperl, no File::Spec (used by charnames)"); 9plan(tests => 147); 10 11{ 12 # check the special casing of split /\s/ and unicode 13 use charnames qw(:full); 14 # below test data is extracted from 15 # PropList-5.0.0.txt 16 # Date: 2006-06-07, 23:22:52 GMT [MD] 17 # 18 # Unicode Character Database 19 # Copyright (c) 1991-2006 Unicode, Inc. 20 # For terms of use, see http://www.unicode.org/terms_of_use.html 21 # For documentation, see UCD.html 22 my @spaces=( 23 ord("\t"), # Cc <control-0009> 24 ord("\n"), # Cc <control-000A> 25 # not PerlSpace # Cc <control-000B> 26 ord("\f"), # Cc <control-000C> 27 ord("\r"), # Cc <control-000D> 28 ord(" "), # Zs SPACE 29 ord("\N{NEL}"), # Cc <control-0085> 30 ord("\N{NO-BREAK SPACE}"), 31 # Zs NO-BREAK SPACE 32 0x1680, # Zs OGHAM SPACE MARK 33 0x2000..0x200A, # Zs [11] EN QUAD..HAIR SPACE 34 0x2028, # Zl LINE SEPARATOR 35 0x2029, # Zp PARAGRAPH SEPARATOR 36 0x202F, # Zs NARROW NO-BREAK SPACE 37 0x205F, # Zs MEDIUM MATHEMATICAL SPACE 38 0x3000 # Zs IDEOGRAPHIC SPACE 39 ); 40 #diag "Have @{[0+@spaces]} to test\n"; 41 foreach my $cp (@spaces) { 42 my $msg = sprintf "Space: U+%04x", $cp; 43 my $space = chr($cp); 44 my $str="A:$space:B\x{FFFD}"; 45 chop $str; 46 47 my @res=split(/\s+/,$str); 48 my $cnt=split(/\s+/,$str); 49 ok(@res == 2 && join('-',@res) eq "A:-:B", "$msg - /\\s+/"); 50 is($cnt, scalar(@res), "$msg - /\\s+/ (count)"); 51 52 my $s2 = "$space$space:A:$space$space:B\x{FFFD}"; 53 chop $s2; 54 55 my @r2 = split(' ',$s2); 56 my $c2 = split(' ',$s2); 57 ok(@r2 == 2 && join('-', @r2) eq ":A:-:B", "$msg - ' '"); 58 is($c2, scalar(@r2), "$msg - ' ' (count)"); 59 60 my @r3 = split(/\s+/, $s2); 61 my $c3 = split(/\s+/, $s2); 62 ok(@r3 == 3 && join('-', @r3) eq "-:A:-:B", "$msg - /\\s+/ No.2"); 63 is($c3, scalar(@r3), "$msg - /\\s+/ No.2 (count)"); 64 } 65 66 { # RT #114808 67 warning_is( 68 sub { 69 $p=chr(0x100); 70 for (".","ab\x{101}def") { 71 @q = split /$p/ 72 } 73 }, 74 undef, 75 'no warnings when part of split cant match non-utf8' 76 ); 77 } 78 79} 80 81{ 82 # Check empty pattern with specified field count on Unicode string 83 my $string = "\x{100}\x{101}\x{102}"; 84 $_ = join(':', split(//, $string, 2)); 85 is($_, "\x{100}:\x{101}\x{102}", 86 "Split into specified number of fields with empty pattern"); 87 @ary = split(//, $string, 2); 88 $cnt = split(//, $string, 2); 89 is($cnt, scalar(@ary), "Check element count from previous test"); 90} 91