1 2BEGIN { 3 unless ('A' eq pack('U', 0x41)) { 4 print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; 5 exit 0; 6 } 7 unless (0x41 == unpack('U', 'A')) { 8 print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; 9 exit 0; 10 } 11} 12 13BEGIN { 14 if ($ENV{PERL_CORE}) { 15 chdir('t') if -d 't'; 16 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 17 } 18} 19 20BEGIN { 21 unless (5.006001 <= $]) { 22 print "1..0 # skipped: Perl 5.6.1 or later". 23 " needed for this test\n"; 24 exit; 25 } 26} 27 28######################### 29 30use strict; 31use warnings; 32BEGIN { $| = 1; print "1..34\n"; } 33my $count = 0; 34sub ok ($;$) { 35 my $p = my $r = shift; 36 if (@_) { 37 my $x = shift; 38 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 39 } 40 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 41} 42 43use Unicode::Normalize qw(:all); 44 45ok(1); 46 47sub _pack_U { Unicode::Normalize::pack_U(@_) } 48sub _unpack_U { Unicode::Normalize::unpack_U(@_) } 49 50######################### 51 52our $proc; # before the last starter 53our $unproc; # the last starter and after 54# If string has no starter, entire string is set to $unproc. 55 56($proc, $unproc) = splitOnLastStarter(""); 57ok($proc, ""); 58ok($unproc, ""); 59 60($proc, $unproc) = splitOnLastStarter("A"); 61ok($proc, ""); 62ok($unproc, "A"); 63 64($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42)); 65ok($proc, _pack_U(0x41, 0x300, 0x327)); 66ok($unproc, "B"); 67 68($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301)); 69ok($proc, _pack_U(0x4E00)); 70ok($unproc, _pack_U(0x41, 0x301)); 71 72($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300)); 73ok($proc, ""); 74ok($unproc, _pack_U(0x302, 0x301, 0x300)); 75 76our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300); 77our $dakuten = _pack_U(0x3099); 78our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300); 79 80our ($p, $u) = splitOnLastStarter($ka_grave); 81our $concat = $p . NFC($u.$dakuten); 82 83ok(NFC($ka_grave.$dakuten) eq $ga_grave); 84ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave); 85ok($concat eq $ga_grave); 86 87# 14 88 89sub arraynorm { 90 my $form = shift; 91 my @string = @_; 92 my $result = ""; 93 my $unproc = ""; 94 foreach my $str (@string) { 95 $unproc .= $str; 96 my $n = normalize($form, $unproc); 97 my($p, $u) = splitOnLastStarter($n); 98 $result .= $p; 99 $unproc = $u; 100 } 101 $result .= $unproc; 102 return $result; 103} 104 105my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}"; 106my $strC = "\x{3CE}\x{AC01}\x{AC03}"; 107my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7)); 108my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4)); 109ok($strC eq NFC($strD)); 110ok($strD eq join('', @str1)); 111ok($strC eq arraynorm('NFC', @str1)); 112ok($strD eq join('', @str2)); 113ok($strC eq arraynorm('NFC', @str2)); 114 115my @strX = ("\x{300}\x{AC00}", "\x{11A8}"); 116my $strX = "\x{300}\x{AC01}"; 117ok($strX eq NFC(join('', @strX))); 118ok($strX eq arraynorm('NFC', @strX)); 119ok($strX eq NFKC(join('', @strX))); 120ok($strX eq arraynorm('NFKC', @strX)); 121 122my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}"); 123my $strY = ("\x{304C}\x{0323}\x{0308}"); 124ok($strY eq NFC(join('', @strY))); 125ok($strY eq arraynorm('NFC', @strY)); 126ok($strY eq NFKC(join('', @strY))); 127ok($strY eq arraynorm('NFKC', @strY)); 128 129my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}"); 130my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}"); 131ok($strZ eq NFD(join('', @strZ))); 132ok($strZ eq arraynorm('NFD', @strZ)); 133ok($strZ eq NFKD(join('', @strZ))); 134ok($strZ eq arraynorm('NFKD', @strZ)); 135 136# 31 137 138# don't modify the source 139 140my $source = "ABC"; 141($proc, $unproc) = splitOnLastStarter($source); 142ok($proc, "AB"); 143ok($unproc, "C"); 144ok($source, "ABC"); 145 146# 34 147 148