1 2BEGIN { 3 if ($ENV{PERL_CORE}) { 4 chdir('t') if -d 't'; 5 @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); 6 } 7} 8 9use strict; 10use warnings; 11BEGIN { $| = 1; print "1..17\n"; } 12my $count = 0; 13sub ok ($;$) { 14 my $p = my $r = shift; 15 if (@_) { 16 my $x = shift; 17 $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x; 18 } 19 print $p ? "ok" : "not ok", ' ', ++$count, "\n"; 20} 21 22use Unicode::Collate; 23 24ok(1); 25 26sub _pack_U { Unicode::Collate::pack_U(@_) } 27sub _unpack_U { Unicode::Collate::unpack_U(@_) } 28 29######################### 30 31my $code = sub { 32 my $line = shift; 33 $line =~ s/\[\.0000\..{4}\..{4}([.\]])/[.0000.0000.0000$1/g; 34 return $line; 35 }; 36 37##### 38 39my $Collator = Unicode::Collate->new( 40 table => 'keys.txt', normalization => undef, rewrite => $code, 41); 42 43ok($Collator->eq("camel", "came\x{300}l")); 44ok($Collator->eq("camel", "ca\x{300}me\x{301}l")); 45ok($Collator->lt("camel", "Camel")); 46{ 47 my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l."; 48 $Collator->gsubst($s, "camel", sub { "=$_[0]=" }); 49 ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=."); 50} 51 52# 5 53 54my $rewriteDUCET = Unicode::Collate->new( 55 normalization => undef, rewrite => $code, 56); 57 58ok($rewriteDUCET->eq("camel", "came\x{300}l")); 59ok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l")); 60ok($rewriteDUCET->lt("camel", "Camel")); 61{ 62 my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l."; 63 $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" }); 64 ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=."); 65} 66 67# 9 68 69my $undef_hira = Unicode::Collate->new( 70 table => 'keys.txt', 71 normalization => undef, 72 level => 1, 73 rewrite => sub { 74 my $line = shift; 75 return '' if $line =~ /HIRAGANA/; 76 return $line; 77 }, 78); 79 80my $hiragana = "\x{3042}\x{3044}"; 81my $katakana = "\x{30A2}\x{30A4}"; 82my $cjkkanji = "\x{4E00}"; 83 84# HIRAGANA are undefined via rewrite 85# So they are after CJK Unified Ideographs. 86 87ok($undef_hira->lt("abc", "perl")); 88ok($undef_hira->lt("", "ABC")); 89ok($undef_hira->lt($katakana, $hiragana)); 90ok($undef_hira->lt($katakana, $cjkkanji)); 91ok($undef_hira->lt($cjkkanji, $hiragana)); 92 93$Collator->change(level => 1); 94ok($Collator->eq($katakana, $hiragana)); 95ok($Collator->lt($katakana, $cjkkanji)); 96ok($Collator->gt($cjkkanji, $hiragana)); 97 98# 17 99