xref: /openbsd/gnu/usr.bin/perl/dist/Storable/t/utf8hash.t (revision 8932bfb7)
1#!./perl
2
3sub BEGIN {
4    if ($] < 5.007) {
5	print "1..0 # Skip: no utf8 hash key support\n";
6	exit 0;
7    }
8    unshift @INC, 't';
9    require Config; import Config;
10    if ($ENV{PERL_CORE}){
11	if($Config{'extensions'} !~ /\bStorable\b/) {
12	    print "1..0 # Skip: Storable was not built\n";
13	    exit 0;
14	}
15    }
16}
17
18use strict;
19our $DEBUGME = shift || 0;
20use Storable qw(store nstore retrieve thaw freeze);
21{
22    no warnings;
23    $Storable::DEBUGME = ($DEBUGME > 1);
24}
25# Better than no plan, because I was getting out of memory errors, at which
26# point Test::More tidily prints up 1..79 as if I meant to finish there.
27use Test::More tests=>144;
28use bytes ();
29my %utf8hash;
30
31$Storable::canonical = $Storable::canonical; # Shut up a used only once warning.
32
33for $Storable::canonical (0, 1) {
34
35# first we generate a nasty hash which keys include both utf8
36# on and off with identical PVs
37
38no utf8; # we have a naked 8-bit byte below (in Latin 1, anyway)
39
40# In Latin 1 -ese the below ord() should end up 0xc0 (192),
41# in EBCDIC 0x64 (100).  Both should end up being UTF-8/UTF-EBCDIC.
42my @ords = (
43	    ord("�"), # LATIN CAPITAL LETTER A WITH GRAVE
44	    0x3000, #IDEOGRAPHIC SPACE
45	   );
46
47foreach my $i (@ords){
48    my $u = chr($i); utf8::upgrade($u);
49    # warn sprintf "%d,%d", bytes::length($u), is_utf8($u);
50    my $b = chr($i); utf8::encode($b);
51    # warn sprintf "%d,%d" ,bytes::length($b), is_utf8($b);
52
53    isnt($u, $b, "equivalence - with utf8flag");
54
55    $utf8hash{$u} = $utf8hash{$b} = $i;
56}
57
58sub nkeys($){
59    my $href = shift;
60    return scalar keys %$href;
61}
62
63my $nk;
64is($nk = nkeys(\%utf8hash), scalar(@ords)*2,
65   "nasty hash generated (nkeys=$nk)");
66
67# now let the show begin!
68
69my $thawed = thaw(freeze(\%utf8hash));
70
71is($nk = nkeys($thawed),
72   nkeys(\%utf8hash),
73   "scalar keys \%{\$thawed} (nkeys=$nk)");
74for my $k (sort keys %$thawed){
75    is($utf8hash{$k}, $thawed->{$k}, "frozen item chr($utf8hash{$k})");
76}
77
78my $storage = "utfhash.po"; # po = perl object!
79my $retrieved;
80
81ok((nstore \%utf8hash, $storage), "nstore to $storage");
82ok(($retrieved = retrieve($storage)), "retrieve from $storage");
83
84is($nk = nkeys($retrieved),
85   nkeys(\%utf8hash),
86   "scalar keys \%{\$retrieved} (nkeys=$nk)");
87for my $k (sort keys %$retrieved){
88    is($utf8hash{$k}, $retrieved->{$k}, "nstored item chr($utf8hash{$k})");
89}
90unlink $storage;
91
92
93ok((store \%utf8hash, $storage), "store to $storage");
94ok(($retrieved = retrieve($storage)), "retrieve from $storage");
95is($nk = nkeys($retrieved),
96   nkeys(\%utf8hash),
97   "scalar keys \%{\$retrieved} (nkeys=$nk)");
98for my $k (sort keys %$retrieved){
99    is($utf8hash{$k}, $retrieved->{$k}, "stored item chr($utf8hash{$k})");
100}
101$DEBUGME or unlink $storage;
102
103# On the premis that more tests are good, here are NWC's tests:
104
105package Hash_Test;
106
107sub me_second {
108  return (undef, $_[0]);
109}
110
111package main;
112
113my $utf8 = "Schlo\xdf" . chr 256;
114chop $utf8;
115
116# Set this to 1 to test the test by bypassing Storable.
117my $bypass = 0;
118
119sub class_test {
120  my ($object, $package) = @_;
121  unless ($package) {
122    is ref $object, 'HASH', "$object is unblessed";
123    return;
124  }
125  isa_ok ($object, $package);
126  my ($garbage, $copy) = eval {$object->me_second};
127  is $@, "", "check it has correct method";
128  cmp_ok $copy, '==', $object, "and that it returns the same object";
129}
130
131# Thanks to Dan Kogai for the Kanji for "castle" (which he informs me also
132# means 'a city' in Mandarin).
133my %hash = (map {$_, $_} 'castle', "ch\xe5teau", $utf8, "\x{57CE}");
134
135for my $package ('', 'Hash_Test') {
136  # Run through and sanity check these.
137  if ($package) {
138    bless \%hash, $package;
139  }
140  for (keys %hash) {
141    my $l = 0 + /^\w+$/;
142    my $r = 0 + $hash{$_} =~ /^\w+$/;
143    cmp_ok ($l, '==', $r);
144  }
145
146  # Grr. This cperl mode thinks that ${ is a punctuation variable.
147  # I presume it's punishment for using xemacs rather than emacs. Or OS/2 :-)
148  my $copy = $bypass ? \%hash : ${thaw freeze \\%hash};
149  class_test ($copy, $package);
150
151  for (keys %$copy) {
152    my $l = 0 + /^\w+$/;
153    my $r = 0 + $copy->{$_} =~ /^\w+$/;
154    cmp_ok ($l, '==', $r, sprintf "key length %d", length $_);
155  }
156
157
158  my $bytes = my $char = chr 27182;
159  utf8::encode ($bytes);
160
161  my $orig = {$char => 1};
162  if ($package) {
163    bless $orig, $package;
164  }
165  my $just_utf8 = $bypass ? $orig : ${thaw freeze \$orig};
166  class_test ($just_utf8, $package);
167  cmp_ok (scalar keys %$just_utf8, '==', 1, "1 key in utf8?");
168  cmp_ok ($just_utf8->{$char}, '==', 1, "utf8 key present?");
169  ok (!exists $just_utf8->{$bytes}, "bytes key absent?");
170
171  $orig = {$bytes => 1};
172  if ($package) {
173    bless $orig, $package;
174  }
175  my $just_bytes = $bypass ? $orig : ${thaw freeze \$orig};
176  class_test ($just_bytes, $package);
177
178  cmp_ok (scalar keys %$just_bytes, '==', 1, "1 key in bytes?");
179  cmp_ok ($just_bytes->{$bytes}, '==', 1, "bytes key present?");
180  ok (!exists $just_bytes->{$char}, "utf8 key absent?");
181
182  die sprintf "Both have length %d, which is crazy", length $char
183    if length $char == length $bytes;
184
185  $orig = {$bytes => length $bytes, $char => length $char};
186  if ($package) {
187    bless $orig, $package;
188  }
189  my $both = $bypass ? $orig : ${thaw freeze \$orig};
190  class_test ($both, $package);
191
192  cmp_ok (scalar keys %$both, '==', 2, "2 keys?");
193  cmp_ok ($both->{$bytes}, '==', length $bytes, "bytes key present?");
194  cmp_ok ($both->{$char}, '==', length $char, "utf8 key present?");
195}
196
197}
198