1#!perl
2#
3# This auxiliary script makes five header files
4# used for building XSUB of Unicode::Collate.
5#
6# Usage:
7#    <do './mkheader'> in perl, or <perl mkheader> in command line
8#
9# Input file:
10#    Collate/allkeys.txt
11#
12# Output file:
13#    ucatbl.h
14#
15use 5.006;
16use strict;
17use warnings;
18use Carp;
19use File::Spec;
20
21use constant TRUE  => 1;
22use constant FALSE => "";
23use constant VCE_TEMPLATE => 'Cn4';
24
25sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
26
27our $PACKAGE = 'Unicode::Collate, mkheader';
28our $prefix  = "UCA_";
29
30our %SimpleEntries;	# $codepoint => $keys
31our @Rest;
32
33{
34    my($f, $fh);
35    foreach my $d (File::Spec->curdir()) {
36	$f = File::Spec->catfile($d, "Collate", "allkeys.txt");
37	last if open($fh, $f);
38	$f = undef;
39    }
40    croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
41
42    while (my $line = <$fh>) {
43	next if $line =~ /^\s*#/;
44	if ($line =~ /^\s*\@/) {
45	    push @Rest, $line;
46	    next;
47	}
48
49	next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element
50
51	$line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
52
53	# gets element
54	my($e, $k) = split /;/, $line;
55
56	croak "Wrong Entry: <charList> must be separated by ';' ".
57	      "from <collElement>" if ! $k;
58
59	my @uv = _getHexArray($e);
60	next if !@uv;
61
62	if (@uv != 1) {
63	    push @Rest, $line;
64	    next;
65	    # Contractions of two or more characters will not be compiled.
66	}
67
68	my $is_L3_ignorable = TRUE;
69
70	my @key;
71	foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
72	    my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
73	    my @wt = _getHexArray($arr);
74	    push @key, pack(VCE_TEMPLATE, $var, @wt);
75	    $is_L3_ignorable = FALSE
76		if $wt[0] || $wt[1] || $wt[2];
77	    # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
78	    # is completely ignorable.
79	    # For expansion, an entry $is_L3_ignorable
80	    # if and only if "all" CEs are [.0000.0000.0000].
81	}
82	my $mapping = $is_L3_ignorable ? [] : \@key;
83	my $num = @$mapping;
84	my $str = chr($num).join('', @$mapping);
85	$SimpleEntries{$uv[0]} = stringify($str);
86    }
87}
88
89sub stringify {
90    my $str = shift;
91    return sprintf '"%s"', join '',
92	   map sprintf("\\x%02x", ord $_), split //, $str;
93
94}
95
96########## writing header files ##########
97
98my $init = '';
99{
100    my $type = "char* const";
101    my $head = $prefix."rest";
102
103    $init .= "static const $type $head [] = {\n";
104    for my $line (@Rest) {
105	$line =~ s/\s*\z//;
106	next if $line eq '';
107	$init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
108	$init .= stringify($line).",\n";
109    }
110    $init .= "NULL\n"; # sentinel
111    $init .= "};\n\n";
112}
113
114my @tripletable = (
115    {
116	file => "ucatbl",
117	name => "simple",
118	type => "char* const",
119	hash => \%SimpleEntries,
120	null => "NULL",
121	init => $init,
122    },
123);
124
125foreach my $tbl (@tripletable) {
126    my $file = "$tbl->{file}.h";
127    my $head = "${prefix}$tbl->{name}";
128    my $type = $tbl->{type};
129    my $hash = $tbl->{hash};
130    my $null = $tbl->{null};
131    my $init = $tbl->{init};
132
133    open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
134    binmode $fh_h;
135    my $old_fh = select $fh_h;
136    my %val;
137
138    print << 'EOF';
139/*
140 * This file is auto-generated by mkheader.
141 * Any changes here will be lost!
142 */
143EOF
144
145    print $init if defined $init;
146
147    foreach my $uv (keys %$hash) {
148	croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
149	    unless $uv <= 0x10FFFF;
150	my @c = unpack 'CCCC', pack 'N', $uv;
151	$val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
152	# $c[0] must be 0.
153    }
154
155    foreach my $p (sort { $a <=> $b } keys %val) {
156	next if ! $val{ $p };
157	for (my $r = 0; $r < 256; $r++) {
158	    next if ! $val{ $p }{ $r };
159	    printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r;
160	    for (my $c = 0; $c < 256; $c++) {
161		print "\t", defined $val{$p}{$r}{$c}
162		    ? $val{$p}{$r}{$c}
163		    : $null;
164		print ','  if $c != 255;
165		print "\n" if $c % 8 == 7;
166	    }
167	    print "};\n\n";
168	}
169    }
170    foreach my $p (sort { $a <=> $b } keys %val) {
171	next if ! $val{ $p };
172	printf "static const $type* const ${head}_%02x [256] = {\n", $p;
173	for (my $r = 0; $r < 256; $r++) {
174	    print $val{ $p }{ $r }
175		? sprintf("${head}_%02x_%02x", $p, $r)
176		: "NULL";
177	    print ','  if $r != 255;
178	    print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
179	}
180	print "};\n\n";
181    }
182    print "static const $type* const * const $head [] = {\n";
183    for (my $p = 0; $p <= 0x10; $p++) {
184	print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
185	print ','  if $p != 0x10;
186	print "\n";
187    }
188    print "};\n\n";
189    close $fh_h;
190    select $old_fh;
191}
192
1931;
194__END__
195