1#!perl 2# 3# This auxiliary script makes five header files 4# used for building XSUB of Unicode::Collate. 5# 6# Usage: 7# <do './mkheader'> in perl, or <perl mkheader> in command line 8# 9# Input file: 10# Collate/allkeys.txt 11# 12# Output file: 13# ucatbl.h 14# 15use 5.006; 16use strict; 17use warnings; 18use Carp; 19use File::Spec; 20 21use constant TRUE => 1; 22use constant FALSE => ""; 23use constant VCE_TEMPLATE => 'Cn4'; 24 25sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } 26 27our $PACKAGE = 'Unicode::Collate, mkheader'; 28our $prefix = "UCA_"; 29 30our %SimpleEntries; # $codepoint => $keys 31our @Rest; 32 33{ 34 my($f, $fh); 35 foreach my $d (File::Spec->curdir()) { 36 $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); 37 last if open($fh, $f); 38 $f = undef; 39 } 40 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; 41 42 while (my $line = <$fh>) { 43 next if $line =~ /^\s*#/; 44 if ($line =~ /^\s*\@/) { 45 push @Rest, $line; 46 next; 47 } 48 49 next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element 50 51 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) 52 53 # gets element 54 my($e, $k) = split /;/, $line; 55 56 croak "Wrong Entry: <charList> must be separated by ';' ". 57 "from <collElement>" if ! $k; 58 59 my @uv = _getHexArray($e); 60 next if !@uv; 61 62 if (@uv != 1) { 63 push @Rest, $line; 64 next; 65 # Contractions of two or more characters will not be compiled. 66 } 67 68 my $is_L3_ignorable = TRUE; 69 70 my @key; 71 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed 72 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. 73 my @wt = _getHexArray($arr); 74 push @key, pack(VCE_TEMPLATE, $var, @wt); 75 $is_L3_ignorable = FALSE 76 if $wt[0] || $wt[1] || $wt[2]; 77 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable 78 # is completely ignorable. 79 # For expansion, an entry $is_L3_ignorable 80 # if and only if "all" CEs are [.0000.0000.0000]. 81 } 82 my $mapping = $is_L3_ignorable ? [] : \@key; 83 my $num = @$mapping; 84 my $str = chr($num).join('', @$mapping); 85 $SimpleEntries{$uv[0]} = stringify($str); 86 } 87} 88 89sub stringify { 90 my $str = shift; 91 return sprintf '"%s"', join '', 92 map sprintf("\\x%02x", ord $_), split //, $str; 93 94} 95 96########## writing header files ########## 97 98my $init = ''; 99{ 100 my $type = "char* const"; 101 my $head = $prefix."rest"; 102 103 $init .= "static const $type $head [] = {\n"; 104 for my $line (@Rest) { 105 $line =~ s/\s*\z//; 106 next if $line eq ''; 107 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; 108 $init .= stringify($line).",\n"; 109 } 110 $init .= "NULL\n"; # sentinel 111 $init .= "};\n\n"; 112} 113 114my @tripletable = ( 115 { 116 file => "ucatbl", 117 name => "simple", 118 type => "char* const", 119 hash => \%SimpleEntries, 120 null => "NULL", 121 init => $init, 122 }, 123); 124 125foreach my $tbl (@tripletable) { 126 my $file = "$tbl->{file}.h"; 127 my $head = "${prefix}$tbl->{name}"; 128 my $type = $tbl->{type}; 129 my $hash = $tbl->{hash}; 130 my $null = $tbl->{null}; 131 my $init = $tbl->{init}; 132 133 open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made"; 134 binmode $fh_h; 135 my $old_fh = select $fh_h; 136 my %val; 137 138 print << 'EOF'; 139/* 140 * This file is auto-generated by mkheader. 141 * Any changes here will be lost! 142 */ 143EOF 144 145 print $init if defined $init; 146 147 foreach my $uv (keys %$hash) { 148 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) 149 unless $uv <= 0x10FFFF; 150 my @c = unpack 'CCCC', pack 'N', $uv; 151 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; 152 # $c[0] must be 0. 153 } 154 155 foreach my $p (sort { $a <=> $b } keys %val) { 156 next if ! $val{ $p }; 157 for (my $r = 0; $r < 256; $r++) { 158 next if ! $val{ $p }{ $r }; 159 printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r; 160 for (my $c = 0; $c < 256; $c++) { 161 print "\t", defined $val{$p}{$r}{$c} 162 ? $val{$p}{$r}{$c} 163 : $null; 164 print ',' if $c != 255; 165 print "\n" if $c % 8 == 7; 166 } 167 print "};\n\n"; 168 } 169 } 170 foreach my $p (sort { $a <=> $b } keys %val) { 171 next if ! $val{ $p }; 172 printf "static const $type* const ${head}_%02x [256] = {\n", $p; 173 for (my $r = 0; $r < 256; $r++) { 174 print $val{ $p }{ $r } 175 ? sprintf("${head}_%02x_%02x", $p, $r) 176 : "NULL"; 177 print ',' if $r != 255; 178 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; 179 } 180 print "};\n\n"; 181 } 182 print "static const $type* const * const $head [] = {\n"; 183 for (my $p = 0; $p <= 0x10; $p++) { 184 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; 185 print ',' if $p != 0x10; 186 print "\n"; 187 } 188 print "};\n\n"; 189 close $fh_h; 190 select $old_fh; 191} 192 1931; 194__END__ 195