1#!/usr/bin/env perl 2use strict; 3 4my (%titlecase8, %uni8_decomp); 5my (@titlecase16_keys, @titlecase16_values); 6my (@titlecase32_keys, @titlecase32_values); 7my (@uni16_decomp_keys, @uni16_decomp_values); 8my (@uni32_decomp_keys, @uni32_decomp_values); 9my (@multidecomp_keys, @multidecomp_offsets, @multidecomp_values); 10while (<>) { 11 chomp $_; 12 my @arr = split(";"); 13 my $code = eval("0x".$arr[0]); 14 my $decomp = $arr[5]; 15 my $titlecode = $arr[14]; 16 17 if ($titlecode ne "") { 18 # titlecase mapping 19 my $value = eval("0x$titlecode"); 20 if ($value == $code) { 21 # the same character, ignore 22 } elsif ($code <= 0xff) { 23 die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff); 24 $titlecase8{$code} = $value; 25 } elsif ($code <= 0xffff) { 26 die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff); 27 push @titlecase16_keys, $code; 28 push @titlecase16_values, $value; 29 } else { 30 push @titlecase32_keys, $code; 31 push @titlecase32_values, $value; 32 } 33 } elsif ($decomp =~ /(?:\<[^>]*> )?(.+)/) { 34 # decompositions 35 my $decomp_codes = $1; 36 if ($decomp_codes =~ /^([0-9A-Z]*)$/i) { 37 # unicharacter decomposition. use separate lists for this 38 my $value = eval("0x$1"); 39 if ($value > 0xffffffff) { 40 print STDERR "Error: We've assumed decomposition codes are max. 32bit\n"; 41 exit 1; 42 } 43 if ($code <= 0xff) { 44 $uni8_decomp{$code} = $value; 45 } elsif ($code <= 0xffff) { 46 push @uni16_decomp_keys, $code; 47 push @uni16_decomp_values, $value; 48 } else { 49 push @uni32_decomp_keys, $code; 50 push @uni32_decomp_values, $value; 51 } 52 } else { 53 # multicharacter decomposition. 54 if ($code > 0xffffffff) { 55 print STDERR "Error: We've assumed multi-decomposition key codes are max. 32bit\n"; 56 exit 1; 57 } 58 59 push @multidecomp_keys, $code; 60 push @multidecomp_offsets, scalar(@multidecomp_values); 61 62 foreach my $dcode (split(" ", $decomp_codes)) { 63 my $value = eval("0x$dcode"); 64 if ($value > 0xffffffff) { 65 print STDERR "Error: We've assumed decomposition codes are max. 32bit\n"; 66 exit 1; 67 } 68 push @multidecomp_values, $value; 69 } 70 push @multidecomp_values, 0; 71 } 72 } 73} 74 75sub print_list { 76 my @list = @{$_[0]}; 77 78 my $last = $#list; 79 my $n = 0; 80 foreach my $key (@list) { 81 printf("0x%05x", $key); 82 last if ($n == $last); 83 print ","; 84 85 $n++; 86 if (($n % 8) == 0) { 87 print "\n\t"; 88 } else { 89 print " "; 90 } 91 } 92} 93 94print "/* This file is automatically generated by unicodemap.pl from UnicodeData.txt 95 96 NOTE: decompositions for characters having titlecase characters 97 are not included, because we first translate everything to titlecase */\n"; 98 99sub print_map8 { 100 my %map = %{$_[0]}; 101 my @list; 102 for (my $i = 0; $i <= 0xff; $i++) { 103 if (defined($map{$i})) { 104 push @list, $map{$i}; 105 } else { 106 push @list, $i; 107 } 108 } 109 print_list(\@list); 110} 111 112print "static const uint16_t titlecase8_map[256] = {\n\t"; 113print_map8(\%titlecase8); 114print "\n};\n"; 115 116print "static const uint16_t titlecase16_keys[] = {\n\t"; 117print_list(\@titlecase16_keys); 118print "\n};\n"; 119 120print "static const uint16_t titlecase16_values[] = {\n\t"; 121print_list(\@titlecase16_values); 122print "\n};\n"; 123 124print "static const uint32_t titlecase32_keys[] = {\n\t"; 125print_list(\@titlecase32_keys); 126print "\n};\n"; 127 128print "static const uint32_t titlecase32_values[] = {\n\t"; 129print_list(\@titlecase32_values); 130print "\n};\n"; 131 132print "static const uint16_t uni8_decomp_map[256] = {\n\t"; 133print_map8(\%uni8_decomp); 134print "\n};\n"; 135 136print "static const uint16_t uni16_decomp_keys[] = {\n\t"; 137print_list(\@uni16_decomp_keys); 138print "\n};\n"; 139 140print "static const uint32_t uni16_decomp_values[] = {\n\t"; 141print_list(\@uni16_decomp_values); 142print "\n};\n"; 143 144print "static const uint32_t uni32_decomp_keys[] = {\n\t"; 145print_list(\@uni32_decomp_keys); 146print "\n};\n"; 147 148print "static const uint32_t uni32_decomp_values[] = {\n\t"; 149print_list(\@uni32_decomp_values); 150print "\n};\n"; 151 152print "static const uint32_t multidecomp_keys[] = {\n\t"; 153print_list(\@multidecomp_keys); 154print "\n};\n"; 155 156print "static const uint16_t multidecomp_offsets[] = {\n\t"; 157print_list(\@multidecomp_offsets); 158print "\n};\n"; 159 160print "static const uint32_t multidecomp_values[] = {\n\t"; 161print_list(\@multidecomp_values); 162print "\n};\n"; 163