1#!/usr/local/bin/perl -w 2 3# SPDX-License-Identifier: BSD-2-Clause-FreeBSD 4# 5# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org> 6# Copyright 2015 John Marino <draco@marino.st> 7# Copyright 2020 Yuri Pankov <yuripv@FreeBSD.org> 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions 11# are met: 12# 1. Redistributions of source code must retain the above copyright 13# notice, this list of conditions and the following disclaimer. 14# 2. Redistributions in binary form must reproduce the above copyright 15# notice, this list of conditions and the following disclaimer in the 16# documentation and/or other materials provided with the distribution. 17# 18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28# SUCH DAMAGE. 29# 30# $FreeBSD$ 31 32use strict; 33use Encode qw(encode decode); 34 35my %utf8map = (); 36my $utf8charmap = "$ARGV[0]"; 37my $outfilename = "$ARGV[1]"; 38 39get_utf8map("$utf8charmap"); 40generate_header(); 41make_widths("$outfilename"); 42generate_footer(); 43 44############################ 45 46sub utf8to32 { 47 my @kl = split /\\x/, $_[0]; 48 49 shift @kl if ($kl[0] eq ''); 50 my $k = pack('H2' x scalar @kl, @kl); 51 my $ux = encode('UTF-32BE', decode('UTF-8', $k)); 52 my $u = uc(unpack('H*', $ux)); 53 # Remove BOM 54 $u =~ s/^0000FEFF//; 55 # Remove heading bytes of 0 56 while ($u =~ m/^0/ and length($u) > 4) { 57 $u =~ s/^0//; 58 } 59 60 return $u; 61} 62 63sub get_utf8map { 64 my $file = shift; 65 66 open(FIN, $file); 67 my @lines = <FIN>; 68 close(FIN); 69 chomp(@lines); 70 71 my $incharmap = 0; 72 foreach my $l (@lines) { 73 $l =~ s/\r//; 74 next if ($l =~ /^\#/); 75 next if ($l eq ""); 76 77 if ($l eq "CHARMAP") { 78 $incharmap = 1; 79 next; 80 } 81 82 next if (!$incharmap); 83 last if ($l eq "END CHARMAP"); 84 85 $l =~ /^(<[^\s]+>)\s+(.*)/; 86 my $k = utf8to32($2); # UTF-8 char code 87 my $v = $1; 88 89# print STDERR "register: $k - $v\n"; 90 $utf8map{$k} = $v; 91 } 92} 93 94sub generate_header { 95 my $version = <STDIN>; 96 chomp($version); 97 98 open(FOUT, ">", "$outfilename") 99 or die ("can't write to $outfilename\n"); 100 print FOUT <<EOF; 101# Warning: Do not edit. This file is automatically generated from the 102# tools in /usr/src/tools/tools/locale. The data is obtained from the 103# utf8proc $version. 104# ----------------------------------------------------------------------------- 105WIDTH 106EOF 107} 108 109sub generate_footer { 110 print FOUT "END WIDTH\n"; 111 close (FOUT); 112} 113 114sub make_widths { 115 my @lines = <STDIN>; 116 chomp(@lines); 117 118 foreach my $l (@lines) { 119 my ($wc, $wcw) = split(/ /, $l, -1); 120 121 next if !defined $utf8map{$wc}; 122 123 print FOUT "$utf8map{$wc}\t$wcw\n"; 124 } 125} 126