xref: /freebsd/tools/tools/locale/tools/mkwidths.pl (revision 81ad6265)
1#!/usr/local/bin/perl -w
2
3# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4#
5# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org>
6# Copyright 2015 John Marino <draco@marino.st>
7# Copyright 2020 Yuri Pankov <yuripv@FreeBSD.org>
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17#
18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28# SUCH DAMAGE.
29#
30# $FreeBSD$
31
32use strict;
33use Encode qw(encode decode);
34
35my %utf8map = ();
36my $utf8charmap = "$ARGV[0]";
37my $outfilename = "$ARGV[1]";
38
39get_utf8map("$utf8charmap");
40generate_header();
41make_widths("$outfilename");
42generate_footer();
43
44############################
45
46sub utf8to32 {
47	my @kl = split /\\x/, $_[0];
48
49	shift @kl if ($kl[0] eq '');
50	my $k = pack('H2' x scalar @kl, @kl);
51	my $ux = encode('UTF-32BE', decode('UTF-8', $k));
52	my $u = uc(unpack('H*', $ux));
53	# Remove BOM
54	$u =~ s/^0000FEFF//;
55	# Remove heading bytes of 0
56	while ($u =~ m/^0/ and length($u) > 4) {
57		$u =~ s/^0//;
58	}
59
60	return $u;
61}
62
63sub get_utf8map {
64	my $file = shift;
65
66	open(FIN, $file);
67	my @lines = <FIN>;
68	close(FIN);
69	chomp(@lines);
70
71	my $incharmap = 0;
72	foreach my $l (@lines) {
73		$l =~ s/\r//;
74		next if ($l =~ /^\#/);
75		next if ($l eq "");
76
77		if ($l eq "CHARMAP") {
78			$incharmap = 1;
79			next;
80		}
81
82		next if (!$incharmap);
83		last if ($l eq "END CHARMAP");
84
85		$l =~ /^(<[^\s]+>)\s+(.*)/;
86		my $k = utf8to32($2);	# UTF-8 char code
87		my $v = $1;
88
89#		print STDERR "register: $k - $v\n";
90		$utf8map{$k} = $v;
91	}
92}
93
94sub generate_header {
95	my $version = <STDIN>;
96	chomp($version);
97
98	open(FOUT, ">", "$outfilename")
99		or die ("can't write to $outfilename\n");
100	print FOUT <<EOF;
101# Warning: Do not edit. This file is automatically generated from the
102# tools in /usr/src/tools/tools/locale. The data is obtained from the
103# utf8proc $version.
104# -----------------------------------------------------------------------------
105WIDTH
106EOF
107}
108
109sub generate_footer {
110	print FOUT "END WIDTH\n";
111	close (FOUT);
112}
113
114sub make_widths {
115	my @lines = <STDIN>;
116	chomp(@lines);
117
118	foreach my $l (@lines) {
119		my ($wc, $wcw) = split(/ /, $l, -1);
120
121		next if !defined $utf8map{$wc};
122
123		print FOUT "$utf8map{$wc}\t$wcw\n";
124	}
125}
126