xref: /freebsd/tools/tools/locale/tools/mkwidths.pl (revision 06c3fb27)
1#!/usr/local/bin/perl -w
2
3# SPDX-License-Identifier: BSD-2-Clause
4#
5# Copyright 2009 Edwin Groothuis <edwin@FreeBSD.org>
6# Copyright 2015 John Marino <draco@marino.st>
7# Copyright 2020 Yuri Pankov <yuripv@FreeBSD.org>
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17#
18# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28# SUCH DAMAGE.
29#
30
31use strict;
32use Encode qw(encode decode);
33
34my %utf8map = ();
35my $utf8charmap = "$ARGV[0]";
36my $outfilename = "$ARGV[1]";
37
38get_utf8map("$utf8charmap");
39generate_header();
40make_widths("$outfilename");
41generate_footer();
42
43############################
44
45sub utf8to32 {
46	my @kl = split /\\x/, $_[0];
47
48	shift @kl if ($kl[0] eq '');
49	my $k = pack('H2' x scalar @kl, @kl);
50	my $ux = encode('UTF-32BE', decode('UTF-8', $k));
51	my $u = uc(unpack('H*', $ux));
52	# Remove BOM
53	$u =~ s/^0000FEFF//;
54	# Remove heading bytes of 0
55	while ($u =~ m/^0/ and length($u) > 4) {
56		$u =~ s/^0//;
57	}
58
59	return $u;
60}
61
62sub get_utf8map {
63	my $file = shift;
64
65	open(FIN, $file);
66	my @lines = <FIN>;
67	close(FIN);
68	chomp(@lines);
69
70	my $incharmap = 0;
71	foreach my $l (@lines) {
72		$l =~ s/\r//;
73		next if ($l =~ /^\#/);
74		next if ($l eq "");
75
76		if ($l eq "CHARMAP") {
77			$incharmap = 1;
78			next;
79		}
80
81		next if (!$incharmap);
82		last if ($l eq "END CHARMAP");
83
84		$l =~ /^(<[^\s]+>)\s+(.*)/;
85		my $k = utf8to32($2);	# UTF-8 char code
86		my $v = $1;
87
88#		print STDERR "register: $k - $v\n";
89		$utf8map{$k} = $v;
90	}
91}
92
93sub generate_header {
94	my $version = <STDIN>;
95	chomp($version);
96
97	open(FOUT, ">", "$outfilename")
98		or die ("can't write to $outfilename\n");
99	print FOUT <<EOF;
100# Warning: Do not edit. This file is automatically generated from the
101# tools in /usr/src/tools/tools/locale. The data is obtained from the
102# utf8proc $version.
103# -----------------------------------------------------------------------------
104WIDTH
105EOF
106}
107
108sub generate_footer {
109	print FOUT "END WIDTH\n";
110	close (FOUT);
111}
112
113sub make_widths {
114	my @lines = <STDIN>;
115	chomp(@lines);
116
117	foreach my $l (@lines) {
118		my ($wc, $wcw) = split(/ /, $l, -1);
119
120		next if !defined $utf8map{$wc};
121
122		print FOUT "$utf8map{$wc}\t$wcw\n";
123	}
124}
125