1#! /usr/bin/perl
2#
3# Copyright (c) 2007-2020, PostgreSQL Global Development Group
4#
5# src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
6#
7# Generate UTF-8 <--> UHC code conversion tables from
8# "windows-949-2000.xml", obtained from
9# http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/
10#
11# The lines we care about in the source file look like
12#    <a u="009A" b="81 30 83 36"/>
13# where the "u" field is the Unicode code point in hex,
14# and the "b" field is the hex byte sequence for UHC
15
16use strict;
17use warnings;
18
19use convutils;
20
21my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_UHC.pl';
22
23# Read the input
24
25my $in_file = "windows-949-2000.xml";
26
27open(my $in, '<', $in_file) || die("cannot open $in_file");
28
29my @mapping;
30
31while (<$in>)
32{
33	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
34	my ($u, $c) = ($1, $2);
35	$c =~ s/ //g;
36	my $ucs  = hex($u);
37	my $code = hex($c);
38
39	next if ($code == 0x0080 || $code == 0x00FF);
40
41	if ($code >= 0x80 && $ucs >= 0x0080)
42	{
43		push @mapping,
44		  {
45			ucs       => $ucs,
46			code      => $code,
47			direction => BOTH,
48			f         => $in_file,
49			l         => $.
50		  };
51	}
52}
53close($in);
54
55# One extra character that's not in the source file.
56push @mapping,
57  {
58	direction => BOTH,
59	code      => 0xa2e8,
60	ucs       => 0x327e,
61	comment   => 'CIRCLED HANGUL IEUNG U',
62	f         => $this_script,
63	l         => __LINE__
64  };
65
66print_conversion_tables($this_script, "UHC", \@mapping);
67