1#! /usr/bin/perl
2#
3# Copyright (c) 2001-2020, PostgreSQL Global Development Group
4#
5# src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
6#
7# Generate UTF-8 <--> EUC_TW code conversion tables from
8# map files provided by Unicode organization.
9# Unfortunately it is prohibited by the organization
10# to distribute the map files. So if you try to use this script,
11# you have to obtain CNS11643.TXT from
12# the organization's ftp site.
13#
14# CNS11643.TXT format:
15#		 CNS11643 code in hex (3 bytes)
16#		 (I guess the first byte means the plane No.)
17#		 UCS-2 code in hex
18#		 # and Unicode name (not used in this script)
19
20use strict;
21use warnings;
22
23use convutils;
24
25my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl';
26
27my $mapping = &read_source("CNS11643.TXT");
28
29my @extras;
30
31foreach my $i (@$mapping)
32{
33	my $ucs      = $i->{ucs};
34	my $code     = $i->{code};
35	my $origcode = $i->{code};
36
37	my $plane = ($code & 0x1f0000) >> 16;
38	if ($plane > 16)
39	{
40		printf STDERR "Warning: invalid plane No.$plane. ignored\n";
41		next;
42	}
43
44	if ($plane == 1)
45	{
46		$code = ($code & 0xffff) | 0x8080;
47	}
48	else
49	{
50		$code = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
51	}
52	$i->{code} = $code;
53
54	# Some codes are mapped twice in the EUC_TW to UTF-8 table.
55	if ($origcode >= 0x12121 && $origcode <= 0x20000)
56	{
57		push @extras,
58		  {
59			ucs       => $i->{ucs},
60			code      => ($i->{code} + 0x8ea10000),
61			rest      => $i->{rest},
62			direction => TO_UNICODE,
63			f         => $i->{f},
64			l         => $i->{l}
65		  };
66	}
67}
68
69push @$mapping, @extras;
70
71print_conversion_tables($this_script, "EUC_TW", $mapping);
72