1#! /usr/bin/perl
2#
3# Copyright (c) 2007-2020, PostgreSQL Global Development Group
4#
5# src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
6#
7# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
8# "euc-jis-2004-std.txt" (http://x0213.org)
9
10use strict;
11use warnings;
12
13use convutils;
14
15my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl';
16
17# first generate UTF-8 --> EUC_JIS_2004 table
18
19my $in_file = "euc-jis-2004-std.txt";
20
21open(my $in, '<', $in_file) || die("cannot open $in_file");
22
23my @all;
24
25while (my $line = <$in>)
26{
27	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
28	{
29
30		# combined characters
31		my ($c, $u1, $u2) = ($1, $2, $3);
32		my $rest = "U+" . $u1 . "+" . $u2 . $4;
33		my $code = hex($c);
34		my $ucs1 = hex($u1);
35		my $ucs2 = hex($u2);
36
37		push @all,
38		  {
39			direction  => BOTH,
40			ucs        => $ucs1,
41			ucs_second => $ucs2,
42			code       => $code,
43			comment    => $rest,
44			f          => $in_file,
45			l          => $.
46		  };
47	}
48	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
49	{
50
51		# non-combined characters
52		my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
53		my $ucs  = hex($u);
54		my $code = hex($c);
55
56		next if ($code < 0x80 && $ucs < 0x80);
57
58		push @all,
59		  {
60			direction => BOTH,
61			ucs       => $ucs,
62			code      => $code,
63			comment   => $rest,
64			f         => $in_file,
65			l         => $.
66		  };
67	}
68}
69close($in);
70
71print_conversion_tables($this_script, "EUC_JIS_2004", \@all);
72