1#! /usr/bin/perl
2#
3# Copyright (c) 2007-2020, PostgreSQL Global Development Group
4#
5# src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
6#
7# Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
8# "sjis-0213-2004-std.txt" (http://x0213.org)
9
10use strict;
11use warnings;
12
13use convutils;
14
15# first generate UTF-8 --> SHIFT_JIS_2004 table
16
17my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl';
18
19my $in_file = "sjis-0213-2004-std.txt";
20
21open(my $in, '<', $in_file) || die("cannot open $in_file");
22
23my @mapping;
24
25while (my $line = <$in>)
26{
27	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
28	{
29
30		# combined characters
31		my ($c, $u1, $u2) = ($1, $2, $3);
32		my $rest = "U+" . $u1 . "+" . $u2 . $4;
33		my $code = hex($c);
34		my $ucs1 = hex($u1);
35		my $ucs2 = hex($u2);
36
37		push @mapping,
38		  {
39			code       => $code,
40			ucs        => $ucs1,
41			ucs_second => $ucs2,
42			comment    => $rest,
43			direction  => BOTH,
44			f          => $in_file,
45			l          => $.
46		  };
47	}
48	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
49	{
50
51		# non-combined characters
52		my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3);
53		my $ucs  = hex($u);
54		my $code = hex($c);
55		my $direction;
56
57		if ($code < 0x80 && $ucs < 0x80)
58		{
59			next;
60		}
61		elsif ($code < 0x80)
62		{
63			$direction = FROM_UNICODE;
64		}
65		elsif ($ucs < 0x80)
66		{
67			$direction = TO_UNICODE;
68		}
69		else
70		{
71			$direction = BOTH;
72		}
73
74		push @mapping,
75		  {
76			code      => $code,
77			ucs       => $ucs,
78			comment   => $rest,
79			direction => $direction,
80			f         => $in_file,
81			l         => $.
82		  };
83	}
84}
85close($in);
86
87print_conversion_tables($this_script, "SHIFT_JIS_2004", \@mapping);
88