1#! /usr/bin/perl
2#
3# Copyright (c) 2001-2016, PostgreSQL Global Development Group
4#
5# src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
6#
7# Generate UTF-8 <--> EUC_JP code conversion tables from
8# map files provided by Unicode organization.
9# Unfortunately it is prohibited by the organization
10# to distribute the map files. So if you try to use this script,
11# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
12# the organization's ftp site.
13#
14# JIS0201.TXT format:
15#		 JIS0201 code in hex
16#		 UCS-2 code in hex
17#		 # and Unicode name (not used in this script)
18#
19# JIS0208.TXT format:
20#		 JIS0208 shift-JIS code in hex
21#		 JIS0208 code in hex
22#		 UCS-2 code in hex
23#		 # and Unicode name (not used in this script)
24#
25# JIS0212.TXT format:
26#		 JIS0212 code in hex
27#		 UCS-2 code in hex
28#		 # and Unicode name (not used in this script)
29
30require "ucs2utf.pl";
31
32# first generate UTF-8 --> EUC_JP table
33
34#
35# JIS0201
36#
37$in_file = "JIS0201.TXT";
38
39open(FILE, $in_file) || die("cannot open $in_file");
40
41reset 'array';
42
43while (<FILE>)
44{
45	chop;
46	if (/^#/)
47	{
48		next;
49	}
50	($c, $u, $rest) = split;
51	$ucs  = hex($u);
52	$code = hex($c);
53	if ($code >= 0x80 && $ucs >= 0x0080)
54	{
55		$utf = &ucs2utf($ucs);
56		if ($array{$utf} ne "")
57		{
58			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
59			next;
60		}
61		$count++;
62
63		# add single shift 2
64		$array{$utf} = ($code | 0x8e00);
65	}
66}
67close(FILE);
68
69#
70# JIS0208
71#
72$in_file = "JIS0208.TXT";
73
74open(FILE, $in_file) || die("cannot open $in_file");
75
76while (<FILE>)
77{
78	chop;
79	if (/^#/)
80	{
81		next;
82	}
83	($s, $c, $u, $rest) = split;
84	$ucs  = hex($u);
85	$code = hex($c);
86	if ($code >= 0x80 && $ucs >= 0x0080)
87	{
88		$utf = &ucs2utf($ucs);
89		if ($array{$utf} ne "")
90		{
91			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
92			next;
93		}
94		$count++;
95
96		$array{$utf} = ($code | 0x8080);
97	}
98}
99close(FILE);
100
101#
102# JIS0212
103#
104$in_file = "JIS0212.TXT";
105
106open(FILE, $in_file) || die("cannot open $in_file");
107
108while (<FILE>)
109{
110	chop;
111	if (/^#/)
112	{
113		next;
114	}
115	($c, $u, $rest) = split;
116	$ucs  = hex($u);
117	$code = hex($c);
118	if ($code >= 0x80 && $ucs >= 0x0080)
119	{
120		$utf = &ucs2utf($ucs);
121		if ($array{$utf} ne "")
122		{
123			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
124			next;
125		}
126		$count++;
127
128		$array{$utf} = ($code | 0x8f8080);
129	}
130}
131close(FILE);
132
133$file = "utf8_to_euc_jp.map";
134open(FILE, "> $file") || die("cannot open $file");
135
136print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
137print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
138
139for $index (sort { $a <=> $b } keys(%array))
140{
141	$code = $array{$index};
142	$count--;
143	if ($count == 0)
144	{
145		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
146	}
147	else
148	{
149		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
150	}
151}
152
153print FILE "};\n";
154close(FILE);
155
156#
157# then generate EUC_JP --> UTF8 table
158#
159
160#
161# JIS0201
162#
163$in_file = "JIS0201.TXT";
164
165open(FILE, $in_file) || die("cannot open $in_file");
166
167reset 'array';
168
169while (<FILE>)
170{
171	chop;
172	if (/^#/)
173	{
174		next;
175	}
176	($c, $u, $rest) = split;
177	$ucs  = hex($u);
178	$code = hex($c);
179	if ($code >= 0x80 && $ucs >= 0x0080)
180	{
181		$utf = &ucs2utf($ucs);
182		if ($array{$code} ne "")
183		{
184			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
185			next;
186		}
187		$count++;
188
189		# add single shift 2
190		$code |= 0x8e00;
191		$array{$code} = $utf;
192	}
193}
194close(FILE);
195
196#
197# JIS0208
198#
199$in_file = "JIS0208.TXT";
200
201open(FILE, $in_file) || die("cannot open $in_file");
202
203while (<FILE>)
204{
205	chop;
206	if (/^#/)
207	{
208		next;
209	}
210	($s, $c, $u, $rest) = split;
211	$ucs  = hex($u);
212	$code = hex($c);
213	if ($code >= 0x80 && $ucs >= 0x0080)
214	{
215		$utf = &ucs2utf($ucs);
216		if ($array{$code} ne "")
217		{
218			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
219			next;
220		}
221		$count++;
222
223		$code |= 0x8080;
224		$array{$code} = $utf;
225	}
226}
227close(FILE);
228
229#
230# JIS0212
231#
232$in_file = "JIS0212.TXT";
233
234open(FILE, $in_file) || die("cannot open $in_file");
235
236while (<FILE>)
237{
238	chop;
239	if (/^#/)
240	{
241		next;
242	}
243	($c, $u, $rest) = split;
244	$ucs  = hex($u);
245	$code = hex($c);
246	if ($code >= 0x80 && $ucs >= 0x0080)
247	{
248		$utf = &ucs2utf($ucs);
249		if ($array{$code} ne "")
250		{
251			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
252			next;
253		}
254		$count++;
255
256		$code |= 0x8f8080;
257		$array{$code} = $utf;
258	}
259}
260close(FILE);
261
262$file = "euc_jp_to_utf8.map";
263open(FILE, "> $file") || die("cannot open $file");
264
265print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
266print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
267for $index (sort { $a <=> $b } keys(%array))
268{
269	$utf = $array{$index};
270	$count--;
271	if ($count == 0)
272	{
273		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
274	}
275	else
276	{
277		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
278	}
279}
280
281print FILE "};\n";
282close(FILE);
283