1#! /usr/bin/perl 2# 3# Copyright (c) 2007-2020, PostgreSQL Global Development Group 4# 5# src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl 6# 7# Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from 8# "euc-jis-2004-std.txt" (http://x0213.org) 9 10use strict; 11use warnings; 12 13use convutils; 14 15my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl'; 16 17# first generate UTF-8 --> EUC_JIS_2004 table 18 19my $in_file = "euc-jis-2004-std.txt"; 20 21open(my $in, '<', $in_file) || die("cannot open $in_file"); 22 23my @all; 24 25while (my $line = <$in>) 26{ 27 if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) 28 { 29 30 # combined characters 31 my ($c, $u1, $u2) = ($1, $2, $3); 32 my $rest = "U+" . $u1 . "+" . $u2 . $4; 33 my $code = hex($c); 34 my $ucs1 = hex($u1); 35 my $ucs2 = hex($u2); 36 37 push @all, 38 { 39 direction => BOTH, 40 ucs => $ucs1, 41 ucs_second => $ucs2, 42 code => $code, 43 comment => $rest, 44 f => $in_file, 45 l => $. 46 }; 47 } 48 elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) 49 { 50 51 # non-combined characters 52 my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3); 53 my $ucs = hex($u); 54 my $code = hex($c); 55 56 next if ($code < 0x80 && $ucs < 0x80); 57 58 push @all, 59 { 60 direction => BOTH, 61 ucs => $ucs, 62 code => $code, 63 comment => $rest, 64 f => $in_file, 65 l => $. 66 }; 67 } 68} 69close($in); 70 71print_conversion_tables($this_script, "EUC_JIS_2004", \@all); 72