1#! /usr/bin/perl 2# 3# Copyright (c) 2007-2020, PostgreSQL Global Development Group 4# 5# src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl 6# 7# Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from 8# "sjis-0213-2004-std.txt" (http://x0213.org) 9 10use strict; 11use warnings; 12 13use convutils; 14 15# first generate UTF-8 --> SHIFT_JIS_2004 table 16 17my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl'; 18 19my $in_file = "sjis-0213-2004-std.txt"; 20 21open(my $in, '<', $in_file) || die("cannot open $in_file"); 22 23my @mapping; 24 25while (my $line = <$in>) 26{ 27 if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/) 28 { 29 30 # combined characters 31 my ($c, $u1, $u2) = ($1, $2, $3); 32 my $rest = "U+" . $u1 . "+" . $u2 . $4; 33 my $code = hex($c); 34 my $ucs1 = hex($u1); 35 my $ucs2 = hex($u2); 36 37 push @mapping, 38 { 39 code => $code, 40 ucs => $ucs1, 41 ucs_second => $ucs2, 42 comment => $rest, 43 direction => BOTH, 44 f => $in_file, 45 l => $. 46 }; 47 } 48 elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/) 49 { 50 51 # non-combined characters 52 my ($c, $u, $rest) = ($1, $2, "U+" . $2 . $3); 53 my $ucs = hex($u); 54 my $code = hex($c); 55 my $direction; 56 57 if ($code < 0x80 && $ucs < 0x80) 58 { 59 next; 60 } 61 elsif ($code < 0x80) 62 { 63 $direction = FROM_UNICODE; 64 } 65 elsif ($ucs < 0x80) 66 { 67 $direction = TO_UNICODE; 68 } 69 else 70 { 71 $direction = BOTH; 72 } 73 74 push @mapping, 75 { 76 code => $code, 77 ucs => $ucs, 78 comment => $rest, 79 direction => $direction, 80 f => $in_file, 81 l => $. 82 }; 83 } 84} 85close($in); 86 87print_conversion_tables($this_script, "SHIFT_JIS_2004", \@mapping); 88