1#! /usr/bin/perl 2# 3# Copyright (c) 2001-2020, PostgreSQL Global Development Group 4# 5# src/backend/utils/mb/Unicode/UCS_to_BIG5.pl 6# 7# Generate UTF-8 <--> BIG5 conversion tables from 8# map files provided by Unicode organization. 9# Unfortunately it is prohibited by the organization 10# to distribute the map files. So if you try to use this script, 11# you have to obtain the map files from the organization's download site. 12# https://www.unicode.org/Public/MAPPINGS/ 13# 14# Our "big5" comes from BIG5.TXT, with the addition of the characters 15# in the range 0xf9d6-0xf9dc from CP950.TXT. 16# 17# BIG5.TXT format: 18# BIG5 code in hex 19# UCS-2 code in hex 20# # and Unicode name (not used in this script) 21# 22# CP950.TXT format: 23# CP950 code in hex 24# UCS-2 code in hex 25# # and Unicode name (not used in this script) 26 27use strict; 28use warnings; 29 30use convutils; 31 32my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_BIG5.pl'; 33 34# Load BIG5.TXT 35my $all = &read_source("BIG5.TXT"); 36 37# Load CP950.TXT 38my $cp950txt = &read_source("CP950.TXT"); 39 40foreach my $i (@$cp950txt) 41{ 42 my $code = $i->{code}; 43 my $ucs = $i->{ucs}; 44 45 # Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc 46 # from CP950.TXT 47 if ( $code >= 0x80 48 && $ucs >= 0x0080 49 && $code >= 0xf9d6 50 && $code <= 0xf9dc) 51 { 52 push @$all, 53 { 54 code => $code, 55 ucs => $ucs, 56 comment => $i->{comment}, 57 direction => BOTH, 58 f => $i->{f}, 59 l => $i->{l} 60 }; 61 } 62} 63 64foreach my $i (@$all) 65{ 66 my $code = $i->{code}; 67 my $ucs = $i->{ucs}; 68 69 # BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can 70 # contain only one of them. XXX: Doesn't really make sense to include any of them, 71 # but for historical reasons, we map the first one of them. 72 if ($i->{ucs} == 0xFFFD && $i->{code} != 0xA15A) 73 { 74 $i->{direction} = TO_UNICODE; 75 } 76} 77 78# Output 79print_conversion_tables($this_script, "BIG5", $all); 80