1#! /usr/bin/perl 2# 3# Copyright (c) 2001-2020, PostgreSQL Global Development Group 4# 5# src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl 6# 7# Generate UTF-8 <--> EUC_TW code conversion tables from 8# map files provided by Unicode organization. 9# Unfortunately it is prohibited by the organization 10# to distribute the map files. So if you try to use this script, 11# you have to obtain CNS11643.TXT from 12# the organization's ftp site. 13# 14# CNS11643.TXT format: 15# CNS11643 code in hex (3 bytes) 16# (I guess the first byte means the plane No.) 17# UCS-2 code in hex 18# # and Unicode name (not used in this script) 19 20use strict; 21use warnings; 22 23use convutils; 24 25my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl'; 26 27my $mapping = &read_source("CNS11643.TXT"); 28 29my @extras; 30 31foreach my $i (@$mapping) 32{ 33 my $ucs = $i->{ucs}; 34 my $code = $i->{code}; 35 my $origcode = $i->{code}; 36 37 my $plane = ($code & 0x1f0000) >> 16; 38 if ($plane > 16) 39 { 40 printf STDERR "Warning: invalid plane No.$plane. ignored\n"; 41 next; 42 } 43 44 if ($plane == 1) 45 { 46 $code = ($code & 0xffff) | 0x8080; 47 } 48 else 49 { 50 $code = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080); 51 } 52 $i->{code} = $code; 53 54 # Some codes are mapped twice in the EUC_TW to UTF-8 table. 55 if ($origcode >= 0x12121 && $origcode <= 0x20000) 56 { 57 push @extras, 58 { 59 ucs => $i->{ucs}, 60 code => ($i->{code} + 0x8ea10000), 61 rest => $i->{rest}, 62 direction => TO_UNICODE, 63 f => $i->{f}, 64 l => $i->{l} 65 }; 66 } 67} 68 69push @$mapping, @extras; 70 71print_conversion_tables($this_script, "EUC_TW", $mapping); 72