1#! /usr/bin/perl 2# 3# Copyright (c) 2001-2020, PostgreSQL Global Development Group 4# 5# src/backend/utils/mb/Unicode/UCS_to_most.pl 6# 7# Generate UTF-8 <--> character code conversion tables from 8# map files provided by Unicode organization. 9# Unfortunately it is prohibited by the organization 10# to distribute the map files. So if you try to use this script, 11# you have to obtain the map files from the organization's download site. 12# https://www.unicode.org/Public/MAPPINGS/ 13# We assume the file include three tab-separated columns: 14# source character set code in hex 15# UCS-2 code in hex 16# # and Unicode name (not used in this script) 17 18use strict; 19use warnings; 20 21use convutils; 22 23my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_most.pl'; 24 25my %filename = ( 26 'WIN866' => 'CP866.TXT', 27 'WIN874' => 'CP874.TXT', 28 'WIN1250' => 'CP1250.TXT', 29 'WIN1251' => 'CP1251.TXT', 30 'WIN1252' => 'CP1252.TXT', 31 'WIN1253' => 'CP1253.TXT', 32 'WIN1254' => 'CP1254.TXT', 33 'WIN1255' => 'CP1255.TXT', 34 'WIN1256' => 'CP1256.TXT', 35 'WIN1257' => 'CP1257.TXT', 36 'WIN1258' => 'CP1258.TXT', 37 'ISO8859_2' => '8859-2.TXT', 38 'ISO8859_3' => '8859-3.TXT', 39 'ISO8859_4' => '8859-4.TXT', 40 'ISO8859_5' => '8859-5.TXT', 41 'ISO8859_6' => '8859-6.TXT', 42 'ISO8859_7' => '8859-7.TXT', 43 'ISO8859_8' => '8859-8.TXT', 44 'ISO8859_9' => '8859-9.TXT', 45 'ISO8859_10' => '8859-10.TXT', 46 'ISO8859_13' => '8859-13.TXT', 47 'ISO8859_14' => '8859-14.TXT', 48 'ISO8859_15' => '8859-15.TXT', 49 'ISO8859_16' => '8859-16.TXT', 50 'KOI8R' => 'KOI8-R.TXT', 51 'KOI8U' => 'KOI8-U.TXT', 52 'GBK' => 'CP936.TXT'); 53 54# make maps for all encodings if not specified 55my @charsets = (scalar(@ARGV) > 0) ? @ARGV : sort keys(%filename); 56 57foreach my $charset (@charsets) 58{ 59 my $mapping = &read_source($filename{$charset}); 60 61 print_conversion_tables($this_script, $charset, $mapping); 62} 63