1#! /usr/bin/perl 2# 3# Copyright (c) 2001-2016, PostgreSQL Global Development Group 4# 5# src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl 6# 7# Generate UTF-8 <--> EUC_JP code conversion tables from 8# map files provided by Unicode organization. 9# Unfortunately it is prohibited by the organization 10# to distribute the map files. So if you try to use this script, 11# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from 12# the organization's ftp site. 13# 14# JIS0201.TXT format: 15# JIS0201 code in hex 16# UCS-2 code in hex 17# # and Unicode name (not used in this script) 18# 19# JIS0208.TXT format: 20# JIS0208 shift-JIS code in hex 21# JIS0208 code in hex 22# UCS-2 code in hex 23# # and Unicode name (not used in this script) 24# 25# JIS0212.TXT format: 26# JIS0212 code in hex 27# UCS-2 code in hex 28# # and Unicode name (not used in this script) 29 30require "ucs2utf.pl"; 31 32# first generate UTF-8 --> EUC_JP table 33 34# 35# JIS0201 36# 37$in_file = "JIS0201.TXT"; 38 39open(FILE, $in_file) || die("cannot open $in_file"); 40 41reset 'array'; 42 43while (<FILE>) 44{ 45 chop; 46 if (/^#/) 47 { 48 next; 49 } 50 ($c, $u, $rest) = split; 51 $ucs = hex($u); 52 $code = hex($c); 53 if ($code >= 0x80 && $ucs >= 0x0080) 54 { 55 $utf = &ucs2utf($ucs); 56 if ($array{$utf} ne "") 57 { 58 printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; 59 next; 60 } 61 $count++; 62 63 # add single shift 2 64 $array{$utf} = ($code | 0x8e00); 65 } 66} 67close(FILE); 68 69# 70# JIS0208 71# 72$in_file = "JIS0208.TXT"; 73 74open(FILE, $in_file) || die("cannot open $in_file"); 75 76while (<FILE>) 77{ 78 chop; 79 if (/^#/) 80 { 81 next; 82 } 83 ($s, $c, $u, $rest) = split; 84 $ucs = hex($u); 85 $code = hex($c); 86 if ($code >= 0x80 && $ucs >= 0x0080) 87 { 88 $utf = &ucs2utf($ucs); 89 if ($array{$utf} ne "") 90 { 91 printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; 92 next; 93 } 94 $count++; 95 96 $array{$utf} = ($code | 0x8080); 97 } 98} 99close(FILE); 100 101# 102# JIS0212 103# 104$in_file = "JIS0212.TXT"; 105 106open(FILE, $in_file) || die("cannot open $in_file"); 107 108while (<FILE>) 109{ 110 chop; 111 if (/^#/) 112 { 113 next; 114 } 115 ($c, $u, $rest) = split; 116 $ucs = hex($u); 117 $code = hex($c); 118 if ($code >= 0x80 && $ucs >= 0x0080) 119 { 120 $utf = &ucs2utf($ucs); 121 if ($array{$utf} ne "") 122 { 123 printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs; 124 next; 125 } 126 $count++; 127 128 $array{$utf} = ($code | 0x8f8080); 129 } 130} 131close(FILE); 132 133$file = "utf8_to_euc_jp.map"; 134open(FILE, "> $file") || die("cannot open $file"); 135 136print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n"; 137print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n"; 138 139for $index (sort { $a <=> $b } keys(%array)) 140{ 141 $code = $array{$index}; 142 $count--; 143 if ($count == 0) 144 { 145 printf FILE " {0x%04x, 0x%04x}\n", $index, $code; 146 } 147 else 148 { 149 printf FILE " {0x%04x, 0x%04x},\n", $index, $code; 150 } 151} 152 153print FILE "};\n"; 154close(FILE); 155 156# 157# then generate EUC_JP --> UTF8 table 158# 159 160# 161# JIS0201 162# 163$in_file = "JIS0201.TXT"; 164 165open(FILE, $in_file) || die("cannot open $in_file"); 166 167reset 'array'; 168 169while (<FILE>) 170{ 171 chop; 172 if (/^#/) 173 { 174 next; 175 } 176 ($c, $u, $rest) = split; 177 $ucs = hex($u); 178 $code = hex($c); 179 if ($code >= 0x80 && $ucs >= 0x0080) 180 { 181 $utf = &ucs2utf($ucs); 182 if ($array{$code} ne "") 183 { 184 printf STDERR "Warning: duplicate code: %04x\n", $ucs; 185 next; 186 } 187 $count++; 188 189 # add single shift 2 190 $code |= 0x8e00; 191 $array{$code} = $utf; 192 } 193} 194close(FILE); 195 196# 197# JIS0208 198# 199$in_file = "JIS0208.TXT"; 200 201open(FILE, $in_file) || die("cannot open $in_file"); 202 203while (<FILE>) 204{ 205 chop; 206 if (/^#/) 207 { 208 next; 209 } 210 ($s, $c, $u, $rest) = split; 211 $ucs = hex($u); 212 $code = hex($c); 213 if ($code >= 0x80 && $ucs >= 0x0080) 214 { 215 $utf = &ucs2utf($ucs); 216 if ($array{$code} ne "") 217 { 218 printf STDERR "Warning: duplicate code: %04x\n", $ucs; 219 next; 220 } 221 $count++; 222 223 $code |= 0x8080; 224 $array{$code} = $utf; 225 } 226} 227close(FILE); 228 229# 230# JIS0212 231# 232$in_file = "JIS0212.TXT"; 233 234open(FILE, $in_file) || die("cannot open $in_file"); 235 236while (<FILE>) 237{ 238 chop; 239 if (/^#/) 240 { 241 next; 242 } 243 ($c, $u, $rest) = split; 244 $ucs = hex($u); 245 $code = hex($c); 246 if ($code >= 0x80 && $ucs >= 0x0080) 247 { 248 $utf = &ucs2utf($ucs); 249 if ($array{$code} ne "") 250 { 251 printf STDERR "Warning: duplicate code: %04x\n", $ucs; 252 next; 253 } 254 $count++; 255 256 $code |= 0x8f8080; 257 $array{$code} = $utf; 258 } 259} 260close(FILE); 261 262$file = "euc_jp_to_utf8.map"; 263open(FILE, "> $file") || die("cannot open $file"); 264 265print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n"; 266print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n"; 267for $index (sort { $a <=> $b } keys(%array)) 268{ 269 $utf = $array{$index}; 270 $count--; 271 if ($count == 0) 272 { 273 printf FILE " {0x%04x, 0x%04x}\n", $index, $utf; 274 } 275 else 276 { 277 printf FILE " {0x%04x, 0x%04x},\n", $index, $utf; 278 } 279} 280 281print FILE "};\n"; 282close(FILE); 283