1#!/usr/bin/env perl 2# $Id: make_cmap,v 1.5 2003/08/05 16:26:37 s42335 Exp $ 3# 4# usage: make_cmap encodings1 format1 platform1 platspec1 lang1 5# ... 6# encodingsn formatn platformn platspecn langn 7# 8# build a cmap (Character code MAPping) table and dump it into stdout. 9# 10# NOTE: the source format is *NOT* compatible with disp_cmap. 11# 12# BUGS: only format 0, 2, 4 are supported. 13# 14# 2002/2/3, by 1@2ch 15# * public domain * 16# 17 18 19$p=$0; $p=~s:[^/]+$::; push(@INC,$p); 20require 'lib_util.pl'; 21 22sub usage { 23 print "usage: make_cmap encodings1 format1 plat1 platspec1 lang1 ...\n"; 24 exit 1; 25} 26 27$ARGV[0] || &usage(); 28 29 30## initialization 31 32$globalPos = 0; 33@platformIDs = (); 34@platformSpecificIDs = (); 35@globalOffsets = (); 36@subTables = (); 37 38$globalOffset = 0; 39 40 41 42# here @enc maps sjis-charcodes to glyphs. 43 44# read mapping table 45sub readenc($) { 46 @enc = (); 47 open(IN, $_[0]) || die("open: $_[0]: $!"); 48 while($_ = getline(IN)) { 49 split(/\s+/); 50 $enc[eval($_[0])] = eval($_[1]); 51 } 52 close(IN); 53} 54 55 56 57# Format 0 (fixed) 58sub doFormat0($$$) { 59 my ($pid, $psid, $langid) = @_; 60 61 my $s = ''; 62 # format 0 63 $s .= suint16(0); 64 # length 262 (fixed) 65 $s .= suint16(2+2+2+256); 66 # language 67 $s .= suint16($langid); 68 for(my $i = 0; $i < 256; $i++) { 69 my $x = $enc[$i] || 0; 70 $s .= suint8($x); 71 } 72 73 push(@platformIDs, $pid); 74 push(@platformSpecificIDs, $psid); 75 push(@globalOffsets, $globalPos); 76 push(@subTables, $s); 77 $globalPos += length($s); 78} 79 80# Format 2 81sub doFormat2($$$) { 82 my ($pid, $psid, $langid) = @_; 83 84 # GENERATE indexArrays in advance 85 86 my $numSubHeaders = 0; 87 my @indexArrays = (); 88 my @indexArrayOffsets = (); 89 my @subHeaderFirstCode = (); 90 my @subHeaderEntryCount = (); 91 my @subHeaderIdDelta = (); 92 my @subHeaderArrayRef = (); 93 my $subHeaderKeys = (); 94 95 # subheader 1 96 push(@subHeaderFirstCode, 0); 97 push(@subHeaderEntryCount, 256); 98 push(@subHeaderIdDelta, 0); 99 push(@subHeaderArrayRef, 0); 100 $numSubHeaders++; 101 push(@subHeaderKeys, 8*$numSubHeaders); 102 my $indexArray1 = ''; 103 for(my $i = 0; $i < 256; $i++) { 104 $indexArray1 .= suint16($enc[$i]); 105 } 106 push(@indexArrays, $indexArray1); 107 108 # dummy subheader (contains no glyphs) 109 push(@subHeaderFirstCode, 0); 110 push(@subHeaderEntryCount, 0); 111 push(@subHeaderIdDelta, 0); 112 push(@subHeaderArrayRef, -1); 113 $numSubHeaders++; 114 115 for(my $b1 = 1; $b1 < 256; $b1++) { 116 # SCAN the line. 117 # search from left 118 my $b2left, $b2right; 119 for ($b2left = 0; $b2left < 256; $b2left++) { 120 last if ($enc[($b1 << 8) | $b2left]); 121 } 122 # search from right 123 for ($b2right = 255; 0 <= $b2right; $b2right--) { 124 last if ($enc[($b1 << 8) | $b2right]); 125 } 126 #print STDERR "$b1: $b2left - $b2right\n"; 127 if ($b2left <= $b2right) { 128 # CHARS found. 129 # compute glyph delta 130 $delta = 65535; 131 for (my $b2 = $b2left; $b2 <= $b2right; $b2++) { 132 my $c = ($b1 << 8) | $b2; 133 $delta = $enc[$c] if ($enc[$c] && $enc[$c] < $delta); 134 } 135 $delta--; 136 # compute indexArray 137 my $indexArray1 = ''; 138 for (my $b2 = $b2left; $b2 <= $b2right; $b2++) { 139 my $c = ($b1 << 8) | $b2; 140 my $g = $enc[$c]; 141 $g -= $delta if ($g); 142 $indexArray1 .= suint16($g); 143 } 144 # check if it's duplicated? 145 my $n; 146 for($n = 0; $n < @indexArrays; $n++) { 147 last if ($indexArrays[$n] eq $indexArray1); 148 } 149 # if the index of array is a new one, $n must be @indexArrays, 150 # which is (the index of the last indexArrays) + 1. so 151 # it will be automatically added. 152 $indexArrays[$n] = $indexArray1; 153 push(@subHeaderFirstCode, $b2left); 154 push(@subHeaderEntryCount, $b2right - $b2left + 1); 155 push(@subHeaderIdDelta, $delta); 156 push(@subHeaderArrayRef, $n); 157 push(@subHeaderKeys, 8*$numSubHeaders); 158 $numSubHeaders++; 159 } else { 160 # CHARS notfound 161 # dummy subheader 162 push(@subHeaderKeys, 0); 163 } 164 } 165 166 # COMPUTE offsets to the subtables 167 my $offset = 0; 168 foreach my $i (@indexArrays) { 169 push(@indexArrayOffsets, $offset); 170 $offset += length($i); 171 } 172 173 # finally GENERATE the subtable. 174 175 my $s = ''; 176 # format 2 177 $s .= suint16(2); 178 # length 179 $s .= suint16(2+2+2 + 2*256 + 8*$numSubHeaders + $offset); 180 # language 181 $s .= suint16($langid); 182 # subHeaderKeys 183 foreach my $i (@subHeaderKeys) { 184 $s .= suint16($i); 185 } 186 # subHeaders 187 my $toIndexArray = 8*$numSubHeaders - 6; 188 for(my $i = 0; $i < $numSubHeaders; $i++) { 189 # firstCode 190 $s .= suint16($subHeaderFirstCode[$i]); 191 # entryCount 192 $s .= suint16($subHeaderEntryCount[$i]); 193 # idDelta 194 $s .= suint16($subHeaderIdDelta[$i]); 195 # idRangeOffset 196 my $ref = $subHeaderArrayRef[$i]; 197 if ($ref == -1) { 198 $s .= suint16(0); 199 } else { 200 $s .= suint16($indexArrayOffsets[$ref] + $toIndexArray); 201 } 202 $toIndexArray -= 8; 203 } 204 # indexArrays 205 foreach my $i (@indexArrays) { 206 $s .= $i; 207 } 208 209 push(@platformIDs, $pid); 210 push(@platformSpecificIDs, $psid); 211 push(@globalOffsets, $globalPos); 212 push(@subTables, $s); 213 $globalPos += length($s); 214} 215 216 217# Format 4 218sub doFormat4($$$) { 219 my ($pid, $psid, $langid) = @_; 220 221 @startCounts = (); 222 @endCounts = (); 223 @idDeltas = (); 224 225 my $c = 0, $segCount = 0; 226 while($c < 0xffff) { 227 # search a first char of a segment 228 while($c < 0xffff && !$enc[$c]) { $c++; } 229 if ($c < 0xffff) { 230 my $g = $enc[$c]; 231 push(@startCounts, $c); 232 push(@idDeltas, $g-$c); 233 # search contiguous chars 234 while($enc[$c] == $g) { $c++; $g++; } 235 push(@endCounts, $c-1); 236 $segCount++; 237 } 238 } 239 push(@startCounts, 0xffff); 240 push(@endCounts, 0xffff); 241 push(@idDeltas, 1); 242 $segCount++; 243 244 # patch by 18, Sep. 13/2002 245 # Unify contiguous segments whose length is 1. 246 my $seg, $endseg; 247 my @glyphIdArray = (); 248 my @idRangeOffset = (); 249 for ($seg = 0; $seg < $segCount - 1; $seg++) { 250 if ($startCounts[$seg] == $endCounts[$seg] && 251 $startCounts[$seg + 1] < 0xffff) { 252 for ($endseg = $seg + 1; $endseg < $segCount ; $endseg++) { 253 last if ($startCounts[$endseg] != $endCounts[$endseg] || 254 $startCounts[$endseg] != $startCounts[$endseg-1] + 1); 255 } 256 $endseg--; 257 if ($seg < $endseg) { 258 $endCounts[$seg] = $endCounts[$endseg]; 259 $idRangeOffset[$seg] = scalar(@glyphIdArray) + 1; # +1 for >0 260 foreach $s ($seg..$endseg) { 261 $idDeltas[$s] += $startCounts[$s]; 262 } 263 push(@glyphIdArray, @idDeltas[$seg..$endseg]); 264 $idDeltas[$seg] = 0; 265 $segCount -= $endseg-$seg; 266 splice(@startCounts, $seg+1, $endseg-$seg); 267 splice(@endCounts, $seg+1, $endseg-$seg); 268 splice(@idDeltas, $seg+1, $endseg-$seg); 269 } 270 } 271 } 272 for ($seg = 0; $seg < $segCount; $seg++) { 273 if ($idRangeOffset[$seg]) { 274 $idRangeOffset[$seg] += ($segCount - $seg - 1); 275 $idRangeOffset[$seg] *= 2; 276 } else { 277 $idRangeOffset[$seg] = 0; 278 } 279 } 280 281 my $searchRange = 2, $entrySelector = 1; 282 while($searchRange <= $segCount) { 283 $searchRange *= 2; 284 $entrySelector++; 285 } 286 $entrySelector--; 287 288 # make subtable 289 my $s = ''; 290 # format 4 291 $s .= suint16(4); 292 # length 293 $s .= suint16(2+2+2 + 8 + 8*$segCount+2 + 294 2*scalar(@glyphIdArray)); 295 # language 296 $s .= suint16($langid); 297 298 $s .= suint16(2*$segCount); 299 $s .= suint16($searchRange); 300 $s .= suint16($entrySelector); 301 $s .= suint16(2*$segCount - $searchRange); 302 # print STDERR "$segCount, $searchRange, $entrySelector\n"; 303 foreach my $i (@endCounts) { 304 $s .= suint16($i); 305 } 306 $s .= suint16(0x0000); # resarvedPad 307 foreach my $i (@startCounts) { 308 $s .= suint16($i); 309 } 310 foreach my $i (@idDeltas) { 311 $s .= ssint16($i); 312 } 313 foreach my $i (@idRangeOffset) { 314 $s .= suint16($i); 315 } 316 foreach my $i (@glyphIdArray) { 317 $s .= suint16($i); 318 } 319 320 push(@platformIDs, $pid); 321 push(@platformSpecificIDs, $psid); 322 push(@globalOffsets, $globalPos); 323 push(@subTables, $s); 324 $globalPos += length($s); 325} 326 327 328## write cmap. 329 330sub writeCmap() { 331 wopen('>&STDOUT'); 332 # version number (0x0000) 333 wuint16(0x0000); 334 # numberSubtables 335 my $numSubtables = 0+@subTables; 336 wuint16($numSubtables); 337 my $headerLen = 2+2 + 8*$numSubtables; 338 for(my $i = 0; $i < $numSubtables; $i++) { 339 wuint16($platformIDs[$i]); 340 wuint16($platformSpecificIDs[$i]); 341 wuint32($globalOffsets[$i] + $headerLen); 342 } 343 foreach my $s (@subTables) { 344 wstrn($s); 345 } 346 wclose(); 347} 348 349 350## main 351 352# supported format: 0, 2, 4 353 354# platformID: 0 (Unicode) 355# platformSpecificID: 0 (Unicode 1.0) 356# platformSpecificID: 1 (Unicode 1.1) 357# platformSpecificID: 2 (ISO 10646:1993) 358# platformSpecificID: 3 (Unicode 2.0) 359# languageID: (not defined in Unicode) 360# 361# platformID: 1 (Macintosh) 362# platformSpecificID: 0 (Roman) 363# platformSpecificID: 1 (Japanese) 364# platformSpecificID: 2 (Chinese) 365# ... 366# languageID: 0 (English) 367# languageID: 11 (Japanese) 368# ... 369# 370# platformID: 3 (Microsoft) 371# platformSpecificID: 0 (Symbol) 372# platformSpecificID: 1 (Unicode) 373# platformSpecificID: 2 (ShiftJIS) 374# ... 375# languageID: 1033 (US English) 376# languageID: 1041 (Japanese) 377# ... 378# 379 380# make_cmap encodings1 format1 platformID1 platformSpecificID1 languageID1 381# encodings2 format2 platformID2 platformSpecificID2 languageID2 382# ... 383 384# prepare subtables 385while(@ARGV) { 386 my $f = shift(@ARGV); 387 my $fmt = shift(@ARGV); 388 my $pid = shift(@ARGV); 389 my $psid = shift(@ARGV); 390 my $langid = shift(@ARGV); 391 392 readenc($f); 393 if ($fmt == 0) { 394 doFormat0($pid, $psid, $langid); 395 } elsif ($fmt == 2) { 396 doFormat2($pid, $psid, $langid); 397 } elsif ($fmt == 4) { 398 doFormat4($pid, $psid, $langid); 399 } else { 400 die("unsupported format: $fmt"); 401 } 402} 403 404## write it. 405writeCmap(); 406