1#!/usr/bin/env python3 2 3# Parse a CMap file and dump it as a C struct. 4 5import sys 6 7# Decode a subset of CMap syntax (only what is needed for our built-in resources) 8# We require that tokens are whitespace separated. 9 10def dumpcmap(filename): 11 codespacerange = [] 12 usecmap = "" 13 cmapname = "" 14 wmode = 0 15 16 map = {} 17 18 def tocode(s): 19 if s[0] == '<' and s[-1] == '>': 20 return int(s[1:-1], 16) 21 return int(s, 10) 22 23 def map_cidchar(lo, v): 24 map[lo] = v 25 26 def map_cidrange(lo, hi, v): 27 while lo <= hi: 28 map[lo] = v 29 lo = lo + 1 30 v = v + 1 31 32 def add_bf(lo, v): 33 # Decode unicode surrogate pairs 34 if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff: 35 map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000 36 elif len(v) == 1: 37 map[lo] = v[0] 38 elif len(v) <= 8: 39 map[lo] = v[:] 40 else: 41 print("/* warning: too long one-to-many mapping: %s */" % (v)) 42 43 def map_bfchar(lo, bf): 44 bf = bf[1:-1] # drop < > 45 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)] 46 add_bf(lo, v) 47 48 def map_bfrange(lo, hi, bf): 49 bf = bf[1:-1] # drop < > 50 v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)] 51 while lo <= hi: 52 add_bf(lo, v) 53 lo = lo + 1 54 v[-1] = v[-1] + 1 55 56 current = None 57 for line in open(filename, "r").readlines(): 58 if line[0] == '%': 59 continue 60 line = line.strip().split() 61 if len(line) == 0: 62 continue 63 if line[0] == '/CMapName': 64 cmapname = line[1][1:] 65 elif line[0] == '/WMode': 66 wmode = int(line[1]) 67 elif len(line) > 1 and line[1] == 'usecmap': 68 usecmap = line[0][1:] 69 elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange' 70 elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange' 71 elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange' 72 elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar' 73 elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar' 74 elif line[0] == 'begincodespacerange': current = 'codespacerange' 75 elif line[0] == 'begincidrange': current = 'cidrange' 76 elif line[0] == 'beginbfrange': current = 'bfrange' 77 elif line[0] == 'begincidchar': current = 'cidchar' 78 elif line[0] == 'beginbfchar': current = 'bfchar' 79 elif line[0].startswith("end"): 80 current = None 81 elif current == 'codespacerange' and len(line) == 2: 82 n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1]) 83 codespacerange.append((n, a, b)) 84 elif current == 'cidrange' and len(line) == 3: 85 a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2]) 86 map_cidrange(a, b, c) 87 elif current == 'cidchar' and len(line) == 2: 88 a, b = tocode(line[0]), tocode(line[1]) 89 map_cidchar(a, b) 90 elif current == 'bfchar' and len(line) == 2: 91 a, b = tocode(line[0]), line[1] 92 map_bfchar(a, b) 93 elif current == 'bfrange' and len(line) == 3: 94 a, b, c = tocode(line[0]), tocode(line[1]), line[2] 95 map_bfrange(a, b, c) 96 97 # Create ranges 98 99 ranges = [] 100 xranges = [] 101 mranges = [] 102 mdata = [] 103 104 out_lo = -100 105 out_hi = -100 106 out_v_lo = 0 107 out_v_hi = 0 108 109 def flush_range(): 110 if out_lo >= 0: 111 if out_lo > 0xffff or out_hi > 0xffff or out_v_lo > 0xffff: 112 xranges.append((out_lo, out_hi, out_v_lo)) 113 else: 114 ranges.append((out_lo, out_hi, out_v_lo)) 115 116 keys = list(map.keys()) 117 keys.sort() 118 for code in keys: 119 v = map[code] 120 if type(v) is not int: 121 flush_range() 122 out_lo = out_hi = -100 123 mranges.append((code, len(mdata))) 124 mdata.append(len(v)) 125 mdata.extend(v) 126 else: 127 if code != out_hi + 1 or v != out_v_hi + 1: 128 flush_range() 129 out_lo = out_hi = code 130 out_v_lo = out_v_hi = v 131 else: 132 out_hi = out_hi + 1 133 out_v_hi = out_v_hi + 1 134 flush_range() 135 136 # Print C file 137 138 cname = cmapname.replace('-', '_') 139 140 print() 141 print("/*", cmapname, "*/") 142 print() 143 144 if len(ranges) > 0: 145 print("static const pdf_range cmap_%s_ranges[] = {" % cname) 146 for r in ranges: 147 print("{%d,%d,%d}," % r) 148 print("};") 149 print() 150 if len(xranges) > 0: 151 print("static const pdf_xrange cmap_%s_xranges[] = {" % cname) 152 for r in xranges: 153 print("{%d,%d,%d}," % r) 154 print("};") 155 print() 156 if len(mranges) > 0: 157 print("static const pdf_mrange cmap_%s_mranges[] = {" % cname) 158 for r in mranges: 159 print("{%d,%d}," % r) 160 print("};") 161 print() 162 print("static const int cmap_%s_table[] = {" % cname) 163 n = mdata[0] 164 i = 0 165 for r in mdata: 166 if i <= n: 167 sys.stdout.write("%d," % r) 168 i = i + 1 169 else: 170 sys.stdout.write("\n%d," % r) 171 i = 1 172 n = r 173 sys.stdout.write("\n") 174 print("};") 175 print() 176 177 print("static pdf_cmap cmap_%s = {" % cname) 178 print("\t{ -1, pdf_drop_cmap_imp },") 179 print("\t/* cmapname */ \"%s\"," % cmapname) 180 print("\t/* usecmap */ \"%s\", NULL," % usecmap) 181 print("\t/* wmode */ %d," % wmode) 182 print("\t/* codespaces */ %d, {" % len(codespacerange)) 183 if len(codespacerange) > 0: 184 for codespace in codespacerange: 185 fmt = "\t\t{ %%d, 0x%%0%dx, 0x%%0%dx }," % (codespace[0]*2, codespace[0]*2) 186 print(fmt % codespace) 187 else: 188 print("\t\t{ 0, 0, 0 },") 189 print("\t},") 190 191 if len(ranges) > 0: 192 print("\t%d, %d, (pdf_range*)cmap_%s_ranges," % (len(ranges),len(ranges),cname)) 193 else: 194 print("\t0, 0, NULL, /* ranges */") 195 196 if len(xranges) > 0: 197 print("\t%d, %d, (pdf_xrange*)cmap_%s_xranges," % (len(xranges),len(xranges),cname)) 198 else: 199 print("\t0, 0, NULL, /* xranges */") 200 201 if len(mranges) > 0: 202 print("\t%d, %d, (pdf_mrange*)cmap_%s_mranges," % (len(mranges),len(mranges),cname)) 203 else: 204 print("\t0, 0, NULL, /* mranges */") 205 206 if len(mdata) > 0: 207 print("\t%d, %d, (int*)cmap_%s_table," % (len(mdata),len(mdata),cname)) 208 else: 209 print("\t0, 0, NULL, /* table */") 210 211 print("\t0, 0, 0, NULL /* splay tree */") 212 print("};") 213 214print("/* This is an automatically generated file. Do not edit. */") 215 216for arg in sys.argv[1:]: 217 dumpcmap(arg) 218