1#!/usr/bin/env python3
2
3# Parse a CMap file and dump it as a C struct.
4
5import sys
6
7# Decode a subset of CMap syntax (only what is needed for our built-in resources)
8# We require that tokens are whitespace separated.
9
10def dumpcmap(filename):
11	codespacerange = []
12	usecmap = ""
13	cmapname = ""
14	wmode = 0
15
16	map = {}
17
18	def tocode(s):
19		if s[0] == '<' and s[-1] == '>':
20			return int(s[1:-1], 16)
21		return int(s, 10)
22
23	def map_cidchar(lo, v):
24		map[lo] = v
25
26	def map_cidrange(lo, hi, v):
27		while lo <= hi:
28			map[lo] = v
29			lo = lo + 1
30			v = v + 1
31
32	def add_bf(lo, v):
33		# Decode unicode surrogate pairs
34		if len(v) == 2 and v[0] >= 0xd800 and v[0] <= 0xdbff and v[1] >= 0xdc00 and v[1] <= 0xdfff:
35			map[lo] = ((v[0] - 0xd800) << 10) + (v[1] - 0xdc00) + 0x10000
36		elif len(v) == 1:
37			map[lo] = v[0]
38		elif len(v) <= 8:
39			map[lo] = v[:]
40		else:
41			print("/* warning: too long one-to-many mapping: %s */" % (v))
42
43	def map_bfchar(lo, bf):
44		bf = bf[1:-1] # drop < >
45		v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
46		add_bf(lo, v)
47
48	def map_bfrange(lo, hi, bf):
49		bf = bf[1:-1] # drop < >
50		v = [int(bf[i:i+4],16) for i in range(0, len(bf), 4)]
51		while lo <= hi:
52			add_bf(lo, v)
53			lo = lo + 1
54			v[-1] = v[-1] + 1
55
56	current = None
57	for line in open(filename, "r").readlines():
58		if line[0] == '%':
59			continue
60		line = line.strip().split()
61		if len(line) == 0:
62			continue
63		if line[0] == '/CMapName':
64			cmapname = line[1][1:]
65		elif line[0] == '/WMode':
66			wmode = int(line[1])
67		elif len(line) > 1 and line[1] == 'usecmap':
68			usecmap = line[0][1:]
69		elif len(line) > 1 and line[1] == 'begincodespacerange': current = 'codespacerange'
70		elif len(line) > 1 and line[1] == 'begincidrange': current = 'cidrange'
71		elif len(line) > 1 and line[1] == 'beginbfrange': current = 'bfrange'
72		elif len(line) > 1 and line[1] == 'begincidchar': current = 'cidchar'
73		elif len(line) > 1 and line[1] == 'beginbfchar': current = 'bfchar'
74		elif line[0] == 'begincodespacerange': current = 'codespacerange'
75		elif line[0] == 'begincidrange': current = 'cidrange'
76		elif line[0] == 'beginbfrange': current = 'bfrange'
77		elif line[0] == 'begincidchar': current = 'cidchar'
78		elif line[0] == 'beginbfchar': current = 'bfchar'
79		elif line[0].startswith("end"):
80			current = None
81		elif current == 'codespacerange' and len(line) == 2:
82			n, a, b = (len(line[0])-2)/2, tocode(line[0]), tocode(line[1])
83			codespacerange.append((n, a, b))
84		elif current == 'cidrange' and len(line) == 3:
85			a, b, c = tocode(line[0]), tocode(line[1]), tocode(line[2])
86			map_cidrange(a, b, c)
87		elif current == 'cidchar' and len(line) == 2:
88			a, b = tocode(line[0]), tocode(line[1])
89			map_cidchar(a, b)
90		elif current == 'bfchar' and len(line) == 2:
91			a, b = tocode(line[0]), line[1]
92			map_bfchar(a, b)
93		elif current == 'bfrange' and len(line) == 3:
94			a, b, c = tocode(line[0]), tocode(line[1]), line[2]
95			map_bfrange(a, b, c)
96
97	# Create ranges
98
99	ranges = []
100	xranges = []
101	mranges = []
102	mdata = []
103
104	out_lo = -100
105	out_hi = -100
106	out_v_lo = 0
107	out_v_hi = 0
108
109	def flush_range():
110		if out_lo >= 0:
111			if out_lo > 0xffff or out_hi > 0xffff or out_v_lo > 0xffff:
112				xranges.append((out_lo, out_hi, out_v_lo))
113			else:
114				ranges.append((out_lo, out_hi, out_v_lo))
115
116	keys = list(map.keys())
117	keys.sort()
118	for code in keys:
119		v = map[code]
120		if type(v) is not int:
121			flush_range()
122			out_lo = out_hi = -100
123			mranges.append((code, len(mdata)))
124			mdata.append(len(v))
125			mdata.extend(v)
126		else:
127			if code != out_hi + 1 or v != out_v_hi + 1:
128				flush_range()
129				out_lo = out_hi = code
130				out_v_lo = out_v_hi = v
131			else:
132				out_hi = out_hi + 1
133				out_v_hi = out_v_hi + 1
134	flush_range()
135
136	# Print C file
137
138	cname = cmapname.replace('-', '_')
139
140	print()
141	print("/*", cmapname, "*/")
142	print()
143
144	if len(ranges) > 0:
145		print("static const pdf_range cmap_%s_ranges[] = {" % cname)
146		for r in ranges:
147			print("{%d,%d,%d}," % r)
148		print("};")
149		print()
150	if len(xranges) > 0:
151		print("static const pdf_xrange cmap_%s_xranges[] = {" % cname)
152		for r in xranges:
153			print("{%d,%d,%d}," % r)
154		print("};")
155		print()
156	if len(mranges) > 0:
157		print("static const pdf_mrange cmap_%s_mranges[] = {" % cname)
158		for r in mranges:
159			print("{%d,%d}," % r)
160		print("};")
161		print()
162		print("static const int cmap_%s_table[] = {" % cname)
163		n = mdata[0]
164		i = 0
165		for r in mdata:
166			if i <= n:
167				sys.stdout.write("%d," % r)
168				i = i + 1
169			else:
170				sys.stdout.write("\n%d," % r)
171				i = 1
172				n = r
173		sys.stdout.write("\n")
174		print("};")
175		print()
176
177	print("static pdf_cmap cmap_%s = {" % cname)
178	print("\t{ -1, pdf_drop_cmap_imp },")
179	print("\t/* cmapname */ \"%s\"," % cmapname)
180	print("\t/* usecmap */ \"%s\", NULL," % usecmap)
181	print("\t/* wmode */ %d," % wmode)
182	print("\t/* codespaces */ %d, {" % len(codespacerange))
183	if len(codespacerange) > 0:
184		for codespace in codespacerange:
185			fmt = "\t\t{ %%d, 0x%%0%dx, 0x%%0%dx }," % (codespace[0]*2, codespace[0]*2)
186			print(fmt % codespace)
187	else:
188			print("\t\t{ 0, 0, 0 },")
189	print("\t},")
190
191	if len(ranges) > 0:
192		print("\t%d, %d, (pdf_range*)cmap_%s_ranges," % (len(ranges),len(ranges),cname))
193	else:
194		print("\t0, 0, NULL, /* ranges */")
195
196	if len(xranges) > 0:
197		print("\t%d, %d, (pdf_xrange*)cmap_%s_xranges," % (len(xranges),len(xranges),cname))
198	else:
199		print("\t0, 0, NULL, /* xranges */")
200
201	if len(mranges) > 0:
202		print("\t%d, %d, (pdf_mrange*)cmap_%s_mranges," % (len(mranges),len(mranges),cname))
203	else:
204		print("\t0, 0, NULL, /* mranges */")
205
206	if len(mdata) > 0:
207		print("\t%d, %d, (int*)cmap_%s_table," % (len(mdata),len(mdata),cname))
208	else:
209		print("\t0, 0, NULL, /* table */")
210
211	print("\t0, 0, 0, NULL /* splay tree */")
212	print("};")
213
214print("/* This is an automatically generated file. Do not edit. */")
215
216for arg in sys.argv[1:]:
217	dumpcmap(arg)
218