1 // -*- related-file-name: "../include/efont/otfcmap.hh" -*-
2
3 /* otfcmap.{cc,hh} -- OpenType cmap table
4 *
5 * Copyright (c) 2002-2019 Eddie Kohler
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the Free
9 * Software Foundation; either version 2 of the License, or (at your option)
10 * any later version. This program is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 * Public License for more details.
14 */
15
16 #ifdef HAVE_CONFIG_H
17 # include <config.h>
18 #endif
19 #include <efont/otfcmap.hh>
20 #include <lcdf/error.hh>
21 #include <errno.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <efont/otfdata.hh> // for ntohl()
25
26 #define USHORT_AT(d) (Data::u16_aligned(d))
27 #define SHORT_AT(d) (Data::s16_aligned(d))
28 #define ULONG_AT(d) (Data::u32_aligned(d))
29 #define ULONG_AT2(d) (Data::u32_aligned16(d))
30
31 namespace Efont { namespace OpenType {
32
Cmap(const String & s,ErrorHandler * errh)33 Cmap::Cmap(const String &s, ErrorHandler *errh)
34 : _str(s)
35 {
36 _str.align(4);
37 _error = parse_header(errh ? errh : ErrorHandler::silent_handler());
38 }
39
40 int
parse_header(ErrorHandler * errh)41 Cmap::parse_header(ErrorHandler *errh)
42 {
43 // HEADER FORMAT:
44 // USHORT version
45 // USHORT numTables
46 int len = _str.length();
47 const uint8_t *data = _str.udata();
48 if (HEADER_SIZE > len)
49 return errh->error("OTF cmap too small"), -EFAULT;
50 if (!(data[0] == '\000' && data[1] == '\000'))
51 return errh->error("bad cmap version number"), -ERANGE;
52 _ntables = USHORT_AT(data + 2);
53 if (_ntables == 0)
54 return errh->error("OTF cmap contains no tables"), -EINVAL;
55 if (HEADER_SIZE + ENCODING_SIZE * _ntables > len)
56 return errh->error("cmap directory out of range"), -EFAULT;
57
58 // ENCODING ENTRY FORMAT:
59 // USHORT platformID
60 // USHORT encodingID
61 // ULONG offset
62 int last_platform = -1;
63 int last_encoding = -1;
64 int last_language = -1;
65 _first_unicode_table = -1;
66 for (int i = 0; i < _ntables; i++) {
67 int loc = HEADER_SIZE + ENCODING_SIZE * i;
68 int platform = USHORT_AT(data + loc);
69 int encoding = USHORT_AT(data + loc + 2);
70 uint32_t offset = ULONG_AT(data + loc + 4);
71 if (offset + 8 > (uint32_t) len) {
72 length_error:
73 return errh->error("encoding data for entry %d out of range", i);
74 }
75 int format = USHORT_AT(data + offset);
76 int language;
77 if (format == F_BYTE || format == F_HIBYTE || format == F_SEGMENTED
78 || format == F_TRIMMED) {
79 if (USHORT_AT(data + offset + 2) < 6)
80 goto length_error;
81 language = USHORT_AT(data + offset + 4);
82 } else if (format == F_HIBYTE32 || format == F_TRIMMED32
83 || format == F_SEGMENTED32) {
84 if (offset + 12 > (uint32_t) len || ULONG_AT2(data + offset + 4) < 12)
85 goto length_error;
86 language = ULONG_AT2(data + offset + 8);
87 } else
88 continue;
89 if (!(platform > last_platform
90 || (platform == last_platform
91 && (encoding > last_encoding
92 || (encoding == last_encoding
93 && language > last_language)))))
94 errh->warning("unsorted cmap encoding records at entry %d (%d,%d,%d follows %d,%d,%d)", i, platform, encoding, language, last_platform, last_encoding, last_language);
95 if ((platform == 0 || (platform == 3 && encoding == 1))
96 && _first_unicode_table < 0)
97 _first_unicode_table = i;
98 last_platform = platform, last_encoding = encoding, last_language = language;
99 }
100
101 _table_error.assign(_ntables, -2);
102 return 0;
103 }
104
105 int
first_table(int platform,int encoding) const106 Cmap::first_table(int platform, int encoding) const
107 {
108 if (error() < 0)
109 return -1;
110 const uint8_t *data = _str.udata();
111 data += HEADER_SIZE;
112 for (int i = 0; i < _ntables; i++, data += ENCODING_SIZE) {
113 int p = USHORT_AT(data), e = USHORT_AT(data + 2);
114 if (platform == p && (encoding < 0 || encoding == e))
115 return i;
116 }
117 return -1;
118 }
119
120 int
check_table(int t,ErrorHandler * errh) const121 Cmap::check_table(int t, ErrorHandler *errh) const
122 {
123 if (!errh)
124 errh = ErrorHandler::silent_handler();
125 if (t == USE_FIRST_UNICODE_TABLE && _first_unicode_table == -1) {
126 errh->warning("font appears not to support Unicode");
127 _first_unicode_table = 0;
128 }
129 if (t == USE_FIRST_UNICODE_TABLE)
130 t = _first_unicode_table;
131 if (_error < 0 || t < 0 || t >= _ntables)
132 return errh->error("no such table");
133 if (_table_error[t] != -2)
134 return _table_error[t];
135 _table_error[t] = -1;
136
137 const uint8_t *data = table_data(t);
138 uint32_t left = _str.uend() - data;
139 int format = USHORT_AT(data);
140 uint32_t length = 0; // value not used
141
142 switch (format) {
143
144 case F_BYTE:
145 if (left < 4
146 || (length = USHORT_AT(data + 2)) > left
147 || length != 259)
148 return errh->error("bad table %d length (format %d)", t, format);
149 break;
150
151 case F_HIBYTE:
152 if (left < 4
153 || (length = USHORT_AT(data + 2)) > left
154 || length < 524)
155 return errh->error("bad table %d length (format %d)", t, format);
156 for (int hi_byte = 0; hi_byte < 256; hi_byte++)
157 if (uint32_t subh_key = USHORT_AT(data + 6 + 2 * hi_byte)) {
158 if ((subh_key & 7) || HIBYTE_SUBHEADERS + subh_key + 8 > length)
159 return errh->error("bad table %d subheader %d offset (format 2)", t, hi_byte);
160 const uint8_t *subh = data + HIBYTE_SUBHEADERS + subh_key;
161 int firstCode = USHORT_AT(subh);
162 int entryCount = USHORT_AT(subh + 2);
163 int idRangeOffset = USHORT_AT(subh + 6);
164 if (firstCode + entryCount > 256 || entryCount == 0)
165 return errh->error("bad table %d subheader %d contents (format 2)", t, hi_byte);
166 if ((HIBYTE_SUBHEADERS + subh_key + 6) // pos[idRangeOffset]
167 + idRangeOffset + entryCount * 2 > length)
168 return errh->error("bad table %d subheader %d length (format 2)", t, hi_byte);
169 }
170 break;
171
172 case F_SEGMENTED: {
173 if (left < 4
174 || (length = USHORT_AT(data + 2)) > left
175 || length < 16)
176 return errh->error("bad table %d length (format %d)", t, format);
177 int segCountX2 = USHORT_AT(data + 6);
178 int searchRange = USHORT_AT(data + 8);
179 int entrySelector = USHORT_AT(data + 10);
180 int rangeShift = USHORT_AT(data + 12);
181 if ((segCountX2 & 1)
182 || segCountX2 == 0
183 || (searchRange & (searchRange - 1)) /* not a power of 2? */
184 || searchRange <= segCountX2/2
185 || (searchRange>>1) > segCountX2/2
186 || 1 << (entrySelector + 1) != searchRange
187 || rangeShift != segCountX2 - searchRange)
188 return errh->error("bad table %d segment counts (format %d)", format);
189 uint32_t segCount = segCountX2 >> 1;
190 if (length < 16 + 8 * segCount)
191 return errh->error("bad table %d length (format %d, length %u, need %u)", t, format, length, 16 + 8 * segCount);
192 const uint8_t *endCodes = data + 14;
193 const uint8_t *startCodes = endCodes + 2 + segCountX2;
194 const uint8_t *idDeltas = startCodes + segCountX2;
195 const uint8_t *idRangeOffsets = idDeltas + segCountX2;
196 uint32_t idRangeOffsetsPos = idRangeOffsets - data;
197 int last_end = 0;
198 for (int i = 0; i < segCountX2; i += 2) {
199 int endCode = USHORT_AT(endCodes + i);
200 int startCode = USHORT_AT(startCodes + i);
201 /* int idDelta = SHORT_AT(idDeltas + i); // not needed */
202 int idRangeOffset = USHORT_AT(idRangeOffsets + i);
203 if (endCode < startCode || startCode < last_end)
204 return errh->error("bad table %d overlapping range %d (format %d)", t, i/2, format);
205 if (idRangeOffset
206 && idRangeOffset != 65535
207 && idRangeOffsetsPos + i + idRangeOffset + (endCode - startCode)*2 + 2 > length)
208 return errh->error("bad table %d range %d length (format %d, range %d-%d, idRangeOffset %d, length %u)", t, i/2, format, startCode, endCode, idRangeOffset, length);
209 last_end = endCode + 1;
210 }
211 if (USHORT_AT(endCodes + segCountX2 - 2) != 0xFFFF)
212 return errh->error("bad table %d incorrect final endCode (format 4)", t);
213 break;
214 }
215
216 case F_TRIMMED: {
217 if (left < 4
218 || (length = USHORT_AT(data + 2)) > left
219 || length < 10)
220 return errh->error("bad table %d length (format %d)", t, format);
221 uint32_t entryCount = USHORT_AT(data + 8);
222 if (10 + entryCount * 2 > length)
223 return errh->error("bad table %d length (format %d)", t, format);
224 break;
225 }
226
227 case F_SEGMENTED32: {
228 if (left < 8
229 || (length = ULONG_AT(data + 4)) > left
230 || length < 16)
231 return errh->error("bad table %d length (format %d)", t, format);
232 uint32_t nGroups = ULONG_AT(data + 16);
233 if ((length - 16) / 12 < nGroups)
234 return errh->error("bad table %d length (format %d)", t, format);
235 uint32_t last_post_end = 0;
236 data += 16;
237 for (uint32_t i = 0; i < nGroups; i++, data += 12) {
238 uint32_t startCharCode = ULONG_AT(data);
239 uint32_t endCharCode = ULONG_AT(data + 4);
240 if (startCharCode < last_post_end || endCharCode < startCharCode)
241 return errh->error("bad table %d overlapping range %d (format %d)", t, i, format);
242 last_post_end = endCharCode + 1;
243 }
244 break;
245 }
246
247 case F_HIBYTE32:
248 case F_TRIMMED32:
249 default:
250 return errh->error("bad table %d unsupported format %d", t, format);
251
252 }
253
254 _table_error[t] = t;
255 return t;
256 }
257
258 Glyph
map_table(int t,uint32_t uni,ErrorHandler * errh) const259 Cmap::map_table(int t, uint32_t uni, ErrorHandler *errh) const
260 {
261 if ((t = check_table(t, errh)) < 0)
262 return 0;
263
264 const uint8_t *data = table_data(t);
265 switch (USHORT_AT(data)) {
266
267 case F_BYTE:
268 if (uni < 256)
269 return data[6 + uni];
270 else
271 return 0;
272
273 case F_HIBYTE: {
274 if (uni >= 65536)
275 return 0;
276 int hi_byte = (uni >> 8) & 255;
277 int subh = USHORT_AT(data + 6 + hi_byte * 2);
278 if (subh == 0 && hi_byte) // XXX?
279 return 0;
280 data += 524 + subh;
281 int firstCode = USHORT_AT(data);
282 int entryCount = USHORT_AT(data + 2);
283 int idDelta = SHORT_AT(data + 4);
284 int idRangeOffset = USHORT_AT(data + 6);
285 int lo_byte = uni & 255;
286 if (lo_byte < firstCode || lo_byte >= firstCode + entryCount)
287 return 0;
288 int answer = USHORT_AT(data + 6 + idRangeOffset + (lo_byte - firstCode) * 2);
289 if (answer == 0)
290 return 0;
291 return (answer + idDelta) & 65535;
292 }
293
294 case F_SEGMENTED: {
295 if (uni >= 65536)
296 return 0;
297 int segCount = USHORT_AT(data + 6) >> 1;
298 const uint8_t *endCounts = data + 14;
299 const uint8_t *startCounts = endCounts + (segCount << 1) + 2;
300 const uint8_t *idDeltas = startCounts + (segCount << 1);
301 const uint8_t *idRangeOffsets = idDeltas + (segCount << 1);
302 int l = 0, r = segCount;
303 while (l < r) {
304 int m = l + (r - l) / 2;
305 uint32_t endCount = USHORT_AT(endCounts + (m << 1));
306 uint32_t startCount = USHORT_AT(startCounts + (m << 1));
307 if (uni < startCount)
308 r = m;
309 else if (uni <= endCount) {
310 int idDelta = SHORT_AT(idDeltas + (m << 1));
311 int idRangeOffset = USHORT_AT(idRangeOffsets + (m << 1));
312 if (idRangeOffset == 0)
313 return (idDelta + uni) & 65535;
314 else if (idRangeOffset == 65535)
315 return 0;
316 int g = USHORT_AT(idRangeOffsets + (m << 1) + idRangeOffset + ((uni - startCount) << 1));
317 if (g == 0)
318 return 0;
319 return (idDelta + g) & 65535;
320 } else
321 l = m + 1;
322 }
323 return 0;
324 }
325
326 case F_TRIMMED: {
327 uint32_t firstCode = USHORT_AT(data + 6);
328 uint32_t entryCount = USHORT_AT(data + 8);
329 if (uni < firstCode || uni >= firstCode + entryCount)
330 return 0;
331 return USHORT_AT(data + 10 + ((uni - firstCode) << 1));
332 }
333
334 case F_SEGMENTED32: {
335 uint32_t nGroups = ULONG_AT2(data + 12);
336 uint32_t l = 0, r = nGroups;
337 const uint8_t *groups = data + 16;
338 while (l < r) {
339 uint32_t m = l + (r - l) / 2;
340 uint32_t startCharCode = ULONG_AT2(groups + m * 12);
341 uint32_t endCharCode = ULONG_AT2(groups + m * 12 + 4);
342 if (uni < startCharCode)
343 r = m;
344 else if (uni <= endCharCode)
345 return ULONG_AT2(groups + m * 12 + 8) + uni - startCharCode;
346 else
347 l = m + 1;
348 }
349 return 0;
350 }
351
352 default:
353 return 0;
354
355 }
356 }
357
358 void
dump_table(int t,Vector<std::pair<uint32_t,Glyph>> & ugp,ErrorHandler * errh) const359 Cmap::dump_table(int t, Vector<std::pair<uint32_t, Glyph> > &ugp, ErrorHandler *errh) const
360 {
361 if ((t = check_table(t, errh)) < 0)
362 return;
363
364 const uint8_t *data = table_data(t);
365 switch (USHORT_AT(data)) {
366
367 case F_BYTE:
368 for (uint32_t u = 0; u < 256; ++u)
369 if (int g = data[6 + u])
370 ugp.push_back(std::make_pair(u, g));
371 break;
372
373 case F_HIBYTE:
374 assert(USHORT_AT(data + 6) == 0);
375 for (int hi_byte = 0; hi_byte < 256; hi_byte++) {
376 int subh = USHORT_AT(data + 6 + hi_byte * 4);
377 if (subh == 0 && hi_byte > 0)
378 continue;
379 const uint8_t *tdata = data + 524 + subh;
380 int firstCode = USHORT_AT(tdata);
381 int entryCount = USHORT_AT(tdata + 2);
382 int idDelta = SHORT_AT(tdata + 4);
383 int idRangeOffset = USHORT_AT(tdata + 6);
384 const uint8_t *gdata = tdata + 6 + idRangeOffset;
385 for (int i = 0; i < entryCount; i++)
386 if (Glyph g = USHORT_AT(gdata + (i << 1))) {
387 g = (idDelta + g) & 65535;
388 uint32_t u = (hi_byte << 8) + firstCode + i;
389 ugp.push_back(std::make_pair(u, g));
390 }
391 }
392 break;
393
394 case F_SEGMENTED: {
395 int segCountX2 = USHORT_AT(data + 6);
396 const uint8_t *endCounts = data + 14;
397 const uint8_t *startCounts = endCounts + segCountX2 + 2;
398 const uint8_t *idDeltas = startCounts + segCountX2;
399 const uint8_t *idRangeOffsets = idDeltas + segCountX2;
400 for (int i = 0; i < segCountX2; i += 2) {
401 uint32_t endCount = USHORT_AT(endCounts + i);
402 uint32_t startCount = USHORT_AT(startCounts + i);
403 int idDelta = SHORT_AT(idDeltas + i);
404 int idRangeOffset = USHORT_AT(idRangeOffsets + i);
405 if (idRangeOffset == 0) {
406 for (uint32_t u = startCount; u <= endCount; ++u) {
407 Glyph g = (u + idDelta) & 65535;
408 ugp.push_back(std::make_pair(u, g));
409 }
410 } else {
411 const uint8_t *gdata = idRangeOffsets + i + idRangeOffset;
412 for (uint32_t u = startCount; u <= endCount; ++u, gdata += 2)
413 if (Glyph g = USHORT_AT(gdata)) {
414 g = (g + idDelta) & 65535;
415 ugp.push_back(std::make_pair(u, g));
416 }
417 }
418 }
419 break;
420 }
421
422 case F_TRIMMED: {
423 uint32_t firstCode = USHORT_AT(data + 6);
424 int entryCount = USHORT_AT(data + 8);
425 for (int i = 0; i < entryCount; i++)
426 if (Glyph g = USHORT_AT(data + 10 + (i << 1)))
427 ugp.push_back(std::make_pair(firstCode + i, g));
428 break;
429 }
430
431 case F_SEGMENTED32: {
432 uint32_t nGroups = ULONG_AT2(data + 12);
433 const uint8_t *groups = data + 16;
434 for (uint32_t i = 0; i < nGroups; i++, groups += 12) {
435 uint32_t startCharCode = ULONG_AT2(groups);
436 uint32_t nCharCodes = ULONG_AT2(groups + 4) - startCharCode;
437 Glyph startGlyphID = ULONG_AT2(groups + 8);
438 for (uint32_t i = 0; i <= nCharCodes; i++)
439 ugp.push_back(std::make_pair(startCharCode + i, startGlyphID + i));
440 }
441 break;
442 }
443
444 default:
445 break;
446
447 }
448 }
449
450 int
map_uni(const Vector<uint32_t> & vin,Vector<Glyph> & vout) const451 Cmap::map_uni(const Vector<uint32_t> &vin, Vector<Glyph> &vout) const
452 {
453 int t;
454 if ((t = check_table(USE_FIRST_UNICODE_TABLE)) < 0)
455 return -1;
456 vout.resize(vin.size(), 0);
457 for (int i = 0; i < vin.size(); i++)
458 vout[i] = map_table(t, vin[i]);
459 return 0;
460 }
461
462 }}
463