1 // -*- related-file-name: "../include/efont/otfcmap.hh" -*-
2 
3 /* otfcmap.{cc,hh} -- OpenType cmap table
4  *
5  * Copyright (c) 2002-2019 Eddie Kohler
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License as published by the Free
9  * Software Foundation; either version 2 of the License, or (at your option)
10  * any later version. This program is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13  * Public License for more details.
14  */
15 
16 #ifdef HAVE_CONFIG_H
17 # include <config.h>
18 #endif
19 #include <efont/otfcmap.hh>
20 #include <lcdf/error.hh>
21 #include <errno.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <efont/otfdata.hh>     // for ntohl()
25 
26 #define USHORT_AT(d)            (Data::u16_aligned(d))
27 #define SHORT_AT(d)             (Data::s16_aligned(d))
28 #define ULONG_AT(d)             (Data::u32_aligned(d))
29 #define ULONG_AT2(d)            (Data::u32_aligned16(d))
30 
31 namespace Efont { namespace OpenType {
32 
Cmap(const String & s,ErrorHandler * errh)33 Cmap::Cmap(const String &s, ErrorHandler *errh)
34     : _str(s)
35 {
36     _str.align(4);
37     _error = parse_header(errh ? errh : ErrorHandler::silent_handler());
38 }
39 
40 int
parse_header(ErrorHandler * errh)41 Cmap::parse_header(ErrorHandler *errh)
42 {
43     // HEADER FORMAT:
44     // USHORT   version
45     // USHORT   numTables
46     int len = _str.length();
47     const uint8_t *data = _str.udata();
48     if (HEADER_SIZE > len)
49         return errh->error("OTF cmap too small"), -EFAULT;
50     if (!(data[0] == '\000' && data[1] == '\000'))
51         return errh->error("bad cmap version number"), -ERANGE;
52     _ntables = USHORT_AT(data + 2);
53     if (_ntables == 0)
54         return errh->error("OTF cmap contains no tables"), -EINVAL;
55     if (HEADER_SIZE + ENCODING_SIZE * _ntables > len)
56         return errh->error("cmap directory out of range"), -EFAULT;
57 
58     // ENCODING ENTRY FORMAT:
59     // USHORT   platformID
60     // USHORT   encodingID
61     // ULONG    offset
62     int last_platform = -1;
63     int last_encoding = -1;
64     int last_language = -1;
65     _first_unicode_table = -1;
66     for (int i = 0; i < _ntables; i++) {
67         int loc = HEADER_SIZE + ENCODING_SIZE * i;
68         int platform = USHORT_AT(data + loc);
69         int encoding = USHORT_AT(data + loc + 2);
70         uint32_t offset = ULONG_AT(data + loc + 4);
71         if (offset + 8 > (uint32_t) len) {
72           length_error:
73             return errh->error("encoding data for entry %d out of range", i);
74         }
75         int format = USHORT_AT(data + offset);
76         int language;
77         if (format == F_BYTE || format == F_HIBYTE || format == F_SEGMENTED
78             || format == F_TRIMMED) {
79             if (USHORT_AT(data + offset + 2) < 6)
80                 goto length_error;
81             language = USHORT_AT(data + offset + 4);
82         } else if (format == F_HIBYTE32 || format == F_TRIMMED32
83                    || format == F_SEGMENTED32) {
84             if (offset + 12 > (uint32_t) len || ULONG_AT2(data + offset + 4) < 12)
85                 goto length_error;
86             language = ULONG_AT2(data + offset + 8);
87         } else
88             continue;
89         if (!(platform > last_platform
90               || (platform == last_platform
91                   && (encoding > last_encoding
92                       || (encoding == last_encoding
93                           && language > last_language)))))
94             errh->warning("unsorted cmap encoding records at entry %d (%d,%d,%d follows %d,%d,%d)", i, platform, encoding, language, last_platform, last_encoding, last_language);
95         if ((platform == 0 || (platform == 3 && encoding == 1))
96             && _first_unicode_table < 0)
97             _first_unicode_table = i;
98         last_platform = platform, last_encoding = encoding, last_language = language;
99     }
100 
101     _table_error.assign(_ntables, -2);
102     return 0;
103 }
104 
105 int
first_table(int platform,int encoding) const106 Cmap::first_table(int platform, int encoding) const
107 {
108     if (error() < 0)
109         return -1;
110     const uint8_t *data = _str.udata();
111     data += HEADER_SIZE;
112     for (int i = 0; i < _ntables; i++, data += ENCODING_SIZE) {
113         int p = USHORT_AT(data), e = USHORT_AT(data + 2);
114         if (platform == p && (encoding < 0 || encoding == e))
115             return i;
116     }
117     return -1;
118 }
119 
120 int
check_table(int t,ErrorHandler * errh) const121 Cmap::check_table(int t, ErrorHandler *errh) const
122 {
123     if (!errh)
124         errh = ErrorHandler::silent_handler();
125     if (t == USE_FIRST_UNICODE_TABLE && _first_unicode_table == -1) {
126         errh->warning("font appears not to support Unicode");
127         _first_unicode_table = 0;
128     }
129     if (t == USE_FIRST_UNICODE_TABLE)
130         t = _first_unicode_table;
131     if (_error < 0 || t < 0 || t >= _ntables)
132         return errh->error("no such table");
133     if (_table_error[t] != -2)
134         return _table_error[t];
135     _table_error[t] = -1;
136 
137     const uint8_t *data = table_data(t);
138     uint32_t left = _str.uend() - data;
139     int format = USHORT_AT(data);
140     uint32_t length = 0;        // value not used
141 
142     switch (format) {
143 
144       case F_BYTE:
145         if (left < 4
146             || (length = USHORT_AT(data + 2)) > left
147             || length != 259)
148             return errh->error("bad table %d length (format %d)", t, format);
149         break;
150 
151       case F_HIBYTE:
152         if (left < 4
153             || (length = USHORT_AT(data + 2)) > left
154             || length < 524)
155             return errh->error("bad table %d length (format %d)", t, format);
156         for (int hi_byte = 0; hi_byte < 256; hi_byte++)
157             if (uint32_t subh_key = USHORT_AT(data + 6 + 2 * hi_byte)) {
158                 if ((subh_key & 7) || HIBYTE_SUBHEADERS + subh_key + 8 > length)
159                     return errh->error("bad table %d subheader %d offset (format 2)", t, hi_byte);
160                 const uint8_t *subh = data + HIBYTE_SUBHEADERS + subh_key;
161                 int firstCode = USHORT_AT(subh);
162                 int entryCount = USHORT_AT(subh + 2);
163                 int idRangeOffset = USHORT_AT(subh + 6);
164                 if (firstCode + entryCount > 256 || entryCount == 0)
165                     return errh->error("bad table %d subheader %d contents (format 2)", t, hi_byte);
166                 if ((HIBYTE_SUBHEADERS + subh_key + 6) // pos[idRangeOffset]
167                     + idRangeOffset + entryCount * 2 > length)
168                     return errh->error("bad table %d subheader %d length (format 2)", t, hi_byte);
169             }
170         break;
171 
172       case F_SEGMENTED: {
173           if (left < 4
174               || (length = USHORT_AT(data + 2)) > left
175               || length < 16)
176               return errh->error("bad table %d length (format %d)", t, format);
177           int segCountX2 = USHORT_AT(data + 6);
178           int searchRange = USHORT_AT(data + 8);
179           int entrySelector = USHORT_AT(data + 10);
180           int rangeShift = USHORT_AT(data + 12);
181           if ((segCountX2 & 1)
182               || segCountX2 == 0
183               || (searchRange & (searchRange - 1)) /* not a power of 2? */
184               || searchRange <= segCountX2/2
185               || (searchRange>>1) > segCountX2/2
186               || 1 << (entrySelector + 1) != searchRange
187               || rangeShift != segCountX2 - searchRange)
188               return errh->error("bad table %d segment counts (format %d)", format);
189           uint32_t segCount = segCountX2 >> 1;
190           if (length < 16 + 8 * segCount)
191               return errh->error("bad table %d length (format %d, length %u, need %u)", t, format, length, 16 + 8 * segCount);
192           const uint8_t *endCodes = data + 14;
193           const uint8_t *startCodes = endCodes + 2 + segCountX2;
194           const uint8_t *idDeltas = startCodes + segCountX2;
195           const uint8_t *idRangeOffsets = idDeltas + segCountX2;
196           uint32_t idRangeOffsetsPos = idRangeOffsets - data;
197           int last_end = 0;
198           for (int i = 0; i < segCountX2; i += 2) {
199               int endCode = USHORT_AT(endCodes + i);
200               int startCode = USHORT_AT(startCodes + i);
201               /* int idDelta = SHORT_AT(idDeltas + i); // not needed */
202               int idRangeOffset = USHORT_AT(idRangeOffsets + i);
203               if (endCode < startCode || startCode < last_end)
204                   return errh->error("bad table %d overlapping range %d (format %d)", t, i/2, format);
205               if (idRangeOffset
206                   && idRangeOffset != 65535
207                   && idRangeOffsetsPos + i + idRangeOffset + (endCode - startCode)*2 + 2 > length)
208                   return errh->error("bad table %d range %d length (format %d, range %d-%d, idRangeOffset %d, length %u)", t, i/2, format, startCode, endCode, idRangeOffset, length);
209               last_end = endCode + 1;
210           }
211           if (USHORT_AT(endCodes + segCountX2 - 2) != 0xFFFF)
212               return errh->error("bad table %d incorrect final endCode (format 4)", t);
213           break;
214       }
215 
216       case F_TRIMMED: {
217           if (left < 4
218               || (length = USHORT_AT(data + 2)) > left
219               || length < 10)
220               return errh->error("bad table %d length (format %d)", t, format);
221           uint32_t entryCount = USHORT_AT(data + 8);
222           if (10 + entryCount * 2 > length)
223               return errh->error("bad table %d length (format %d)", t, format);
224           break;
225       }
226 
227       case F_SEGMENTED32: {
228           if (left < 8
229               || (length = ULONG_AT(data + 4)) > left
230               || length < 16)
231               return errh->error("bad table %d length (format %d)", t, format);
232           uint32_t nGroups = ULONG_AT(data + 16);
233           if ((length - 16) / 12 < nGroups)
234               return errh->error("bad table %d length (format %d)", t, format);
235           uint32_t last_post_end = 0;
236           data += 16;
237           for (uint32_t i = 0; i < nGroups; i++, data += 12) {
238               uint32_t startCharCode = ULONG_AT(data);
239               uint32_t endCharCode = ULONG_AT(data + 4);
240               if (startCharCode < last_post_end || endCharCode < startCharCode)
241                   return errh->error("bad table %d overlapping range %d (format %d)", t, i, format);
242               last_post_end = endCharCode + 1;
243           }
244           break;
245       }
246 
247       case F_HIBYTE32:
248       case F_TRIMMED32:
249       default:
250         return errh->error("bad table %d unsupported format %d", t, format);
251 
252     }
253 
254     _table_error[t] = t;
255     return t;
256 }
257 
258 Glyph
map_table(int t,uint32_t uni,ErrorHandler * errh) const259 Cmap::map_table(int t, uint32_t uni, ErrorHandler *errh) const
260 {
261     if ((t = check_table(t, errh)) < 0)
262         return 0;
263 
264     const uint8_t *data = table_data(t);
265     switch (USHORT_AT(data)) {
266 
267     case F_BYTE:
268         if (uni < 256)
269             return data[6 + uni];
270         else
271             return 0;
272 
273     case F_HIBYTE: {
274         if (uni >= 65536)
275             return 0;
276         int hi_byte = (uni >> 8) & 255;
277         int subh = USHORT_AT(data + 6 + hi_byte * 2);
278         if (subh == 0 && hi_byte) // XXX?
279             return 0;
280         data += 524 + subh;
281         int firstCode = USHORT_AT(data);
282         int entryCount = USHORT_AT(data + 2);
283         int idDelta = SHORT_AT(data + 4);
284         int idRangeOffset = USHORT_AT(data + 6);
285         int lo_byte = uni & 255;
286         if (lo_byte < firstCode || lo_byte >= firstCode + entryCount)
287             return 0;
288         int answer = USHORT_AT(data + 6 + idRangeOffset + (lo_byte - firstCode) * 2);
289         if (answer == 0)
290             return 0;
291         return (answer + idDelta) & 65535;
292     }
293 
294     case F_SEGMENTED: {
295         if (uni >= 65536)
296             return 0;
297         int segCount = USHORT_AT(data + 6) >> 1;
298         const uint8_t *endCounts = data + 14;
299         const uint8_t *startCounts = endCounts + (segCount << 1) + 2;
300         const uint8_t *idDeltas = startCounts + (segCount << 1);
301         const uint8_t *idRangeOffsets = idDeltas + (segCount << 1);
302         int l = 0, r = segCount;
303         while (l < r) {
304             int m = l + (r - l) / 2;
305             uint32_t endCount = USHORT_AT(endCounts + (m << 1));
306             uint32_t startCount = USHORT_AT(startCounts + (m << 1));
307             if (uni < startCount)
308                 r = m;
309             else if (uni <= endCount) {
310                 int idDelta = SHORT_AT(idDeltas + (m << 1));
311                 int idRangeOffset = USHORT_AT(idRangeOffsets + (m << 1));
312                 if (idRangeOffset == 0)
313                     return (idDelta + uni) & 65535;
314                 else if (idRangeOffset == 65535)
315                     return 0;
316                 int g = USHORT_AT(idRangeOffsets + (m << 1) + idRangeOffset + ((uni - startCount) << 1));
317                 if (g == 0)
318                     return 0;
319                 return (idDelta + g) & 65535;
320             } else
321                 l = m + 1;
322         }
323         return 0;
324     }
325 
326     case F_TRIMMED: {
327         uint32_t firstCode = USHORT_AT(data + 6);
328         uint32_t entryCount = USHORT_AT(data + 8);
329         if (uni < firstCode || uni >= firstCode + entryCount)
330             return 0;
331         return USHORT_AT(data + 10 + ((uni - firstCode) << 1));
332     }
333 
334     case F_SEGMENTED32: {
335         uint32_t nGroups = ULONG_AT2(data + 12);
336         uint32_t l = 0, r = nGroups;
337         const uint8_t *groups = data + 16;
338         while (l < r) {
339             uint32_t m = l + (r - l) / 2;
340             uint32_t startCharCode = ULONG_AT2(groups + m * 12);
341             uint32_t endCharCode = ULONG_AT2(groups + m * 12 + 4);
342             if (uni < startCharCode)
343                 r = m;
344             else if (uni <= endCharCode)
345                 return ULONG_AT2(groups + m * 12 + 8) + uni - startCharCode;
346             else
347                 l = m + 1;
348         }
349         return 0;
350     }
351 
352     default:
353         return 0;
354 
355     }
356 }
357 
358 void
dump_table(int t,Vector<std::pair<uint32_t,Glyph>> & ugp,ErrorHandler * errh) const359 Cmap::dump_table(int t, Vector<std::pair<uint32_t, Glyph> > &ugp, ErrorHandler *errh) const
360 {
361     if ((t = check_table(t, errh)) < 0)
362         return;
363 
364     const uint8_t *data = table_data(t);
365     switch (USHORT_AT(data)) {
366 
367     case F_BYTE:
368         for (uint32_t u = 0; u < 256; ++u)
369             if (int g = data[6 + u])
370                 ugp.push_back(std::make_pair(u, g));
371         break;
372 
373     case F_HIBYTE:
374         assert(USHORT_AT(data + 6) == 0);
375         for (int hi_byte = 0; hi_byte < 256; hi_byte++) {
376             int subh = USHORT_AT(data + 6 + hi_byte * 4);
377             if (subh == 0 && hi_byte > 0)
378                 continue;
379             const uint8_t *tdata = data + 524 + subh;
380             int firstCode = USHORT_AT(tdata);
381             int entryCount = USHORT_AT(tdata + 2);
382             int idDelta = SHORT_AT(tdata + 4);
383             int idRangeOffset = USHORT_AT(tdata + 6);
384             const uint8_t *gdata = tdata + 6 + idRangeOffset;
385             for (int i = 0; i < entryCount; i++)
386                 if (Glyph g = USHORT_AT(gdata + (i << 1))) {
387                     g = (idDelta + g) & 65535;
388                     uint32_t u = (hi_byte << 8) + firstCode + i;
389                     ugp.push_back(std::make_pair(u, g));
390                 }
391         }
392         break;
393 
394     case F_SEGMENTED: {
395         int segCountX2 = USHORT_AT(data + 6);
396         const uint8_t *endCounts = data + 14;
397         const uint8_t *startCounts = endCounts + segCountX2 + 2;
398         const uint8_t *idDeltas = startCounts + segCountX2;
399         const uint8_t *idRangeOffsets = idDeltas + segCountX2;
400         for (int i = 0; i < segCountX2; i += 2) {
401             uint32_t endCount = USHORT_AT(endCounts + i);
402             uint32_t startCount = USHORT_AT(startCounts + i);
403             int idDelta = SHORT_AT(idDeltas + i);
404             int idRangeOffset = USHORT_AT(idRangeOffsets + i);
405             if (idRangeOffset == 0) {
406                 for (uint32_t u = startCount; u <= endCount; ++u) {
407                     Glyph g = (u + idDelta) & 65535;
408                     ugp.push_back(std::make_pair(u, g));
409                 }
410             } else {
411                 const uint8_t *gdata = idRangeOffsets + i + idRangeOffset;
412                 for (uint32_t u = startCount; u <= endCount; ++u, gdata += 2)
413                     if (Glyph g = USHORT_AT(gdata)) {
414                         g = (g + idDelta) & 65535;
415                         ugp.push_back(std::make_pair(u, g));
416                     }
417             }
418         }
419         break;
420     }
421 
422     case F_TRIMMED: {
423         uint32_t firstCode = USHORT_AT(data + 6);
424         int entryCount = USHORT_AT(data + 8);
425         for (int i = 0; i < entryCount; i++)
426             if (Glyph g = USHORT_AT(data + 10 + (i << 1)))
427                 ugp.push_back(std::make_pair(firstCode + i, g));
428         break;
429     }
430 
431     case F_SEGMENTED32: {
432         uint32_t nGroups = ULONG_AT2(data + 12);
433         const uint8_t *groups = data + 16;
434         for (uint32_t i = 0; i < nGroups; i++, groups += 12) {
435             uint32_t startCharCode = ULONG_AT2(groups);
436             uint32_t nCharCodes = ULONG_AT2(groups + 4) - startCharCode;
437             Glyph startGlyphID = ULONG_AT2(groups + 8);
438             for (uint32_t i = 0; i <= nCharCodes; i++)
439                 ugp.push_back(std::make_pair(startCharCode + i, startGlyphID + i));
440         }
441         break;
442     }
443 
444     default:
445         break;
446 
447     }
448 }
449 
450 int
map_uni(const Vector<uint32_t> & vin,Vector<Glyph> & vout) const451 Cmap::map_uni(const Vector<uint32_t> &vin, Vector<Glyph> &vout) const
452 {
453     int t;
454     if ((t = check_table(USE_FIRST_UNICODE_TABLE)) < 0)
455         return -1;
456     vout.resize(vin.size(), 0);
457     for (int i = 0; i < vin.size(); i++)
458         vout[i] = map_table(t, vin[i]);
459     return 0;
460 }
461 
462 }}
463