1 /*
2  * Copyright © 2014  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
29 
30 #include "hb-open-type.hh"
31 #include "hb-set.hh"
32 
33 /*
34  * cmap -- Character to Glyph Index Mapping
35  * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
36  */
37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
38 
39 namespace OT {
40 
41 
42 struct CmapSubtableFormat0
43 {
get_glyphOT::CmapSubtableFormat044   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
45   {
46     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
47     if (!gid)
48       return false;
49     *glyph = gid;
50     return true;
51   }
collect_unicodesOT::CmapSubtableFormat052   void collect_unicodes (hb_set_t *out) const
53   {
54     for (unsigned int i = 0; i < 256; i++)
55       if (glyphIdArray[i])
56         out->add (i);
57   }
58 
collect_mappingOT::CmapSubtableFormat059   void collect_mapping (hb_set_t *unicodes, /* OUT */
60                         hb_map_t *mapping /* OUT */) const
61   {
62     for (unsigned i = 0; i < 256; i++)
63       if (glyphIdArray[i])
64       {
65         hb_codepoint_t glyph = glyphIdArray[i];
66         unicodes->add (i);
67         mapping->set (i, glyph);
68       }
69   }
70 
sanitizeOT::CmapSubtableFormat071   bool sanitize (hb_sanitize_context_t *c) const
72   {
73     TRACE_SANITIZE (this);
74     return_trace (c->check_struct (this));
75   }
76 
77   protected:
78   HBUINT16      format;         /* Format number is set to 0. */
79   HBUINT16      length;         /* Byte length of this subtable. */
80   HBUINT16      language;       /* Ignore. */
81   HBUINT8       glyphIdArray[256];/* An array that maps character
82                                  * code to glyph index values. */
83   public:
84   DEFINE_SIZE_STATIC (6 + 256);
85 };
86 
87 struct CmapSubtableFormat4
88 {
89 
90   template<typename Iterator,
91            hb_requires (hb_is_iterator (Iterator))>
serialize_endcode_arrayOT::CmapSubtableFormat492   HBUINT16* serialize_endcode_array (hb_serialize_context_t *c,
93                                      Iterator it)
94   {
95     HBUINT16 *endCode = c->start_embed<HBUINT16> ();
96     hb_codepoint_t prev_endcp = 0xFFFF;
97 
98     for (const auto& _ : +it)
99     {
100       if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first)
101       {
102         HBUINT16 end_code;
103         end_code = prev_endcp;
104         c->copy<HBUINT16> (end_code);
105       }
106       prev_endcp = _.first;
107     }
108 
109     {
110       // last endCode
111       HBUINT16 endcode;
112       endcode = prev_endcp;
113       if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr;
114       // There must be a final entry with end_code == 0xFFFF.
115       if (prev_endcp != 0xFFFF)
116       {
117         HBUINT16 finalcode;
118         finalcode = 0xFFFF;
119         if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
120       }
121     }
122 
123     return endCode;
124   }
125 
126   template<typename Iterator,
127            hb_requires (hb_is_iterator (Iterator))>
serialize_startcode_arrayOT::CmapSubtableFormat4128   HBUINT16* serialize_startcode_array (hb_serialize_context_t *c,
129                                        Iterator it)
130   {
131     HBUINT16 *startCode = c->start_embed<HBUINT16> ();
132     hb_codepoint_t prev_cp = 0xFFFF;
133 
134     for (const auto& _ : +it)
135     {
136       if (prev_cp == 0xFFFF || prev_cp + 1u != _.first)
137       {
138         HBUINT16 start_code;
139         start_code = _.first;
140         c->copy<HBUINT16> (start_code);
141       }
142 
143       prev_cp = _.first;
144     }
145 
146     // There must be a final entry with end_code == 0xFFFF.
147     if (it.len () == 0 || prev_cp != 0xFFFF)
148     {
149       HBUINT16 finalcode;
150       finalcode = 0xFFFF;
151       if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
152     }
153 
154     return startCode;
155   }
156 
157   template<typename Iterator,
158            hb_requires (hb_is_iterator (Iterator))>
serialize_idDelta_arrayOT::CmapSubtableFormat4159   HBINT16* serialize_idDelta_array (hb_serialize_context_t *c,
160                                     Iterator it,
161                                     HBUINT16 *endCode,
162                                     HBUINT16 *startCode,
163                                     unsigned segcount)
164   {
165     unsigned i = 0;
166     hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF;
167     bool use_delta = true;
168 
169     HBINT16 *idDelta = c->start_embed<HBINT16> ();
170     if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size)
171       return nullptr;
172 
173     for (const auto& _ : +it)
174     {
175       if (_.first == startCode[i])
176       {
177         use_delta = true;
178         start_gid = _.second;
179       }
180       else if (_.second != last_gid + 1) use_delta = false;
181 
182       if (_.first == endCode[i])
183       {
184         HBINT16 delta;
185         if (use_delta) delta = (int)start_gid - (int)startCode[i];
186         else delta = 0;
187         c->copy<HBINT16> (delta);
188 
189         i++;
190       }
191 
192       last_gid = _.second;
193       last_cp = _.first;
194     }
195 
196     if (it.len () == 0 || last_cp != 0xFFFF)
197     {
198       HBINT16 delta;
199       delta = 1;
200       if (unlikely (!c->copy<HBINT16> (delta))) return nullptr;
201     }
202 
203     return idDelta;
204   }
205 
206   template<typename Iterator,
207            hb_requires (hb_is_iterator (Iterator))>
serialize_rangeoffset_glyidOT::CmapSubtableFormat4208   HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c,
209                                          Iterator it,
210                                          HBUINT16 *endCode,
211                                          HBUINT16 *startCode,
212                                          HBINT16 *idDelta,
213                                          unsigned segcount)
214   {
215     HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
216     if (unlikely (!c->check_success (idRangeOffset))) return nullptr;
217     if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr;
218 
219     + hb_range (segcount)
220     | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; })
221     | hb_apply ([&] (const unsigned i)
222                 {
223                   idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i);
224 
225                   + it
226                   | hb_filter ([&] (const hb_item_type<Iterator> _) { return _.first >= startCode[i] && _.first <= endCode[i]; })
227                   | hb_apply ([&] (const hb_item_type<Iterator> _)
228                               {
229                                 HBUINT16 glyID;
230                                 glyID = _.second;
231                                 c->copy<HBUINT16> (glyID);
232                               })
233                   ;
234 
235 
236                 })
237     ;
238 
239     return idRangeOffset;
240   }
241 
242   template<typename Iterator,
243            hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtableFormat4244   void serialize (hb_serialize_context_t *c,
245                   Iterator it)
246   {
247     auto format4_iter =
248     + it
249     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
250                  { return _.first <= 0xFFFF; })
251     ;
252 
253     if (format4_iter.len () == 0) return;
254 
255     unsigned table_initpos = c->length ();
256     if (unlikely (!c->extend_min (*this))) return;
257     this->format = 4;
258 
259     //serialize endCode[]
260     HBUINT16 *endCode = serialize_endcode_array (c, format4_iter);
261     if (unlikely (!endCode)) return;
262 
263     unsigned segcount = (c->length () - min_size) / HBUINT16::static_size;
264 
265     // 2 bytes of padding.
266     if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding.
267 
268    // serialize startCode[]
269     HBUINT16 *startCode = serialize_startcode_array (c, format4_iter);
270     if (unlikely (!startCode)) return;
271 
272     //serialize idDelta[]
273     HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount);
274     if (unlikely (!idDelta)) return;
275 
276     HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
277     if (unlikely (!c->check_success (idRangeOffset))) return;
278 
279     if (unlikely (!c->check_assign(this->length, c->length () - table_initpos))) return;
280     this->segCountX2 = segcount * 2;
281     this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1;
282     this->searchRange = 2 * (1u << this->entrySelector);
283     this->rangeShift = segcount * 2 > this->searchRange
284                        ? 2 * segcount - this->searchRange
285                        : 0;
286   }
287 
288   struct accelerator_t
289   {
accelerator_tOT::CmapSubtableFormat4::accelerator_t290     accelerator_t () {}
accelerator_tOT::CmapSubtableFormat4::accelerator_t291     accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); }
~accelerator_tOT::CmapSubtableFormat4::accelerator_t292     ~accelerator_t () { fini (); }
293 
initOT::CmapSubtableFormat4::accelerator_t294     void init (const CmapSubtableFormat4 *subtable)
295     {
296       segCount = subtable->segCountX2 / 2;
297       endCount = subtable->values.arrayZ;
298       startCount = endCount + segCount + 1;
299       idDelta = startCount + segCount;
300       idRangeOffset = idDelta + segCount;
301       glyphIdArray = idRangeOffset + segCount;
302       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
303     }
finiOT::CmapSubtableFormat4::accelerator_t304     void fini () {}
305 
get_glyphOT::CmapSubtableFormat4::accelerator_t306     bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
307     {
308       struct CustomRange
309       {
310         int cmp (hb_codepoint_t k,
311                  unsigned distance) const
312         {
313           if (k > last) return +1;
314           if (k < (&last)[distance]) return -1;
315           return 0;
316         }
317         HBUINT16 last;
318       };
319 
320       const HBUINT16 *found = hb_bsearch (codepoint,
321                                           this->endCount,
322                                           this->segCount,
323                                           2,
324                                           _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>,
325                                           this->segCount + 1);
326       if (!found)
327         return false;
328       unsigned int i = found - endCount;
329 
330       hb_codepoint_t gid;
331       unsigned int rangeOffset = this->idRangeOffset[i];
332       if (rangeOffset == 0)
333         gid = codepoint + this->idDelta[i];
334       else
335       {
336         /* Somebody has been smoking... */
337         unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
338         if (unlikely (index >= this->glyphIdArrayLength))
339           return false;
340         gid = this->glyphIdArray[index];
341         if (unlikely (!gid))
342           return false;
343         gid += this->idDelta[i];
344       }
345       gid &= 0xFFFFu;
346       if (!gid)
347         return false;
348       *glyph = gid;
349       return true;
350     }
351 
get_glyph_funcOT::CmapSubtableFormat4::accelerator_t352     HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
353     { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); }
354 
collect_unicodesOT::CmapSubtableFormat4::accelerator_t355     void collect_unicodes (hb_set_t *out) const
356     {
357       unsigned int count = this->segCount;
358       if (count && this->startCount[count - 1] == 0xFFFFu)
359         count--; /* Skip sentinel segment. */
360       for (unsigned int i = 0; i < count; i++)
361       {
362         hb_codepoint_t start = this->startCount[i];
363         hb_codepoint_t end = this->endCount[i];
364         unsigned int rangeOffset = this->idRangeOffset[i];
365         if (rangeOffset == 0)
366         {
367           for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
368           {
369             hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
370             if (unlikely (!gid))
371               continue;
372             out->add (codepoint);
373           }
374         }
375         else
376         {
377           for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
378           {
379             unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
380             if (unlikely (index >= this->glyphIdArrayLength))
381               break;
382             hb_codepoint_t gid = this->glyphIdArray[index];
383             if (unlikely (!gid))
384               continue;
385             out->add (codepoint);
386           }
387         }
388       }
389     }
390 
collect_mappingOT::CmapSubtableFormat4::accelerator_t391     void collect_mapping (hb_set_t *unicodes, /* OUT */
392                           hb_map_t *mapping /* OUT */) const
393     {
394       unsigned count = this->segCount;
395       if (count && this->startCount[count - 1] == 0xFFFFu)
396         count--; /* Skip sentinel segment. */
397       for (unsigned i = 0; i < count; i++)
398       {
399         hb_codepoint_t start = this->startCount[i];
400         hb_codepoint_t end = this->endCount[i];
401         unsigned rangeOffset = this->idRangeOffset[i];
402         if (rangeOffset == 0)
403         {
404           for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
405           {
406             hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
407             if (unlikely (!gid))
408               continue;
409             unicodes->add (codepoint);
410             mapping->set (codepoint, gid);
411           }
412         }
413         else
414         {
415           for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
416           {
417             unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
418             if (unlikely (index >= this->glyphIdArrayLength))
419               break;
420             hb_codepoint_t gid = this->glyphIdArray[index];
421             if (unlikely (!gid))
422               continue;
423             unicodes->add (codepoint);
424             mapping->set (codepoint, gid);
425           }
426         }
427       }
428     }
429 
430     const HBUINT16 *endCount;
431     const HBUINT16 *startCount;
432     const HBUINT16 *idDelta;
433     const HBUINT16 *idRangeOffset;
434     const HBUINT16 *glyphIdArray;
435     unsigned int segCount;
436     unsigned int glyphIdArrayLength;
437   };
438 
get_glyphOT::CmapSubtableFormat4439   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
440   {
441     accelerator_t accel (this);
442     return accel.get_glyph_func (&accel, codepoint, glyph);
443   }
collect_unicodesOT::CmapSubtableFormat4444   void collect_unicodes (hb_set_t *out) const
445   {
446     accelerator_t accel (this);
447     accel.collect_unicodes (out);
448   }
449 
collect_mappingOT::CmapSubtableFormat4450   void collect_mapping (hb_set_t *unicodes, /* OUT */
451                         hb_map_t *mapping /* OUT */) const
452   {
453     accelerator_t accel (this);
454     accel.collect_mapping (unicodes, mapping);
455   }
456 
sanitizeOT::CmapSubtableFormat4457   bool sanitize (hb_sanitize_context_t *c) const
458   {
459     TRACE_SANITIZE (this);
460     if (unlikely (!c->check_struct (this)))
461       return_trace (false);
462 
463     if (unlikely (!c->check_range (this, length)))
464     {
465       /* Some broken fonts have too long of a "length" value.
466        * If that is the case, just change the value to truncate
467        * the subtable at the end of the blob. */
468       uint16_t new_length = (uint16_t) hb_min ((uintptr_t) 65535,
469                                                (uintptr_t) (c->end -
470                                                             (char *) this));
471       if (!c->try_set (&length, new_length))
472         return_trace (false);
473     }
474 
475     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
476   }
477 
478 
479 
480   protected:
481   HBUINT16      format;         /* Format number is set to 4. */
482   HBUINT16      length;         /* This is the length in bytes of the
483                                  * subtable. */
484   HBUINT16      language;       /* Ignore. */
485   HBUINT16      segCountX2;     /* 2 x segCount. */
486   HBUINT16      searchRange;    /* 2 * (2**floor(log2(segCount))) */
487   HBUINT16      entrySelector;  /* log2(searchRange/2) */
488   HBUINT16      rangeShift;     /* 2 x segCount - searchRange */
489 
490   UnsizedArrayOf<HBUINT16>
491                 values;
492 #if 0
493   HBUINT16      endCount[segCount];     /* End characterCode for each segment,
494                                          * last=0xFFFFu. */
495   HBUINT16      reservedPad;            /* Set to 0. */
496   HBUINT16      startCount[segCount];   /* Start character code for each segment. */
497   HBINT16               idDelta[segCount];      /* Delta for all character codes in segment. */
498   HBUINT16      idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
499   UnsizedArrayOf<HBUINT16>
500                 glyphIdArray;   /* Glyph index array (arbitrary length) */
501 #endif
502 
503   public:
504   DEFINE_SIZE_ARRAY (14, values);
505 };
506 
507 struct CmapSubtableLongGroup
508 {
509   friend struct CmapSubtableFormat12;
510   friend struct CmapSubtableFormat13;
511   template<typename U>
512   friend struct CmapSubtableLongSegmented;
513   friend struct cmap;
514 
cmpOT::CmapSubtableLongGroup515   int cmp (hb_codepoint_t codepoint) const
516   {
517     if (codepoint < startCharCode) return -1;
518     if (codepoint > endCharCode)   return +1;
519     return 0;
520   }
521 
sanitizeOT::CmapSubtableLongGroup522   bool sanitize (hb_sanitize_context_t *c) const
523   {
524     TRACE_SANITIZE (this);
525     return_trace (c->check_struct (this));
526   }
527 
528   private:
529   HBUINT32              startCharCode;  /* First character code in this group. */
530   HBUINT32              endCharCode;    /* Last character code in this group. */
531   HBUINT32              glyphID;        /* Glyph index; interpretation depends on
532                                          * subtable format. */
533   public:
534   DEFINE_SIZE_STATIC (12);
535 };
536 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup);
537 
538 template <typename UINT>
539 struct CmapSubtableTrimmed
540 {
get_glyphOT::CmapSubtableTrimmed541   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
542   {
543     /* Rely on our implicit array bound-checking. */
544     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
545     if (!gid)
546       return false;
547     *glyph = gid;
548     return true;
549   }
collect_unicodesOT::CmapSubtableTrimmed550   void collect_unicodes (hb_set_t *out) const
551   {
552     hb_codepoint_t start = startCharCode;
553     unsigned int count = glyphIdArray.len;
554     for (unsigned int i = 0; i < count; i++)
555       if (glyphIdArray[i])
556         out->add (start + i);
557   }
558 
collect_mappingOT::CmapSubtableTrimmed559   void collect_mapping (hb_set_t *unicodes, /* OUT */
560                         hb_map_t *mapping /* OUT */) const
561   {
562     hb_codepoint_t start_cp = startCharCode;
563     unsigned count = glyphIdArray.len;
564     for (unsigned i = 0; i < count; i++)
565       if (glyphIdArray[i])
566       {
567         hb_codepoint_t unicode = start_cp + i;
568         hb_codepoint_t glyphid = glyphIdArray[i];
569         unicodes->add (unicode);
570         mapping->set (unicode, glyphid);
571       }
572   }
573 
sanitizeOT::CmapSubtableTrimmed574   bool sanitize (hb_sanitize_context_t *c) const
575   {
576     TRACE_SANITIZE (this);
577     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
578   }
579 
580   protected:
581   UINT          formatReserved; /* Subtable format and (maybe) padding. */
582   UINT          length;         /* Byte length of this subtable. */
583   UINT          language;       /* Ignore. */
584   UINT          startCharCode;  /* First character code covered. */
585   ArrayOf<HBGlyphID, UINT>
586                 glyphIdArray;   /* Array of glyph index values for character
587                                  * codes in the range. */
588   public:
589   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
590 };
591 
592 struct CmapSubtableFormat6  : CmapSubtableTrimmed<HBUINT16> {};
593 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
594 
595 template <typename T>
596 struct CmapSubtableLongSegmented
597 {
598   friend struct cmap;
599 
get_glyphOT::CmapSubtableLongSegmented600   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
601   {
602     hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint);
603     if (!gid)
604       return false;
605     *glyph = gid;
606     return true;
607   }
608 
collect_unicodesOT::CmapSubtableLongSegmented609   void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
610   {
611     for (unsigned int i = 0; i < this->groups.len; i++)
612     {
613       hb_codepoint_t start = this->groups[i].startCharCode;
614       hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
615                                    (hb_codepoint_t) HB_UNICODE_MAX);
616       hb_codepoint_t gid = this->groups[i].glyphID;
617       if (!gid)
618       {
619         /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
620         if (! T::group_get_glyph (this->groups[i], end)) continue;
621         start++;
622         gid++;
623       }
624       if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
625       if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
626         end = start + (hb_codepoint_t) num_glyphs - gid;
627 
628       out->add_range (start, end);
629     }
630   }
631 
collect_mappingOT::CmapSubtableLongSegmented632   void collect_mapping (hb_set_t *unicodes, /* OUT */
633                         hb_map_t *mapping, /* OUT */
634                         unsigned num_glyphs) const
635   {
636     for (unsigned i = 0; i < this->groups.len; i++)
637     {
638       hb_codepoint_t start = this->groups[i].startCharCode;
639       hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
640                                    (hb_codepoint_t) HB_UNICODE_MAX);
641       hb_codepoint_t gid = this->groups[i].glyphID;
642       if (!gid)
643       {
644         /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
645         if (! T::group_get_glyph (this->groups[i], end)) continue;
646         start++;
647         gid++;
648       }
649       if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
650       if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
651         end = start + (hb_codepoint_t) num_glyphs - gid;
652 
653       for (unsigned cp = start; cp <= end; cp++)
654       {
655         unicodes->add (cp);
656         mapping->set (cp, gid);
657         gid++;
658       }
659     }
660   }
661 
sanitizeOT::CmapSubtableLongSegmented662   bool sanitize (hb_sanitize_context_t *c) const
663   {
664     TRACE_SANITIZE (this);
665     return_trace (c->check_struct (this) && groups.sanitize (c));
666   }
667 
668   protected:
669   HBUINT16      format;         /* Subtable format; set to 12. */
670   HBUINT16      reserved;       /* Reserved; set to 0. */
671   HBUINT32      length;         /* Byte length of this subtable. */
672   HBUINT32      language;       /* Ignore. */
673   SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
674                 groups;         /* Groupings. */
675   public:
676   DEFINE_SIZE_ARRAY (16, groups);
677 };
678 
679 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
680 {
group_get_glyphOT::CmapSubtableFormat12681   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
682                                          hb_codepoint_t u)
683   { return likely (group.startCharCode <= group.endCharCode) ?
684            group.glyphID + (u - group.startCharCode) : 0; }
685 
686 
687   template<typename Iterator,
688            hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtableFormat12689   void serialize (hb_serialize_context_t *c,
690                   Iterator it)
691   {
692     if (it.len () == 0) return;
693     unsigned table_initpos = c->length ();
694     if (unlikely (!c->extend_min (*this))) return;
695 
696     hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF;
697     hb_codepoint_t glyphID = 0;
698 
699     for (const auto& _ : +it)
700     {
701       if (startCharCode == 0xFFFF)
702       {
703         startCharCode = _.first;
704         endCharCode = _.first;
705         glyphID = _.second;
706       }
707       else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second))
708       {
709         CmapSubtableLongGroup  grouprecord;
710         grouprecord.startCharCode = startCharCode;
711         grouprecord.endCharCode = endCharCode;
712         grouprecord.glyphID = glyphID;
713         c->copy<CmapSubtableLongGroup> (grouprecord);
714 
715         startCharCode = _.first;
716         endCharCode = _.first;
717         glyphID = _.second;
718       }
719       else
720         endCharCode = _.first;
721     }
722 
723     CmapSubtableLongGroup record;
724     record.startCharCode = startCharCode;
725     record.endCharCode = endCharCode;
726     record.glyphID = glyphID;
727     c->copy<CmapSubtableLongGroup> (record);
728 
729     this->format = 12;
730     this->reserved = 0;
731     this->length = c->length () - table_initpos;
732     this->groups.len = (this->length - min_size)/CmapSubtableLongGroup::static_size;
733   }
734 
get_sub_table_sizeOT::CmapSubtableFormat12735   static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data)
736   { return 16 + 12 * groups_data.length; }
737 
738   private:
_is_gid_consecutiveOT::CmapSubtableFormat12739   static bool _is_gid_consecutive (hb_codepoint_t endCharCode,
740                                    hb_codepoint_t startCharCode,
741                                    hb_codepoint_t glyphID,
742                                    hb_codepoint_t cp,
743                                    hb_codepoint_t new_gid)
744   {
745     return (cp - 1 == endCharCode) &&
746         new_gid == glyphID + (cp - startCharCode);
747   }
748 
749 };
750 
751 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
752 {
group_get_glyphOT::CmapSubtableFormat13753   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
754                                          hb_codepoint_t u HB_UNUSED)
755   { return group.glyphID; }
756 };
757 
758 typedef enum
759 {
760   GLYPH_VARIANT_NOT_FOUND = 0,
761   GLYPH_VARIANT_FOUND = 1,
762   GLYPH_VARIANT_USE_DEFAULT = 2
763 } glyph_variant_t;
764 
765 struct UnicodeValueRange
766 {
cmpOT::UnicodeValueRange767   int cmp (const hb_codepoint_t &codepoint) const
768   {
769     if (codepoint < startUnicodeValue) return -1;
770     if (codepoint > startUnicodeValue + additionalCount) return +1;
771     return 0;
772   }
773 
sanitizeOT::UnicodeValueRange774   bool sanitize (hb_sanitize_context_t *c) const
775   {
776     TRACE_SANITIZE (this);
777     return_trace (c->check_struct (this));
778   }
779 
780   HBUINT24      startUnicodeValue;      /* First value in this range. */
781   HBUINT8       additionalCount;        /* Number of additional values in this
782                                          * range. */
783   public:
784   DEFINE_SIZE_STATIC (4);
785 };
786 
787 struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
788 {
collect_unicodesOT::DefaultUVS789   void collect_unicodes (hb_set_t *out) const
790   {
791     unsigned int count = len;
792     for (unsigned int i = 0; i < count; i++)
793     {
794       hb_codepoint_t first = arrayZ[i].startUnicodeValue;
795       hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
796                                     (hb_codepoint_t) HB_UNICODE_MAX);
797       out->add_range (first, last);
798     }
799   }
800 
copyOT::DefaultUVS801   DefaultUVS* copy (hb_serialize_context_t *c,
802                     const hb_set_t *unicodes) const
803   {
804     DefaultUVS *out = c->start_embed<DefaultUVS> ();
805     if (unlikely (!out)) return nullptr;
806     auto snap = c->snapshot ();
807 
808     HBUINT32 len;
809     len = 0;
810     if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
811     unsigned init_len = c->length ();
812 
813     hb_codepoint_t lastCode = HB_MAP_VALUE_INVALID;
814     int count = -1;
815 
816     for (const UnicodeValueRange& _ : as_array ())
817     {
818       for (const unsigned addcnt : hb_range ((unsigned) _.additionalCount + 1))
819       {
820         unsigned curEntry = (unsigned) _.startUnicodeValue + addcnt;
821         if (!unicodes->has (curEntry)) continue;
822         count += 1;
823         if (lastCode == HB_MAP_VALUE_INVALID)
824           lastCode = curEntry;
825         else if (lastCode + count != curEntry)
826         {
827           UnicodeValueRange rec;
828           rec.startUnicodeValue = lastCode;
829           rec.additionalCount = count - 1;
830           c->copy<UnicodeValueRange> (rec);
831 
832           lastCode = curEntry;
833           count = 0;
834         }
835       }
836     }
837 
838     if (lastCode != HB_MAP_VALUE_INVALID)
839     {
840       UnicodeValueRange rec;
841       rec.startUnicodeValue = lastCode;
842       rec.additionalCount = count;
843       c->copy<UnicodeValueRange> (rec);
844     }
845 
846     if (c->length () - init_len == 0)
847     {
848       c->revert (snap);
849       return nullptr;
850     }
851     else
852     {
853       if (unlikely (!c->check_assign (out->len, (c->length () - init_len) / UnicodeValueRange::static_size))) return nullptr;
854       return out;
855     }
856   }
857 
858   public:
859   DEFINE_SIZE_ARRAY (4, *this);
860 };
861 
862 struct UVSMapping
863 {
cmpOT::UVSMapping864   int cmp (const hb_codepoint_t &codepoint) const
865   { return unicodeValue.cmp (codepoint); }
866 
sanitizeOT::UVSMapping867   bool sanitize (hb_sanitize_context_t *c) const
868   {
869     TRACE_SANITIZE (this);
870     return_trace (c->check_struct (this));
871   }
872 
873   HBUINT24      unicodeValue;   /* Base Unicode value of the UVS */
874   HBGlyphID     glyphID;        /* Glyph ID of the UVS */
875   public:
876   DEFINE_SIZE_STATIC (5);
877 };
878 
879 struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
880 {
collect_unicodesOT::NonDefaultUVS881   void collect_unicodes (hb_set_t *out) const
882   {
883     unsigned int count = len;
884     for (unsigned int i = 0; i < count; i++)
885       out->add (arrayZ[i].unicodeValue);
886   }
887 
collect_mappingOT::NonDefaultUVS888   void collect_mapping (hb_set_t *unicodes, /* OUT */
889                         hb_map_t *mapping /* OUT */) const
890   {
891     unsigned count = len;
892     for (unsigned i = 0; i < count; i++)
893     {
894       hb_codepoint_t unicode = arrayZ[i].unicodeValue;
895       hb_codepoint_t glyphid = arrayZ[i].glyphID;
896       unicodes->add (unicode);
897       mapping->set (unicode, glyphid);
898     }
899   }
900 
closure_glyphsOT::NonDefaultUVS901   void closure_glyphs (const hb_set_t      *unicodes,
902                        hb_set_t            *glyphset) const
903   {
904     + as_array ()
905     | hb_filter (unicodes, &UVSMapping::unicodeValue)
906     | hb_map (&UVSMapping::glyphID)
907     | hb_sink (glyphset)
908     ;
909   }
910 
copyOT::NonDefaultUVS911   NonDefaultUVS* copy (hb_serialize_context_t *c,
912                        const hb_set_t *unicodes,
913                        const hb_set_t *glyphs_requested,
914                        const hb_map_t *glyph_map) const
915   {
916     NonDefaultUVS *out = c->start_embed<NonDefaultUVS> ();
917     if (unlikely (!out)) return nullptr;
918 
919     auto it =
920     + as_array ()
921     | hb_filter ([&] (const UVSMapping& _)
922                  {
923                    return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID);
924                  })
925     ;
926 
927     if (!it) return nullptr;
928 
929     HBUINT32 len;
930     len = it.len ();
931     if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
932 
933     for (const UVSMapping& _ : it)
934     {
935       UVSMapping mapping;
936       mapping.unicodeValue = _.unicodeValue;
937       mapping.glyphID = glyph_map->get (_.glyphID);
938       c->copy<UVSMapping> (mapping);
939     }
940 
941     return out;
942   }
943 
944   public:
945   DEFINE_SIZE_ARRAY (4, *this);
946 };
947 
948 struct VariationSelectorRecord
949 {
get_glyphOT::VariationSelectorRecord950   glyph_variant_t get_glyph (hb_codepoint_t codepoint,
951                              hb_codepoint_t *glyph,
952                              const void *base) const
953   {
954     if ((base+defaultUVS).bfind (codepoint))
955       return GLYPH_VARIANT_USE_DEFAULT;
956     const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint);
957     if (nonDefault.glyphID)
958     {
959       *glyph = nonDefault.glyphID;
960        return GLYPH_VARIANT_FOUND;
961     }
962     return GLYPH_VARIANT_NOT_FOUND;
963   }
964 
VariationSelectorRecordOT::VariationSelectorRecord965   VariationSelectorRecord(const VariationSelectorRecord& other)
966   {
967     *this = other;
968   }
969 
operator =OT::VariationSelectorRecord970   void operator= (const VariationSelectorRecord& other)
971   {
972     varSelector = other.varSelector;
973     HBUINT32 offset = other.defaultUVS;
974     defaultUVS = offset;
975     offset = other.nonDefaultUVS;
976     nonDefaultUVS = offset;
977   }
978 
collect_unicodesOT::VariationSelectorRecord979   void collect_unicodes (hb_set_t *out, const void *base) const
980   {
981     (base+defaultUVS).collect_unicodes (out);
982     (base+nonDefaultUVS).collect_unicodes (out);
983   }
984 
collect_mappingOT::VariationSelectorRecord985   void collect_mapping (const void *base,
986                         hb_set_t *unicodes, /* OUT */
987                         hb_map_t *mapping /* OUT */) const
988   {
989     (base+defaultUVS).collect_unicodes (unicodes);
990     (base+nonDefaultUVS).collect_mapping (unicodes, mapping);
991   }
992 
cmpOT::VariationSelectorRecord993   int cmp (const hb_codepoint_t &variation_selector) const
994   { return varSelector.cmp (variation_selector); }
995 
sanitizeOT::VariationSelectorRecord996   bool sanitize (hb_sanitize_context_t *c, const void *base) const
997   {
998     TRACE_SANITIZE (this);
999     return_trace (c->check_struct (this) &&
1000                   defaultUVS.sanitize (c, base) &&
1001                   nonDefaultUVS.sanitize (c, base));
1002   }
1003 
1004   hb_pair_t<unsigned, unsigned>
copyOT::VariationSelectorRecord1005   copy (hb_serialize_context_t *c,
1006         const hb_set_t *unicodes,
1007         const hb_set_t *glyphs_requested,
1008         const hb_map_t *glyph_map,
1009         const void *base) const
1010   {
1011     auto snap = c->snapshot ();
1012     auto *out = c->embed<VariationSelectorRecord> (*this);
1013     if (unlikely (!out)) return hb_pair (0, 0);
1014 
1015     out->defaultUVS = 0;
1016     out->nonDefaultUVS = 0;
1017 
1018     unsigned non_default_uvs_objidx = 0;
1019     if (nonDefaultUVS != 0)
1020     {
1021       c->push ();
1022       if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map))
1023         non_default_uvs_objidx = c->pop_pack ();
1024       else c->pop_discard ();
1025     }
1026 
1027     unsigned default_uvs_objidx = 0;
1028     if (defaultUVS != 0)
1029     {
1030       c->push ();
1031       if (c->copy (base+defaultUVS, unicodes))
1032         default_uvs_objidx = c->pop_pack ();
1033       else c->pop_discard ();
1034     }
1035 
1036 
1037     if (!default_uvs_objidx && !non_default_uvs_objidx)
1038       c->revert (snap);
1039 
1040     return hb_pair (default_uvs_objidx, non_default_uvs_objidx);
1041   }
1042 
1043   HBUINT24      varSelector;    /* Variation selector. */
1044   LOffsetTo<DefaultUVS>
1045                 defaultUVS;     /* Offset to Default UVS Table.  May be 0. */
1046   LOffsetTo<NonDefaultUVS>
1047                 nonDefaultUVS;  /* Offset to Non-Default UVS Table.  May be 0. */
1048   public:
1049   DEFINE_SIZE_STATIC (11);
1050 };
1051 
1052 struct CmapSubtableFormat14
1053 {
get_glyph_variantOT::CmapSubtableFormat141054   glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
1055                                      hb_codepoint_t variation_selector,
1056                                      hb_codepoint_t *glyph) const
1057   { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); }
1058 
collect_variation_selectorsOT::CmapSubtableFormat141059   void collect_variation_selectors (hb_set_t *out) const
1060   {
1061     unsigned int count = record.len;
1062     for (unsigned int i = 0; i < count; i++)
1063       out->add (record.arrayZ[i].varSelector);
1064   }
collect_variation_unicodesOT::CmapSubtableFormat141065   void collect_variation_unicodes (hb_codepoint_t variation_selector,
1066                                    hb_set_t *out) const
1067   { record.bsearch (variation_selector).collect_unicodes (out, this); }
1068 
serializeOT::CmapSubtableFormat141069   void serialize (hb_serialize_context_t *c,
1070                   const hb_set_t *unicodes,
1071                   const hb_set_t *glyphs_requested,
1072                   const hb_map_t *glyph_map,
1073                   const void *base)
1074   {
1075     auto snap = c->snapshot ();
1076     unsigned table_initpos = c->length ();
1077     const char* init_tail = c->tail;
1078 
1079     if (unlikely (!c->extend_min (*this))) return;
1080     this->format = 14;
1081 
1082     auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base);
1083 
1084     /*
1085      * Some versions of OTS require that offsets are in order. Due to the use
1086      * of push()/pop_pack() serializing the variation records in order results
1087      * in the offsets being in reverse order (first record has the largest
1088      * offset). While this is perfectly valid, it will cause some versions of
1089      * OTS to consider this table bad.
1090      *
1091      * So to prevent this issue we serialize the variation records in reverse
1092      * order, so that the offsets are ordered from small to large. Since
1093      * variation records are supposed to be in increasing order of varSelector
1094      * we then have to reverse the order of the written variation selector
1095      * records after everything is finalized.
1096      */
1097     hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices;
1098     for (int i = src_tbl->record.len - 1; i >= 0; i--)
1099     {
1100       hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base);
1101       if (result.first || result.second)
1102         obj_indices.push (result);
1103     }
1104 
1105     if (c->length () - table_initpos == CmapSubtableFormat14::min_size)
1106     {
1107       c->revert (snap);
1108       return;
1109     }
1110 
1111     if (unlikely (!c->check_success (!obj_indices.in_error ())))
1112       return;
1113 
1114     int tail_len = init_tail - c->tail;
1115     c->check_assign (this->length, c->length () - table_initpos + tail_len);
1116     c->check_assign (this->record.len,
1117                      (c->length () - table_initpos - CmapSubtableFormat14::min_size) /
1118                      VariationSelectorRecord::static_size);
1119 
1120     /* Correct the incorrect write order by reversing the order of the variation
1121        records array. */
1122     _reverse_variation_records ();
1123 
1124     /* Now that records are in the right order, we can set up the offsets. */
1125     _add_links_to_variation_records (c, obj_indices);
1126   }
1127 
_reverse_variation_recordsOT::CmapSubtableFormat141128   void _reverse_variation_records ()
1129   {
1130     record.as_array ().reverse ();
1131   }
1132 
_add_links_to_variation_recordsOT::CmapSubtableFormat141133   void _add_links_to_variation_records (hb_serialize_context_t *c,
1134                                         const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices)
1135   {
1136     for (unsigned i = 0; i < obj_indices.length; i++)
1137     {
1138       /*
1139        * Since the record array has been reversed (see comments in copy())
1140        * but obj_indices has not been, the indices at obj_indices[i]
1141        * are for the variation record at record[j].
1142        */
1143       int j = obj_indices.length - 1 - i;
1144       c->add_link (record[j].defaultUVS, obj_indices[i].first);
1145       c->add_link (record[j].nonDefaultUVS, obj_indices[i].second);
1146     }
1147   }
1148 
closure_glyphsOT::CmapSubtableFormat141149   void closure_glyphs (const hb_set_t      *unicodes,
1150                        hb_set_t            *glyphset) const
1151   {
1152     + hb_iter (record)
1153     | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS)
1154     | hb_map (&VariationSelectorRecord::nonDefaultUVS)
1155     | hb_map (hb_add (this))
1156     | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); })
1157     ;
1158   }
1159 
collect_unicodesOT::CmapSubtableFormat141160   void collect_unicodes (hb_set_t *out) const
1161   {
1162     for (const VariationSelectorRecord& _ : record)
1163       _.collect_unicodes (out, this);
1164   }
1165 
collect_mappingOT::CmapSubtableFormat141166   void collect_mapping (hb_set_t *unicodes, /* OUT */
1167                         hb_map_t *mapping /* OUT */) const
1168   {
1169     for (const VariationSelectorRecord& _ : record)
1170       _.collect_mapping (this, unicodes, mapping);
1171   }
1172 
sanitizeOT::CmapSubtableFormat141173   bool sanitize (hb_sanitize_context_t *c) const
1174   {
1175     TRACE_SANITIZE (this);
1176     return_trace (c->check_struct (this) &&
1177                   record.sanitize (c, this));
1178   }
1179 
1180   protected:
1181   HBUINT16      format;         /* Format number is set to 14. */
1182   HBUINT32      length;         /* Byte length of this subtable. */
1183   SortedArrayOf<VariationSelectorRecord, HBUINT32>
1184                 record;         /* Variation selector records; sorted
1185                                  * in increasing order of `varSelector'. */
1186   public:
1187   DEFINE_SIZE_ARRAY (10, record);
1188 };
1189 
1190 struct CmapSubtable
1191 {
1192   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
1193 
get_glyphOT::CmapSubtable1194   bool get_glyph (hb_codepoint_t codepoint,
1195                   hb_codepoint_t *glyph) const
1196   {
1197     switch (u.format) {
1198     case  0: return u.format0 .get_glyph (codepoint, glyph);
1199     case  4: return u.format4 .get_glyph (codepoint, glyph);
1200     case  6: return u.format6 .get_glyph (codepoint, glyph);
1201     case 10: return u.format10.get_glyph (codepoint, glyph);
1202     case 12: return u.format12.get_glyph (codepoint, glyph);
1203     case 13: return u.format13.get_glyph (codepoint, glyph);
1204     case 14:
1205     default: return false;
1206     }
1207   }
collect_unicodesOT::CmapSubtable1208   void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const
1209   {
1210     switch (u.format) {
1211     case  0: u.format0 .collect_unicodes (out); return;
1212     case  4: u.format4 .collect_unicodes (out); return;
1213     case  6: u.format6 .collect_unicodes (out); return;
1214     case 10: u.format10.collect_unicodes (out); return;
1215     case 12: u.format12.collect_unicodes (out, num_glyphs); return;
1216     case 13: u.format13.collect_unicodes (out, num_glyphs); return;
1217     case 14:
1218     default: return;
1219     }
1220   }
1221 
collect_mappingOT::CmapSubtable1222   void collect_mapping (hb_set_t *unicodes, /* OUT */
1223                         hb_map_t *mapping, /* OUT */
1224                         unsigned num_glyphs = UINT_MAX) const
1225   {
1226     switch (u.format) {
1227     case  0: u.format0 .collect_mapping (unicodes, mapping); return;
1228     case  4: u.format4 .collect_mapping (unicodes, mapping); return;
1229     case  6: u.format6 .collect_mapping (unicodes, mapping); return;
1230     case 10: u.format10.collect_mapping (unicodes, mapping); return;
1231     case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return;
1232     case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return;
1233     case 14:
1234     default: return;
1235     }
1236   }
1237 
1238   template<typename Iterator,
1239            hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtable1240   void serialize (hb_serialize_context_t *c,
1241                   Iterator it,
1242                   unsigned format,
1243                   const hb_subset_plan_t *plan,
1244                   const void *base)
1245   {
1246     switch (format) {
1247     case  4: return u.format4.serialize (c, it);
1248     case 12: return u.format12.serialize (c, it);
1249     case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base);
1250     default: return;
1251     }
1252   }
1253 
sanitizeOT::CmapSubtable1254   bool sanitize (hb_sanitize_context_t *c) const
1255   {
1256     TRACE_SANITIZE (this);
1257     if (!u.format.sanitize (c)) return_trace (false);
1258     switch (u.format) {
1259     case  0: return_trace (u.format0 .sanitize (c));
1260     case  4: return_trace (u.format4 .sanitize (c));
1261     case  6: return_trace (u.format6 .sanitize (c));
1262     case 10: return_trace (u.format10.sanitize (c));
1263     case 12: return_trace (u.format12.sanitize (c));
1264     case 13: return_trace (u.format13.sanitize (c));
1265     case 14: return_trace (u.format14.sanitize (c));
1266     default:return_trace (true);
1267     }
1268   }
1269 
1270   public:
1271   union {
1272   HBUINT16              format;         /* Format identifier */
1273   CmapSubtableFormat0   format0;
1274   CmapSubtableFormat4   format4;
1275   CmapSubtableFormat6   format6;
1276   CmapSubtableFormat10  format10;
1277   CmapSubtableFormat12  format12;
1278   CmapSubtableFormat13  format13;
1279   CmapSubtableFormat14  format14;
1280   } u;
1281   public:
1282   DEFINE_SIZE_UNION (2, format);
1283 };
1284 
1285 
1286 struct EncodingRecord
1287 {
cmpOT::EncodingRecord1288   int cmp (const EncodingRecord &other) const
1289   {
1290     int ret;
1291     ret = platformID.cmp (other.platformID);
1292     if (ret) return ret;
1293     ret = encodingID.cmp (other.encodingID);
1294     if (ret) return ret;
1295     return 0;
1296   }
1297 
sanitizeOT::EncodingRecord1298   bool sanitize (hb_sanitize_context_t *c, const void *base) const
1299   {
1300     TRACE_SANITIZE (this);
1301     return_trace (c->check_struct (this) &&
1302                   subtable.sanitize (c, base));
1303   }
1304 
1305   template<typename Iterator,
1306            hb_requires (hb_is_iterator (Iterator))>
copyOT::EncodingRecord1307   EncodingRecord* copy (hb_serialize_context_t *c,
1308                         Iterator it,
1309                         unsigned format,
1310                         const void *base,
1311                         const hb_subset_plan_t *plan,
1312                         /* INOUT */ unsigned *objidx) const
1313   {
1314     TRACE_SERIALIZE (this);
1315     auto snap = c->snapshot ();
1316     auto *out = c->embed (this);
1317     if (unlikely (!out)) return_trace (nullptr);
1318     out->subtable = 0;
1319 
1320     if (*objidx == 0)
1321     {
1322       CmapSubtable *cmapsubtable = c->push<CmapSubtable> ();
1323       unsigned origin_length = c->length ();
1324       cmapsubtable->serialize (c, it, format, plan, &(base+subtable));
1325       if (c->length () - origin_length > 0) *objidx = c->pop_pack ();
1326       else c->pop_discard ();
1327     }
1328 
1329     if (*objidx == 0)
1330     {
1331       c->revert (snap);
1332       return_trace (nullptr);
1333     }
1334 
1335     c->add_link (out->subtable, *objidx);
1336     return_trace (out);
1337   }
1338 
1339   HBUINT16      platformID;     /* Platform ID. */
1340   HBUINT16      encodingID;     /* Platform-specific encoding ID. */
1341   LOffsetTo<CmapSubtable>
1342                 subtable;       /* Byte offset from beginning of table to the subtable for this encoding. */
1343   public:
1344   DEFINE_SIZE_STATIC (8);
1345 };
1346 
1347 struct cmap
1348 {
1349   static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
1350 
1351   template<typename Iterator, typename EncodingRecIter,
1352            hb_requires (hb_is_iterator (EncodingRecIter))>
serializeOT::cmap1353   void serialize (hb_serialize_context_t *c,
1354                   Iterator it,
1355                   EncodingRecIter encodingrec_iter,
1356                   const void *base,
1357                   const hb_subset_plan_t *plan)
1358   {
1359     if (unlikely (!c->extend_min ((*this))))  return;
1360     this->version = 0;
1361 
1362     unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0;
1363 
1364     for (const EncodingRecord& _ : encodingrec_iter)
1365     {
1366       unsigned format = (base+_.subtable).u.format;
1367       if (!plan->glyphs_requested->is_empty ())
1368       {
1369         hb_set_t unicodes_set;
1370         hb_map_t cp_glyphid_map;
1371         (base+_.subtable).collect_mapping (&unicodes_set, &cp_glyphid_map);
1372 
1373         auto table_iter =
1374         + hb_zip (unicodes_set.iter(), unicodes_set.iter() | hb_map(cp_glyphid_map))
1375         | hb_filter (plan->_glyphset, hb_second)
1376         | hb_filter ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p)
1377                      {
1378                        return plan->unicodes->has (p.first) ||
1379                               plan->glyphs_requested->has (p.second);
1380                      })
1381         | hb_map ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p_org)
1382                   {
1383                     return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (p_org.first, plan->glyph_map->get(p_org.second));
1384                   })
1385         ;
1386 
1387         if (format == 4) c->copy (_, table_iter, 4u, base, plan, &format4objidx);
1388         else if (format == 12) c->copy (_, table_iter, 12u, base, plan, &format12objidx);
1389         else if (format == 14) c->copy (_, table_iter, 14u, base, plan, &format14objidx);
1390       }
1391       /* when --gids option is not used, we iterate input unicodes instead of
1392        * all codepoints in each subtable, which is more efficient */
1393       else
1394       {
1395         hb_set_t unicodes_set;
1396         (base+_.subtable).collect_unicodes (&unicodes_set);
1397 
1398         if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
1399         else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
1400         else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
1401       }
1402     }
1403 
1404     c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size);
1405   }
1406 
closure_glyphsOT::cmap1407   void closure_glyphs (const hb_set_t      *unicodes,
1408                        hb_set_t            *glyphset) const
1409   {
1410     + hb_iter (encodingRecord)
1411     | hb_map (&EncodingRecord::subtable)
1412     | hb_map (hb_add (this))
1413     | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == 14; })
1414     | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); })
1415     ;
1416   }
1417 
subsetOT::cmap1418   bool subset (hb_subset_context_t *c) const
1419   {
1420     TRACE_SUBSET (this);
1421 
1422     cmap *cmap_prime = c->serializer->start_embed<cmap> ();
1423     if (unlikely (!c->serializer->check_success (cmap_prime))) return_trace (false);
1424 
1425     auto encodingrec_iter =
1426     + hb_iter (encodingRecord)
1427     | hb_filter ([&] (const EncodingRecord& _)
1428                 {
1429                   if ((_.platformID == 0 && _.encodingID == 3) ||
1430                       (_.platformID == 0 && _.encodingID == 4) ||
1431                       (_.platformID == 3 && _.encodingID == 1) ||
1432                       (_.platformID == 3 && _.encodingID == 10) ||
1433                       (this + _.subtable).u.format == 14)
1434                     return true;
1435 
1436                   return false;
1437                 })
1438     ;
1439 
1440     if (unlikely (!encodingrec_iter.len ())) return_trace (false);
1441 
1442     const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr;
1443     bool has_format12 = false;
1444 
1445     for (const EncodingRecord& _ : encodingrec_iter)
1446     {
1447       unsigned format = (this + _.subtable).u.format;
1448       if (format == 12) has_format12 = true;
1449 
1450       const EncodingRecord *table = hb_addressof (_);
1451       if      (_.platformID == 0 && _.encodingID ==  3) unicode_bmp = table;
1452       else if (_.platformID == 0 && _.encodingID ==  4) unicode_ucs4 = table;
1453       else if (_.platformID == 3 && _.encodingID ==  1) ms_bmp = table;
1454       else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table;
1455     }
1456 
1457     if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false);
1458     if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
1459 
1460     auto it =
1461     + hb_iter (c->plan->unicodes)
1462     | hb_map ([&] (hb_codepoint_t _)
1463               {
1464                 hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID;
1465                 c->plan->new_gid_for_codepoint (_, &new_gid);
1466                 return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid);
1467               })
1468     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
1469                  { return (_.second != HB_MAP_VALUE_INVALID); })
1470     ;
1471     cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan);
1472     return_trace (true);
1473   }
1474 
find_best_subtableOT::cmap1475   const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
1476   {
1477     if (symbol) *symbol = false;
1478 
1479     const CmapSubtable *subtable;
1480 
1481     /* Symbol subtable.
1482      * Prefer symbol if available.
1483      * https://github.com/harfbuzz/harfbuzz/issues/1918 */
1484     if ((subtable = this->find_subtable (3, 0)))
1485     {
1486       if (symbol) *symbol = true;
1487       return subtable;
1488     }
1489 
1490     /* 32-bit subtables. */
1491     if ((subtable = this->find_subtable (3, 10))) return subtable;
1492     if ((subtable = this->find_subtable (0, 6))) return subtable;
1493     if ((subtable = this->find_subtable (0, 4))) return subtable;
1494 
1495     /* 16-bit subtables. */
1496     if ((subtable = this->find_subtable (3, 1))) return subtable;
1497     if ((subtable = this->find_subtable (0, 3))) return subtable;
1498     if ((subtable = this->find_subtable (0, 2))) return subtable;
1499     if ((subtable = this->find_subtable (0, 1))) return subtable;
1500     if ((subtable = this->find_subtable (0, 0))) return subtable;
1501 
1502     /* Meh. */
1503     return &Null (CmapSubtable);
1504   }
1505 
1506   struct accelerator_t
1507   {
initOT::cmap::accelerator_t1508     void init (hb_face_t *face)
1509     {
1510       this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
1511       bool symbol;
1512       this->subtable = table->find_best_subtable (&symbol);
1513       this->subtable_uvs = &Null (CmapSubtableFormat14);
1514       {
1515         const CmapSubtable *st = table->find_subtable (0, 5);
1516         if (st && st->u.format == 14)
1517           subtable_uvs = &st->u.format14;
1518       }
1519 
1520       this->get_glyph_data = subtable;
1521       if (unlikely (symbol))
1522         this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>;
1523       else
1524       {
1525         switch (subtable->u.format) {
1526         /* Accelerate format 4 and format 12. */
1527         default:
1528           this->get_glyph_funcZ = get_glyph_from<CmapSubtable>;
1529           break;
1530         case 12:
1531           this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>;
1532           break;
1533         case  4:
1534         {
1535           this->format4_accel.init (&subtable->u.format4);
1536           this->get_glyph_data = &this->format4_accel;
1537           this->get_glyph_funcZ = this->format4_accel.get_glyph_func;
1538           break;
1539         }
1540         }
1541       }
1542     }
1543 
finiOT::cmap::accelerator_t1544     void fini () { this->table.destroy (); }
1545 
get_nominal_glyphOT::cmap::accelerator_t1546     bool get_nominal_glyph (hb_codepoint_t  unicode,
1547                             hb_codepoint_t *glyph) const
1548     {
1549       if (unlikely (!this->get_glyph_funcZ)) return false;
1550       return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
1551     }
get_nominal_glyphsOT::cmap::accelerator_t1552     unsigned int get_nominal_glyphs (unsigned int count,
1553                                      const hb_codepoint_t *first_unicode,
1554                                      unsigned int unicode_stride,
1555                                      hb_codepoint_t *first_glyph,
1556                                      unsigned int glyph_stride) const
1557     {
1558       if (unlikely (!this->get_glyph_funcZ)) return 0;
1559 
1560       hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
1561       const void *get_glyph_data = this->get_glyph_data;
1562 
1563       unsigned int done;
1564       for (done = 0;
1565            done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
1566            done++)
1567       {
1568         first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
1569         first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
1570       }
1571       return done;
1572     }
1573 
get_variation_glyphOT::cmap::accelerator_t1574     bool get_variation_glyph (hb_codepoint_t  unicode,
1575                               hb_codepoint_t  variation_selector,
1576                               hb_codepoint_t *glyph) const
1577     {
1578       switch (this->subtable_uvs->get_glyph_variant (unicode,
1579                                                      variation_selector,
1580                                                      glyph))
1581       {
1582         case GLYPH_VARIANT_NOT_FOUND:   return false;
1583         case GLYPH_VARIANT_FOUND:       return true;
1584         case GLYPH_VARIANT_USE_DEFAULT: break;
1585       }
1586 
1587       return get_nominal_glyph (unicode, glyph);
1588     }
1589 
collect_unicodesOT::cmap::accelerator_t1590     void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
1591     { subtable->collect_unicodes (out, num_glyphs); }
collect_mappingOT::cmap::accelerator_t1592     void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping,
1593                           unsigned num_glyphs = UINT_MAX) const
1594     { subtable->collect_mapping (unicodes, mapping, num_glyphs); }
collect_variation_selectorsOT::cmap::accelerator_t1595     void collect_variation_selectors (hb_set_t *out) const
1596     { subtable_uvs->collect_variation_selectors (out); }
collect_variation_unicodesOT::cmap::accelerator_t1597     void collect_variation_unicodes (hb_codepoint_t variation_selector,
1598                                      hb_set_t *out) const
1599     { subtable_uvs->collect_variation_unicodes (variation_selector, out); }
1600 
1601     protected:
1602     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
1603                                               hb_codepoint_t codepoint,
1604                                               hb_codepoint_t *glyph);
1605 
1606     template <typename Type>
get_glyph_fromOT::cmap::accelerator_t1607     HB_INTERNAL static bool get_glyph_from (const void *obj,
1608                                             hb_codepoint_t codepoint,
1609                                             hb_codepoint_t *glyph)
1610     {
1611       const Type *typed_obj = (const Type *) obj;
1612       return typed_obj->get_glyph (codepoint, glyph);
1613     }
1614 
1615     template <typename Type>
get_glyph_from_symbolOT::cmap::accelerator_t1616     HB_INTERNAL static bool get_glyph_from_symbol (const void *obj,
1617                                                    hb_codepoint_t codepoint,
1618                                                    hb_codepoint_t *glyph)
1619     {
1620       const Type *typed_obj = (const Type *) obj;
1621       if (likely (typed_obj->get_glyph (codepoint, glyph)))
1622         return true;
1623 
1624       if (codepoint <= 0x00FFu)
1625       {
1626         /* For symbol-encoded OpenType fonts, we duplicate the
1627          * U+F000..F0FF range at U+0000..U+00FF.  That's what
1628          * Windows seems to do, and that's hinted about at:
1629          * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1630          * under "Non-Standard (Symbol) Fonts". */
1631         return typed_obj->get_glyph (0xF000u + codepoint, glyph);
1632       }
1633 
1634       return false;
1635     }
1636 
1637     private:
1638     hb_nonnull_ptr_t<const CmapSubtable> subtable;
1639     hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
1640 
1641     hb_cmap_get_glyph_func_t get_glyph_funcZ;
1642     const void *get_glyph_data;
1643 
1644     CmapSubtableFormat4::accelerator_t format4_accel;
1645 
1646     public:
1647     hb_blob_ptr_t<cmap> table;
1648   };
1649 
1650   protected:
1651 
find_subtableOT::cmap1652   const CmapSubtable *find_subtable (unsigned int platform_id,
1653                                      unsigned int encoding_id) const
1654   {
1655     EncodingRecord key;
1656     key.platformID = platform_id;
1657     key.encodingID = encoding_id;
1658 
1659     const EncodingRecord &result = encodingRecord.bsearch (key);
1660     if (!result.subtable)
1661       return nullptr;
1662 
1663     return &(this+result.subtable);
1664   }
1665 
find_encodingrecOT::cmap1666   const EncodingRecord *find_encodingrec (unsigned int platform_id,
1667                                           unsigned int encoding_id) const
1668   {
1669     EncodingRecord key;
1670     key.platformID = platform_id;
1671     key.encodingID = encoding_id;
1672 
1673     return encodingRecord.as_array ().bsearch (key);
1674   }
1675 
find_subtableOT::cmap1676   bool find_subtable (unsigned format) const
1677   {
1678     auto it =
1679     + hb_iter (encodingRecord)
1680     | hb_map (&EncodingRecord::subtable)
1681     | hb_map (hb_add (this))
1682     | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; })
1683     ;
1684 
1685     return it.len ();
1686   }
1687 
1688   public:
1689 
sanitizeOT::cmap1690   bool sanitize (hb_sanitize_context_t *c) const
1691   {
1692     TRACE_SANITIZE (this);
1693     return_trace (c->check_struct (this) &&
1694                   likely (version == 0) &&
1695                   encodingRecord.sanitize (c, this));
1696   }
1697 
1698   protected:
1699   HBUINT16      version;        /* Table version number (0). */
1700   SortedArrayOf<EncodingRecord>
1701                 encodingRecord; /* Encoding tables. */
1702   public:
1703   DEFINE_SIZE_ARRAY (4, encodingRecord);
1704 };
1705 
1706 struct cmap_accelerator_t : cmap::accelerator_t {};
1707 
1708 } /* namespace OT */
1709 
1710 
1711 #endif /* HB_OT_CMAP_TABLE_HH */
1712