1 /*
2  * Copyright © 2014  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
29 
30 #include "hb-open-type.hh"
31 #include "hb-set.hh"
32 
33 /*
34  * cmap -- Character to Glyph Index Mapping
35  * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
36  */
37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
38 
39 namespace OT {
40 
41 
42 struct CmapSubtableFormat0
43 {
get_glyphOT::CmapSubtableFormat044   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
45   {
46     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
47     if (!gid)
48       return false;
49     *glyph = gid;
50     return true;
51   }
collect_unicodesOT::CmapSubtableFormat052   void collect_unicodes (hb_set_t *out) const
53   {
54     for (unsigned int i = 0; i < 256; i++)
55       if (glyphIdArray[i])
56 	out->add (i);
57   }
58 
collect_mappingOT::CmapSubtableFormat059   void collect_mapping (hb_set_t *unicodes, /* OUT */
60 			hb_map_t *mapping /* OUT */) const
61   {
62     for (unsigned i = 0; i < 256; i++)
63       if (glyphIdArray[i])
64       {
65 	hb_codepoint_t glyph = glyphIdArray[i];
66 	unicodes->add (i);
67 	mapping->set (i, glyph);
68       }
69   }
70 
sanitizeOT::CmapSubtableFormat071   bool sanitize (hb_sanitize_context_t *c) const
72   {
73     TRACE_SANITIZE (this);
74     return_trace (c->check_struct (this));
75   }
76 
77   protected:
78   HBUINT16	format;		/* Format number is set to 0. */
79   HBUINT16	length;		/* Byte length of this subtable. */
80   HBUINT16	language;	/* Ignore. */
81   HBUINT8	glyphIdArray[256];/* An array that maps character
82 				 * code to glyph index values. */
83   public:
84   DEFINE_SIZE_STATIC (6 + 256);
85 };
86 
87 struct CmapSubtableFormat4
88 {
89 
90   template<typename Iterator,
91 	   hb_requires (hb_is_iterator (Iterator))>
serialize_endcode_arrayOT::CmapSubtableFormat492   HBUINT16* serialize_endcode_array (hb_serialize_context_t *c,
93 				     Iterator it)
94   {
95     HBUINT16 *endCode = c->start_embed<HBUINT16> ();
96     hb_codepoint_t prev_endcp = 0xFFFF;
97 
98     for (const auto& _ : +it)
99     {
100       if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first)
101       {
102 	HBUINT16 end_code;
103 	end_code = prev_endcp;
104 	c->copy<HBUINT16> (end_code);
105       }
106       prev_endcp = _.first;
107     }
108 
109     {
110       // last endCode
111       HBUINT16 endcode;
112       endcode = prev_endcp;
113       if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr;
114       // There must be a final entry with end_code == 0xFFFF.
115       if (prev_endcp != 0xFFFF)
116       {
117 	HBUINT16 finalcode;
118 	finalcode = 0xFFFF;
119 	if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
120       }
121     }
122 
123     return endCode;
124   }
125 
126   template<typename Iterator,
127 	   hb_requires (hb_is_iterator (Iterator))>
serialize_startcode_arrayOT::CmapSubtableFormat4128   HBUINT16* serialize_startcode_array (hb_serialize_context_t *c,
129 				       Iterator it)
130   {
131     HBUINT16 *startCode = c->start_embed<HBUINT16> ();
132     hb_codepoint_t prev_cp = 0xFFFF;
133 
134     for (const auto& _ : +it)
135     {
136       if (prev_cp == 0xFFFF || prev_cp + 1u != _.first)
137       {
138 	HBUINT16 start_code;
139 	start_code = _.first;
140 	c->copy<HBUINT16> (start_code);
141       }
142 
143       prev_cp = _.first;
144     }
145 
146     // There must be a final entry with end_code == 0xFFFF.
147     if (it.len () == 0 || prev_cp != 0xFFFF)
148     {
149       HBUINT16 finalcode;
150       finalcode = 0xFFFF;
151       if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
152     }
153 
154     return startCode;
155   }
156 
157   template<typename Iterator,
158 	   hb_requires (hb_is_iterator (Iterator))>
serialize_idDelta_arrayOT::CmapSubtableFormat4159   HBINT16* serialize_idDelta_array (hb_serialize_context_t *c,
160 				    Iterator it,
161 				    HBUINT16 *endCode,
162 				    HBUINT16 *startCode,
163 				    unsigned segcount)
164   {
165     unsigned i = 0;
166     hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF;
167     bool use_delta = true;
168 
169     HBINT16 *idDelta = c->start_embed<HBINT16> ();
170     if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size)
171       return nullptr;
172 
173     for (const auto& _ : +it)
174     {
175       if (_.first == startCode[i])
176       {
177 	use_delta = true;
178 	start_gid = _.second;
179       }
180       else if (_.second != last_gid + 1) use_delta = false;
181 
182       if (_.first == endCode[i])
183       {
184 	HBINT16 delta;
185 	if (use_delta) delta = (int)start_gid - (int)startCode[i];
186 	else delta = 0;
187 	c->copy<HBINT16> (delta);
188 
189 	i++;
190       }
191 
192       last_gid = _.second;
193       last_cp = _.first;
194     }
195 
196     if (it.len () == 0 || last_cp != 0xFFFF)
197     {
198       HBINT16 delta;
199       delta = 1;
200       if (unlikely (!c->copy<HBINT16> (delta))) return nullptr;
201     }
202 
203     return idDelta;
204   }
205 
206   template<typename Iterator,
207 	   hb_requires (hb_is_iterator (Iterator))>
serialize_rangeoffset_glyidOT::CmapSubtableFormat4208   HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c,
209 					 Iterator it,
210 					 HBUINT16 *endCode,
211 					 HBUINT16 *startCode,
212 					 HBINT16 *idDelta,
213 					 unsigned segcount)
214   {
215     HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
216     if (unlikely (!c->check_success (idRangeOffset))) return nullptr;
217     if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr;
218 
219     + hb_range (segcount)
220     | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; })
221     | hb_apply ([&] (const unsigned i)
222 		{
223 		  idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i);
224 
225 		  + it
226 		  | hb_filter ([&] (const hb_item_type<Iterator> _) { return _.first >= startCode[i] && _.first <= endCode[i]; })
227 		  | hb_apply ([&] (const hb_item_type<Iterator> _)
228 			      {
229 				HBUINT16 glyID;
230 				glyID = _.second;
231 				c->copy<HBUINT16> (glyID);
232 			      })
233 		  ;
234 
235 
236 		})
237     ;
238 
239     return idRangeOffset;
240   }
241 
242   template<typename Iterator,
243 	   hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtableFormat4244   void serialize (hb_serialize_context_t *c,
245 		  Iterator it)
246   {
247     auto format4_iter =
248     + it
249     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
250 		 { return _.first <= 0xFFFF; })
251     ;
252 
253     if (format4_iter.len () == 0) return;
254 
255     unsigned table_initpos = c->length ();
256     if (unlikely (!c->extend_min (*this))) return;
257     this->format = 4;
258 
259     //serialize endCode[]
260     HBUINT16 *endCode = serialize_endcode_array (c, format4_iter);
261     if (unlikely (!endCode)) return;
262 
263     unsigned segcount = (c->length () - min_size) / HBUINT16::static_size;
264 
265     // 2 bytes of padding.
266     if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding.
267 
268    // serialize startCode[]
269     HBUINT16 *startCode = serialize_startcode_array (c, format4_iter);
270     if (unlikely (!startCode)) return;
271 
272     //serialize idDelta[]
273     HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount);
274     if (unlikely (!idDelta)) return;
275 
276     HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
277     if (unlikely (!c->check_success (idRangeOffset))) return;
278 
279     if (unlikely (!c->check_assign(this->length,
280                                    c->length () - table_initpos,
281                                    HB_SERIALIZE_ERROR_INT_OVERFLOW))) return;
282     this->segCountX2 = segcount * 2;
283     this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1;
284     this->searchRange = 2 * (1u << this->entrySelector);
285     this->rangeShift = segcount * 2 > this->searchRange
286 		       ? 2 * segcount - this->searchRange
287 		       : 0;
288   }
289 
290   struct accelerator_t
291   {
accelerator_tOT::CmapSubtableFormat4::accelerator_t292     accelerator_t () {}
accelerator_tOT::CmapSubtableFormat4::accelerator_t293     accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); }
~accelerator_tOT::CmapSubtableFormat4::accelerator_t294     ~accelerator_t () { fini (); }
295 
initOT::CmapSubtableFormat4::accelerator_t296     void init (const CmapSubtableFormat4 *subtable)
297     {
298       segCount = subtable->segCountX2 / 2;
299       endCount = subtable->values.arrayZ;
300       startCount = endCount + segCount + 1;
301       idDelta = startCount + segCount;
302       idRangeOffset = idDelta + segCount;
303       glyphIdArray = idRangeOffset + segCount;
304       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
305     }
finiOT::CmapSubtableFormat4::accelerator_t306     void fini () {}
307 
get_glyphOT::CmapSubtableFormat4::accelerator_t308     bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
309     {
310       struct CustomRange
311       {
312 	int cmp (hb_codepoint_t k,
313 		 unsigned distance) const
314 	{
315 	  if (k > last) return +1;
316 	  if (k < (&last)[distance]) return -1;
317 	  return 0;
318 	}
319 	HBUINT16 last;
320       };
321 
322       const HBUINT16 *found = hb_bsearch (codepoint,
323 					  this->endCount,
324 					  this->segCount,
325 					  2,
326 					  _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>,
327 					  this->segCount + 1);
328       if (!found)
329 	return false;
330       unsigned int i = found - endCount;
331 
332       hb_codepoint_t gid;
333       unsigned int rangeOffset = this->idRangeOffset[i];
334       if (rangeOffset == 0)
335 	gid = codepoint + this->idDelta[i];
336       else
337       {
338 	/* Somebody has been smoking... */
339 	unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
340 	if (unlikely (index >= this->glyphIdArrayLength))
341 	  return false;
342 	gid = this->glyphIdArray[index];
343 	if (unlikely (!gid))
344 	  return false;
345 	gid += this->idDelta[i];
346       }
347       gid &= 0xFFFFu;
348       if (!gid)
349 	return false;
350       *glyph = gid;
351       return true;
352     }
353 
get_glyph_funcOT::CmapSubtableFormat4::accelerator_t354     HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
355     { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); }
356 
collect_unicodesOT::CmapSubtableFormat4::accelerator_t357     void collect_unicodes (hb_set_t *out) const
358     {
359       unsigned int count = this->segCount;
360       if (count && this->startCount[count - 1] == 0xFFFFu)
361 	count--; /* Skip sentinel segment. */
362       for (unsigned int i = 0; i < count; i++)
363       {
364 	hb_codepoint_t start = this->startCount[i];
365 	hb_codepoint_t end = this->endCount[i];
366 	unsigned int rangeOffset = this->idRangeOffset[i];
367 	if (rangeOffset == 0)
368 	{
369 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
370 	  {
371 	    hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
372 	    if (unlikely (!gid))
373 	      continue;
374 	    out->add (codepoint);
375 	  }
376 	}
377 	else
378 	{
379 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
380 	  {
381 	    unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
382 	    if (unlikely (index >= this->glyphIdArrayLength))
383 	      break;
384 	    hb_codepoint_t gid = this->glyphIdArray[index];
385 	    if (unlikely (!gid))
386 	      continue;
387 	    out->add (codepoint);
388 	  }
389 	}
390       }
391     }
392 
collect_mappingOT::CmapSubtableFormat4::accelerator_t393     void collect_mapping (hb_set_t *unicodes, /* OUT */
394 			  hb_map_t *mapping /* OUT */) const
395     {
396       unsigned count = this->segCount;
397       if (count && this->startCount[count - 1] == 0xFFFFu)
398 	count--; /* Skip sentinel segment. */
399       for (unsigned i = 0; i < count; i++)
400       {
401 	hb_codepoint_t start = this->startCount[i];
402 	hb_codepoint_t end = this->endCount[i];
403 	unsigned rangeOffset = this->idRangeOffset[i];
404 	if (rangeOffset == 0)
405 	{
406 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
407 	  {
408 	    hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
409 	    if (unlikely (!gid))
410 	      continue;
411 	    unicodes->add (codepoint);
412 	    mapping->set (codepoint, gid);
413 	  }
414 	}
415 	else
416 	{
417 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
418 	  {
419 	    unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
420 	    if (unlikely (index >= this->glyphIdArrayLength))
421 	      break;
422 	    hb_codepoint_t gid = this->glyphIdArray[index];
423 	    if (unlikely (!gid))
424 	      continue;
425 	    unicodes->add (codepoint);
426 	    mapping->set (codepoint, gid);
427 	  }
428 	}
429       }
430     }
431 
432     const HBUINT16 *endCount;
433     const HBUINT16 *startCount;
434     const HBUINT16 *idDelta;
435     const HBUINT16 *idRangeOffset;
436     const HBUINT16 *glyphIdArray;
437     unsigned int segCount;
438     unsigned int glyphIdArrayLength;
439   };
440 
get_glyphOT::CmapSubtableFormat4441   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
442   {
443     accelerator_t accel (this);
444     return accel.get_glyph_func (&accel, codepoint, glyph);
445   }
collect_unicodesOT::CmapSubtableFormat4446   void collect_unicodes (hb_set_t *out) const
447   {
448     accelerator_t accel (this);
449     accel.collect_unicodes (out);
450   }
451 
collect_mappingOT::CmapSubtableFormat4452   void collect_mapping (hb_set_t *unicodes, /* OUT */
453 			hb_map_t *mapping /* OUT */) const
454   {
455     accelerator_t accel (this);
456     accel.collect_mapping (unicodes, mapping);
457   }
458 
sanitizeOT::CmapSubtableFormat4459   bool sanitize (hb_sanitize_context_t *c) const
460   {
461     TRACE_SANITIZE (this);
462     if (unlikely (!c->check_struct (this)))
463       return_trace (false);
464 
465     if (unlikely (!c->check_range (this, length)))
466     {
467       /* Some broken fonts have too long of a "length" value.
468        * If that is the case, just change the value to truncate
469        * the subtable at the end of the blob. */
470       uint16_t new_length = (uint16_t) hb_min ((uintptr_t) 65535,
471 					       (uintptr_t) (c->end -
472 							    (char *) this));
473       if (!c->try_set (&length, new_length))
474 	return_trace (false);
475     }
476 
477     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
478   }
479 
480 
481 
482   protected:
483   HBUINT16	format;		/* Format number is set to 4. */
484   HBUINT16	length;		/* This is the length in bytes of the
485 				 * subtable. */
486   HBUINT16	language;	/* Ignore. */
487   HBUINT16	segCountX2;	/* 2 x segCount. */
488   HBUINT16	searchRange;	/* 2 * (2**floor(log2(segCount))) */
489   HBUINT16	entrySelector;	/* log2(searchRange/2) */
490   HBUINT16	rangeShift;	/* 2 x segCount - searchRange */
491 
492   UnsizedArrayOf<HBUINT16>
493 		values;
494 #if 0
495   HBUINT16	endCount[segCount];	/* End characterCode for each segment,
496 					 * last=0xFFFFu. */
497   HBUINT16	reservedPad;		/* Set to 0. */
498   HBUINT16	startCount[segCount];	/* Start character code for each segment. */
499   HBINT16		idDelta[segCount];	/* Delta for all character codes in segment. */
500   HBUINT16	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
501   UnsizedArrayOf<HBUINT16>
502 		glyphIdArray;	/* Glyph index array (arbitrary length) */
503 #endif
504 
505   public:
506   DEFINE_SIZE_ARRAY (14, values);
507 };
508 
509 struct CmapSubtableLongGroup
510 {
511   friend struct CmapSubtableFormat12;
512   friend struct CmapSubtableFormat13;
513   template<typename U>
514   friend struct CmapSubtableLongSegmented;
515   friend struct cmap;
516 
cmpOT::CmapSubtableLongGroup517   int cmp (hb_codepoint_t codepoint) const
518   {
519     if (codepoint < startCharCode) return -1;
520     if (codepoint > endCharCode)   return +1;
521     return 0;
522   }
523 
sanitizeOT::CmapSubtableLongGroup524   bool sanitize (hb_sanitize_context_t *c) const
525   {
526     TRACE_SANITIZE (this);
527     return_trace (c->check_struct (this));
528   }
529 
530   private:
531   HBUINT32		startCharCode;	/* First character code in this group. */
532   HBUINT32		endCharCode;	/* Last character code in this group. */
533   HBUINT32		glyphID;	/* Glyph index; interpretation depends on
534 					 * subtable format. */
535   public:
536   DEFINE_SIZE_STATIC (12);
537 };
538 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup);
539 
540 template <typename UINT>
541 struct CmapSubtableTrimmed
542 {
get_glyphOT::CmapSubtableTrimmed543   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
544   {
545     /* Rely on our implicit array bound-checking. */
546     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
547     if (!gid)
548       return false;
549     *glyph = gid;
550     return true;
551   }
collect_unicodesOT::CmapSubtableTrimmed552   void collect_unicodes (hb_set_t *out) const
553   {
554     hb_codepoint_t start = startCharCode;
555     unsigned int count = glyphIdArray.len;
556     for (unsigned int i = 0; i < count; i++)
557       if (glyphIdArray[i])
558 	out->add (start + i);
559   }
560 
collect_mappingOT::CmapSubtableTrimmed561   void collect_mapping (hb_set_t *unicodes, /* OUT */
562 			hb_map_t *mapping /* OUT */) const
563   {
564     hb_codepoint_t start_cp = startCharCode;
565     unsigned count = glyphIdArray.len;
566     for (unsigned i = 0; i < count; i++)
567       if (glyphIdArray[i])
568       {
569 	hb_codepoint_t unicode = start_cp + i;
570 	hb_codepoint_t glyphid = glyphIdArray[i];
571 	unicodes->add (unicode);
572 	mapping->set (unicode, glyphid);
573       }
574   }
575 
sanitizeOT::CmapSubtableTrimmed576   bool sanitize (hb_sanitize_context_t *c) const
577   {
578     TRACE_SANITIZE (this);
579     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
580   }
581 
582   protected:
583   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
584   UINT		length;		/* Byte length of this subtable. */
585   UINT		language;	/* Ignore. */
586   UINT		startCharCode;	/* First character code covered. */
587   ArrayOf<HBGlyphID, UINT>
588 		glyphIdArray;	/* Array of glyph index values for character
589 				 * codes in the range. */
590   public:
591   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
592 };
593 
594 struct CmapSubtableFormat6  : CmapSubtableTrimmed<HBUINT16> {};
595 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
596 
597 template <typename T>
598 struct CmapSubtableLongSegmented
599 {
600   friend struct cmap;
601 
get_glyphOT::CmapSubtableLongSegmented602   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
603   {
604     hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint);
605     if (!gid)
606       return false;
607     *glyph = gid;
608     return true;
609   }
610 
collect_unicodesOT::CmapSubtableLongSegmented611   void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
612   {
613     for (unsigned int i = 0; i < this->groups.len; i++)
614     {
615       hb_codepoint_t start = this->groups[i].startCharCode;
616       hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
617 				   (hb_codepoint_t) HB_UNICODE_MAX);
618       hb_codepoint_t gid = this->groups[i].glyphID;
619       if (!gid)
620       {
621 	/* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
622 	if (! T::group_get_glyph (this->groups[i], end)) continue;
623 	start++;
624 	gid++;
625       }
626       if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
627       if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
628 	end = start + (hb_codepoint_t) num_glyphs - gid;
629 
630       out->add_range (start, end);
631     }
632   }
633 
collect_mappingOT::CmapSubtableLongSegmented634   void collect_mapping (hb_set_t *unicodes, /* OUT */
635 			hb_map_t *mapping, /* OUT */
636 			unsigned num_glyphs) const
637   {
638     for (unsigned i = 0; i < this->groups.len; i++)
639     {
640       hb_codepoint_t start = this->groups[i].startCharCode;
641       hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
642 				   (hb_codepoint_t) HB_UNICODE_MAX);
643       hb_codepoint_t gid = this->groups[i].glyphID;
644       if (!gid)
645       {
646 	/* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
647 	if (! T::group_get_glyph (this->groups[i], end)) continue;
648 	start++;
649 	gid++;
650       }
651       if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
652       if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
653 	end = start + (hb_codepoint_t) num_glyphs - gid;
654 
655       for (unsigned cp = start; cp <= end; cp++)
656       {
657 	unicodes->add (cp);
658 	mapping->set (cp, gid);
659 	gid++;
660       }
661     }
662   }
663 
sanitizeOT::CmapSubtableLongSegmented664   bool sanitize (hb_sanitize_context_t *c) const
665   {
666     TRACE_SANITIZE (this);
667     return_trace (c->check_struct (this) && groups.sanitize (c));
668   }
669 
670   protected:
671   HBUINT16	format;		/* Subtable format; set to 12. */
672   HBUINT16	reserved;	/* Reserved; set to 0. */
673   HBUINT32	length;		/* Byte length of this subtable. */
674   HBUINT32	language;	/* Ignore. */
675   SortedArray32Of<CmapSubtableLongGroup>
676 		groups;		/* Groupings. */
677   public:
678   DEFINE_SIZE_ARRAY (16, groups);
679 };
680 
681 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
682 {
group_get_glyphOT::CmapSubtableFormat12683   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
684 					 hb_codepoint_t u)
685   { return likely (group.startCharCode <= group.endCharCode) ?
686 	   group.glyphID + (u - group.startCharCode) : 0; }
687 
688 
689   template<typename Iterator,
690 	   hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtableFormat12691   void serialize (hb_serialize_context_t *c,
692 		  Iterator it)
693   {
694     if (it.len () == 0) return;
695     unsigned table_initpos = c->length ();
696     if (unlikely (!c->extend_min (*this))) return;
697 
698     hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF;
699     hb_codepoint_t glyphID = 0;
700 
701     for (const auto& _ : +it)
702     {
703       if (startCharCode == 0xFFFF)
704       {
705 	startCharCode = _.first;
706 	endCharCode = _.first;
707 	glyphID = _.second;
708       }
709       else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second))
710       {
711 	CmapSubtableLongGroup  grouprecord;
712 	grouprecord.startCharCode = startCharCode;
713 	grouprecord.endCharCode = endCharCode;
714 	grouprecord.glyphID = glyphID;
715 	c->copy<CmapSubtableLongGroup> (grouprecord);
716 
717 	startCharCode = _.first;
718 	endCharCode = _.first;
719 	glyphID = _.second;
720       }
721       else
722 	endCharCode = _.first;
723     }
724 
725     CmapSubtableLongGroup record;
726     record.startCharCode = startCharCode;
727     record.endCharCode = endCharCode;
728     record.glyphID = glyphID;
729     c->copy<CmapSubtableLongGroup> (record);
730 
731     this->format = 12;
732     this->reserved = 0;
733     this->length = c->length () - table_initpos;
734     this->groups.len = (this->length - min_size)/CmapSubtableLongGroup::static_size;
735   }
736 
get_sub_table_sizeOT::CmapSubtableFormat12737   static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data)
738   { return 16 + 12 * groups_data.length; }
739 
740   private:
_is_gid_consecutiveOT::CmapSubtableFormat12741   static bool _is_gid_consecutive (hb_codepoint_t endCharCode,
742 				   hb_codepoint_t startCharCode,
743 				   hb_codepoint_t glyphID,
744 				   hb_codepoint_t cp,
745 				   hb_codepoint_t new_gid)
746   {
747     return (cp - 1 == endCharCode) &&
748 	new_gid == glyphID + (cp - startCharCode);
749   }
750 
751 };
752 
753 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
754 {
group_get_glyphOT::CmapSubtableFormat13755   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
756 					 hb_codepoint_t u HB_UNUSED)
757   { return group.glyphID; }
758 };
759 
760 typedef enum
761 {
762   GLYPH_VARIANT_NOT_FOUND = 0,
763   GLYPH_VARIANT_FOUND = 1,
764   GLYPH_VARIANT_USE_DEFAULT = 2
765 } glyph_variant_t;
766 
767 struct UnicodeValueRange
768 {
cmpOT::UnicodeValueRange769   int cmp (const hb_codepoint_t &codepoint) const
770   {
771     if (codepoint < startUnicodeValue) return -1;
772     if (codepoint > startUnicodeValue + additionalCount) return +1;
773     return 0;
774   }
775 
sanitizeOT::UnicodeValueRange776   bool sanitize (hb_sanitize_context_t *c) const
777   {
778     TRACE_SANITIZE (this);
779     return_trace (c->check_struct (this));
780   }
781 
782   HBUINT24	startUnicodeValue;	/* First value in this range. */
783   HBUINT8	additionalCount;	/* Number of additional values in this
784 					 * range. */
785   public:
786   DEFINE_SIZE_STATIC (4);
787 };
788 
789 struct DefaultUVS : SortedArray32Of<UnicodeValueRange>
790 {
collect_unicodesOT::DefaultUVS791   void collect_unicodes (hb_set_t *out) const
792   {
793     unsigned int count = len;
794     for (unsigned int i = 0; i < count; i++)
795     {
796       hb_codepoint_t first = arrayZ[i].startUnicodeValue;
797       hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
798 				    (hb_codepoint_t) HB_UNICODE_MAX);
799       out->add_range (first, last);
800     }
801   }
802 
copyOT::DefaultUVS803   DefaultUVS* copy (hb_serialize_context_t *c,
804 		    const hb_set_t *unicodes) const
805   {
806     DefaultUVS *out = c->start_embed<DefaultUVS> ();
807     if (unlikely (!out)) return nullptr;
808     auto snap = c->snapshot ();
809 
810     HBUINT32 len;
811     len = 0;
812     if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
813     unsigned init_len = c->length ();
814 
815     hb_codepoint_t lastCode = HB_MAP_VALUE_INVALID;
816     int count = -1;
817 
818     for (const UnicodeValueRange& _ : as_array ())
819     {
820       for (const unsigned addcnt : hb_range ((unsigned) _.additionalCount + 1))
821       {
822 	unsigned curEntry = (unsigned) _.startUnicodeValue + addcnt;
823 	if (!unicodes->has (curEntry)) continue;
824 	count += 1;
825 	if (lastCode == HB_MAP_VALUE_INVALID)
826 	  lastCode = curEntry;
827 	else if (lastCode + count != curEntry)
828 	{
829 	  UnicodeValueRange rec;
830 	  rec.startUnicodeValue = lastCode;
831 	  rec.additionalCount = count - 1;
832 	  c->copy<UnicodeValueRange> (rec);
833 
834 	  lastCode = curEntry;
835 	  count = 0;
836 	}
837       }
838     }
839 
840     if (lastCode != HB_MAP_VALUE_INVALID)
841     {
842       UnicodeValueRange rec;
843       rec.startUnicodeValue = lastCode;
844       rec.additionalCount = count;
845       c->copy<UnicodeValueRange> (rec);
846     }
847 
848     if (c->length () - init_len == 0)
849     {
850       c->revert (snap);
851       return nullptr;
852     }
853     else
854     {
855       if (unlikely (!c->check_assign (out->len,
856                                       (c->length () - init_len) / UnicodeValueRange::static_size,
857                                       HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr;
858       return out;
859     }
860   }
861 
862   public:
863   DEFINE_SIZE_ARRAY (4, *this);
864 };
865 
866 struct UVSMapping
867 {
cmpOT::UVSMapping868   int cmp (const hb_codepoint_t &codepoint) const
869   { return unicodeValue.cmp (codepoint); }
870 
sanitizeOT::UVSMapping871   bool sanitize (hb_sanitize_context_t *c) const
872   {
873     TRACE_SANITIZE (this);
874     return_trace (c->check_struct (this));
875   }
876 
877   HBUINT24	unicodeValue;	/* Base Unicode value of the UVS */
878   HBGlyphID	glyphID;	/* Glyph ID of the UVS */
879   public:
880   DEFINE_SIZE_STATIC (5);
881 };
882 
883 struct NonDefaultUVS : SortedArray32Of<UVSMapping>
884 {
collect_unicodesOT::NonDefaultUVS885   void collect_unicodes (hb_set_t *out) const
886   {
887     for (const auto& a : as_array ())
888       out->add (a.unicodeValue);
889   }
890 
collect_mappingOT::NonDefaultUVS891   void collect_mapping (hb_set_t *unicodes, /* OUT */
892 			hb_map_t *mapping /* OUT */) const
893   {
894     for (const auto& a : as_array ())
895     {
896       hb_codepoint_t unicode = a.unicodeValue;
897       hb_codepoint_t glyphid = a.glyphID;
898       unicodes->add (unicode);
899       mapping->set (unicode, glyphid);
900     }
901   }
902 
closure_glyphsOT::NonDefaultUVS903   void closure_glyphs (const hb_set_t      *unicodes,
904 		       hb_set_t            *glyphset) const
905   {
906     + as_array ()
907     | hb_filter (unicodes, &UVSMapping::unicodeValue)
908     | hb_map (&UVSMapping::glyphID)
909     | hb_sink (glyphset)
910     ;
911   }
912 
copyOT::NonDefaultUVS913   NonDefaultUVS* copy (hb_serialize_context_t *c,
914 		       const hb_set_t *unicodes,
915 		       const hb_set_t *glyphs_requested,
916 		       const hb_map_t *glyph_map) const
917   {
918     NonDefaultUVS *out = c->start_embed<NonDefaultUVS> ();
919     if (unlikely (!out)) return nullptr;
920 
921     auto it =
922     + as_array ()
923     | hb_filter ([&] (const UVSMapping& _)
924 		 {
925 		   return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID);
926 		 })
927     ;
928 
929     if (!it) return nullptr;
930 
931     HBUINT32 len;
932     len = it.len ();
933     if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
934 
935     for (const UVSMapping& _ : it)
936     {
937       UVSMapping mapping;
938       mapping.unicodeValue = _.unicodeValue;
939       mapping.glyphID = glyph_map->get (_.glyphID);
940       c->copy<UVSMapping> (mapping);
941     }
942 
943     return out;
944   }
945 
946   public:
947   DEFINE_SIZE_ARRAY (4, *this);
948 };
949 
950 struct VariationSelectorRecord
951 {
get_glyphOT::VariationSelectorRecord952   glyph_variant_t get_glyph (hb_codepoint_t codepoint,
953 			     hb_codepoint_t *glyph,
954 			     const void *base) const
955   {
956     if ((base+defaultUVS).bfind (codepoint))
957       return GLYPH_VARIANT_USE_DEFAULT;
958     const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint);
959     if (nonDefault.glyphID)
960     {
961       *glyph = nonDefault.glyphID;
962        return GLYPH_VARIANT_FOUND;
963     }
964     return GLYPH_VARIANT_NOT_FOUND;
965   }
966 
VariationSelectorRecordOT::VariationSelectorRecord967   VariationSelectorRecord(const VariationSelectorRecord& other)
968   {
969     *this = other;
970   }
971 
operator =OT::VariationSelectorRecord972   void operator= (const VariationSelectorRecord& other)
973   {
974     varSelector = other.varSelector;
975     HBUINT32 offset = other.defaultUVS;
976     defaultUVS = offset;
977     offset = other.nonDefaultUVS;
978     nonDefaultUVS = offset;
979   }
980 
collect_unicodesOT::VariationSelectorRecord981   void collect_unicodes (hb_set_t *out, const void *base) const
982   {
983     (base+defaultUVS).collect_unicodes (out);
984     (base+nonDefaultUVS).collect_unicodes (out);
985   }
986 
collect_mappingOT::VariationSelectorRecord987   void collect_mapping (const void *base,
988 			hb_set_t *unicodes, /* OUT */
989 			hb_map_t *mapping /* OUT */) const
990   {
991     (base+defaultUVS).collect_unicodes (unicodes);
992     (base+nonDefaultUVS).collect_mapping (unicodes, mapping);
993   }
994 
cmpOT::VariationSelectorRecord995   int cmp (const hb_codepoint_t &variation_selector) const
996   { return varSelector.cmp (variation_selector); }
997 
sanitizeOT::VariationSelectorRecord998   bool sanitize (hb_sanitize_context_t *c, const void *base) const
999   {
1000     TRACE_SANITIZE (this);
1001     return_trace (c->check_struct (this) &&
1002 		  defaultUVS.sanitize (c, base) &&
1003 		  nonDefaultUVS.sanitize (c, base));
1004   }
1005 
1006   hb_pair_t<unsigned, unsigned>
copyOT::VariationSelectorRecord1007   copy (hb_serialize_context_t *c,
1008 	const hb_set_t *unicodes,
1009 	const hb_set_t *glyphs_requested,
1010 	const hb_map_t *glyph_map,
1011 	const void *base) const
1012   {
1013     auto snap = c->snapshot ();
1014     auto *out = c->embed<VariationSelectorRecord> (*this);
1015     if (unlikely (!out)) return hb_pair (0, 0);
1016 
1017     out->defaultUVS = 0;
1018     out->nonDefaultUVS = 0;
1019 
1020     unsigned non_default_uvs_objidx = 0;
1021     if (nonDefaultUVS != 0)
1022     {
1023       c->push ();
1024       if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map))
1025 	non_default_uvs_objidx = c->pop_pack ();
1026       else c->pop_discard ();
1027     }
1028 
1029     unsigned default_uvs_objidx = 0;
1030     if (defaultUVS != 0)
1031     {
1032       c->push ();
1033       if (c->copy (base+defaultUVS, unicodes))
1034 	default_uvs_objidx = c->pop_pack ();
1035       else c->pop_discard ();
1036     }
1037 
1038 
1039     if (!default_uvs_objidx && !non_default_uvs_objidx)
1040       c->revert (snap);
1041 
1042     return hb_pair (default_uvs_objidx, non_default_uvs_objidx);
1043   }
1044 
1045   HBUINT24	varSelector;	/* Variation selector. */
1046   Offset32To<DefaultUVS>
1047 		defaultUVS;	/* Offset to Default UVS Table.  May be 0. */
1048   Offset32To<NonDefaultUVS>
1049 		nonDefaultUVS;	/* Offset to Non-Default UVS Table.  May be 0. */
1050   public:
1051   DEFINE_SIZE_STATIC (11);
1052 };
1053 
1054 struct CmapSubtableFormat14
1055 {
get_glyph_variantOT::CmapSubtableFormat141056   glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
1057 				     hb_codepoint_t variation_selector,
1058 				     hb_codepoint_t *glyph) const
1059   { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); }
1060 
collect_variation_selectorsOT::CmapSubtableFormat141061   void collect_variation_selectors (hb_set_t *out) const
1062   {
1063     for (const auto& a : record.as_array ())
1064       out->add (a.varSelector);
1065   }
collect_variation_unicodesOT::CmapSubtableFormat141066   void collect_variation_unicodes (hb_codepoint_t variation_selector,
1067 				   hb_set_t *out) const
1068   { record.bsearch (variation_selector).collect_unicodes (out, this); }
1069 
serializeOT::CmapSubtableFormat141070   void serialize (hb_serialize_context_t *c,
1071 		  const hb_set_t *unicodes,
1072 		  const hb_set_t *glyphs_requested,
1073 		  const hb_map_t *glyph_map,
1074 		  const void *base)
1075   {
1076     auto snap = c->snapshot ();
1077     unsigned table_initpos = c->length ();
1078     const char* init_tail = c->tail;
1079 
1080     if (unlikely (!c->extend_min (*this))) return;
1081     this->format = 14;
1082 
1083     auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base);
1084 
1085     /*
1086      * Some versions of OTS require that offsets are in order. Due to the use
1087      * of push()/pop_pack() serializing the variation records in order results
1088      * in the offsets being in reverse order (first record has the largest
1089      * offset). While this is perfectly valid, it will cause some versions of
1090      * OTS to consider this table bad.
1091      *
1092      * So to prevent this issue we serialize the variation records in reverse
1093      * order, so that the offsets are ordered from small to large. Since
1094      * variation records are supposed to be in increasing order of varSelector
1095      * we then have to reverse the order of the written variation selector
1096      * records after everything is finalized.
1097      */
1098     hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices;
1099     for (int i = src_tbl->record.len - 1; i >= 0; i--)
1100     {
1101       hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base);
1102       if (result.first || result.second)
1103 	obj_indices.push (result);
1104     }
1105 
1106     if (c->length () - table_initpos == CmapSubtableFormat14::min_size)
1107     {
1108       c->revert (snap);
1109       return;
1110     }
1111 
1112     if (unlikely (!c->check_success (!obj_indices.in_error ())))
1113       return;
1114 
1115     int tail_len = init_tail - c->tail;
1116     c->check_assign (this->length, c->length () - table_initpos + tail_len,
1117                      HB_SERIALIZE_ERROR_INT_OVERFLOW);
1118     c->check_assign (this->record.len,
1119 		     (c->length () - table_initpos - CmapSubtableFormat14::min_size) /
1120 		     VariationSelectorRecord::static_size,
1121                      HB_SERIALIZE_ERROR_INT_OVERFLOW);
1122 
1123     /* Correct the incorrect write order by reversing the order of the variation
1124        records array. */
1125     _reverse_variation_records ();
1126 
1127     /* Now that records are in the right order, we can set up the offsets. */
1128     _add_links_to_variation_records (c, obj_indices);
1129   }
1130 
_reverse_variation_recordsOT::CmapSubtableFormat141131   void _reverse_variation_records ()
1132   {
1133     record.as_array ().reverse ();
1134   }
1135 
_add_links_to_variation_recordsOT::CmapSubtableFormat141136   void _add_links_to_variation_records (hb_serialize_context_t *c,
1137 					const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices)
1138   {
1139     for (unsigned i = 0; i < obj_indices.length; i++)
1140     {
1141       /*
1142        * Since the record array has been reversed (see comments in copy())
1143        * but obj_indices has not been, the indices at obj_indices[i]
1144        * are for the variation record at record[j].
1145        */
1146       int j = obj_indices.length - 1 - i;
1147       c->add_link (record[j].defaultUVS, obj_indices[i].first);
1148       c->add_link (record[j].nonDefaultUVS, obj_indices[i].second);
1149     }
1150   }
1151 
closure_glyphsOT::CmapSubtableFormat141152   void closure_glyphs (const hb_set_t      *unicodes,
1153 		       hb_set_t            *glyphset) const
1154   {
1155     + hb_iter (record)
1156     | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS)
1157     | hb_map (&VariationSelectorRecord::nonDefaultUVS)
1158     | hb_map (hb_add (this))
1159     | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); })
1160     ;
1161   }
1162 
collect_unicodesOT::CmapSubtableFormat141163   void collect_unicodes (hb_set_t *out) const
1164   {
1165     for (const VariationSelectorRecord& _ : record)
1166       _.collect_unicodes (out, this);
1167   }
1168 
collect_mappingOT::CmapSubtableFormat141169   void collect_mapping (hb_set_t *unicodes, /* OUT */
1170 			hb_map_t *mapping /* OUT */) const
1171   {
1172     for (const VariationSelectorRecord& _ : record)
1173       _.collect_mapping (this, unicodes, mapping);
1174   }
1175 
sanitizeOT::CmapSubtableFormat141176   bool sanitize (hb_sanitize_context_t *c) const
1177   {
1178     TRACE_SANITIZE (this);
1179     return_trace (c->check_struct (this) &&
1180 		  record.sanitize (c, this));
1181   }
1182 
1183   protected:
1184   HBUINT16	format;		/* Format number is set to 14. */
1185   HBUINT32	length;		/* Byte length of this subtable. */
1186   SortedArray32Of<VariationSelectorRecord>
1187 		record;		/* Variation selector records; sorted
1188 				 * in increasing order of `varSelector'. */
1189   public:
1190   DEFINE_SIZE_ARRAY (10, record);
1191 };
1192 
1193 struct CmapSubtable
1194 {
1195   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
1196 
get_glyphOT::CmapSubtable1197   bool get_glyph (hb_codepoint_t codepoint,
1198 		  hb_codepoint_t *glyph) const
1199   {
1200     switch (u.format) {
1201     case  0: return u.format0 .get_glyph (codepoint, glyph);
1202     case  4: return u.format4 .get_glyph (codepoint, glyph);
1203     case  6: return u.format6 .get_glyph (codepoint, glyph);
1204     case 10: return u.format10.get_glyph (codepoint, glyph);
1205     case 12: return u.format12.get_glyph (codepoint, glyph);
1206     case 13: return u.format13.get_glyph (codepoint, glyph);
1207     case 14:
1208     default: return false;
1209     }
1210   }
collect_unicodesOT::CmapSubtable1211   void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const
1212   {
1213     switch (u.format) {
1214     case  0: u.format0 .collect_unicodes (out); return;
1215     case  4: u.format4 .collect_unicodes (out); return;
1216     case  6: u.format6 .collect_unicodes (out); return;
1217     case 10: u.format10.collect_unicodes (out); return;
1218     case 12: u.format12.collect_unicodes (out, num_glyphs); return;
1219     case 13: u.format13.collect_unicodes (out, num_glyphs); return;
1220     case 14:
1221     default: return;
1222     }
1223   }
1224 
collect_mappingOT::CmapSubtable1225   void collect_mapping (hb_set_t *unicodes, /* OUT */
1226 			hb_map_t *mapping, /* OUT */
1227 			unsigned num_glyphs = UINT_MAX) const
1228   {
1229     switch (u.format) {
1230     case  0: u.format0 .collect_mapping (unicodes, mapping); return;
1231     case  4: u.format4 .collect_mapping (unicodes, mapping); return;
1232     case  6: u.format6 .collect_mapping (unicodes, mapping); return;
1233     case 10: u.format10.collect_mapping (unicodes, mapping); return;
1234     case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return;
1235     case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return;
1236     case 14:
1237     default: return;
1238     }
1239   }
1240 
1241   template<typename Iterator,
1242 	   hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtable1243   void serialize (hb_serialize_context_t *c,
1244 		  Iterator it,
1245 		  unsigned format,
1246 		  const hb_subset_plan_t *plan,
1247 		  const void *base)
1248   {
1249     switch (format) {
1250     case  4: return u.format4.serialize (c, it);
1251     case 12: return u.format12.serialize (c, it);
1252     case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base);
1253     default: return;
1254     }
1255   }
1256 
sanitizeOT::CmapSubtable1257   bool sanitize (hb_sanitize_context_t *c) const
1258   {
1259     TRACE_SANITIZE (this);
1260     if (!u.format.sanitize (c)) return_trace (false);
1261     switch (u.format) {
1262     case  0: return_trace (u.format0 .sanitize (c));
1263     case  4: return_trace (u.format4 .sanitize (c));
1264     case  6: return_trace (u.format6 .sanitize (c));
1265     case 10: return_trace (u.format10.sanitize (c));
1266     case 12: return_trace (u.format12.sanitize (c));
1267     case 13: return_trace (u.format13.sanitize (c));
1268     case 14: return_trace (u.format14.sanitize (c));
1269     default:return_trace (true);
1270     }
1271   }
1272 
1273   public:
1274   union {
1275   HBUINT16		format;		/* Format identifier */
1276   CmapSubtableFormat0	format0;
1277   CmapSubtableFormat4	format4;
1278   CmapSubtableFormat6	format6;
1279   CmapSubtableFormat10	format10;
1280   CmapSubtableFormat12	format12;
1281   CmapSubtableFormat13	format13;
1282   CmapSubtableFormat14	format14;
1283   } u;
1284   public:
1285   DEFINE_SIZE_UNION (2, format);
1286 };
1287 
1288 
1289 struct EncodingRecord
1290 {
cmpOT::EncodingRecord1291   int cmp (const EncodingRecord &other) const
1292   {
1293     int ret;
1294     ret = platformID.cmp (other.platformID);
1295     if (ret) return ret;
1296     ret = encodingID.cmp (other.encodingID);
1297     if (ret) return ret;
1298     return 0;
1299   }
1300 
sanitizeOT::EncodingRecord1301   bool sanitize (hb_sanitize_context_t *c, const void *base) const
1302   {
1303     TRACE_SANITIZE (this);
1304     return_trace (c->check_struct (this) &&
1305 		  subtable.sanitize (c, base));
1306   }
1307 
1308   template<typename Iterator,
1309 	   hb_requires (hb_is_iterator (Iterator))>
copyOT::EncodingRecord1310   EncodingRecord* copy (hb_serialize_context_t *c,
1311 			Iterator it,
1312 			unsigned format,
1313 			const void *base,
1314 			const hb_subset_plan_t *plan,
1315 			/* INOUT */ unsigned *objidx) const
1316   {
1317     TRACE_SERIALIZE (this);
1318     auto snap = c->snapshot ();
1319     auto *out = c->embed (this);
1320     if (unlikely (!out)) return_trace (nullptr);
1321     out->subtable = 0;
1322 
1323     if (*objidx == 0)
1324     {
1325       CmapSubtable *cmapsubtable = c->push<CmapSubtable> ();
1326       unsigned origin_length = c->length ();
1327       cmapsubtable->serialize (c, it, format, plan, &(base+subtable));
1328       if (c->length () - origin_length > 0) *objidx = c->pop_pack ();
1329       else c->pop_discard ();
1330     }
1331 
1332     if (*objidx == 0)
1333     {
1334       c->revert (snap);
1335       return_trace (nullptr);
1336     }
1337 
1338     c->add_link (out->subtable, *objidx);
1339     return_trace (out);
1340   }
1341 
1342   HBUINT16	platformID;	/* Platform ID. */
1343   HBUINT16	encodingID;	/* Platform-specific encoding ID. */
1344   Offset32To<CmapSubtable>
1345 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
1346   public:
1347   DEFINE_SIZE_STATIC (8);
1348 };
1349 
1350 struct cmap
1351 {
1352   static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
1353 
1354   template<typename Iterator, typename EncodingRecIter,
1355 	   hb_requires (hb_is_iterator (EncodingRecIter))>
serializeOT::cmap1356   void serialize (hb_serialize_context_t *c,
1357 		  Iterator it,
1358 		  EncodingRecIter encodingrec_iter,
1359 		  const void *base,
1360 		  const hb_subset_plan_t *plan)
1361   {
1362     if (unlikely (!c->extend_min ((*this))))  return;
1363     this->version = 0;
1364 
1365     unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0;
1366 
1367     for (const EncodingRecord& _ : encodingrec_iter)
1368     {
1369       unsigned format = (base+_.subtable).u.format;
1370       if (!plan->glyphs_requested->is_empty ())
1371       {
1372 	hb_set_t unicodes_set;
1373 	hb_map_t cp_glyphid_map;
1374 	(base+_.subtable).collect_mapping (&unicodes_set, &cp_glyphid_map);
1375 
1376 	auto table_iter =
1377 	+ hb_zip (unicodes_set.iter(), unicodes_set.iter() | hb_map(cp_glyphid_map))
1378 	| hb_filter (plan->_glyphset, hb_second)
1379 	| hb_filter ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p)
1380 		     {
1381 		       return plan->unicodes->has (p.first) ||
1382 			      plan->glyphs_requested->has (p.second);
1383 		     })
1384 	| hb_map ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p_org)
1385 		  {
1386 		    return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (p_org.first, plan->glyph_map->get(p_org.second));
1387 		  })
1388 	;
1389 
1390 	if (format == 4) c->copy (_, table_iter, 4u, base, plan, &format4objidx);
1391 	else if (format == 12) c->copy (_, table_iter, 12u, base, plan, &format12objidx);
1392 	else if (format == 14) c->copy (_, table_iter, 14u, base, plan, &format14objidx);
1393       }
1394       /* when --gids option is not used, we iterate input unicodes instead of
1395        * all codepoints in each subtable, which is more efficient */
1396       else
1397       {
1398 	hb_set_t unicodes_set;
1399 	(base+_.subtable).collect_unicodes (&unicodes_set);
1400 
1401 	if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
1402 	else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
1403 	else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
1404       }
1405     }
1406 
1407     c->check_assign(this->encodingRecord.len,
1408                     (c->length () - cmap::min_size)/EncodingRecord::static_size,
1409                     HB_SERIALIZE_ERROR_INT_OVERFLOW);
1410   }
1411 
closure_glyphsOT::cmap1412   void closure_glyphs (const hb_set_t      *unicodes,
1413 		       hb_set_t            *glyphset) const
1414   {
1415     + hb_iter (encodingRecord)
1416     | hb_map (&EncodingRecord::subtable)
1417     | hb_map (hb_add (this))
1418     | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == 14; })
1419     | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); })
1420     ;
1421   }
1422 
subsetOT::cmap1423   bool subset (hb_subset_context_t *c) const
1424   {
1425     TRACE_SUBSET (this);
1426 
1427     cmap *cmap_prime = c->serializer->start_embed<cmap> ();
1428     if (unlikely (!c->serializer->check_success (cmap_prime))) return_trace (false);
1429 
1430     auto encodingrec_iter =
1431     + hb_iter (encodingRecord)
1432     | hb_filter ([&] (const EncodingRecord& _)
1433 		{
1434 		  if ((_.platformID == 0 && _.encodingID == 3) ||
1435 		      (_.platformID == 0 && _.encodingID == 4) ||
1436 		      (_.platformID == 3 && _.encodingID == 1) ||
1437 		      (_.platformID == 3 && _.encodingID == 10) ||
1438 		      (this + _.subtable).u.format == 14)
1439 		    return true;
1440 
1441 		  return false;
1442 		})
1443     ;
1444 
1445     if (unlikely (!encodingrec_iter.len ())) return_trace (false);
1446 
1447     const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr;
1448     bool has_format12 = false;
1449 
1450     for (const EncodingRecord& _ : encodingrec_iter)
1451     {
1452       unsigned format = (this + _.subtable).u.format;
1453       if (format == 12) has_format12 = true;
1454 
1455       const EncodingRecord *table = hb_addressof (_);
1456       if      (_.platformID == 0 && _.encodingID ==  3) unicode_bmp = table;
1457       else if (_.platformID == 0 && _.encodingID ==  4) unicode_ucs4 = table;
1458       else if (_.platformID == 3 && _.encodingID ==  1) ms_bmp = table;
1459       else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table;
1460     }
1461 
1462     if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false);
1463     if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
1464 
1465     auto it =
1466     + hb_iter (c->plan->unicodes)
1467     | hb_map ([&] (hb_codepoint_t _)
1468 	      {
1469 		hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID;
1470 		c->plan->new_gid_for_codepoint (_, &new_gid);
1471 		return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid);
1472 	      })
1473     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
1474 		 { return (_.second != HB_MAP_VALUE_INVALID); })
1475     ;
1476     cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan);
1477     return_trace (true);
1478   }
1479 
find_best_subtableOT::cmap1480   const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
1481   {
1482     if (symbol) *symbol = false;
1483 
1484     const CmapSubtable *subtable;
1485 
1486     /* Symbol subtable.
1487      * Prefer symbol if available.
1488      * https://github.com/harfbuzz/harfbuzz/issues/1918 */
1489     if ((subtable = this->find_subtable (3, 0)))
1490     {
1491       if (symbol) *symbol = true;
1492       return subtable;
1493     }
1494 
1495     /* 32-bit subtables. */
1496     if ((subtable = this->find_subtable (3, 10))) return subtable;
1497     if ((subtable = this->find_subtable (0, 6))) return subtable;
1498     if ((subtable = this->find_subtable (0, 4))) return subtable;
1499 
1500     /* 16-bit subtables. */
1501     if ((subtable = this->find_subtable (3, 1))) return subtable;
1502     if ((subtable = this->find_subtable (0, 3))) return subtable;
1503     if ((subtable = this->find_subtable (0, 2))) return subtable;
1504     if ((subtable = this->find_subtable (0, 1))) return subtable;
1505     if ((subtable = this->find_subtable (0, 0))) return subtable;
1506 
1507     /* Meh. */
1508     return &Null (CmapSubtable);
1509   }
1510 
1511   struct accelerator_t
1512   {
initOT::cmap::accelerator_t1513     void init (hb_face_t *face)
1514     {
1515       this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
1516       bool symbol;
1517       this->subtable = table->find_best_subtable (&symbol);
1518       this->subtable_uvs = &Null (CmapSubtableFormat14);
1519       {
1520 	const CmapSubtable *st = table->find_subtable (0, 5);
1521 	if (st && st->u.format == 14)
1522 	  subtable_uvs = &st->u.format14;
1523       }
1524 
1525       this->get_glyph_data = subtable;
1526       if (unlikely (symbol))
1527 	this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>;
1528       else
1529       {
1530 	switch (subtable->u.format) {
1531 	/* Accelerate format 4 and format 12. */
1532 	default:
1533 	  this->get_glyph_funcZ = get_glyph_from<CmapSubtable>;
1534 	  break;
1535 	case 12:
1536 	  this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>;
1537 	  break;
1538 	case  4:
1539 	{
1540 	  this->format4_accel.init (&subtable->u.format4);
1541 	  this->get_glyph_data = &this->format4_accel;
1542 	  this->get_glyph_funcZ = this->format4_accel.get_glyph_func;
1543 	  break;
1544 	}
1545 	}
1546       }
1547     }
1548 
finiOT::cmap::accelerator_t1549     void fini () { this->table.destroy (); }
1550 
get_nominal_glyphOT::cmap::accelerator_t1551     bool get_nominal_glyph (hb_codepoint_t  unicode,
1552 			    hb_codepoint_t *glyph) const
1553     {
1554       if (unlikely (!this->get_glyph_funcZ)) return false;
1555       return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
1556     }
get_nominal_glyphsOT::cmap::accelerator_t1557     unsigned int get_nominal_glyphs (unsigned int count,
1558 				     const hb_codepoint_t *first_unicode,
1559 				     unsigned int unicode_stride,
1560 				     hb_codepoint_t *first_glyph,
1561 				     unsigned int glyph_stride) const
1562     {
1563       if (unlikely (!this->get_glyph_funcZ)) return 0;
1564 
1565       hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
1566       const void *get_glyph_data = this->get_glyph_data;
1567 
1568       unsigned int done;
1569       for (done = 0;
1570 	   done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
1571 	   done++)
1572       {
1573 	first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
1574 	first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
1575       }
1576       return done;
1577     }
1578 
get_variation_glyphOT::cmap::accelerator_t1579     bool get_variation_glyph (hb_codepoint_t  unicode,
1580 			      hb_codepoint_t  variation_selector,
1581 			      hb_codepoint_t *glyph) const
1582     {
1583       switch (this->subtable_uvs->get_glyph_variant (unicode,
1584 						     variation_selector,
1585 						     glyph))
1586       {
1587 	case GLYPH_VARIANT_NOT_FOUND:	return false;
1588 	case GLYPH_VARIANT_FOUND:	return true;
1589 	case GLYPH_VARIANT_USE_DEFAULT:	break;
1590       }
1591 
1592       return get_nominal_glyph (unicode, glyph);
1593     }
1594 
collect_unicodesOT::cmap::accelerator_t1595     void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
1596     { subtable->collect_unicodes (out, num_glyphs); }
collect_mappingOT::cmap::accelerator_t1597     void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping,
1598 			  unsigned num_glyphs = UINT_MAX) const
1599     { subtable->collect_mapping (unicodes, mapping, num_glyphs); }
collect_variation_selectorsOT::cmap::accelerator_t1600     void collect_variation_selectors (hb_set_t *out) const
1601     { subtable_uvs->collect_variation_selectors (out); }
collect_variation_unicodesOT::cmap::accelerator_t1602     void collect_variation_unicodes (hb_codepoint_t variation_selector,
1603 				     hb_set_t *out) const
1604     { subtable_uvs->collect_variation_unicodes (variation_selector, out); }
1605 
1606     protected:
1607     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
1608 					      hb_codepoint_t codepoint,
1609 					      hb_codepoint_t *glyph);
1610 
1611     template <typename Type>
get_glyph_fromOT::cmap::accelerator_t1612     HB_INTERNAL static bool get_glyph_from (const void *obj,
1613 					    hb_codepoint_t codepoint,
1614 					    hb_codepoint_t *glyph)
1615     {
1616       const Type *typed_obj = (const Type *) obj;
1617       return typed_obj->get_glyph (codepoint, glyph);
1618     }
1619 
1620     template <typename Type>
get_glyph_from_symbolOT::cmap::accelerator_t1621     HB_INTERNAL static bool get_glyph_from_symbol (const void *obj,
1622 						   hb_codepoint_t codepoint,
1623 						   hb_codepoint_t *glyph)
1624     {
1625       const Type *typed_obj = (const Type *) obj;
1626       if (likely (typed_obj->get_glyph (codepoint, glyph)))
1627 	return true;
1628 
1629       if (codepoint <= 0x00FFu)
1630       {
1631 	/* For symbol-encoded OpenType fonts, we duplicate the
1632 	 * U+F000..F0FF range at U+0000..U+00FF.  That's what
1633 	 * Windows seems to do, and that's hinted about at:
1634 	 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1635 	 * under "Non-Standard (Symbol) Fonts". */
1636 	return typed_obj->get_glyph (0xF000u + codepoint, glyph);
1637       }
1638 
1639       return false;
1640     }
1641 
1642     private:
1643     hb_nonnull_ptr_t<const CmapSubtable> subtable;
1644     hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
1645 
1646     hb_cmap_get_glyph_func_t get_glyph_funcZ;
1647     const void *get_glyph_data;
1648 
1649     CmapSubtableFormat4::accelerator_t format4_accel;
1650 
1651     public:
1652     hb_blob_ptr_t<cmap> table;
1653   };
1654 
1655   protected:
1656 
find_subtableOT::cmap1657   const CmapSubtable *find_subtable (unsigned int platform_id,
1658 				     unsigned int encoding_id) const
1659   {
1660     EncodingRecord key;
1661     key.platformID = platform_id;
1662     key.encodingID = encoding_id;
1663 
1664     const EncodingRecord &result = encodingRecord.bsearch (key);
1665     if (!result.subtable)
1666       return nullptr;
1667 
1668     return &(this+result.subtable);
1669   }
1670 
find_encodingrecOT::cmap1671   const EncodingRecord *find_encodingrec (unsigned int platform_id,
1672 					  unsigned int encoding_id) const
1673   {
1674     EncodingRecord key;
1675     key.platformID = platform_id;
1676     key.encodingID = encoding_id;
1677 
1678     return encodingRecord.as_array ().bsearch (key);
1679   }
1680 
find_subtableOT::cmap1681   bool find_subtable (unsigned format) const
1682   {
1683     auto it =
1684     + hb_iter (encodingRecord)
1685     | hb_map (&EncodingRecord::subtable)
1686     | hb_map (hb_add (this))
1687     | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; })
1688     ;
1689 
1690     return it.len ();
1691   }
1692 
1693   public:
1694 
sanitizeOT::cmap1695   bool sanitize (hb_sanitize_context_t *c) const
1696   {
1697     TRACE_SANITIZE (this);
1698     return_trace (c->check_struct (this) &&
1699 		  likely (version == 0) &&
1700 		  encodingRecord.sanitize (c, this));
1701   }
1702 
1703   protected:
1704   HBUINT16	version;	/* Table version number (0). */
1705   SortedArray16Of<EncodingRecord>
1706 		encodingRecord;	/* Encoding tables. */
1707   public:
1708   DEFINE_SIZE_ARRAY (4, encodingRecord);
1709 };
1710 
1711 struct cmap_accelerator_t : cmap::accelerator_t {};
1712 
1713 } /* namespace OT */
1714 
1715 
1716 #endif /* HB_OT_CMAP_TABLE_HH */
1717