1 /*
2  * Copyright © 2011,2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #include "hb.hh"
28 
29 #ifndef HB_NO_OT_SHAPE
30 
31 #include "hb-ot-shape-complex-khmer.hh"
32 #include "hb-ot-layout.hh"
33 
34 
35 /*
36  * Khmer shaper.
37  */
38 
39 static const hb_ot_map_feature_t
40 khmer_features[] =
41 {
42   /*
43    * Basic features.
44    * These features are applied in order, one at a time, after reordering.
45    */
46   {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS},
47   {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS},
48   {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS},
49   {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS},
50   {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS},
51   /*
52    * Other features.
53    * These features are applied all at once after clearing syllables.
54    */
55   {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS},
56   {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS},
57   {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS},
58   {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS},
59 };
60 
61 /*
62  * Must be in the same order as the khmer_features array.
63  */
64 enum {
65   KHMER_PREF,
66   KHMER_BLWF,
67   KHMER_ABVF,
68   KHMER_PSTF,
69   KHMER_CFAR,
70 
71   _KHMER_PRES,
72   _KHMER_ABVS,
73   _KHMER_BLWS,
74   _KHMER_PSTS,
75 
76   KHMER_NUM_FEATURES,
77   KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
78 };
79 
80 static void
81 setup_syllables_khmer (const hb_ot_shape_plan_t *plan,
82 		       hb_font_t *font,
83 		       hb_buffer_t *buffer);
84 static void
85 reorder_khmer (const hb_ot_shape_plan_t *plan,
86 	       hb_font_t *font,
87 	       hb_buffer_t *buffer);
88 
89 static void
collect_features_khmer(hb_ot_shape_planner_t * plan)90 collect_features_khmer (hb_ot_shape_planner_t *plan)
91 {
92   hb_ot_map_builder_t *map = &plan->map;
93 
94   /* Do this before any lookups have been applied. */
95   map->add_gsub_pause (setup_syllables_khmer);
96   map->add_gsub_pause (reorder_khmer);
97 
98   /* Testing suggests that Uniscribe does NOT pause between basic
99    * features.  Test with KhmerUI.ttf and the following three
100    * sequences:
101    *
102    *   U+1789,U+17BC
103    *   U+1789,U+17D2,U+1789
104    *   U+1789,U+17D2,U+1789,U+17BC
105    *
106    * https://github.com/harfbuzz/harfbuzz/issues/974
107    */
108   map->enable_feature (HB_TAG('l','o','c','l'));
109   map->enable_feature (HB_TAG('c','c','m','p'));
110 
111   unsigned int i = 0;
112   for (; i < KHMER_BASIC_FEATURES; i++)
113     map->add_feature (khmer_features[i]);
114 
115   map->add_gsub_pause (_hb_clear_syllables);
116 
117   for (; i < KHMER_NUM_FEATURES; i++)
118     map->add_feature (khmer_features[i]);
119 }
120 
121 static void
override_features_khmer(hb_ot_shape_planner_t * plan)122 override_features_khmer (hb_ot_shape_planner_t *plan)
123 {
124   hb_ot_map_builder_t *map = &plan->map;
125 
126   /* Khmer spec has 'clig' as part of required shaping features:
127    * "Apply feature 'clig' to form ligatures that are desired for
128    * typographical correctness.", hence in overrides... */
129   map->enable_feature (HB_TAG('c','l','i','g'));
130 
131   /* Uniscribe does not apply 'kern' in Khmer. */
132   if (hb_options ().uniscribe_bug_compatible)
133   {
134     map->disable_feature (HB_TAG('k','e','r','n'));
135   }
136 
137   map->disable_feature (HB_TAG('l','i','g','a'));
138 }
139 
140 
141 struct khmer_shape_plan_t
142 {
get_virama_glyphkhmer_shape_plan_t143   bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
144   {
145     hb_codepoint_t glyph = virama_glyph;
146     if (unlikely (virama_glyph == (hb_codepoint_t) -1))
147     {
148       if (!font->get_nominal_glyph (0x17D2u, &glyph))
149 	glyph = 0;
150       /* Technically speaking, the spec says we should apply 'locl' to virama too.
151        * Maybe one day... */
152 
153       /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
154        * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
155       virama_glyph = glyph;
156     }
157 
158     *pglyph = glyph;
159     return glyph != 0;
160   }
161 
162   mutable hb_codepoint_t virama_glyph;
163 
164   hb_indic_would_substitute_feature_t pref;
165 
166   hb_mask_t mask_array[KHMER_NUM_FEATURES];
167 };
168 
169 static void *
data_create_khmer(const hb_ot_shape_plan_t * plan)170 data_create_khmer (const hb_ot_shape_plan_t *plan)
171 {
172   khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t));
173   if (unlikely (!khmer_plan))
174     return nullptr;
175 
176   khmer_plan->virama_glyph = (hb_codepoint_t) -1;
177 
178   khmer_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), true);
179 
180   for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
181     khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
182 				 0 : plan->map.get_1_mask (khmer_features[i].tag);
183 
184   return khmer_plan;
185 }
186 
187 static void
data_destroy_khmer(void * data)188 data_destroy_khmer (void *data)
189 {
190   free (data);
191 }
192 
193 
194 enum khmer_syllable_type_t {
195   khmer_consonant_syllable,
196   khmer_broken_cluster,
197   khmer_non_khmer_cluster,
198 };
199 
200 #include "hb-ot-shape-complex-khmer-machine.hh"
201 
202 static void
setup_masks_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_buffer_t * buffer,hb_font_t * font HB_UNUSED)203 setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
204 		   hb_buffer_t              *buffer,
205 		   hb_font_t                *font HB_UNUSED)
206 {
207   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
208 
209   /* We cannot setup masks here.  We save information about characters
210    * and setup masks later on in a pause-callback. */
211 
212   unsigned int count = buffer->len;
213   hb_glyph_info_t *info = buffer->info;
214   for (unsigned int i = 0; i < count; i++)
215     set_khmer_properties (info[i]);
216 }
217 
218 static void
setup_syllables_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)219 setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
220 		       hb_font_t *font HB_UNUSED,
221 		       hb_buffer_t *buffer)
222 {
223   find_syllables_khmer (buffer);
224   foreach_syllable (buffer, start, end)
225     buffer->unsafe_to_break (start, end);
226 }
227 
228 
229 /* Rules from:
230  * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
231 
232 static void
reorder_consonant_syllable(const hb_ot_shape_plan_t * plan,hb_face_t * face HB_UNUSED,hb_buffer_t * buffer,unsigned int start,unsigned int end)233 reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
234 			    hb_face_t *face HB_UNUSED,
235 			    hb_buffer_t *buffer,
236 			    unsigned int start, unsigned int end)
237 {
238   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
239   hb_glyph_info_t *info = buffer->info;
240 
241   /* Setup masks. */
242   {
243     /* Post-base */
244     hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] |
245 		     khmer_plan->mask_array[KHMER_ABVF] |
246 		     khmer_plan->mask_array[KHMER_PSTF];
247     for (unsigned int i = start + 1; i < end; i++)
248       info[i].mask  |= mask;
249   }
250 
251   unsigned int num_coengs = 0;
252   for (unsigned int i = start + 1; i < end; i++)
253   {
254     /* """
255      * When a COENG + (Cons | IndV) combination are found (and subscript count
256      * is less than two) the character combination is handled according to the
257      * subscript type of the character following the COENG.
258      *
259      * ...
260      *
261      * Subscript Type 2 - The COENG + RO characters are reordered to immediately
262      * before the base glyph. Then the COENG + RO characters are assigned to have
263      * the 'pref' OpenType feature applied to them.
264      * """
265      */
266     if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end)
267     {
268       num_coengs++;
269 
270       if (info[i + 1].khmer_category() == OT_Ra)
271       {
272 	for (unsigned int j = 0; j < 2; j++)
273 	  info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
274 
275 	/* Move the Coeng,Ro sequence to the start. */
276 	buffer->merge_clusters (start, i + 2);
277 	hb_glyph_info_t t0 = info[i];
278 	hb_glyph_info_t t1 = info[i + 1];
279 	memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0]));
280 	info[start] = t0;
281 	info[start + 1] = t1;
282 
283 	/* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
284 	 * Read the feature spec.
285 	 * This allows distinguishing the following cases with MS Khmer fonts:
286 	 * U+1784,U+17D2,U+179A,U+17D2,U+1782
287 	 * U+1784,U+17D2,U+1782,U+17D2,U+179A
288 	 */
289 	if (khmer_plan->mask_array[KHMER_CFAR])
290 	  for (unsigned int j = i + 2; j < end; j++)
291 	    info[j].mask |= khmer_plan->mask_array[KHMER_CFAR];
292 
293 	num_coengs = 2; /* Done. */
294       }
295     }
296 
297     /* Reorder left matra piece. */
298     else if (info[i].khmer_category() == OT_VPre)
299     {
300       /* Move to the start. */
301       buffer->merge_clusters (start, i + 1);
302       hb_glyph_info_t t = info[i];
303       memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0]));
304       info[start] = t;
305     }
306   }
307 }
308 
309 static void
reorder_syllable_khmer(const hb_ot_shape_plan_t * plan,hb_face_t * face,hb_buffer_t * buffer,unsigned int start,unsigned int end)310 reorder_syllable_khmer (const hb_ot_shape_plan_t *plan,
311 			hb_face_t *face,
312 			hb_buffer_t *buffer,
313 			unsigned int start, unsigned int end)
314 {
315   khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F);
316   switch (syllable_type)
317   {
318     case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
319     case khmer_consonant_syllable:
320      reorder_consonant_syllable (plan, face, buffer, start, end);
321      break;
322 
323     case khmer_non_khmer_cluster:
324       break;
325   }
326 }
327 
328 static inline void
insert_dotted_circles_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font,hb_buffer_t * buffer)329 insert_dotted_circles_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
330 			     hb_font_t *font,
331 			     hb_buffer_t *buffer)
332 {
333   if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
334     return;
335 
336   /* Note: This loop is extra overhead, but should not be measurable.
337    * TODO Use a buffer scratch flag to remove the loop. */
338   bool has_broken_syllables = false;
339   unsigned int count = buffer->len;
340   hb_glyph_info_t *info = buffer->info;
341   for (unsigned int i = 0; i < count; i++)
342     if ((info[i].syllable() & 0x0F) == khmer_broken_cluster)
343     {
344       has_broken_syllables = true;
345       break;
346     }
347   if (likely (!has_broken_syllables))
348     return;
349 
350 
351   hb_codepoint_t dottedcircle_glyph;
352   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
353     return;
354 
355   hb_glyph_info_t dottedcircle = {0};
356   dottedcircle.codepoint = 0x25CCu;
357   set_khmer_properties (dottedcircle);
358   dottedcircle.codepoint = dottedcircle_glyph;
359 
360   buffer->clear_output ();
361 
362   buffer->idx = 0;
363   unsigned int last_syllable = 0;
364   while (buffer->idx < buffer->len && buffer->successful)
365   {
366     unsigned int syllable = buffer->cur().syllable();
367     khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (syllable & 0x0F);
368     if (unlikely (last_syllable != syllable && syllable_type == khmer_broken_cluster))
369     {
370       last_syllable = syllable;
371 
372       hb_glyph_info_t ginfo = dottedcircle;
373       ginfo.cluster = buffer->cur().cluster;
374       ginfo.mask = buffer->cur().mask;
375       ginfo.syllable() = buffer->cur().syllable();
376 
377       /* Insert dottedcircle after possible Repha. */
378       while (buffer->idx < buffer->len && buffer->successful &&
379 	     last_syllable == buffer->cur().syllable() &&
380 	     buffer->cur().khmer_category() == OT_Repha)
381 	buffer->next_glyph ();
382 
383       buffer->output_info (ginfo);
384     }
385     else
386       buffer->next_glyph ();
387   }
388   buffer->swap_buffers ();
389 }
390 
391 static void
reorder_khmer(const hb_ot_shape_plan_t * plan,hb_font_t * font,hb_buffer_t * buffer)392 reorder_khmer (const hb_ot_shape_plan_t *plan,
393 	       hb_font_t *font,
394 	       hb_buffer_t *buffer)
395 {
396   insert_dotted_circles_khmer (plan, font, buffer);
397 
398   foreach_syllable (buffer, start, end)
399     reorder_syllable_khmer (plan, font->face, buffer, start, end);
400 
401   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
402 }
403 
404 
405 static bool
decompose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b)406 decompose_khmer (const hb_ot_shape_normalize_context_t *c,
407 		 hb_codepoint_t  ab,
408 		 hb_codepoint_t *a,
409 		 hb_codepoint_t *b)
410 {
411   switch (ab)
412   {
413     /*
414      * Decompose split matras that don't have Unicode decompositions.
415      */
416 
417     /* Khmer */
418     case 0x17BEu  : *a = 0x17C1u; *b= 0x17BEu; return true;
419     case 0x17BFu  : *a = 0x17C1u; *b= 0x17BFu; return true;
420     case 0x17C0u  : *a = 0x17C1u; *b= 0x17C0u; return true;
421     case 0x17C4u  : *a = 0x17C1u; *b= 0x17C4u; return true;
422     case 0x17C5u  : *a = 0x17C1u; *b= 0x17C5u; return true;
423   }
424 
425   return (bool) c->unicode->decompose (ab, a, b);
426 }
427 
428 static bool
compose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab)429 compose_khmer (const hb_ot_shape_normalize_context_t *c,
430 	       hb_codepoint_t  a,
431 	       hb_codepoint_t  b,
432 	       hb_codepoint_t *ab)
433 {
434   /* Avoid recomposing split matras. */
435   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
436     return false;
437 
438   return (bool) c->unicode->compose (a, b, ab);
439 }
440 
441 
442 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
443 {
444   collect_features_khmer,
445   override_features_khmer,
446   data_create_khmer,
447   data_destroy_khmer,
448   nullptr, /* preprocess_text */
449   nullptr, /* postprocess_glyphs */
450   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
451   decompose_khmer,
452   compose_khmer,
453   setup_masks_khmer,
454   HB_TAG_NONE, /* gpos_tag */
455   nullptr, /* reorder_marks */
456   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
457   false, /* fallback_position */
458 };
459 
460 
461 #endif
462