1 /*
2  * Copyright © 2011,2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #include "hb-ot-shape-complex-khmer-private.hh"
28 #include "hb-ot-layout-private.hh"
29 
30 
31 /*
32  * Khmer shaper.
33  */
34 
35 struct feature_list_t {
36   hb_tag_t tag;
37   hb_ot_map_feature_flags_t flags;
38 };
39 
40 static const feature_list_t
41 khmer_features[] =
42 {
43   /*
44    * Basic features.
45    * These features are applied in order, one at a time, after initial_reordering.
46    */
47   {HB_TAG('p','r','e','f'), F_NONE},
48   {HB_TAG('b','l','w','f'), F_NONE},
49   {HB_TAG('a','b','v','f'), F_NONE},
50   {HB_TAG('p','s','t','f'), F_NONE},
51   {HB_TAG('c','f','a','r'), F_NONE},
52   /*
53    * Other features.
54    * These features are applied all at once, after final_reordering.
55    * Default Bengali font in Windows for example has intermixed
56    * lookups for init,pres,abvs,blws features.
57    */
58   {HB_TAG('p','r','e','s'), F_GLOBAL},
59   {HB_TAG('a','b','v','s'), F_GLOBAL},
60   {HB_TAG('b','l','w','s'), F_GLOBAL},
61   {HB_TAG('p','s','t','s'), F_GLOBAL},
62   /* Positioning features, though we don't care about the types. */
63   {HB_TAG('d','i','s','t'), F_GLOBAL},
64   {HB_TAG('a','b','v','m'), F_GLOBAL},
65   {HB_TAG('b','l','w','m'), F_GLOBAL},
66 };
67 
68 /*
69  * Must be in the same order as the khmer_features array.
70  */
71 enum {
72   PREF,
73   BLWF,
74   ABVF,
75   PSTF,
76   CFAR,
77 
78   _PRES,
79   _ABVS,
80   _BLWS,
81   _PSTS,
82   _DIST,
83   _ABVM,
84   _BLWM,
85 
86   KHMER_NUM_FEATURES,
87   KHMER_BASIC_FEATURES = _PRES /* Don't forget to update this! */
88 };
89 
90 static void
91 setup_syllables (const hb_ot_shape_plan_t *plan,
92 		 hb_font_t *font,
93 		 hb_buffer_t *buffer);
94 static void
95 initial_reordering (const hb_ot_shape_plan_t *plan,
96 		    hb_font_t *font,
97 		    hb_buffer_t *buffer);
98 static void
99 final_reordering (const hb_ot_shape_plan_t *plan,
100 		  hb_font_t *font,
101 		  hb_buffer_t *buffer);
102 static void
103 clear_syllables (const hb_ot_shape_plan_t *plan,
104 		 hb_font_t *font,
105 		 hb_buffer_t *buffer);
106 
107 static void
collect_features_khmer(hb_ot_shape_planner_t * plan)108 collect_features_khmer (hb_ot_shape_planner_t *plan)
109 {
110   hb_ot_map_builder_t *map = &plan->map;
111 
112   /* Do this before any lookups have been applied. */
113   map->add_gsub_pause (setup_syllables);
114 
115   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
116   /* The Indic specs do not require ccmp, but we apply it here since if
117    * there is a use of it, it's typically at the beginning. */
118   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
119 
120 
121   unsigned int i = 0;
122   map->add_gsub_pause (initial_reordering);
123   for (; i < KHMER_BASIC_FEATURES; i++) {
124     map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
125     map->add_gsub_pause (nullptr);
126   }
127   map->add_gsub_pause (final_reordering);
128   for (; i < KHMER_NUM_FEATURES; i++) {
129     map->add_feature (khmer_features[i].tag, 1, khmer_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ);
130   }
131 
132   map->add_global_bool_feature (HB_TAG('c','a','l','t'));
133   map->add_global_bool_feature (HB_TAG('c','l','i','g'));
134 
135   map->add_gsub_pause (clear_syllables);
136 }
137 
138 static void
override_features_khmer(hb_ot_shape_planner_t * plan)139 override_features_khmer (hb_ot_shape_planner_t *plan)
140 {
141   /* Uniscribe does not apply 'kern' in Khmer. */
142   if (hb_options ().uniscribe_bug_compatible)
143   {
144     plan->map.add_feature (HB_TAG('k','e','r','n'), 0, F_GLOBAL);
145   }
146 
147   plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
148 }
149 
150 
151 struct would_substitute_feature_t
152 {
initwould_substitute_feature_t153   inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_)
154   {
155     zero_context = zero_context_;
156     map->get_stage_lookups (0/*GSUB*/,
157 			    map->get_feature_stage (0/*GSUB*/, feature_tag),
158 			    &lookups, &count);
159   }
160 
would_substitutewould_substitute_feature_t161   inline bool would_substitute (const hb_codepoint_t *glyphs,
162 				unsigned int          glyphs_count,
163 				hb_face_t            *face) const
164   {
165     for (unsigned int i = 0; i < count; i++)
166       if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
167 	return true;
168     return false;
169   }
170 
171   private:
172   const hb_ot_map_t::lookup_map_t *lookups;
173   unsigned int count;
174   bool zero_context;
175 };
176 
177 struct khmer_shape_plan_t
178 {
179   ASSERT_POD ();
180 
get_virama_glyphkhmer_shape_plan_t181   inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const
182   {
183     hb_codepoint_t glyph = virama_glyph;
184     if (unlikely (virama_glyph == (hb_codepoint_t) -1))
185     {
186       if (!font->get_nominal_glyph (0x17D2u, &glyph))
187 	glyph = 0;
188       /* Technically speaking, the spec says we should apply 'locl' to virama too.
189        * Maybe one day... */
190 
191       /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
192        * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
193       virama_glyph = glyph;
194     }
195 
196     *pglyph = glyph;
197     return glyph != 0;
198   }
199 
200   mutable hb_codepoint_t virama_glyph;
201 
202   would_substitute_feature_t pref;
203 
204   hb_mask_t mask_array[KHMER_NUM_FEATURES];
205 };
206 
207 static void *
data_create_khmer(const hb_ot_shape_plan_t * plan)208 data_create_khmer (const hb_ot_shape_plan_t *plan)
209 {
210   khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t));
211   if (unlikely (!khmer_plan))
212     return nullptr;
213 
214   khmer_plan->virama_glyph = (hb_codepoint_t) -1;
215 
216   khmer_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), true);
217 
218   for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
219     khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
220 				 0 : plan->map.get_1_mask (khmer_features[i].tag);
221 
222   return khmer_plan;
223 }
224 
225 static void
data_destroy_khmer(void * data)226 data_destroy_khmer (void *data)
227 {
228   free (data);
229 }
230 
231 
232 enum syllable_type_t {
233   consonant_syllable,
234   broken_cluster,
235   non_khmer_cluster,
236 };
237 
238 #include "hb-ot-shape-complex-khmer-machine.hh"
239 
240 static void
setup_masks_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_buffer_t * buffer,hb_font_t * font HB_UNUSED)241 setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
242 		   hb_buffer_t              *buffer,
243 		   hb_font_t                *font HB_UNUSED)
244 {
245   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
246   HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
247 
248   /* We cannot setup masks here.  We save information about characters
249    * and setup masks later on in a pause-callback. */
250 
251   unsigned int count = buffer->len;
252   hb_glyph_info_t *info = buffer->info;
253   for (unsigned int i = 0; i < count; i++)
254     set_khmer_properties (info[i]);
255 }
256 
257 static void
setup_syllables(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)258 setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
259 		 hb_font_t *font HB_UNUSED,
260 		 hb_buffer_t *buffer)
261 {
262   find_syllables (buffer);
263   foreach_syllable (buffer, start, end)
264     buffer->unsafe_to_break (start, end);
265 }
266 
267 static int
compare_khmer_order(const hb_glyph_info_t * pa,const hb_glyph_info_t * pb)268 compare_khmer_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
269 {
270   int a = pa->khmer_position();
271   int b = pb->khmer_position();
272 
273   return a < b ? -1 : a == b ? 0 : +1;
274 }
275 
276 
277 /* Rules from:
278  * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
279 
280 static void
initial_reordering_consonant_syllable(const hb_ot_shape_plan_t * plan,hb_face_t * face,hb_buffer_t * buffer,unsigned int start,unsigned int end)281 initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
282 				       hb_face_t *face,
283 				       hb_buffer_t *buffer,
284 				       unsigned int start, unsigned int end)
285 {
286   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
287   hb_glyph_info_t *info = buffer->info;
288 
289   /* 1. Khmer shaping assumes that a syllable will begin with a Cons, IndV, or Number. */
290 
291   /* The first consonant is always the base. */
292   unsigned int base = start;
293   info[base].khmer_position() = POS_BASE_C;
294 
295   /* Mark all subsequent consonants as below. */
296   for (unsigned int i = base + 1; i < end; i++)
297     if (is_consonant_or_vowel (info[i]))
298       info[i].khmer_position() = POS_BELOW_C;
299 
300   /* Mark final consonants.  A final consonant is one appearing after a matra,
301    * like in Khmer. */
302   for (unsigned int i = base + 1; i < end; i++)
303     if (info[i].khmer_category() == OT_M) {
304       for (unsigned int j = i + 1; j < end; j++)
305         if (is_consonant_or_vowel (info[j])) {
306 	  info[j].khmer_position() = POS_FINAL_C;
307 	  break;
308 	}
309       break;
310     }
311 
312   /* Attach misc marks to previous char to move with them. */
313   {
314     khmer_position_t last_pos = POS_START;
315     for (unsigned int i = start; i < end; i++)
316     {
317       if ((FLAG_UNSAFE (info[i].khmer_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_Coeng))))
318       {
319 	info[i].khmer_position() = last_pos;
320 	if (unlikely (info[i].khmer_category() == OT_H &&
321 		      info[i].khmer_position() == POS_PRE_M))
322 	{
323 	  /*
324 	   * Uniscribe doesn't move the Halant with Left Matra.
325 	   * TEST: U+092B,U+093F,U+094DE
326 	   * We follow.  This is important for the Sinhala
327 	   * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
328 	   * where U+0DD9 is a left matra and U+0DCA is the virama.
329 	   * We don't want to move the virama with the left matra.
330 	   * TEST: U+0D9A,U+0DDA
331 	   */
332 	  for (unsigned int j = i; j > start; j--)
333 	    if (info[j - 1].khmer_position() != POS_PRE_M) {
334 	      info[i].khmer_position() = info[j - 1].khmer_position();
335 	      break;
336 	    }
337 	}
338       } else if (info[i].khmer_position() != POS_SMVD) {
339         last_pos = (khmer_position_t) info[i].khmer_position();
340       }
341     }
342   }
343   /* For post-base consonants let them own anything before them
344    * since the last consonant or matra. */
345   {
346     unsigned int last = base;
347     for (unsigned int i = base + 1; i < end; i++)
348       if (is_consonant_or_vowel (info[i]))
349       {
350 	for (unsigned int j = last + 1; j < i; j++)
351 	  if (info[j].khmer_position() < POS_SMVD)
352 	    info[j].khmer_position() = info[i].khmer_position();
353 	last = i;
354       } else if (info[i].khmer_category() == OT_M)
355         last = i;
356   }
357 
358   {
359     /* Use syllable() for sort accounting temporarily. */
360     unsigned int syllable = info[start].syllable();
361     for (unsigned int i = start; i < end; i++)
362       info[i].syllable() = i - start;
363 
364     /* Sit tight, rock 'n roll! */
365     hb_stable_sort (info + start, end - start, compare_khmer_order);
366     /* Find base again */
367     base = end;
368     for (unsigned int i = start; i < end; i++)
369       if (info[i].khmer_position() == POS_BASE_C)
370       {
371 	base = i;
372 	break;
373       }
374 
375     /* Note!  syllable() is a one-byte field. */
376     for (unsigned int i = base; i < end; i++)
377       if (info[i].syllable() != 255)
378       {
379 	unsigned int max = i;
380 	unsigned int j = start + info[i].syllable();
381 	while (j != i)
382 	{
383 	  max = MAX (max, j);
384 	  unsigned int next = start + info[j].syllable();
385 	  info[j].syllable() = 255; /* So we don't process j later again. */
386 	  j = next;
387 	}
388 	if (i != max)
389 	  buffer->merge_clusters (i, max + 1);
390       }
391 
392     /* Put syllable back in. */
393     for (unsigned int i = start; i < end; i++)
394       info[i].syllable() = syllable;
395   }
396 
397   /* Setup masks now */
398 
399   {
400     hb_mask_t mask;
401 
402     /* Post-base */
403     mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
404     for (unsigned int i = base + 1; i < end; i++)
405       info[i].mask  |= mask;
406   }
407 
408   unsigned int pref_len = 2;
409   if (khmer_plan->mask_array[PREF] && base + pref_len < end)
410   {
411     /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */
412     for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) {
413       hb_codepoint_t glyphs[2];
414       for (unsigned int j = 0; j < pref_len; j++)
415         glyphs[j] = info[i + j].codepoint;
416       if (khmer_plan->pref.would_substitute (glyphs, pref_len, face))
417       {
418 	for (unsigned int j = 0; j < pref_len; j++)
419 	  info[i++].mask |= khmer_plan->mask_array[PREF];
420 
421 	/* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
422 	 * Read the feature spec.
423 	 * This allows distinguishing the following cases with MS Khmer fonts:
424 	 * U+1784,U+17D2,U+179A,U+17D2,U+1782
425 	 * U+1784,U+17D2,U+1782,U+17D2,U+179A
426 	 */
427 	if (khmer_plan->mask_array[CFAR])
428 	  for (; i < end; i++)
429 	    info[i].mask |= khmer_plan->mask_array[CFAR];
430 
431 	break;
432       }
433     }
434   }
435 }
436 
437 static void
initial_reordering_syllable(const hb_ot_shape_plan_t * plan,hb_face_t * face,hb_buffer_t * buffer,unsigned int start,unsigned int end)438 initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
439 			     hb_face_t *face,
440 			     hb_buffer_t *buffer,
441 			     unsigned int start, unsigned int end)
442 {
443   syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
444   switch (syllable_type)
445   {
446     case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
447     case consonant_syllable:
448      initial_reordering_consonant_syllable (plan, face, buffer, start, end);
449      break;
450 
451     case non_khmer_cluster:
452       break;
453   }
454 }
455 
456 static inline void
insert_dotted_circles(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font,hb_buffer_t * buffer)457 insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
458 		       hb_font_t *font,
459 		       hb_buffer_t *buffer)
460 {
461   /* Note: This loop is extra overhead, but should not be measurable. */
462   bool has_broken_syllables = false;
463   unsigned int count = buffer->len;
464   hb_glyph_info_t *info = buffer->info;
465   for (unsigned int i = 0; i < count; i++)
466     if ((info[i].syllable() & 0x0F) == broken_cluster)
467     {
468       has_broken_syllables = true;
469       break;
470     }
471   if (likely (!has_broken_syllables))
472     return;
473 
474 
475   hb_codepoint_t dottedcircle_glyph;
476   if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph))
477     return;
478 
479   hb_glyph_info_t dottedcircle = {0};
480   dottedcircle.codepoint = 0x25CCu;
481   set_khmer_properties (dottedcircle);
482   dottedcircle.codepoint = dottedcircle_glyph;
483 
484   buffer->clear_output ();
485 
486   buffer->idx = 0;
487   unsigned int last_syllable = 0;
488   while (buffer->idx < buffer->len && !buffer->in_error)
489   {
490     unsigned int syllable = buffer->cur().syllable();
491     syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
492     if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
493     {
494       last_syllable = syllable;
495 
496       hb_glyph_info_t ginfo = dottedcircle;
497       ginfo.cluster = buffer->cur().cluster;
498       ginfo.mask = buffer->cur().mask;
499       ginfo.syllable() = buffer->cur().syllable();
500       /* TODO Set glyph_props? */
501 
502       /* Insert dottedcircle after possible Repha. */
503       while (buffer->idx < buffer->len && !buffer->in_error &&
504 	     last_syllable == buffer->cur().syllable() &&
505 	     buffer->cur().khmer_category() == OT_Repha)
506         buffer->next_glyph ();
507 
508       buffer->output_info (ginfo);
509     }
510     else
511       buffer->next_glyph ();
512   }
513 
514   buffer->swap_buffers ();
515 }
516 
517 static void
initial_reordering(const hb_ot_shape_plan_t * plan,hb_font_t * font,hb_buffer_t * buffer)518 initial_reordering (const hb_ot_shape_plan_t *plan,
519 		    hb_font_t *font,
520 		    hb_buffer_t *buffer)
521 {
522   insert_dotted_circles (plan, font, buffer);
523 
524   foreach_syllable (buffer, start, end)
525     initial_reordering_syllable (plan, font->face, buffer, start, end);
526 }
527 
528 static void
final_reordering_syllable(const hb_ot_shape_plan_t * plan,hb_buffer_t * buffer,unsigned int start,unsigned int end)529 final_reordering_syllable (const hb_ot_shape_plan_t *plan,
530 			   hb_buffer_t *buffer,
531 			   unsigned int start, unsigned int end)
532 {
533   const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
534   hb_glyph_info_t *info = buffer->info;
535 
536 
537   /* This function relies heavily on halant glyphs.  Lots of ligation
538    * and possibly multiple substitutions happened prior to this
539    * phase, and that might have messed up our properties.  Recover
540    * from a particular case of that where we're fairly sure that a
541    * class of OT_H is desired but has been lost. */
542   if (khmer_plan->virama_glyph)
543   {
544     unsigned int virama_glyph = khmer_plan->virama_glyph;
545     for (unsigned int i = start; i < end; i++)
546       if (info[i].codepoint == virama_glyph &&
547 	  _hb_glyph_info_ligated (&info[i]) &&
548 	  _hb_glyph_info_multiplied (&info[i]))
549       {
550         /* This will make sure that this glyph passes is_coeng() test. */
551 	info[i].khmer_category() = OT_H;
552 	_hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
553       }
554   }
555 
556 
557   /* 4. Final reordering:
558    *
559    * After the localized forms and basic shaping forms GSUB features have been
560    * applied (see below), the shaping engine performs some final glyph
561    * reordering before applying all the remaining font features to the entire
562    * syllable.
563    */
564 
565   bool try_pref = !!khmer_plan->mask_array[PREF];
566 
567   /* Find base again */
568   unsigned int base;
569   for (base = start; base < end; base++)
570     if (info[base].khmer_position() >= POS_BASE_C)
571     {
572       if (try_pref && base + 1 < end)
573       {
574 	for (unsigned int i = base + 1; i < end; i++)
575 	  if ((info[i].mask & khmer_plan->mask_array[PREF]) != 0)
576 	  {
577 	    if (!(_hb_glyph_info_substituted (&info[i]) &&
578 		  _hb_glyph_info_ligated_and_didnt_multiply (&info[i])))
579 	    {
580 	      /* Ok, this was a 'pref' candidate but didn't form any.
581 	       * Base is around here... */
582 	      base = i;
583 	      while (base < end && is_coeng (info[base]))
584 		base++;
585 	      info[base].khmer_position() = POS_BASE_C;
586 
587 	      try_pref = false;
588 	    }
589 	    break;
590 	  }
591       }
592 
593       if (start < base && info[base].khmer_position() > POS_BASE_C)
594         base--;
595       break;
596     }
597   if (base == end && start < base &&
598       is_one_of (info[base - 1], FLAG (OT_ZWJ)))
599     base--;
600   if (base < end)
601     while (start < base &&
602 	   is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_Coeng))))
603       base--;
604 
605 
606   /*   o Reorder matras:
607    *
608    *     If a pre-base matra character had been reordered before applying basic
609    *     features, the glyph can be moved closer to the main consonant based on
610    *     whether half-forms had been formed. Actual position for the matra is
611    *     defined as “after last standalone halant glyph, after initial matra
612    *     position and before the main consonant”. If ZWJ or ZWNJ follow this
613    *     halant, position is moved after it.
614    */
615 
616   if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */
617   {
618     /* If we lost track of base, alas, position before last thingy. */
619     unsigned int new_pos = base == end ? base - 2 : base - 1;
620 
621     while (new_pos > start &&
622 	   !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_Coeng)))))
623       new_pos--;
624 
625     /* If we found no Halant we are done.
626      * Otherwise only proceed if the Halant does
627      * not belong to the Matra itself! */
628     if (is_coeng (info[new_pos]) &&
629 	info[new_pos].khmer_position() != POS_PRE_M)
630     {
631       /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
632       if (new_pos + 1 < end && is_joiner (info[new_pos + 1]))
633 	new_pos++;
634     }
635     else
636       new_pos = start; /* No move. */
637 
638     if (start < new_pos && info[new_pos].khmer_position () != POS_PRE_M)
639     {
640       /* Now go see if there's actually any matras... */
641       for (unsigned int i = new_pos; i > start; i--)
642 	if (info[i - 1].khmer_position () == POS_PRE_M)
643 	{
644 	  unsigned int old_pos = i - 1;
645 	  if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */
646 	    base--;
647 
648 	  hb_glyph_info_t tmp = info[old_pos];
649 	  memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0]));
650 	  info[new_pos] = tmp;
651 
652 	  /* Note: this merge_clusters() is intentionally *after* the reordering.
653 	   * Indic matra reordering is special and tricky... */
654 	  buffer->merge_clusters (new_pos, MIN (end, base + 1));
655 
656 	  new_pos--;
657 	}
658     } else {
659       for (unsigned int i = start; i < base; i++)
660 	if (info[i].khmer_position () == POS_PRE_M) {
661 	  buffer->merge_clusters (i, MIN (end, base + 1));
662 	  break;
663 	}
664     }
665   }
666 
667 
668   /*   o Reorder pre-base-reordering consonants:
669    *
670    *     If a pre-base-reordering consonant is found, reorder it according to
671    *     the following rules:
672    */
673 
674   if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */
675   {
676     for (unsigned int i = base + 1; i < end; i++)
677       if ((info[i].mask & khmer_plan->mask_array[PREF]) != 0)
678       {
679 	/*       1. Only reorder a glyph produced by substitution during application
680 	 *          of the <pref> feature. (Note that a font may shape a Ra consonant with
681 	 *          the feature generally but block it in certain contexts.)
682 	 */
683         /* Note: We just check that something got substituted.  We don't check that
684 	 * the <pref> feature actually did it...
685 	 *
686 	 * Reorder pref only if it ligated. */
687 	if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i]))
688 	{
689 	  /*
690 	   *       2. Try to find a target position the same way as for pre-base matra.
691 	   *          If it is found, reorder pre-base consonant glyph.
692 	   *
693 	   *       3. If position is not found, reorder immediately before main
694 	   *          consonant.
695 	   */
696 
697 	  unsigned int new_pos = base;
698 	  while (new_pos > start &&
699 		 !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_Coeng))))
700 	    new_pos--;
701 
702 	  /* In Khmer coeng model, a H,Ra can go *after* matras.  If it goes after a
703 	   * split matra, it should be reordered to *before* the left part of such matra. */
704 	  if (new_pos > start && info[new_pos - 1].khmer_category() == OT_M)
705 	  {
706 	    unsigned int old_pos = i;
707 	    for (unsigned int j = base + 1; j < old_pos; j++)
708 	      if (info[j].khmer_category() == OT_M)
709 	      {
710 		new_pos--;
711 		break;
712 	      }
713 	  }
714 
715 	  if (new_pos > start && is_coeng (info[new_pos - 1]))
716 	  {
717 	    /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
718 	    if (new_pos < end && is_joiner (info[new_pos]))
719 	      new_pos++;
720 	  }
721 
722 	  {
723 	    unsigned int old_pos = i;
724 
725 	    buffer->merge_clusters (new_pos, old_pos + 1);
726 	    hb_glyph_info_t tmp = info[old_pos];
727 	    memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0]));
728 	    info[new_pos] = tmp;
729 
730 	    if (new_pos <= base && base < old_pos)
731 	      base++;
732 	  }
733 	}
734 
735         break;
736       }
737   }
738 
739 
740   /*
741    * Finish off the clusters and go home!
742    */
743   if (hb_options ().uniscribe_bug_compatible)
744   {
745     /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
746      * This means, half forms are submerged into the main consonant's cluster.
747      * This is unnecessary, and makes cursor positioning harder, but that's what
748      * Uniscribe does. */
749     buffer->merge_clusters (start, end);
750   }
751 }
752 
753 
754 static void
final_reordering(const hb_ot_shape_plan_t * plan,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)755 final_reordering (const hb_ot_shape_plan_t *plan,
756 		  hb_font_t *font HB_UNUSED,
757 		  hb_buffer_t *buffer)
758 {
759   unsigned int count = buffer->len;
760   if (unlikely (!count)) return;
761 
762   foreach_syllable (buffer, start, end)
763     final_reordering_syllable (plan, buffer, start, end);
764 
765   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
766   HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
767 }
768 
769 
770 static void
clear_syllables(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)771 clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
772 		 hb_font_t *font HB_UNUSED,
773 		 hb_buffer_t *buffer)
774 {
775   hb_glyph_info_t *info = buffer->info;
776   unsigned int count = buffer->len;
777   for (unsigned int i = 0; i < count; i++)
778     info[i].syllable() = 0;
779 }
780 
781 
782 static bool
decompose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b)783 decompose_khmer (const hb_ot_shape_normalize_context_t *c,
784 		 hb_codepoint_t  ab,
785 		 hb_codepoint_t *a,
786 		 hb_codepoint_t *b)
787 {
788   switch (ab)
789   {
790     /*
791      * Decompose split matras that don't have Unicode decompositions.
792      */
793 
794     /* Khmer */
795     case 0x17BEu  : *a = 0x17C1u; *b= 0x17BEu; return true;
796     case 0x17BFu  : *a = 0x17C1u; *b= 0x17BFu; return true;
797     case 0x17C0u  : *a = 0x17C1u; *b= 0x17C0u; return true;
798     case 0x17C4u  : *a = 0x17C1u; *b= 0x17C4u; return true;
799     case 0x17C5u  : *a = 0x17C1u; *b= 0x17C5u; return true;
800   }
801 
802   return (bool) c->unicode->decompose (ab, a, b);
803 }
804 
805 static bool
compose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab)806 compose_khmer (const hb_ot_shape_normalize_context_t *c,
807 	       hb_codepoint_t  a,
808 	       hb_codepoint_t  b,
809 	       hb_codepoint_t *ab)
810 {
811   /* Avoid recomposing split matras. */
812   if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
813     return false;
814 
815   return (bool) c->unicode->compose (a, b, ab);
816 }
817 
818 
819 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
820 {
821   collect_features_khmer,
822   override_features_khmer,
823   data_create_khmer,
824   data_destroy_khmer,
825   nullptr, /* preprocess_text */
826   nullptr, /* postprocess_glyphs */
827   HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
828   decompose_khmer,
829   compose_khmer,
830   setup_masks_khmer,
831   nullptr, /* disable_otl */
832   nullptr, /* reorder_marks */
833   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
834   false, /* fallback_position */
835 };
836