1 /*
2  * Copyright © 2011,2012  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #include "hb.hh"
28 
29 #ifndef HB_NO_OT_SHAPE
30 
31 #include "hb-ot-shape-complex-khmer.hh"
32 #include "hb-ot-layout.hh"
33 
34 /*
35  * Khmer shaper.
36  */
37 
38 static const hb_ot_map_feature_t khmer_features[] = {
39     /*
40      * Basic features.
41      * These features are applied in order, one at a time, after reordering.
42      */
43     {HB_TAG('p', 'r', 'e', 'f'), F_MANUAL_JOINERS},
44     {HB_TAG('b', 'l', 'w', 'f'), F_MANUAL_JOINERS},
45     {HB_TAG('a', 'b', 'v', 'f'), F_MANUAL_JOINERS},
46     {HB_TAG('p', 's', 't', 'f'), F_MANUAL_JOINERS},
47     {HB_TAG('c', 'f', 'a', 'r'), F_MANUAL_JOINERS},
48     /*
49      * Other features.
50      * These features are applied all at once after clearing syllables.
51      */
52     {HB_TAG('p', 'r', 'e', 's'), F_GLOBAL_MANUAL_JOINERS},
53     {HB_TAG('a', 'b', 'v', 's'), F_GLOBAL_MANUAL_JOINERS},
54     {HB_TAG('b', 'l', 'w', 's'), F_GLOBAL_MANUAL_JOINERS},
55     {HB_TAG('p', 's', 't', 's'), F_GLOBAL_MANUAL_JOINERS},
56 };
57 
58 /*
59  * Must be in the same order as the khmer_features array.
60  */
61 enum {
62     KHMER_PREF,
63     KHMER_BLWF,
64     KHMER_ABVF,
65     KHMER_PSTF,
66     KHMER_CFAR,
67 
68     _KHMER_PRES,
69     _KHMER_ABVS,
70     _KHMER_BLWS,
71     _KHMER_PSTS,
72 
73     KHMER_NUM_FEATURES,
74     KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
75 };
76 
77 static void setup_syllables_khmer(const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
78 static void reorder_khmer(const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
79 
collect_features_khmer(hb_ot_shape_planner_t * plan)80 static void collect_features_khmer(hb_ot_shape_planner_t *plan)
81 {
82     hb_ot_map_builder_t *map = &plan->map;
83 
84     /* Do this before any lookups have been applied. */
85     map->add_gsub_pause(setup_syllables_khmer);
86     map->add_gsub_pause(reorder_khmer);
87 
88     /* Testing suggests that Uniscribe does NOT pause between basic
89      * features.  Test with KhmerUI.ttf and the following three
90      * sequences:
91      *
92      *   U+1789,U+17BC
93      *   U+1789,U+17D2,U+1789
94      *   U+1789,U+17D2,U+1789,U+17BC
95      *
96      * https://github.com/harfbuzz/harfbuzz/issues/974
97      */
98     map->enable_feature(HB_TAG('l', 'o', 'c', 'l'));
99     map->enable_feature(HB_TAG('c', 'c', 'm', 'p'));
100 
101     unsigned int i = 0;
102     for (; i < KHMER_BASIC_FEATURES; i++)
103         map->add_feature(khmer_features[i]);
104 
105     map->add_gsub_pause(_hb_clear_syllables);
106 
107     for (; i < KHMER_NUM_FEATURES; i++)
108         map->add_feature(khmer_features[i]);
109 }
110 
override_features_khmer(hb_ot_shape_planner_t * plan)111 static void override_features_khmer(hb_ot_shape_planner_t *plan)
112 {
113     hb_ot_map_builder_t *map = &plan->map;
114 
115     /* Khmer spec has 'clig' as part of required shaping features:
116      * "Apply feature 'clig' to form ligatures that are desired for
117      * typographical correctness.", hence in overrides... */
118     map->enable_feature(HB_TAG('c', 'l', 'i', 'g'));
119 
120     /* Uniscribe does not apply 'kern' in Khmer. */
121     if (hb_options().uniscribe_bug_compatible) {
122         map->disable_feature(HB_TAG('k', 'e', 'r', 'n'));
123     }
124 
125     map->disable_feature(HB_TAG('l', 'i', 'g', 'a'));
126 }
127 
128 struct khmer_shape_plan_t
129 {
get_virama_glyphkhmer_shape_plan_t130     bool get_virama_glyph(hb_font_t *font, hb_codepoint_t *pglyph) const
131     {
132         hb_codepoint_t glyph = virama_glyph;
133         if (unlikely(virama_glyph == (hb_codepoint_t)-1)) {
134             if (!font->get_nominal_glyph(0x17D2u, &glyph))
135                 glyph = 0;
136             /* Technically speaking, the spec says we should apply 'locl' to virama too.
137              * Maybe one day... */
138 
139             /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
140              * during shape planning...  Instead, overwrite it here.  It's safe.  Don't worry! */
141             virama_glyph = glyph;
142         }
143 
144         *pglyph = glyph;
145         return glyph != 0;
146     }
147 
148     mutable hb_codepoint_t virama_glyph;
149 
150     hb_indic_would_substitute_feature_t pref;
151 
152     hb_mask_t mask_array[KHMER_NUM_FEATURES];
153 };
154 
data_create_khmer(const hb_ot_shape_plan_t * plan)155 static void *data_create_khmer(const hb_ot_shape_plan_t *plan)
156 {
157     khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *)calloc(1, sizeof(khmer_shape_plan_t));
158     if (unlikely(!khmer_plan))
159         return nullptr;
160 
161     khmer_plan->virama_glyph = (hb_codepoint_t)-1;
162 
163     khmer_plan->pref.init(&plan->map, HB_TAG('p', 'r', 'e', 'f'), true);
164 
165     for (unsigned int i = 0; i < ARRAY_LENGTH(khmer_plan->mask_array); i++)
166         khmer_plan->mask_array[i] =
167             (khmer_features[i].flags & F_GLOBAL) ? 0 : plan->map.get_1_mask(khmer_features[i].tag);
168 
169     return khmer_plan;
170 }
171 
data_destroy_khmer(void * data)172 static void data_destroy_khmer(void *data)
173 {
174     free(data);
175 }
176 
177 enum khmer_syllable_type_t {
178     khmer_consonant_syllable,
179     khmer_broken_cluster,
180     khmer_non_khmer_cluster,
181 };
182 
183 #include "hb-ot-shape-complex-khmer-machine.hh"
184 
setup_masks_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_buffer_t * buffer,hb_font_t * font HB_UNUSED)185 static void setup_masks_khmer(const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer, hb_font_t *font HB_UNUSED)
186 {
187     HB_BUFFER_ALLOCATE_VAR(buffer, khmer_category);
188 
189     /* We cannot setup masks here.  We save information about characters
190      * and setup masks later on in a pause-callback. */
191 
192     unsigned int count = buffer->len;
193     hb_glyph_info_t *info = buffer->info;
194     for (unsigned int i = 0; i < count; i++)
195         set_khmer_properties(info[i]);
196 }
197 
198 static void
setup_syllables_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)199 setup_syllables_khmer(const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font HB_UNUSED, hb_buffer_t *buffer)
200 {
201     find_syllables_khmer(buffer);
202     foreach_syllable(buffer, start, end) buffer->unsafe_to_break(start, end);
203 }
204 
205 /* Rules from:
206  * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
207 
reorder_consonant_syllable(const hb_ot_shape_plan_t * plan,hb_face_t * face HB_UNUSED,hb_buffer_t * buffer,unsigned int start,unsigned int end)208 static void reorder_consonant_syllable(const hb_ot_shape_plan_t *plan,
209                                        hb_face_t *face HB_UNUSED,
210                                        hb_buffer_t *buffer,
211                                        unsigned int start,
212                                        unsigned int end)
213 {
214     const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *)plan->data;
215     hb_glyph_info_t *info = buffer->info;
216 
217     /* Setup masks. */
218     {
219         /* Post-base */
220         hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | khmer_plan->mask_array[KHMER_ABVF] |
221                          khmer_plan->mask_array[KHMER_PSTF];
222         for (unsigned int i = start + 1; i < end; i++)
223             info[i].mask |= mask;
224     }
225 
226     unsigned int num_coengs = 0;
227     for (unsigned int i = start + 1; i < end; i++) {
228         /* """
229          * When a COENG + (Cons | IndV) combination are found (and subscript count
230          * is less than two) the character combination is handled according to the
231          * subscript type of the character following the COENG.
232          *
233          * ...
234          *
235          * Subscript Type 2 - The COENG + RO characters are reordered to immediately
236          * before the base glyph. Then the COENG + RO characters are assigned to have
237          * the 'pref' OpenType feature applied to them.
238          * """
239          */
240         if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end) {
241             num_coengs++;
242 
243             if (info[i + 1].khmer_category() == OT_Ra) {
244                 for (unsigned int j = 0; j < 2; j++)
245                     info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
246 
247                 /* Move the Coeng,Ro sequence to the start. */
248                 buffer->merge_clusters(start, i + 2);
249                 hb_glyph_info_t t0 = info[i];
250                 hb_glyph_info_t t1 = info[i + 1];
251                 memmove(&info[start + 2], &info[start], (i - start) * sizeof(info[0]));
252                 info[start] = t0;
253                 info[start + 1] = t1;
254 
255                 /* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
256                  * Read the feature spec.
257                  * This allows distinguishing the following cases with MS Khmer fonts:
258                  * U+1784,U+17D2,U+179A,U+17D2,U+1782
259                  * U+1784,U+17D2,U+1782,U+17D2,U+179A
260                  */
261                 if (khmer_plan->mask_array[KHMER_CFAR])
262                     for (unsigned int j = i + 2; j < end; j++)
263                         info[j].mask |= khmer_plan->mask_array[KHMER_CFAR];
264 
265                 num_coengs = 2; /* Done. */
266             }
267         }
268 
269         /* Reorder left matra piece. */
270         else if (info[i].khmer_category() == OT_VPre) {
271             /* Move to the start. */
272             buffer->merge_clusters(start, i + 1);
273             hb_glyph_info_t t = info[i];
274             memmove(&info[start + 1], &info[start], (i - start) * sizeof(info[0]));
275             info[start] = t;
276         }
277     }
278 }
279 
reorder_syllable_khmer(const hb_ot_shape_plan_t * plan,hb_face_t * face,hb_buffer_t * buffer,unsigned int start,unsigned int end)280 static void reorder_syllable_khmer(
281     const hb_ot_shape_plan_t *plan, hb_face_t *face, hb_buffer_t *buffer, unsigned int start, unsigned int end)
282 {
283     khmer_syllable_type_t syllable_type = (khmer_syllable_type_t)(buffer->info[start].syllable() & 0x0F);
284     switch (syllable_type) {
285     case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
286     case khmer_consonant_syllable:
287         reorder_consonant_syllable(plan, face, buffer, start, end);
288         break;
289 
290     case khmer_non_khmer_cluster:
291         break;
292     }
293 }
294 
295 static inline void
insert_dotted_circles_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font,hb_buffer_t * buffer)296 insert_dotted_circles_khmer(const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font, hb_buffer_t *buffer)
297 {
298     if (unlikely(buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
299         return;
300 
301     /* Note: This loop is extra overhead, but should not be measurable.
302      * TODO Use a buffer scratch flag to remove the loop. */
303     bool has_broken_syllables = false;
304     unsigned int count = buffer->len;
305     hb_glyph_info_t *info = buffer->info;
306     for (unsigned int i = 0; i < count; i++)
307         if ((info[i].syllable() & 0x0F) == khmer_broken_cluster) {
308             has_broken_syllables = true;
309             break;
310         }
311     if (likely(!has_broken_syllables))
312         return;
313 
314     hb_codepoint_t dottedcircle_glyph;
315     if (!font->get_nominal_glyph(0x25CCu, &dottedcircle_glyph))
316         return;
317 
318     hb_glyph_info_t dottedcircle = {0};
319     dottedcircle.codepoint = 0x25CCu;
320     set_khmer_properties(dottedcircle);
321     dottedcircle.codepoint = dottedcircle_glyph;
322 
323     buffer->clear_output();
324 
325     buffer->idx = 0;
326     unsigned int last_syllable = 0;
327     while (buffer->idx < buffer->len && buffer->successful) {
328         unsigned int syllable = buffer->cur().syllable();
329         khmer_syllable_type_t syllable_type = (khmer_syllable_type_t)(syllable & 0x0F);
330         if (unlikely(last_syllable != syllable && syllable_type == khmer_broken_cluster)) {
331             last_syllable = syllable;
332 
333             hb_glyph_info_t ginfo = dottedcircle;
334             ginfo.cluster = buffer->cur().cluster;
335             ginfo.mask = buffer->cur().mask;
336             ginfo.syllable() = buffer->cur().syllable();
337 
338             /* Insert dottedcircle after possible Repha. */
339             while (buffer->idx < buffer->len && buffer->successful && last_syllable == buffer->cur().syllable() &&
340                    buffer->cur().khmer_category() == OT_Repha)
341                 buffer->next_glyph();
342 
343             buffer->output_info(ginfo);
344         } else
345             buffer->next_glyph();
346     }
347     buffer->swap_buffers();
348 }
349 
reorder_khmer(const hb_ot_shape_plan_t * plan,hb_font_t * font,hb_buffer_t * buffer)350 static void reorder_khmer(const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer)
351 {
352     insert_dotted_circles_khmer(plan, font, buffer);
353 
354     foreach_syllable(buffer, start, end) reorder_syllable_khmer(plan, font->face, buffer, start, end);
355 
356     HB_BUFFER_DEALLOCATE_VAR(buffer, khmer_category);
357 }
358 
359 static bool
decompose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b)360 decompose_khmer(const hb_ot_shape_normalize_context_t *c, hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
361 {
362     switch (ab) {
363     /*
364      * Decompose split matras that don't have Unicode decompositions.
365      */
366 
367     /* Khmer */
368     case 0x17BEu:
369         *a = 0x17C1u;
370         *b = 0x17BEu;
371         return true;
372     case 0x17BFu:
373         *a = 0x17C1u;
374         *b = 0x17BFu;
375         return true;
376     case 0x17C0u:
377         *a = 0x17C1u;
378         *b = 0x17C0u;
379         return true;
380     case 0x17C4u:
381         *a = 0x17C1u;
382         *b = 0x17C4u;
383         return true;
384     case 0x17C5u:
385         *a = 0x17C1u;
386         *b = 0x17C5u;
387         return true;
388     }
389 
390     return (bool)c->unicode->decompose(ab, a, b);
391 }
392 
393 static bool
compose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab)394 compose_khmer(const hb_ot_shape_normalize_context_t *c, hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
395 {
396     /* Avoid recomposing split matras. */
397     if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK(c->unicode->general_category(a)))
398         return false;
399 
400     return (bool)c->unicode->compose(a, b, ab);
401 }
402 
403 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer = {
404     collect_features_khmer,
405     override_features_khmer,
406     data_create_khmer,
407     data_destroy_khmer,
408     nullptr, /* preprocess_text */
409     nullptr, /* postprocess_glyphs */
410     HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
411     decompose_khmer,
412     compose_khmer,
413     setup_masks_khmer,
414     HB_TAG_NONE, /* gpos_tag */
415     nullptr,     /* reorder_marks */
416     HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
417     false, /* fallback_position */
418 };
419 
420 #endif
421