1 /*
2 * Copyright © 2011,2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27 #include "hb.hh"
28
29 #ifndef HB_NO_OT_SHAPE
30
31 #include "hb-ot-shape-complex-khmer.hh"
32 #include "hb-ot-layout.hh"
33
34 /*
35 * Khmer shaper.
36 */
37
38 static const hb_ot_map_feature_t khmer_features[] = {
39 /*
40 * Basic features.
41 * These features are applied in order, one at a time, after reordering.
42 */
43 {HB_TAG('p', 'r', 'e', 'f'), F_MANUAL_JOINERS},
44 {HB_TAG('b', 'l', 'w', 'f'), F_MANUAL_JOINERS},
45 {HB_TAG('a', 'b', 'v', 'f'), F_MANUAL_JOINERS},
46 {HB_TAG('p', 's', 't', 'f'), F_MANUAL_JOINERS},
47 {HB_TAG('c', 'f', 'a', 'r'), F_MANUAL_JOINERS},
48 /*
49 * Other features.
50 * These features are applied all at once after clearing syllables.
51 */
52 {HB_TAG('p', 'r', 'e', 's'), F_GLOBAL_MANUAL_JOINERS},
53 {HB_TAG('a', 'b', 'v', 's'), F_GLOBAL_MANUAL_JOINERS},
54 {HB_TAG('b', 'l', 'w', 's'), F_GLOBAL_MANUAL_JOINERS},
55 {HB_TAG('p', 's', 't', 's'), F_GLOBAL_MANUAL_JOINERS},
56 };
57
58 /*
59 * Must be in the same order as the khmer_features array.
60 */
61 enum {
62 KHMER_PREF,
63 KHMER_BLWF,
64 KHMER_ABVF,
65 KHMER_PSTF,
66 KHMER_CFAR,
67
68 _KHMER_PRES,
69 _KHMER_ABVS,
70 _KHMER_BLWS,
71 _KHMER_PSTS,
72
73 KHMER_NUM_FEATURES,
74 KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
75 };
76
77 static void setup_syllables_khmer(const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
78 static void reorder_khmer(const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer);
79
collect_features_khmer(hb_ot_shape_planner_t * plan)80 static void collect_features_khmer(hb_ot_shape_planner_t *plan)
81 {
82 hb_ot_map_builder_t *map = &plan->map;
83
84 /* Do this before any lookups have been applied. */
85 map->add_gsub_pause(setup_syllables_khmer);
86 map->add_gsub_pause(reorder_khmer);
87
88 /* Testing suggests that Uniscribe does NOT pause between basic
89 * features. Test with KhmerUI.ttf and the following three
90 * sequences:
91 *
92 * U+1789,U+17BC
93 * U+1789,U+17D2,U+1789
94 * U+1789,U+17D2,U+1789,U+17BC
95 *
96 * https://github.com/harfbuzz/harfbuzz/issues/974
97 */
98 map->enable_feature(HB_TAG('l', 'o', 'c', 'l'));
99 map->enable_feature(HB_TAG('c', 'c', 'm', 'p'));
100
101 unsigned int i = 0;
102 for (; i < KHMER_BASIC_FEATURES; i++)
103 map->add_feature(khmer_features[i]);
104
105 map->add_gsub_pause(_hb_clear_syllables);
106
107 for (; i < KHMER_NUM_FEATURES; i++)
108 map->add_feature(khmer_features[i]);
109 }
110
override_features_khmer(hb_ot_shape_planner_t * plan)111 static void override_features_khmer(hb_ot_shape_planner_t *plan)
112 {
113 hb_ot_map_builder_t *map = &plan->map;
114
115 /* Khmer spec has 'clig' as part of required shaping features:
116 * "Apply feature 'clig' to form ligatures that are desired for
117 * typographical correctness.", hence in overrides... */
118 map->enable_feature(HB_TAG('c', 'l', 'i', 'g'));
119
120 /* Uniscribe does not apply 'kern' in Khmer. */
121 if (hb_options().uniscribe_bug_compatible) {
122 map->disable_feature(HB_TAG('k', 'e', 'r', 'n'));
123 }
124
125 map->disable_feature(HB_TAG('l', 'i', 'g', 'a'));
126 }
127
128 struct khmer_shape_plan_t
129 {
get_virama_glyphkhmer_shape_plan_t130 bool get_virama_glyph(hb_font_t *font, hb_codepoint_t *pglyph) const
131 {
132 hb_codepoint_t glyph = virama_glyph;
133 if (unlikely(virama_glyph == (hb_codepoint_t)-1)) {
134 if (!font->get_nominal_glyph(0x17D2u, &glyph))
135 glyph = 0;
136 /* Technically speaking, the spec says we should apply 'locl' to virama too.
137 * Maybe one day... */
138
139 /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph
140 * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */
141 virama_glyph = glyph;
142 }
143
144 *pglyph = glyph;
145 return glyph != 0;
146 }
147
148 mutable hb_codepoint_t virama_glyph;
149
150 hb_indic_would_substitute_feature_t pref;
151
152 hb_mask_t mask_array[KHMER_NUM_FEATURES];
153 };
154
data_create_khmer(const hb_ot_shape_plan_t * plan)155 static void *data_create_khmer(const hb_ot_shape_plan_t *plan)
156 {
157 khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *)calloc(1, sizeof(khmer_shape_plan_t));
158 if (unlikely(!khmer_plan))
159 return nullptr;
160
161 khmer_plan->virama_glyph = (hb_codepoint_t)-1;
162
163 khmer_plan->pref.init(&plan->map, HB_TAG('p', 'r', 'e', 'f'), true);
164
165 for (unsigned int i = 0; i < ARRAY_LENGTH(khmer_plan->mask_array); i++)
166 khmer_plan->mask_array[i] =
167 (khmer_features[i].flags & F_GLOBAL) ? 0 : plan->map.get_1_mask(khmer_features[i].tag);
168
169 return khmer_plan;
170 }
171
data_destroy_khmer(void * data)172 static void data_destroy_khmer(void *data)
173 {
174 free(data);
175 }
176
177 enum khmer_syllable_type_t {
178 khmer_consonant_syllable,
179 khmer_broken_cluster,
180 khmer_non_khmer_cluster,
181 };
182
183 #include "hb-ot-shape-complex-khmer-machine.hh"
184
setup_masks_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_buffer_t * buffer,hb_font_t * font HB_UNUSED)185 static void setup_masks_khmer(const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer, hb_font_t *font HB_UNUSED)
186 {
187 HB_BUFFER_ALLOCATE_VAR(buffer, khmer_category);
188
189 /* We cannot setup masks here. We save information about characters
190 * and setup masks later on in a pause-callback. */
191
192 unsigned int count = buffer->len;
193 hb_glyph_info_t *info = buffer->info;
194 for (unsigned int i = 0; i < count; i++)
195 set_khmer_properties(info[i]);
196 }
197
198 static void
setup_syllables_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font HB_UNUSED,hb_buffer_t * buffer)199 setup_syllables_khmer(const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font HB_UNUSED, hb_buffer_t *buffer)
200 {
201 find_syllables_khmer(buffer);
202 foreach_syllable(buffer, start, end) buffer->unsafe_to_break(start, end);
203 }
204
205 /* Rules from:
206 * https://docs.microsoft.com/en-us/typography/script-development/devanagari */
207
reorder_consonant_syllable(const hb_ot_shape_plan_t * plan,hb_face_t * face HB_UNUSED,hb_buffer_t * buffer,unsigned int start,unsigned int end)208 static void reorder_consonant_syllable(const hb_ot_shape_plan_t *plan,
209 hb_face_t *face HB_UNUSED,
210 hb_buffer_t *buffer,
211 unsigned int start,
212 unsigned int end)
213 {
214 const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *)plan->data;
215 hb_glyph_info_t *info = buffer->info;
216
217 /* Setup masks. */
218 {
219 /* Post-base */
220 hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | khmer_plan->mask_array[KHMER_ABVF] |
221 khmer_plan->mask_array[KHMER_PSTF];
222 for (unsigned int i = start + 1; i < end; i++)
223 info[i].mask |= mask;
224 }
225
226 unsigned int num_coengs = 0;
227 for (unsigned int i = start + 1; i < end; i++) {
228 /* """
229 * When a COENG + (Cons | IndV) combination are found (and subscript count
230 * is less than two) the character combination is handled according to the
231 * subscript type of the character following the COENG.
232 *
233 * ...
234 *
235 * Subscript Type 2 - The COENG + RO characters are reordered to immediately
236 * before the base glyph. Then the COENG + RO characters are assigned to have
237 * the 'pref' OpenType feature applied to them.
238 * """
239 */
240 if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end) {
241 num_coengs++;
242
243 if (info[i + 1].khmer_category() == OT_Ra) {
244 for (unsigned int j = 0; j < 2; j++)
245 info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
246
247 /* Move the Coeng,Ro sequence to the start. */
248 buffer->merge_clusters(start, i + 2);
249 hb_glyph_info_t t0 = info[i];
250 hb_glyph_info_t t1 = info[i + 1];
251 memmove(&info[start + 2], &info[start], (i - start) * sizeof(info[0]));
252 info[start] = t0;
253 info[start + 1] = t1;
254
255 /* Mark the subsequent stuff with 'cfar'. Used in Khmer.
256 * Read the feature spec.
257 * This allows distinguishing the following cases with MS Khmer fonts:
258 * U+1784,U+17D2,U+179A,U+17D2,U+1782
259 * U+1784,U+17D2,U+1782,U+17D2,U+179A
260 */
261 if (khmer_plan->mask_array[KHMER_CFAR])
262 for (unsigned int j = i + 2; j < end; j++)
263 info[j].mask |= khmer_plan->mask_array[KHMER_CFAR];
264
265 num_coengs = 2; /* Done. */
266 }
267 }
268
269 /* Reorder left matra piece. */
270 else if (info[i].khmer_category() == OT_VPre) {
271 /* Move to the start. */
272 buffer->merge_clusters(start, i + 1);
273 hb_glyph_info_t t = info[i];
274 memmove(&info[start + 1], &info[start], (i - start) * sizeof(info[0]));
275 info[start] = t;
276 }
277 }
278 }
279
reorder_syllable_khmer(const hb_ot_shape_plan_t * plan,hb_face_t * face,hb_buffer_t * buffer,unsigned int start,unsigned int end)280 static void reorder_syllable_khmer(
281 const hb_ot_shape_plan_t *plan, hb_face_t *face, hb_buffer_t *buffer, unsigned int start, unsigned int end)
282 {
283 khmer_syllable_type_t syllable_type = (khmer_syllable_type_t)(buffer->info[start].syllable() & 0x0F);
284 switch (syllable_type) {
285 case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */
286 case khmer_consonant_syllable:
287 reorder_consonant_syllable(plan, face, buffer, start, end);
288 break;
289
290 case khmer_non_khmer_cluster:
291 break;
292 }
293 }
294
295 static inline void
insert_dotted_circles_khmer(const hb_ot_shape_plan_t * plan HB_UNUSED,hb_font_t * font,hb_buffer_t * buffer)296 insert_dotted_circles_khmer(const hb_ot_shape_plan_t *plan HB_UNUSED, hb_font_t *font, hb_buffer_t *buffer)
297 {
298 if (unlikely(buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE))
299 return;
300
301 /* Note: This loop is extra overhead, but should not be measurable.
302 * TODO Use a buffer scratch flag to remove the loop. */
303 bool has_broken_syllables = false;
304 unsigned int count = buffer->len;
305 hb_glyph_info_t *info = buffer->info;
306 for (unsigned int i = 0; i < count; i++)
307 if ((info[i].syllable() & 0x0F) == khmer_broken_cluster) {
308 has_broken_syllables = true;
309 break;
310 }
311 if (likely(!has_broken_syllables))
312 return;
313
314 hb_codepoint_t dottedcircle_glyph;
315 if (!font->get_nominal_glyph(0x25CCu, &dottedcircle_glyph))
316 return;
317
318 hb_glyph_info_t dottedcircle = {0};
319 dottedcircle.codepoint = 0x25CCu;
320 set_khmer_properties(dottedcircle);
321 dottedcircle.codepoint = dottedcircle_glyph;
322
323 buffer->clear_output();
324
325 buffer->idx = 0;
326 unsigned int last_syllable = 0;
327 while (buffer->idx < buffer->len && buffer->successful) {
328 unsigned int syllable = buffer->cur().syllable();
329 khmer_syllable_type_t syllable_type = (khmer_syllable_type_t)(syllable & 0x0F);
330 if (unlikely(last_syllable != syllable && syllable_type == khmer_broken_cluster)) {
331 last_syllable = syllable;
332
333 hb_glyph_info_t ginfo = dottedcircle;
334 ginfo.cluster = buffer->cur().cluster;
335 ginfo.mask = buffer->cur().mask;
336 ginfo.syllable() = buffer->cur().syllable();
337
338 /* Insert dottedcircle after possible Repha. */
339 while (buffer->idx < buffer->len && buffer->successful && last_syllable == buffer->cur().syllable() &&
340 buffer->cur().khmer_category() == OT_Repha)
341 buffer->next_glyph();
342
343 buffer->output_info(ginfo);
344 } else
345 buffer->next_glyph();
346 }
347 buffer->swap_buffers();
348 }
349
reorder_khmer(const hb_ot_shape_plan_t * plan,hb_font_t * font,hb_buffer_t * buffer)350 static void reorder_khmer(const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer)
351 {
352 insert_dotted_circles_khmer(plan, font, buffer);
353
354 foreach_syllable(buffer, start, end) reorder_syllable_khmer(plan, font->face, buffer, start, end);
355
356 HB_BUFFER_DEALLOCATE_VAR(buffer, khmer_category);
357 }
358
359 static bool
decompose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t ab,hb_codepoint_t * a,hb_codepoint_t * b)360 decompose_khmer(const hb_ot_shape_normalize_context_t *c, hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b)
361 {
362 switch (ab) {
363 /*
364 * Decompose split matras that don't have Unicode decompositions.
365 */
366
367 /* Khmer */
368 case 0x17BEu:
369 *a = 0x17C1u;
370 *b = 0x17BEu;
371 return true;
372 case 0x17BFu:
373 *a = 0x17C1u;
374 *b = 0x17BFu;
375 return true;
376 case 0x17C0u:
377 *a = 0x17C1u;
378 *b = 0x17C0u;
379 return true;
380 case 0x17C4u:
381 *a = 0x17C1u;
382 *b = 0x17C4u;
383 return true;
384 case 0x17C5u:
385 *a = 0x17C1u;
386 *b = 0x17C5u;
387 return true;
388 }
389
390 return (bool)c->unicode->decompose(ab, a, b);
391 }
392
393 static bool
compose_khmer(const hb_ot_shape_normalize_context_t * c,hb_codepoint_t a,hb_codepoint_t b,hb_codepoint_t * ab)394 compose_khmer(const hb_ot_shape_normalize_context_t *c, hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab)
395 {
396 /* Avoid recomposing split matras. */
397 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK(c->unicode->general_category(a)))
398 return false;
399
400 return (bool)c->unicode->compose(a, b, ab);
401 }
402
403 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer = {
404 collect_features_khmer,
405 override_features_khmer,
406 data_create_khmer,
407 data_destroy_khmer,
408 nullptr, /* preprocess_text */
409 nullptr, /* postprocess_glyphs */
410 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
411 decompose_khmer,
412 compose_khmer,
413 setup_masks_khmer,
414 HB_TAG_NONE, /* gpos_tag */
415 nullptr, /* reorder_marks */
416 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
417 false, /* fallback_position */
418 };
419
420 #endif
421