1/* 2 * Copyright © 2015 Mozilla Foundation. 3 * Copyright © 2015 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Mozilla Author(s): Jonathan Kew 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH 30#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH 31 32#include "hb.hh" 33 34#include "hb-ot-shape-complex-syllabic.hh" 35 36/* buffer var allocations */ 37#define use_category() complex_var_u8_category() 38 39#define USE(Cat) use_syllable_machine_ex_##Cat 40 41enum use_syllable_type_t { 42 use_virama_terminated_cluster, 43 use_sakot_terminated_cluster, 44 use_standard_cluster, 45 use_number_joiner_terminated_cluster, 46 use_numeral_cluster, 47 use_symbol_cluster, 48 use_hieroglyph_cluster, 49 use_broken_cluster, 50 use_non_cluster, 51}; 52 53%%{ 54 machine use_syllable_machine; 55 alphtype unsigned char; 56 write exports; 57 write data; 58}%% 59 60%%{ 61 62# Categories used in the Universal Shaping Engine spec: 63# https://docs.microsoft.com/en-us/typography/script-development/use 64 65export O = 0; # OTHER 66 67export B = 1; # BASE 68export N = 4; # BASE_NUM 69export GB = 5; # BASE_OTHER 70export CGJ = 6; # CGJ 71export SUB = 11; # CONS_SUB 72export H = 12; # HALANT 73 74export HN = 13; # HALANT_NUM 75export ZWNJ = 14; # Zero width non-joiner 76export R = 18; # REPHA 77export CS = 43; # CONS_WITH_STACKER 78export HVM = 44; # HALANT_OR_VOWEL_MODIFIER 79export Sk = 48; # SAKOT 80export G = 49; # HIEROGLYPH 81export J = 50; # HIEROGLYPH_JOINER 82export SB = 51; # HIEROGLYPH_SEGMENT_BEGIN 83export SE = 52; # HIEROGLYPH_SEGMENT_END 84 85export FAbv = 24; # CONS_FINAL_ABOVE 86export FBlw = 25; # CONS_FINAL_BELOW 87export FPst = 26; # CONS_FINAL_POST 88export MAbv = 27; # CONS_MED_ABOVE 89export MBlw = 28; # CONS_MED_BELOW 90export MPst = 29; # CONS_MED_POST 91export MPre = 30; # CONS_MED_PRE 92export CMAbv = 31; # CONS_MOD_ABOVE 93export CMBlw = 32; # CONS_MOD_BELOW 94export VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST 95export VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST 96export VPst = 35; # VOWEL_POST UIPC = Right 97export VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST 98export VMAbv = 37; # VOWEL_MOD_ABOVE 99export VMBlw = 38; # VOWEL_MOD_BELOW 100export VMPst = 39; # VOWEL_MOD_POST 101export VMPre = 23; # VOWEL_MOD_PRE 102export SMAbv = 41; # SYM_MOD_ABOVE 103export SMBlw = 42; # SYM_MOD_BELOW 104export FMAbv = 45; # CONS_FINAL_MOD UIPC = Top 105export FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom 106export FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable 107 108 109h = H | HVM | Sk; 110 111consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*; 112medial_consonants = MPre? MAbv? MBlw? MPst?; 113dependent_vowels = VPre* VAbv* VBlw* VPst*; 114vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; 115final_consonants = FAbv* FBlw* FPst*; 116final_modifiers = FMAbv* FMBlw* | FMPst?; 117 118complex_syllable_start = (R | CS)? (B | GB); 119complex_syllable_middle = 120 consonant_modifiers 121 medial_consonants 122 dependent_vowels 123 vowel_modifiers 124 (Sk B)* 125; 126complex_syllable_tail = 127 complex_syllable_middle 128 final_consonants 129 final_modifiers 130; 131number_joiner_terminated_cluster_tail = (HN N)* HN; 132numeral_cluster_tail = (HN N)+; 133symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; 134 135virama_terminated_cluster_tail = 136 consonant_modifiers 137 h 138; 139virama_terminated_cluster = 140 complex_syllable_start 141 virama_terminated_cluster_tail 142; 143sakot_terminated_cluster_tail = 144 complex_syllable_middle 145 Sk 146; 147sakot_terminated_cluster = 148 complex_syllable_start 149 sakot_terminated_cluster_tail 150; 151standard_cluster = 152 complex_syllable_start 153 complex_syllable_tail 154; 155broken_cluster = 156 R? 157 (complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail | sakot_terminated_cluster_tail) 158; 159 160number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; 161numeral_cluster = N numeral_cluster_tail?; 162symbol_cluster = (O | GB) symbol_cluster_tail?; 163hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*; 164other = any; 165 166main := |* 167 virama_terminated_cluster => { found_syllable (use_virama_terminated_cluster); }; 168 sakot_terminated_cluster => { found_syllable (use_sakot_terminated_cluster); }; 169 standard_cluster => { found_syllable (use_standard_cluster); }; 170 number_joiner_terminated_cluster => { found_syllable (use_number_joiner_terminated_cluster); }; 171 numeral_cluster => { found_syllable (use_numeral_cluster); }; 172 symbol_cluster => { found_syllable (use_symbol_cluster); }; 173 hieroglyph_cluster => { found_syllable (use_hieroglyph_cluster); }; 174 broken_cluster => { found_syllable (use_broken_cluster); }; 175 other => { found_syllable (use_non_cluster); }; 176*|; 177 178 179}%% 180 181#define found_syllable(syllable_type) \ 182 HB_STMT_START { \ 183 if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \ 184 for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \ 185 info[i].syllable() = (syllable_serial << 4) | syllable_type; \ 186 syllable_serial++; \ 187 if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ 188 } HB_STMT_END 189 190 191template <typename Iter> 192struct machine_index_t : 193 hb_iter_with_fallback_t<machine_index_t<Iter>, 194 typename Iter::item_t> 195{ 196 machine_index_t (const Iter& it) : it (it) {} 197 machine_index_t (const machine_index_t& o) : it (o.it) {} 198 199 static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator; 200 static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator; 201 202 typename Iter::item_t __item__ () const { return *it; } 203 typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; } 204 unsigned __len__ () const { return it.len (); } 205 void __next__ () { ++it; } 206 void __forward__ (unsigned n) { it += n; } 207 void __prev__ () { --it; } 208 void __rewind__ (unsigned n) { it -= n; } 209 void operator = (unsigned n) 210 { unsigned index = (*it).first; if (index < n) it += n - index; else if (index > n) it -= index - n; } 211 void operator = (const machine_index_t& o) { *this = (*o.it).first; } 212 bool operator == (const machine_index_t& o) const { return (*it).first == (*o.it).first; } 213 bool operator != (const machine_index_t& o) const { return !(*this == o); } 214 215 private: 216 Iter it; 217}; 218struct 219{ 220 template <typename Iter, 221 hb_requires (hb_is_iterable (Iter))> 222 machine_index_t<hb_iter_type<Iter>> 223 operator () (Iter&& it) const 224 { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); } 225} 226HB_FUNCOBJ (machine_index); 227 228 229 230static bool 231not_ccs_default_ignorable (const hb_glyph_info_t &i) 232{ return !(i.use_category() == USE(CGJ) && _hb_glyph_info_is_default_ignorable (&i)); } 233 234static inline void 235find_syllables_use (hb_buffer_t *buffer) 236{ 237 hb_glyph_info_t *info = buffer->info; 238 auto p = 239 + hb_iter (info, buffer->len) 240 | hb_enumerate 241 | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); }, 242 hb_second) 243 | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p) 244 { 245 if (p.second.use_category() == USE(ZWNJ)) 246 for (unsigned i = p.first + 1; i < buffer->len; ++i) 247 if (not_ccs_default_ignorable (info[i])) 248 return !_hb_glyph_info_is_unicode_mark (&info[i]); 249 return true; 250 }) 251 | hb_enumerate 252 | machine_index 253 ; 254 auto pe = p + p.len (); 255 auto eof = +pe; 256 auto ts = +p; 257 auto te = +p; 258 unsigned int act HB_UNUSED; 259 int cs; 260 %%{ 261 write init; 262 getkey (*p).second.second.use_category(); 263 }%% 264 265 unsigned int syllable_serial = 1; 266 %%{ 267 write exec; 268 }%% 269} 270 271#undef found_syllable 272 273#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */ 274