1/*
2 * Copyright © 2015  Mozilla Foundation.
3 * Copyright © 2015  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Mozilla Author(s): Jonathan Kew
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
30#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
31
32#include "hb.hh"
33
34#include "hb-ot-shape-complex-syllabic.hh"
35
36/* buffer var allocations */
37#define use_category() complex_var_u8_category()
38
39#define USE(Cat) use_syllable_machine_ex_##Cat
40
41enum use_syllable_type_t {
42  use_independent_cluster,
43  use_virama_terminated_cluster,
44  use_sakot_terminated_cluster,
45  use_standard_cluster,
46  use_number_joiner_terminated_cluster,
47  use_numeral_cluster,
48  use_symbol_cluster,
49  use_hieroglyph_cluster,
50  use_broken_cluster,
51  use_non_cluster,
52};
53
54%%{
55  machine use_syllable_machine;
56  alphtype unsigned char;
57  write exports;
58  write data;
59}%%
60
61%%{
62
63# Categories used in the Universal Shaping Engine spec:
64# https://docs.microsoft.com/en-us/typography/script-development/use
65
66export O	= 0; # OTHER
67
68export B	= 1; # BASE
69export N	= 4; # BASE_NUM
70export GB	= 5; # BASE_OTHER
71export SUB	= 11; # CONS_SUB
72export H	= 12; # HALANT
73
74export HN	= 13; # HALANT_NUM
75export ZWNJ	= 14; # Zero width non-joiner
76export R	= 18; # REPHA
77export S	= 19; # SYM
78export CS	= 43; # CONS_WITH_STACKER
79export HVM	= 44; # HALANT_OR_VOWEL_MODIFIER
80export Sk	= 48; # SAKOT
81export G	= 49; # HIEROGLYPH
82export J	= 50; # HIEROGLYPH_JOINER
83export SB	= 51; # HIEROGLYPH_SEGMENT_BEGIN
84export SE	= 52; # HIEROGLYPH_SEGMENT_END
85
86export FAbv	= 24; # CONS_FINAL_ABOVE
87export FBlw	= 25; # CONS_FINAL_BELOW
88export FPst	= 26; # CONS_FINAL_POST
89export MAbv	= 27; # CONS_MED_ABOVE
90export MBlw	= 28; # CONS_MED_BELOW
91export MPst	= 29; # CONS_MED_POST
92export MPre	= 30; # CONS_MED_PRE
93export CMAbv	= 31; # CONS_MOD_ABOVE
94export CMBlw	= 32; # CONS_MOD_BELOW
95export VAbv	= 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
96export VBlw	= 34; # VOWEL_BELOW / VOWEL_BELOW_POST
97export VPst	= 35; # VOWEL_POST	UIPC = Right
98export VPre	= 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
99export VMAbv	= 37; # VOWEL_MOD_ABOVE
100export VMBlw	= 38; # VOWEL_MOD_BELOW
101export VMPst	= 39; # VOWEL_MOD_POST
102export VMPre	= 23; # VOWEL_MOD_PRE
103export SMAbv	= 41; # SYM_MOD_ABOVE
104export SMBlw	= 42; # SYM_MOD_BELOW
105export FMAbv	= 45; # CONS_FINAL_MOD	UIPC = Top
106export FMBlw	= 46; # CONS_FINAL_MOD	UIPC = Bottom
107export FMPst	= 47; # CONS_FINAL_MOD	UIPC = Not_Applicable
108
109
110h = H | HVM | Sk;
111
112consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*;
113medial_consonants = MPre? MAbv? MBlw? MPst?;
114dependent_vowels = VPre* VAbv* VBlw* VPst*;
115vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
116final_consonants = FAbv* FBlw* FPst*;
117final_modifiers = FMAbv* FMBlw* | FMPst?;
118
119complex_syllable_start = (R | CS)? (B | GB);
120complex_syllable_middle =
121	consonant_modifiers
122	medial_consonants
123	dependent_vowels
124	vowel_modifiers
125	(Sk B)*
126;
127complex_syllable_tail =
128	complex_syllable_middle
129	final_consonants
130	final_modifiers
131;
132number_joiner_terminated_cluster_tail = (HN N)* HN;
133numeral_cluster_tail = (HN N)+;
134symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
135
136virama_terminated_cluster =
137	complex_syllable_start
138	consonant_modifiers
139	h
140;
141sakot_terminated_cluster =
142	complex_syllable_start
143	complex_syllable_middle
144	Sk
145;
146standard_cluster =
147	complex_syllable_start
148	complex_syllable_tail
149;
150broken_cluster =
151	R?
152	(complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail)
153;
154
155number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
156numeral_cluster = N numeral_cluster_tail?;
157symbol_cluster = (S | GB) symbol_cluster_tail?;
158hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*;
159independent_cluster = O;
160other = any;
161
162main := |*
163	independent_cluster			=> { found_syllable (use_independent_cluster); };
164	virama_terminated_cluster		=> { found_syllable (use_virama_terminated_cluster); };
165	sakot_terminated_cluster		=> { found_syllable (use_sakot_terminated_cluster); };
166	standard_cluster			=> { found_syllable (use_standard_cluster); };
167	number_joiner_terminated_cluster	=> { found_syllable (use_number_joiner_terminated_cluster); };
168	numeral_cluster				=> { found_syllable (use_numeral_cluster); };
169	symbol_cluster				=> { found_syllable (use_symbol_cluster); };
170	hieroglyph_cluster			=> { found_syllable (use_hieroglyph_cluster); };
171	broken_cluster				=> { found_syllable (use_broken_cluster); };
172	other					=> { found_syllable (use_non_cluster); };
173*|;
174
175
176}%%
177
178#define found_syllable(syllable_type) \
179  HB_STMT_START { \
180    if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
181    for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
182      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
183    syllable_serial++; \
184    if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
185  } HB_STMT_END
186
187
188template <typename Iter>
189struct machine_index_t :
190  hb_iter_with_fallback_t<machine_index_t<Iter>,
191			  typename Iter::item_t>
192{
193  machine_index_t (const Iter& it) : it (it) {}
194  machine_index_t (const machine_index_t& o) : it (o.it) {}
195
196  static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
197  static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
198
199  typename Iter::item_t __item__ () const { return *it; }
200  typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; }
201  unsigned __len__ () const { return it.len (); }
202  void __next__ () { ++it; }
203  void __forward__ (unsigned n) { it += n; }
204  void __prev__ () { --it; }
205  void __rewind__ (unsigned n) { it -= n; }
206  void operator = (unsigned n)
207  { unsigned index = (*it).first; if (index < n) it += n - index; else if (index > n) it -= index - n; }
208  void operator = (const machine_index_t& o) { *this = (*o.it).first; }
209  bool operator == (const machine_index_t& o) const { return (*it).first == (*o.it).first; }
210  bool operator != (const machine_index_t& o) const { return !(*this == o); }
211
212  private:
213  Iter it;
214};
215struct
216{
217  template <typename Iter,
218	    hb_requires (hb_is_iterable (Iter))>
219  machine_index_t<hb_iter_type<Iter>>
220  operator () (Iter&& it) const
221  { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); }
222}
223HB_FUNCOBJ (machine_index);
224
225
226
227static bool
228not_standard_default_ignorable (const hb_glyph_info_t &i)
229{ return !(i.use_category() == USE(O) && _hb_glyph_info_is_default_ignorable (&i)); }
230
231static inline void
232find_syllables_use (hb_buffer_t *buffer)
233{
234  hb_glyph_info_t *info = buffer->info;
235  auto p =
236    + hb_iter (info, buffer->len)
237    | hb_enumerate
238    | hb_filter ([] (const hb_glyph_info_t &i) { return not_standard_default_ignorable (i); },
239		 hb_second)
240    | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
241		 {
242		   if (p.second.use_category() == USE(ZWNJ))
243		     for (unsigned i = p.first + 1; i < buffer->len; ++i)
244		       if (not_standard_default_ignorable (info[i]))
245			 return !_hb_glyph_info_is_unicode_mark (&info[i]);
246		   return true;
247		 })
248    | hb_enumerate
249    | machine_index
250    ;
251  auto pe = p + p.len ();
252  auto eof = +pe;
253  auto ts = +p;
254  auto te = +p;
255  unsigned int act HB_UNUSED;
256  int cs;
257  %%{
258    write init;
259    getkey (*p).second.second.use_category();
260  }%%
261
262  unsigned int syllable_serial = 1;
263  %%{
264    write exec;
265  }%%
266}
267
268#undef found_syllable
269
270#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
271