1/*
2 * Copyright © 2015  Mozilla Foundation.
3 * Copyright © 2015  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Mozilla Author(s): Jonathan Kew
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
30#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
31
32#include "hb.hh"
33
34#include "hb-ot-shape-complex-syllabic.hh"
35
36/* buffer var allocations */
37#define use_category() complex_var_u8_category()
38
39#define USE(Cat) use_syllable_machine_ex_##Cat
40
41enum use_syllable_type_t {
42  use_virama_terminated_cluster,
43  use_sakot_terminated_cluster,
44  use_standard_cluster,
45  use_number_joiner_terminated_cluster,
46  use_numeral_cluster,
47  use_symbol_cluster,
48  use_hieroglyph_cluster,
49  use_broken_cluster,
50  use_non_cluster,
51};
52
53%%{
54  machine use_syllable_machine;
55  alphtype unsigned char;
56  write exports;
57  write data;
58}%%
59
60%%{
61
62# Categories used in the Universal Shaping Engine spec:
63# https://docs.microsoft.com/en-us/typography/script-development/use
64
65export O	= 0; # OTHER
66
67export B	= 1; # BASE
68export N	= 4; # BASE_NUM
69export GB	= 5; # BASE_OTHER
70export CGJ	= 6; # CGJ
71export SUB	= 11; # CONS_SUB
72export H	= 12; # HALANT
73
74export HN	= 13; # HALANT_NUM
75export ZWNJ	= 14; # Zero width non-joiner
76export R	= 18; # REPHA
77export CS	= 43; # CONS_WITH_STACKER
78export HVM	= 44; # HALANT_OR_VOWEL_MODIFIER
79export Sk	= 48; # SAKOT
80export G	= 49; # HIEROGLYPH
81export J	= 50; # HIEROGLYPH_JOINER
82export SB	= 51; # HIEROGLYPH_SEGMENT_BEGIN
83export SE	= 52; # HIEROGLYPH_SEGMENT_END
84
85export FAbv	= 24; # CONS_FINAL_ABOVE
86export FBlw	= 25; # CONS_FINAL_BELOW
87export FPst	= 26; # CONS_FINAL_POST
88export MAbv	= 27; # CONS_MED_ABOVE
89export MBlw	= 28; # CONS_MED_BELOW
90export MPst	= 29; # CONS_MED_POST
91export MPre	= 30; # CONS_MED_PRE
92export CMAbv	= 31; # CONS_MOD_ABOVE
93export CMBlw	= 32; # CONS_MOD_BELOW
94export VAbv	= 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
95export VBlw	= 34; # VOWEL_BELOW / VOWEL_BELOW_POST
96export VPst	= 35; # VOWEL_POST	UIPC = Right
97export VPre	= 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
98export VMAbv	= 37; # VOWEL_MOD_ABOVE
99export VMBlw	= 38; # VOWEL_MOD_BELOW
100export VMPst	= 39; # VOWEL_MOD_POST
101export VMPre	= 23; # VOWEL_MOD_PRE
102export SMAbv	= 41; # SYM_MOD_ABOVE
103export SMBlw	= 42; # SYM_MOD_BELOW
104export FMAbv	= 45; # CONS_FINAL_MOD	UIPC = Top
105export FMBlw	= 46; # CONS_FINAL_MOD	UIPC = Bottom
106export FMPst	= 47; # CONS_FINAL_MOD	UIPC = Not_Applicable
107
108
109h = H | HVM | Sk;
110
111consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*;
112medial_consonants = MPre? MAbv? MBlw? MPst?;
113dependent_vowels = VPre* VAbv* VBlw* VPst*;
114vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
115final_consonants = FAbv* FBlw* FPst*;
116final_modifiers = FMAbv* FMBlw* | FMPst?;
117
118complex_syllable_start = (R | CS)? (B | GB);
119complex_syllable_middle =
120	consonant_modifiers
121	medial_consonants
122	dependent_vowels
123	vowel_modifiers
124	(Sk B)*
125;
126complex_syllable_tail =
127	complex_syllable_middle
128	final_consonants
129	final_modifiers
130;
131number_joiner_terminated_cluster_tail = (HN N)* HN;
132numeral_cluster_tail = (HN N)+;
133symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
134
135virama_terminated_cluster_tail =
136	consonant_modifiers
137	h
138;
139virama_terminated_cluster =
140	complex_syllable_start
141	virama_terminated_cluster_tail
142;
143sakot_terminated_cluster_tail =
144	complex_syllable_middle
145	Sk
146;
147sakot_terminated_cluster =
148	complex_syllable_start
149	sakot_terminated_cluster_tail
150;
151standard_cluster =
152	complex_syllable_start
153	complex_syllable_tail
154;
155broken_cluster =
156	R?
157	(complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail | sakot_terminated_cluster_tail)
158;
159
160number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
161numeral_cluster = N numeral_cluster_tail?;
162symbol_cluster = (O | GB) symbol_cluster_tail?;
163hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*;
164other = any;
165
166main := |*
167	virama_terminated_cluster		=> { found_syllable (use_virama_terminated_cluster); };
168	sakot_terminated_cluster		=> { found_syllable (use_sakot_terminated_cluster); };
169	standard_cluster			=> { found_syllable (use_standard_cluster); };
170	number_joiner_terminated_cluster	=> { found_syllable (use_number_joiner_terminated_cluster); };
171	numeral_cluster				=> { found_syllable (use_numeral_cluster); };
172	symbol_cluster				=> { found_syllable (use_symbol_cluster); };
173	hieroglyph_cluster			=> { found_syllable (use_hieroglyph_cluster); };
174	broken_cluster				=> { found_syllable (use_broken_cluster); };
175	other					=> { found_syllable (use_non_cluster); };
176*|;
177
178
179}%%
180
181#define found_syllable(syllable_type) \
182  HB_STMT_START { \
183    if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
184    for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
185      info[i].syllable() = (syllable_serial << 4) | syllable_type; \
186    syllable_serial++; \
187    if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
188  } HB_STMT_END
189
190
191template <typename Iter>
192struct machine_index_t :
193  hb_iter_with_fallback_t<machine_index_t<Iter>,
194			  typename Iter::item_t>
195{
196  machine_index_t (const Iter& it) : it (it) {}
197  machine_index_t (const machine_index_t& o) : it (o.it) {}
198
199  static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
200  static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
201
202  typename Iter::item_t __item__ () const { return *it; }
203  typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; }
204  unsigned __len__ () const { return it.len (); }
205  void __next__ () { ++it; }
206  void __forward__ (unsigned n) { it += n; }
207  void __prev__ () { --it; }
208  void __rewind__ (unsigned n) { it -= n; }
209  void operator = (unsigned n)
210  { unsigned index = (*it).first; if (index < n) it += n - index; else if (index > n) it -= index - n; }
211  void operator = (const machine_index_t& o) { *this = (*o.it).first; }
212  bool operator == (const machine_index_t& o) const { return (*it).first == (*o.it).first; }
213  bool operator != (const machine_index_t& o) const { return !(*this == o); }
214
215  private:
216  Iter it;
217};
218struct
219{
220  template <typename Iter,
221	    hb_requires (hb_is_iterable (Iter))>
222  machine_index_t<hb_iter_type<Iter>>
223  operator () (Iter&& it) const
224  { return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); }
225}
226HB_FUNCOBJ (machine_index);
227
228
229
230static bool
231not_ccs_default_ignorable (const hb_glyph_info_t &i)
232{ return !(i.use_category() == USE(CGJ) && _hb_glyph_info_is_default_ignorable (&i)); }
233
234static inline void
235find_syllables_use (hb_buffer_t *buffer)
236{
237  hb_glyph_info_t *info = buffer->info;
238  auto p =
239    + hb_iter (info, buffer->len)
240    | hb_enumerate
241    | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); },
242		 hb_second)
243    | hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
244		 {
245		   if (p.second.use_category() == USE(ZWNJ))
246		     for (unsigned i = p.first + 1; i < buffer->len; ++i)
247		       if (not_ccs_default_ignorable (info[i]))
248			 return !_hb_glyph_info_is_unicode_mark (&info[i]);
249		   return true;
250		 })
251    | hb_enumerate
252    | machine_index
253    ;
254  auto pe = p + p.len ();
255  auto eof = +pe;
256  auto ts = +p;
257  auto te = +p;
258  unsigned int act HB_UNUSED;
259  int cs;
260  %%{
261    write init;
262    getkey (*p).second.second.use_category();
263  }%%
264
265  unsigned int syllable_serial = 1;
266  %%{
267    write exec;
268  }%%
269}
270
271#undef found_syllable
272
273#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
274