1 /*
2 * Implementation of Indic Syllables for the Uniscribe Script Processor
3 *
4 * Copyright 2011 CodeWeavers, Aric Stewart
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 *
20 */
21
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "winnls.h"
31 #include "usp10.h"
32 #include "winternl.h"
33
34 #include "wine/debug.h"
35 #include "wine/heap.h"
36 #include "usp10_internal.h"
37
38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
39
debug_output_string(const WCHAR * str,unsigned int char_count,lexical_function f)40 static void debug_output_string(const WCHAR *str, unsigned int char_count, lexical_function f)
41 {
42 int i;
43 if (TRACE_ON(uniscribe))
44 {
45 for (i = 0; i < char_count; ++i)
46 {
47 switch (f(str[i]))
48 {
49 case lex_Consonant: TRACE("C"); break;
50 case lex_Ra: TRACE("Ra"); break;
51 case lex_Vowel: TRACE("V"); break;
52 case lex_Nukta: TRACE("N"); break;
53 case lex_Halant: TRACE("H"); break;
54 case lex_ZWNJ: TRACE("Zwnj"); break;
55 case lex_ZWJ: TRACE("Zwj"); break;
56 case lex_Matra_post: TRACE("Mp");break;
57 case lex_Matra_above: TRACE("Ma");break;
58 case lex_Matra_below: TRACE("Mb");break;
59 case lex_Matra_pre: TRACE("Mm");break;
60 case lex_Modifier: TRACE("Sm"); break;
61 case lex_Vedic: TRACE("Vd"); break;
62 case lex_Anudatta: TRACE("A"); break;
63 case lex_Composed_Vowel: TRACE("t"); break;
64 default:
65 TRACE("X"); break;
66 }
67 }
68 TRACE("\n");
69 }
70 }
71
is_matra(int type)72 static inline BOOL is_matra( int type )
73 {
74 return (type == lex_Matra_above || type == lex_Matra_below ||
75 type == lex_Matra_pre || type == lex_Matra_post ||
76 type == lex_Composed_Vowel);
77 }
78
is_joiner(int type)79 static inline BOOL is_joiner( int type )
80 {
81 return (type == lex_ZWJ || type == lex_ZWNJ);
82 }
83
consonant_header(const WCHAR * input,unsigned int cChar,unsigned int start,unsigned int next,lexical_function lex)84 static int consonant_header(const WCHAR *input, unsigned int cChar,
85 unsigned int start, unsigned int next, lexical_function lex)
86 {
87 if (!is_consonant( lex(input[next]) )) return -1;
88 next++;
89 if ((next < cChar) && lex(input[next]) == lex_Nukta)
90 next++;
91 if ((next < cChar) && lex(input[next])==lex_Halant)
92 {
93 next++;
94 if((next < cChar) && is_joiner( lex(input[next]) ))
95 next++;
96 if ((next < cChar) && is_consonant( lex(input[next]) ))
97 return next;
98 }
99 else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
100 {
101 next+=2;
102 if ((next < cChar) && is_consonant( lex(input[next]) ))
103 return next;
104 }
105 return -1;
106 }
107
parse_consonant_syllable(const WCHAR * input,unsigned int cChar,unsigned int start,unsigned int * main,unsigned int next,lexical_function lex)108 static int parse_consonant_syllable(const WCHAR *input, unsigned int cChar,
109 unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
110 {
111 int check;
112 int headers = 0;
113 do
114 {
115 check = consonant_header(input,cChar,start,next,lex);
116 if (check != -1)
117 {
118 next = check;
119 headers++;
120 }
121 } while (check != -1);
122 if (headers || is_consonant( lex(input[next]) ))
123 {
124 *main = next;
125 next++;
126 }
127 else
128 return -1;
129 if ((next < cChar) && lex(input[next]) == lex_Nukta)
130 next++;
131 if ((next < cChar) && lex(input[next]) == lex_Anudatta)
132 next++;
133
134 if ((next < cChar) && lex(input[next]) == lex_Halant)
135 {
136 next++;
137 if((next < cChar) && is_joiner( lex(input[next]) ))
138 next++;
139 }
140 else if (next < cChar)
141 {
142 while((next < cChar) && is_matra( lex(input[next]) ))
143 next++;
144 if ((next < cChar) && lex(input[next]) == lex_Nukta)
145 next++;
146 if ((next < cChar) && lex(input[next]) == lex_Halant)
147 next++;
148 }
149 if ((next < cChar) && lex(input[next]) == lex_Modifier)
150 next++;
151 if ((next < cChar) && lex(input[next]) == lex_Vedic)
152 next++;
153 return next;
154 }
155
parse_vowel_syllable(const WCHAR * input,unsigned int cChar,unsigned int start,unsigned int next,lexical_function lex)156 static int parse_vowel_syllable(const WCHAR *input, unsigned int cChar,
157 unsigned int start, unsigned int next, lexical_function lex)
158 {
159 if ((next < cChar) && lex(input[next]) == lex_Nukta)
160 next++;
161 if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
162 next+=3;
163 else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
164 next+=2;
165 else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
166 next+=2;
167
168 if ((next < cChar) && is_matra( lex(input[next]) ))
169 {
170 while((next < cChar) && is_matra( lex(input[next]) ))
171 next++;
172 if ((next < cChar) && lex(input[next]) == lex_Nukta)
173 next++;
174 if ((next < cChar) && lex(input[next]) == lex_Halant)
175 next++;
176 }
177
178 if ((next < cChar) && lex(input[next]) == lex_Modifier)
179 next++;
180 if ((next < cChar) && lex(input[next]) == lex_Vedic)
181 next++;
182 return next;
183 }
184
Indic_process_next_syllable(const WCHAR * input,unsigned int cChar,unsigned int start,unsigned int * main,unsigned int next,lexical_function lex)185 static int Indic_process_next_syllable(const WCHAR *input, unsigned int cChar,
186 unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
187 {
188 if (lex(input[next])==lex_Vowel)
189 {
190 *main = next;
191 return parse_vowel_syllable(input, cChar, start, next+1, lex);
192 }
193 else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
194 {
195 *main = next+2;
196 return parse_vowel_syllable(input, cChar, start, next+3, lex);
197 }
198
199 else if (start == next && lex(input[next])==lex_NBSP)
200 {
201 *main = next;
202 return parse_vowel_syllable(input, cChar, start, next+1, lex);
203 }
204 else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
205 {
206 *main = next+2;
207 return parse_vowel_syllable(input, cChar, start, next+3, lex);
208 }
209
210 return parse_consonant_syllable(input, cChar, start, main, next, lex);
211 }
212
Consonant_is_post_base_form(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,const WCHAR * pwChar,const IndicSyllable * s,lexical_function lexical,BOOL modern)213 static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
214 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
215 {
216 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
217 {
218 if (modern)
219 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0);
220 else
221 {
222 WCHAR cc[2];
223 cc[0] = pwChar[s->base];
224 cc[1] = pwChar[s->base-1];
225 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0);
226 }
227 }
228 return FALSE;
229 }
230
Consonant_is_below_base_form(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,const WCHAR * pwChar,const IndicSyllable * s,lexical_function lexical,BOOL modern)231 static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
232 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
233 {
234 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
235 {
236 if (modern)
237 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0);
238 else
239 {
240 WCHAR cc[2];
241 cc[0] = pwChar[s->base];
242 cc[1] = pwChar[s->base-1];
243 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0);
244 }
245 }
246 return FALSE;
247 }
248
Consonant_is_pre_base_form(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,const WCHAR * pwChar,const IndicSyllable * s,lexical_function lexical,BOOL modern)249 static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
250 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
251 {
252 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
253 {
254 if (modern)
255 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0);
256 else
257 {
258 WCHAR cc[2];
259 cc[0] = pwChar[s->base];
260 cc[1] = pwChar[s->base-1];
261 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0);
262 }
263 }
264 return FALSE;
265 }
266
Consonant_is_ralf(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,const WCHAR * pwChar,const IndicSyllable * s,lexical_function lexical)267 static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
268 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical)
269 {
270 if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant)
271 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0);
272 return FALSE;
273 }
274
FindBaseConsonant(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,const WCHAR * input,IndicSyllable * s,lexical_function lex,BOOL modern)275 static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
276 const WCHAR *input, IndicSyllable *s, lexical_function lex, BOOL modern)
277 {
278 int i;
279 BOOL blwf = FALSE;
280 BOOL pref = FALSE;
281
282 /* remove ralf from consideration */
283 if (Consonant_is_ralf(hdc, psa, psc, input, s, lex))
284 {
285 s->ralf = s->start;
286 s->start+=2;
287 }
288
289 /* try to find a base consonant */
290 if (!is_consonant( lex(input[s->base]) ))
291 {
292 for (i = s->end; i >= s->start; i--)
293 if (is_consonant( lex(input[i]) ))
294 {
295 s->base = i;
296 break;
297 }
298 }
299
300 while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern)))
301 {
302 if (blwf && s->blwf == -1)
303 s->blwf = s->base - 1;
304 if (pref && s->pref == -1)
305 s->pref = s->base - 1;
306
307 for (i = s->base-1; i >= s->start; i--)
308 if (is_consonant( lex(input[i]) ))
309 {
310 s->base = i;
311 break;
312 }
313 }
314
315 if (s->ralf >= 0)
316 s->start = s->ralf;
317
318 if (s->ralf == s->base)
319 s->ralf = -1;
320
321 return s->base;
322 }
323
Indic_ParseSyllables(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,const WCHAR * input,unsigned int cChar,IndicSyllable ** syllables,int * syllable_count,lexical_function lex,BOOL modern)324 void Indic_ParseSyllables(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, unsigned int cChar,
325 IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
326 {
327 unsigned int center = 0;
328 int index = 0;
329 int next = 0;
330
331 *syllable_count = 0;
332
333 if (!lex)
334 {
335 ERR("Failure to have required functions\n");
336 return;
337 }
338
339 debug_output_string(input, cChar, lex);
340 while (next != -1)
341 {
342 while((next < cChar) && lex(input[next]) == lex_Generic)
343 next++;
344 index = next;
345 if (next >= cChar)
346 break;
347 next = Indic_process_next_syllable(input, cChar, 0, ¢er, index, lex);
348 if (next != -1)
349 {
350 if (*syllable_count)
351 *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1));
352 else
353 *syllables = heap_alloc(sizeof(**syllables));
354 (*syllables)[*syllable_count].start = index;
355 (*syllables)[*syllable_count].base = center;
356 (*syllables)[*syllable_count].ralf = -1;
357 (*syllables)[*syllable_count].blwf = -1;
358 (*syllables)[*syllable_count].pref = -1;
359 (*syllables)[*syllable_count].end = next-1;
360 FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern);
361 index = next;
362 *syllable_count = (*syllable_count)+1;
363 }
364 else if (index < cChar)
365 {
366 TRACE("Processing failed at %i\n",index);
367 next = ++index;
368 }
369 }
370 TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count);
371 }
372
Indic_ReorderCharacters(HDC hdc,SCRIPT_ANALYSIS * psa,ScriptCache * psc,WCHAR * input,unsigned int cChar,IndicSyllable ** syllables,int * syllable_count,lexical_function lex,reorder_function reorder_f,BOOL modern)373 void Indic_ReorderCharacters(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, WCHAR *input, unsigned int cChar,
374 IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
375 {
376 int i;
377
378 if (!reorder_f)
379 {
380 ERR("Failure to have required functions\n");
381 return;
382 }
383
384 Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern);
385 for (i = 0; i < *syllable_count; i++)
386 reorder_f(input, &(*syllables)[i], lex);
387 }
388