xref: /reactos/dll/win32/usp10/indic.c (revision 98e8827a)
1 /*
2  * Implementation of Indic Syllables for the Uniscribe Script Processor
3  *
4  * Copyright 2011 CodeWeavers, Aric Stewart
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  */
21 
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "winnls.h"
31 #include "usp10.h"
32 #include "winternl.h"
33 
34 #include "wine/debug.h"
35 #include "wine/heap.h"
36 #include "usp10_internal.h"
37 
38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe);
39 
40 static void debug_output_string(const WCHAR *str, unsigned int char_count, lexical_function f)
41 {
42     int i;
43     if (TRACE_ON(uniscribe))
44     {
45         for (i = 0; i < char_count; ++i)
46         {
47             switch (f(str[i]))
48             {
49                 case lex_Consonant: TRACE("C"); break;
50                 case lex_Ra: TRACE("Ra"); break;
51                 case lex_Vowel: TRACE("V"); break;
52                 case lex_Nukta: TRACE("N"); break;
53                 case lex_Halant: TRACE("H"); break;
54                 case lex_ZWNJ: TRACE("Zwnj"); break;
55                 case lex_ZWJ: TRACE("Zwj"); break;
56                 case lex_Matra_post: TRACE("Mp");break;
57                 case lex_Matra_above: TRACE("Ma");break;
58                 case lex_Matra_below: TRACE("Mb");break;
59                 case lex_Matra_pre: TRACE("Mm");break;
60                 case lex_Modifier: TRACE("Sm"); break;
61                 case lex_Vedic: TRACE("Vd"); break;
62                 case lex_Anudatta: TRACE("A"); break;
63                 case lex_Composed_Vowel: TRACE("t"); break;
64                 default:
65                     TRACE("X"); break;
66             }
67         }
68         TRACE("\n");
69     }
70 }
71 
72 static inline BOOL is_matra( int type )
73 {
74     return (type == lex_Matra_above || type == lex_Matra_below ||
75             type == lex_Matra_pre || type == lex_Matra_post ||
76             type == lex_Composed_Vowel);
77 }
78 
79 static inline BOOL is_joiner( int type )
80 {
81     return (type == lex_ZWJ || type == lex_ZWNJ);
82 }
83 
84 static int consonant_header(const WCHAR *input, unsigned int cChar,
85         unsigned int start, unsigned int next, lexical_function lex)
86 {
87     if (!is_consonant( lex(input[next]) )) return -1;
88     next++;
89     if ((next < cChar) && lex(input[next]) == lex_Nukta)
90             next++;
91     if ((next < cChar) && lex(input[next])==lex_Halant)
92     {
93         next++;
94         if((next < cChar) && is_joiner( lex(input[next]) ))
95             next++;
96         if ((next < cChar) && is_consonant( lex(input[next]) ))
97             return next;
98     }
99     else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant)
100     {
101         next+=2;
102         if ((next < cChar) && is_consonant( lex(input[next]) ))
103             return next;
104     }
105     return -1;
106 }
107 
108 static int parse_consonant_syllable(const WCHAR *input, unsigned int cChar,
109         unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
110 {
111     int check;
112     int headers = 0;
113     do
114     {
115         check = consonant_header(input,cChar,start,next,lex);
116         if (check != -1)
117         {
118             next = check;
119             headers++;
120         }
121     } while (check != -1);
122     if (headers || is_consonant( lex(input[next]) ))
123     {
124         *main = next;
125         next++;
126     }
127     else
128         return -1;
129     if ((next < cChar) && lex(input[next]) == lex_Nukta)
130             next++;
131     if ((next < cChar) && lex(input[next]) == lex_Anudatta)
132             next++;
133 
134     if ((next < cChar) && lex(input[next]) == lex_Halant)
135     {
136         next++;
137         if((next < cChar) && is_joiner( lex(input[next]) ))
138             next++;
139     }
140     else if (next < cChar)
141     {
142         while((next < cChar) && is_matra( lex(input[next]) ))
143             next++;
144         if ((next < cChar) && lex(input[next]) == lex_Nukta)
145             next++;
146         if ((next < cChar) && lex(input[next]) == lex_Halant)
147             next++;
148     }
149     if ((next < cChar) && lex(input[next]) == lex_Modifier)
150             next++;
151     if ((next < cChar) && lex(input[next]) == lex_Vedic)
152             next++;
153     return next;
154 }
155 
156 static int parse_vowel_syllable(const WCHAR *input, unsigned int cChar,
157         unsigned int start, unsigned int next, lexical_function lex)
158 {
159     if ((next < cChar) && lex(input[next]) == lex_Nukta)
160         next++;
161     if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) ))
162         next+=3;
163     else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) ))
164         next+=2;
165     else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) ))
166         next+=2;
167 
168     if ((next < cChar) && is_matra( lex(input[next]) ))
169     {
170         while((next < cChar) && is_matra( lex(input[next]) ))
171             next++;
172         if ((next < cChar) && lex(input[next]) == lex_Nukta)
173             next++;
174         if ((next < cChar) && lex(input[next]) == lex_Halant)
175             next++;
176     }
177 
178     if ((next < cChar) && lex(input[next]) == lex_Modifier)
179         next++;
180     if ((next < cChar) && lex(input[next]) == lex_Vedic)
181         next++;
182     return next;
183 }
184 
185 static int Indic_process_next_syllable(const WCHAR *input, unsigned int cChar,
186         unsigned int start, unsigned int *main, unsigned int next, lexical_function lex)
187 {
188     if (lex(input[next])==lex_Vowel)
189     {
190         *main = next;
191         return parse_vowel_syllable(input, cChar, start, next+1, lex);
192     }
193     else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel)
194     {
195         *main = next+2;
196         return parse_vowel_syllable(input, cChar, start, next+3, lex);
197     }
198 
199     else if (start == next && lex(input[next])==lex_NBSP)
200     {
201         *main = next;
202         return parse_vowel_syllable(input, cChar, start, next+1, lex);
203     }
204     else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP)
205     {
206         *main = next+2;
207         return parse_vowel_syllable(input, cChar, start, next+3, lex);
208     }
209 
210     return parse_consonant_syllable(input, cChar, start, main, next, lex);
211 }
212 
213 static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
214         const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
215 {
216     if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
217     {
218         if (modern)
219             return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0);
220         else
221         {
222             WCHAR cc[2];
223             cc[0] = pwChar[s->base];
224             cc[1] = pwChar[s->base-1];
225             return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0);
226         }
227     }
228     return FALSE;
229 }
230 
231 static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
232         const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
233 {
234     if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
235     {
236         if (modern)
237             return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0);
238         else
239         {
240             WCHAR cc[2];
241             cc[0] = pwChar[s->base];
242             cc[1] = pwChar[s->base-1];
243             return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0);
244         }
245     }
246     return FALSE;
247 }
248 
249 static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
250         const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern)
251 {
252     if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant)
253     {
254         if (modern)
255             return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0);
256         else
257         {
258             WCHAR cc[2];
259             cc[0] = pwChar[s->base];
260             cc[1] = pwChar[s->base-1];
261             return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0);
262         }
263     }
264     return FALSE;
265 }
266 
267 static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
268         const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical)
269 {
270     if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant)
271         return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0);
272     return FALSE;
273 }
274 
275 static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc,
276         const WCHAR *input, IndicSyllable *s, lexical_function lex, BOOL modern)
277 {
278     int i;
279     BOOL blwf = FALSE;
280     BOOL pref = FALSE;
281 
282     /* remove ralf from consideration */
283     if (Consonant_is_ralf(hdc, psa, psc, input, s, lex))
284     {
285         s->ralf = s->start;
286         s->start+=2;
287     }
288 
289     /* try to find a base consonant */
290     if (!is_consonant( lex(input[s->base]) ))
291     {
292         for (i = s->end; i >= s->start; i--)
293             if (is_consonant( lex(input[i]) ))
294             {
295                 s->base = i;
296                 break;
297             }
298     }
299 
300     while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern)))
301     {
302         if (blwf && s->blwf == -1)
303             s->blwf = s->base - 1;
304         if (pref && s->pref == -1)
305             s->pref = s->base - 1;
306 
307         for (i = s->base-1; i >= s->start; i--)
308             if (is_consonant( lex(input[i]) ))
309             {
310                 s->base = i;
311                 break;
312             }
313     }
314 
315     if (s->ralf >= 0)
316         s->start = s->ralf;
317 
318     if (s->ralf == s->base)
319         s->ralf = -1;
320 
321     return s->base;
322 }
323 
324 void Indic_ParseSyllables(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, unsigned int cChar,
325         IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern)
326 {
327     unsigned int center = 0;
328     int index = 0;
329     int next = 0;
330 
331     *syllable_count = 0;
332 
333     if (!lex)
334     {
335         ERR("Failure to have required functions\n");
336         return;
337     }
338 
339     debug_output_string(input, cChar, lex);
340     while (next != -1)
341     {
342         while((next < cChar) && lex(input[next]) == lex_Generic)
343             next++;
344         index = next;
345         if (next >= cChar)
346             break;
347         next = Indic_process_next_syllable(input, cChar, 0, &center, index, lex);
348         if (next != -1)
349         {
350             if (*syllable_count)
351                 *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1));
352             else
353                 *syllables = heap_alloc(sizeof(**syllables));
354             (*syllables)[*syllable_count].start = index;
355             (*syllables)[*syllable_count].base = center;
356             (*syllables)[*syllable_count].ralf = -1;
357             (*syllables)[*syllable_count].blwf = -1;
358             (*syllables)[*syllable_count].pref = -1;
359             (*syllables)[*syllable_count].end = next-1;
360             FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern);
361             index = next;
362             *syllable_count = (*syllable_count)+1;
363         }
364         else if (index < cChar)
365         {
366             TRACE("Processing failed at %i\n",index);
367             next = ++index;
368         }
369     }
370     TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count);
371 }
372 
373 void Indic_ReorderCharacters(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, WCHAR *input, unsigned int cChar,
374         IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern)
375 {
376     int i;
377 
378     if (!reorder_f)
379     {
380         ERR("Failure to have required functions\n");
381         return;
382     }
383 
384     Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern);
385     for (i = 0; i < *syllable_count; i++)
386         reorder_f(input, &(*syllables)[i], lex);
387 }
388