1 /* 2 * Implementation of Indic Syllables for the Uniscribe Script Processor 3 * 4 * Copyright 2011 CodeWeavers, Aric Stewart 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 * 20 */ 21 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 26 #include "windef.h" 27 #include "winbase.h" 28 #include "winuser.h" 29 #include "wingdi.h" 30 #include "winnls.h" 31 #include "usp10.h" 32 #include "winternl.h" 33 34 #include "wine/debug.h" 35 #include "wine/heap.h" 36 #include "usp10_internal.h" 37 38 WINE_DEFAULT_DEBUG_CHANNEL(uniscribe); 39 40 static void debug_output_string(const WCHAR *str, unsigned int char_count, lexical_function f) 41 { 42 int i; 43 if (TRACE_ON(uniscribe)) 44 { 45 for (i = 0; i < char_count; ++i) 46 { 47 switch (f(str[i])) 48 { 49 case lex_Consonant: TRACE("C"); break; 50 case lex_Ra: TRACE("Ra"); break; 51 case lex_Vowel: TRACE("V"); break; 52 case lex_Nukta: TRACE("N"); break; 53 case lex_Halant: TRACE("H"); break; 54 case lex_ZWNJ: TRACE("Zwnj"); break; 55 case lex_ZWJ: TRACE("Zwj"); break; 56 case lex_Matra_post: TRACE("Mp");break; 57 case lex_Matra_above: TRACE("Ma");break; 58 case lex_Matra_below: TRACE("Mb");break; 59 case lex_Matra_pre: TRACE("Mm");break; 60 case lex_Modifier: TRACE("Sm"); break; 61 case lex_Vedic: TRACE("Vd"); break; 62 case lex_Anudatta: TRACE("A"); break; 63 case lex_Composed_Vowel: TRACE("t"); break; 64 default: 65 TRACE("X"); break; 66 } 67 } 68 TRACE("\n"); 69 } 70 } 71 72 static inline BOOL is_matra( int type ) 73 { 74 return (type == lex_Matra_above || type == lex_Matra_below || 75 type == lex_Matra_pre || type == lex_Matra_post || 76 type == lex_Composed_Vowel); 77 } 78 79 static inline BOOL is_joiner( int type ) 80 { 81 return (type == lex_ZWJ || type == lex_ZWNJ); 82 } 83 84 static int consonant_header(const WCHAR *input, unsigned int cChar, 85 unsigned int start, unsigned int next, lexical_function lex) 86 { 87 if (!is_consonant( lex(input[next]) )) return -1; 88 next++; 89 if ((next < cChar) && lex(input[next]) == lex_Nukta) 90 next++; 91 if ((next < cChar) && lex(input[next])==lex_Halant) 92 { 93 next++; 94 if((next < cChar) && is_joiner( lex(input[next]) )) 95 next++; 96 if ((next < cChar) && is_consonant( lex(input[next]) )) 97 return next; 98 } 99 else if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant) 100 { 101 next+=2; 102 if ((next < cChar) && is_consonant( lex(input[next]) )) 103 return next; 104 } 105 return -1; 106 } 107 108 static int parse_consonant_syllable(const WCHAR *input, unsigned int cChar, 109 unsigned int start, unsigned int *main, unsigned int next, lexical_function lex) 110 { 111 int check; 112 int headers = 0; 113 do 114 { 115 check = consonant_header(input,cChar,start,next,lex); 116 if (check != -1) 117 { 118 next = check; 119 headers++; 120 } 121 } while (check != -1); 122 if (headers || is_consonant( lex(input[next]) )) 123 { 124 *main = next; 125 next++; 126 } 127 else 128 return -1; 129 if ((next < cChar) && lex(input[next]) == lex_Nukta) 130 next++; 131 if ((next < cChar) && lex(input[next]) == lex_Anudatta) 132 next++; 133 134 if ((next < cChar) && lex(input[next]) == lex_Halant) 135 { 136 next++; 137 if((next < cChar) && is_joiner( lex(input[next]) )) 138 next++; 139 } 140 else if (next < cChar) 141 { 142 while((next < cChar) && is_matra( lex(input[next]) )) 143 next++; 144 if ((next < cChar) && lex(input[next]) == lex_Nukta) 145 next++; 146 if ((next < cChar) && lex(input[next]) == lex_Halant) 147 next++; 148 } 149 if ((next < cChar) && lex(input[next]) == lex_Modifier) 150 next++; 151 if ((next < cChar) && lex(input[next]) == lex_Vedic) 152 next++; 153 return next; 154 } 155 156 static int parse_vowel_syllable(const WCHAR *input, unsigned int cChar, 157 unsigned int start, unsigned int next, lexical_function lex) 158 { 159 if ((next < cChar) && lex(input[next]) == lex_Nukta) 160 next++; 161 if ((next < cChar) && is_joiner( lex(input[next]) ) && lex(input[next+1])==lex_Halant && is_consonant( lex(input[next+2]) )) 162 next+=3; 163 else if ((next < cChar) && lex(input[next])==lex_Halant && is_consonant( lex(input[next+1]) )) 164 next+=2; 165 else if ((next < cChar) && lex(input[next])==lex_ZWJ && is_consonant( lex(input[next+1]) )) 166 next+=2; 167 168 if ((next < cChar) && is_matra( lex(input[next]) )) 169 { 170 while((next < cChar) && is_matra( lex(input[next]) )) 171 next++; 172 if ((next < cChar) && lex(input[next]) == lex_Nukta) 173 next++; 174 if ((next < cChar) && lex(input[next]) == lex_Halant) 175 next++; 176 } 177 178 if ((next < cChar) && lex(input[next]) == lex_Modifier) 179 next++; 180 if ((next < cChar) && lex(input[next]) == lex_Vedic) 181 next++; 182 return next; 183 } 184 185 static int Indic_process_next_syllable(const WCHAR *input, unsigned int cChar, 186 unsigned int start, unsigned int *main, unsigned int next, lexical_function lex) 187 { 188 if (lex(input[next])==lex_Vowel) 189 { 190 *main = next; 191 return parse_vowel_syllable(input, cChar, start, next+1, lex); 192 } 193 else if ((cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_Vowel) 194 { 195 *main = next+2; 196 return parse_vowel_syllable(input, cChar, start, next+3, lex); 197 } 198 199 else if (start == next && lex(input[next])==lex_NBSP) 200 { 201 *main = next; 202 return parse_vowel_syllable(input, cChar, start, next+1, lex); 203 } 204 else if (start == next && (cChar > next+3) && lex(input[next]) == lex_Ra && lex(input[next+1]) == lex_Halant && lex(input[next+2]) == lex_NBSP) 205 { 206 *main = next+2; 207 return parse_vowel_syllable(input, cChar, start, next+3, lex); 208 } 209 210 return parse_consonant_syllable(input, cChar, start, main, next, lex); 211 } 212 213 static BOOL Consonant_is_post_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, 214 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern) 215 { 216 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant) 217 { 218 if (modern) 219 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pstf") > 0); 220 else 221 { 222 WCHAR cc[2]; 223 cc[0] = pwChar[s->base]; 224 cc[1] = pwChar[s->base-1]; 225 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pstf") > 0); 226 } 227 } 228 return FALSE; 229 } 230 231 static BOOL Consonant_is_below_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, 232 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern) 233 { 234 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant) 235 { 236 if (modern) 237 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "blwf") > 0); 238 else 239 { 240 WCHAR cc[2]; 241 cc[0] = pwChar[s->base]; 242 cc[1] = pwChar[s->base-1]; 243 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "blwf") > 0); 244 } 245 } 246 return FALSE; 247 } 248 249 static BOOL Consonant_is_pre_base_form(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, 250 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical, BOOL modern) 251 { 252 if (is_consonant(lexical(pwChar[s->base])) && s->base > s->start && lexical(pwChar[s->base-1]) == lex_Halant) 253 { 254 if (modern) 255 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->base-1], 1, 2, "pref") > 0); 256 else 257 { 258 WCHAR cc[2]; 259 cc[0] = pwChar[s->base]; 260 cc[1] = pwChar[s->base-1]; 261 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, cc, 1, 2, "pref") > 0); 262 } 263 } 264 return FALSE; 265 } 266 267 static BOOL Consonant_is_ralf(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, 268 const WCHAR *pwChar, const IndicSyllable *s, lexical_function lexical) 269 { 270 if ((lexical(pwChar[s->start])==lex_Ra) && s->end > s->start && lexical(pwChar[s->start+1]) == lex_Halant) 271 return (SHAPE_does_GSUB_feature_apply_to_chars(hdc, psa, psc, &pwChar[s->start], 1, 2, "rphf") > 0); 272 return FALSE; 273 } 274 275 static int FindBaseConsonant(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, 276 const WCHAR *input, IndicSyllable *s, lexical_function lex, BOOL modern) 277 { 278 int i; 279 BOOL blwf = FALSE; 280 BOOL pref = FALSE; 281 282 /* remove ralf from consideration */ 283 if (Consonant_is_ralf(hdc, psa, psc, input, s, lex)) 284 { 285 s->ralf = s->start; 286 s->start+=2; 287 } 288 289 /* try to find a base consonant */ 290 if (!is_consonant( lex(input[s->base]) )) 291 { 292 for (i = s->end; i >= s->start; i--) 293 if (is_consonant( lex(input[i]) )) 294 { 295 s->base = i; 296 break; 297 } 298 } 299 300 while ((blwf = Consonant_is_below_base_form(hdc, psa, psc, input, s, lex, modern)) || Consonant_is_post_base_form(hdc, psa, psc, input, s, lex, modern) || (pref = Consonant_is_pre_base_form(hdc, psa, psc, input, s, lex, modern))) 301 { 302 if (blwf && s->blwf == -1) 303 s->blwf = s->base - 1; 304 if (pref && s->pref == -1) 305 s->pref = s->base - 1; 306 307 for (i = s->base-1; i >= s->start; i--) 308 if (is_consonant( lex(input[i]) )) 309 { 310 s->base = i; 311 break; 312 } 313 } 314 315 if (s->ralf >= 0) 316 s->start = s->ralf; 317 318 if (s->ralf == s->base) 319 s->ralf = -1; 320 321 return s->base; 322 } 323 324 void Indic_ParseSyllables(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, const WCHAR *input, unsigned int cChar, 325 IndicSyllable **syllables, int *syllable_count, lexical_function lex, BOOL modern) 326 { 327 unsigned int center = 0; 328 int index = 0; 329 int next = 0; 330 331 *syllable_count = 0; 332 333 if (!lex) 334 { 335 ERR("Failure to have required functions\n"); 336 return; 337 } 338 339 debug_output_string(input, cChar, lex); 340 while (next != -1) 341 { 342 while((next < cChar) && lex(input[next]) == lex_Generic) 343 next++; 344 index = next; 345 if (next >= cChar) 346 break; 347 next = Indic_process_next_syllable(input, cChar, 0, ¢er, index, lex); 348 if (next != -1) 349 { 350 if (*syllable_count) 351 *syllables = HeapReAlloc(GetProcessHeap(),0,*syllables, sizeof(IndicSyllable)*(*syllable_count+1)); 352 else 353 *syllables = heap_alloc(sizeof(**syllables)); 354 (*syllables)[*syllable_count].start = index; 355 (*syllables)[*syllable_count].base = center; 356 (*syllables)[*syllable_count].ralf = -1; 357 (*syllables)[*syllable_count].blwf = -1; 358 (*syllables)[*syllable_count].pref = -1; 359 (*syllables)[*syllable_count].end = next-1; 360 FindBaseConsonant(hdc, psa, psc, input, &(*syllables)[*syllable_count], lex, modern); 361 index = next; 362 *syllable_count = (*syllable_count)+1; 363 } 364 else if (index < cChar) 365 { 366 TRACE("Processing failed at %i\n",index); 367 next = ++index; 368 } 369 } 370 TRACE("Processed %i of %i characters into %i syllables\n",index,cChar,*syllable_count); 371 } 372 373 void Indic_ReorderCharacters(HDC hdc, SCRIPT_ANALYSIS *psa, ScriptCache *psc, WCHAR *input, unsigned int cChar, 374 IndicSyllable **syllables, int *syllable_count, lexical_function lex, reorder_function reorder_f, BOOL modern) 375 { 376 int i; 377 378 if (!reorder_f) 379 { 380 ERR("Failure to have required functions\n"); 381 return; 382 } 383 384 Indic_ParseSyllables(hdc, psa, psc, input, cChar, syllables, syllable_count, lex, modern); 385 for (i = 0; i < *syllable_count; i++) 386 reorder_f(input, &(*syllables)[i], lex); 387 } 388