1 /*
2  * Copyright (C) 2015 The Qt Company Ltd
3  *
4  * This is part of HarfBuzz, an OpenType Layout engine library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  */
24 
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
27 
28 #include <assert.h>
29 #include <stdio.h>
30 
31 #define FLAG(x) (1 << (x))
32 
isLetter(HB_UChar16 ucs)33 static HB_Bool isLetter(HB_UChar16 ucs)
34 {
35     const int test = FLAG(HB_Letter_Uppercase) |
36                      FLAG(HB_Letter_Lowercase) |
37                      FLAG(HB_Letter_Titlecase) |
38                      FLAG(HB_Letter_Modifier) |
39                      FLAG(HB_Letter_Other);
40     return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
41 }
42 
isMark(HB_UChar16 ucs)43 static HB_Bool isMark(HB_UChar16 ucs)
44 {
45     const int test = FLAG(HB_Mark_NonSpacing) |
46                      FLAG(HB_Mark_SpacingCombining) |
47                      FLAG(HB_Mark_Enclosing);
48     return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
49 }
50 
51 enum Form {
52     Invalid = 0x0,
53     UnknownForm = Invalid,
54     Consonant,
55     Nukta,
56     Halant,
57     Matra,
58     VowelMark,
59     StressMark,
60     IndependentVowel,
61     LengthMark,
62     Control,
63     Other
64 };
65 
66 static const unsigned char indicForms[0xe00-0x900] = {
67     // Devangari
68     Invalid, VowelMark, VowelMark, VowelMark,
69     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
70     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
71     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
72 
73     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
74     IndependentVowel, Consonant, Consonant, Consonant,
75     Consonant, Consonant, Consonant, Consonant,
76     Consonant, Consonant, Consonant, Consonant,
77 
78     Consonant, Consonant, Consonant, Consonant,
79     Consonant, Consonant, Consonant, Consonant,
80     Consonant, Consonant, Consonant, Consonant,
81     Consonant, Consonant, Consonant, Consonant,
82 
83     Consonant, Consonant, Consonant, Consonant,
84     Consonant, Consonant, Consonant, Consonant,
85     Consonant, Consonant, UnknownForm, UnknownForm,
86     Nukta, Other, Matra, Matra,
87 
88     Matra, Matra, Matra, Matra,
89     Matra, Matra, Matra, Matra,
90     Matra, Matra, Matra, Matra,
91     Matra, Halant, UnknownForm, UnknownForm,
92 
93     Other, StressMark, StressMark, StressMark,
94     StressMark, UnknownForm, UnknownForm, UnknownForm,
95     Consonant, Consonant, Consonant, Consonant,
96     Consonant, Consonant, Consonant, Consonant,
97 
98     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
99     Other, Other, Other, Other,
100     Other, Other, Other, Other,
101     Other, Other, Other, Other,
102 
103     Other, Other, Other, Other,
104     Other, Other, Other, Other,
105     Other, Other, Other, Consonant,
106     Consonant, Consonant /* ??? */, Consonant, Consonant,
107 
108     // Bengali
109     Invalid, VowelMark, VowelMark, VowelMark,
110     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
111     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
112     IndependentVowel, Invalid, Invalid, IndependentVowel,
113 
114     IndependentVowel, Invalid, Invalid, IndependentVowel,
115     IndependentVowel, Consonant, Consonant, Consonant,
116     Consonant, Consonant, Consonant, Consonant,
117     Consonant, Consonant, Consonant, Consonant,
118 
119     Consonant, Consonant, Consonant, Consonant,
120     Consonant, Consonant, Consonant, Consonant,
121     Consonant, Invalid, Consonant, Consonant,
122     Consonant, Consonant, Consonant, Consonant,
123 
124     Consonant, Invalid, Consonant, Invalid,
125     Invalid, Invalid, Consonant, Consonant,
126     Consonant, Consonant, UnknownForm, UnknownForm,
127     Nukta, Other, Matra, Matra,
128 
129     Matra, Matra, Matra, Matra,
130     Matra, Invalid, Invalid, Matra,
131     Matra, Invalid, Invalid, Matra,
132     Matra, Halant, Consonant, UnknownForm,
133 
134     Invalid, Invalid, Invalid, Invalid,
135     Invalid, Invalid, Invalid, VowelMark,
136     Invalid, Invalid, Invalid, Invalid,
137     Consonant, Consonant, Invalid, Consonant,
138 
139     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
140     Other, Other, Other, Other,
141     Other, Other, Other, Other,
142     Other, Other, Other, Other,
143 
144     Consonant, Consonant, Other, Other,
145     Other, Other, Other, Other,
146     Other, Other, Other, Other,
147     Other, Other, Other, Other,
148 
149     // Gurmukhi
150     Invalid, VowelMark, VowelMark, VowelMark,
151     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
152     IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
153     Invalid, Invalid, Invalid, IndependentVowel,
154 
155     IndependentVowel, Invalid, Invalid, IndependentVowel,
156     IndependentVowel, Consonant, Consonant, Consonant,
157     Consonant, Consonant, Consonant, Consonant,
158     Consonant, Consonant, Consonant, Consonant,
159 
160     Consonant, Consonant, Consonant, Consonant,
161     Consonant, Consonant, Consonant, Consonant,
162     Consonant, Invalid, Consonant, Consonant,
163     Consonant, Consonant, Consonant, Consonant,
164 
165     Consonant, Invalid, Consonant, Consonant,
166     Invalid, Consonant, Consonant, Invalid,
167     Consonant, Consonant, UnknownForm, UnknownForm,
168     Nukta, Other, Matra, Matra,
169 
170     Matra, Matra, Matra, Invalid,
171     Invalid, Invalid, Invalid, Matra,
172     Matra, Invalid, Invalid, Matra,
173     Matra, Halant, UnknownForm, UnknownForm,
174 
175     Invalid, Invalid, Invalid, Invalid,
176     Invalid, UnknownForm, UnknownForm, UnknownForm,
177     Invalid, Consonant, Consonant, Consonant,
178     Consonant, Invalid, Consonant, Invalid,
179 
180     Other, Other, Invalid, Invalid,
181     Other, Other, Other, Other,
182     Other, Other, Other, Other,
183     Other, Other, Other, Other,
184 
185     StressMark, StressMark, Consonant, Consonant,
186     Other, Other, Other, Other,
187     Other, Other, Other, Other,
188     Other, Other, Other, Other,
189 
190     // Gujarati
191     Invalid, VowelMark, VowelMark, VowelMark,
192     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
193     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
194     IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
195 
196     IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
197     IndependentVowel, Consonant, Consonant, Consonant,
198     Consonant, Consonant, Consonant, Consonant,
199     Consonant, Consonant, Consonant, Consonant,
200 
201     Consonant, Consonant, Consonant, Consonant,
202     Consonant, Consonant, Consonant, Consonant,
203     Consonant, Invalid, Consonant, Consonant,
204     Consonant, Consonant, Consonant, Consonant,
205 
206     Consonant, Invalid, Consonant, Consonant,
207     Invalid, Consonant, Consonant, Consonant,
208     Consonant, Consonant, UnknownForm, UnknownForm,
209     Nukta, Other, Matra, Matra,
210 
211     Matra, Matra, Matra, Matra,
212     Matra, Matra, Invalid, Matra,
213     Matra, Matra, Invalid, Matra,
214     Matra, Halant, UnknownForm, UnknownForm,
215 
216     Other, UnknownForm, UnknownForm, UnknownForm,
217     UnknownForm, UnknownForm, UnknownForm, UnknownForm,
218     UnknownForm, UnknownForm, UnknownForm, UnknownForm,
219     UnknownForm, UnknownForm, UnknownForm, UnknownForm,
220 
221     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
222     Other, Other, Other, Other,
223     Other, Other, Other, Other,
224     Other, Other, Other, Other,
225 
226     Other, Other, Other, Other,
227     Other, Other, Other, Other,
228     Other, Other, Other, Other,
229     Other, Other, Other, Other,
230 
231     // Oriya
232     Invalid, VowelMark, VowelMark, VowelMark,
233     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
234     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
235     IndependentVowel, Invalid, Invalid, IndependentVowel,
236 
237     IndependentVowel, Invalid, Invalid, IndependentVowel,
238     IndependentVowel, Consonant, Consonant, Consonant,
239     Consonant, Consonant, Consonant, Consonant,
240     Consonant, Consonant, Consonant, Consonant,
241 
242     Consonant, Consonant, Consonant, Consonant,
243     Consonant, Consonant, Consonant, Consonant,
244     Consonant, Invalid, Consonant, Consonant,
245     Consonant, Consonant, Consonant, Consonant,
246 
247     Consonant, Invalid, Consonant, Consonant,
248     Invalid, Consonant, Consonant, Consonant,
249     Consonant, Consonant, UnknownForm, UnknownForm,
250     Nukta, Other, Matra, Matra,
251 
252     Matra, Matra, Matra, Matra,
253     Invalid, Invalid, Invalid, Matra,
254     Matra, Invalid, Invalid, Matra,
255     Matra, Halant, UnknownForm, UnknownForm,
256 
257     Other, Invalid, Invalid, Invalid,
258     Invalid, UnknownForm, LengthMark, LengthMark,
259     Invalid, Invalid, Invalid, Invalid,
260     Consonant, Consonant, Invalid, Consonant,
261 
262     IndependentVowel, IndependentVowel, Invalid, Invalid,
263     Invalid, Invalid, Other, Other,
264     Other, Other, Other, Other,
265     Other, Other, Other, Other,
266 
267     Other, Consonant, Other, Other,
268     Other, Other, Other, Other,
269     Other, Other, Other, Other,
270     Other, Other, Other, Other,
271 
272     //Tamil
273     Invalid, Invalid, VowelMark, Other,
274     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
275     IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
276     Invalid, Invalid, IndependentVowel, IndependentVowel,
277 
278     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
279     IndependentVowel, Consonant, Invalid, Invalid,
280     Invalid, Consonant, Consonant, Invalid,
281     Consonant, Invalid, Consonant, Consonant,
282 
283     Invalid, Invalid, Invalid, Consonant,
284     Consonant, Invalid, Invalid, Invalid,
285     Consonant, Consonant, Consonant, Invalid,
286     Invalid, Invalid, Consonant, Consonant,
287 
288     Consonant, Consonant, Consonant, Consonant,
289     Consonant, Consonant, Consonant, Consonant,
290     Consonant, Consonant, UnknownForm, UnknownForm,
291     Invalid, Invalid, Matra, Matra,
292 
293     Matra, Matra, Matra, Invalid,
294     Invalid, Invalid, Matra, Matra,
295     Matra, Invalid, Matra, Matra,
296     Matra, Halant, Invalid, Invalid,
297 
298     Invalid, Invalid, Invalid, Invalid,
299     Invalid, Invalid, Invalid, LengthMark,
300     Invalid, Invalid, Invalid, Invalid,
301     Invalid, Invalid, Invalid, Invalid,
302 
303     Invalid, Invalid, Invalid, Invalid,
304     Invalid, Invalid, Other, Other,
305     Other, Other, Other, Other,
306     Other, Other, Other, Other,
307 
308     Other, Other, Other, Other,
309     Other, Other, Other, Other,
310     Other, Other, Other, Other,
311     Other, Other, Other, Other,
312 
313     // Telugu
314     Invalid, VowelMark, VowelMark, VowelMark,
315     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
316     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
317     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
318 
319     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
320     IndependentVowel, Consonant, Consonant, Consonant,
321     Consonant, Consonant, Consonant, Consonant,
322     Consonant, Consonant, Consonant, Consonant,
323 
324     Consonant, Consonant, Consonant, Consonant,
325     Consonant, Consonant, Consonant, Consonant,
326     Consonant, Invalid, Consonant, Consonant,
327     Consonant, Consonant, Consonant, Consonant,
328 
329     Consonant, Consonant, Consonant, Consonant,
330     Invalid, Consonant, Consonant, Consonant,
331     Consonant, Consonant, UnknownForm, UnknownForm,
332     Invalid, Invalid, Matra, Matra,
333 
334     Matra, Matra, Matra, Matra,
335     Matra, Invalid, Matra, Matra,
336     Matra, Invalid, Matra, Matra,
337     Matra, Halant, Invalid, Invalid,
338 
339     Invalid, Invalid, Invalid, Invalid,
340     Invalid, LengthMark, Matra, Invalid,
341     Invalid, Invalid, Invalid, Invalid,
342     Invalid, Invalid, Invalid, Invalid,
343 
344     IndependentVowel, IndependentVowel, Invalid, Invalid,
345     Invalid, Invalid, Other, Other,
346     Other, Other, Other, Other,
347     Other, Other, Other, Other,
348 
349     Other, Other, Other, Other,
350     Other, Other, Other, Other,
351     Other, Other, Other, Other,
352     Other, Other, Other, Other,
353 
354     // Kannada
355     Invalid, Invalid, VowelMark, VowelMark,
356     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
357     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
358     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
359 
360     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
361     IndependentVowel, Consonant, Consonant, Consonant,
362     Consonant, Consonant, Consonant, Consonant,
363     Consonant, Consonant, Consonant, Consonant,
364 
365     Consonant, Consonant, Consonant, Consonant,
366     Consonant, Consonant, Consonant, Consonant,
367     Consonant, Invalid, Consonant, Consonant,
368     Consonant, Consonant, Consonant, Consonant,
369 
370     Consonant, Consonant, Consonant, Consonant,
371     Invalid, Consonant, Consonant, Consonant,
372     Consonant, Consonant, UnknownForm, UnknownForm,
373     Nukta, Other, Matra, Matra,
374 
375     Matra, Matra, Matra, Matra,
376     Matra, Invalid, Matra, Matra,
377     Matra, Invalid, Matra, Matra,
378     Matra, Halant, Invalid, Invalid,
379 
380     Invalid, Invalid, Invalid, Invalid,
381     Invalid, LengthMark, LengthMark, Invalid,
382     Invalid, Invalid, Invalid, Invalid,
383     Invalid, Invalid, Consonant, Invalid,
384 
385     IndependentVowel, IndependentVowel, VowelMark, VowelMark,
386     Invalid, Invalid, Other, Other,
387     Other, Other, Other, Other,
388     Other, Other, Other, Other,
389 
390     Other, Other, Other, Other,
391     Other, Other, Other, Other,
392     Other, Other, Other, Other,
393     Other, Other, Other, Other,
394 
395     // Malayalam
396     Invalid, Invalid, VowelMark, VowelMark,
397     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
398     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
399     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
400 
401     IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
402     IndependentVowel, Consonant, Consonant, Consonant,
403     Consonant, Consonant, Consonant, Consonant,
404     Consonant, Consonant, Consonant, Consonant,
405 
406     Consonant, Consonant, Consonant, Consonant,
407     Consonant, Consonant, Consonant, Consonant,
408     Consonant, Invalid, Consonant, Consonant,
409     Consonant, Consonant, Consonant, Consonant,
410 
411     Consonant, Consonant, Consonant, Consonant,
412     Consonant, Consonant, Consonant, Consonant,
413     Consonant, Consonant, UnknownForm, UnknownForm,
414     Invalid, Invalid, Matra, Matra,
415 
416     Matra, Matra, Matra, Matra,
417     Invalid, Invalid, Matra, Matra,
418     Matra, Invalid, Matra, Matra,
419     Matra, Halant, Invalid, Invalid,
420 
421     Invalid, Invalid, Invalid, Invalid,
422     Invalid, Invalid, Invalid, Matra,
423     Invalid, Invalid, Invalid, Invalid,
424     Invalid, Invalid, Invalid, Invalid,
425 
426     IndependentVowel, IndependentVowel, Invalid, Invalid,
427     Invalid, Invalid, Other, Other,
428     Other, Other, Other, Other,
429     Other, Other, Other, Other,
430 
431     Other, Other, Other, Other,
432     Other, Other, Other, Other,
433     Other, Other, Other, Other,
434     Other, Other, Other, Other,
435 
436     // Sinhala
437     Invalid, Invalid, VowelMark, VowelMark,
438     Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
439     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
440     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
441 
442     IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
443     IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
444     Invalid, Invalid, Consonant, Consonant,
445     Consonant, Consonant, Consonant, Consonant,
446 
447     Consonant, Consonant, Consonant, Consonant,
448     Consonant, Consonant, Consonant, Consonant,
449     Consonant, Consonant, Consonant, Consonant,
450     Consonant, Consonant, Consonant, Consonant,
451 
452     Consonant, Consonant, Invalid, Consonant,
453     Consonant, Consonant, Consonant, Consonant,
454     Consonant, Consonant, Consonant, Consonant,
455     Invalid, Consonant, Invalid, Invalid,
456 
457     Consonant, Consonant, Consonant, Consonant,
458     Consonant, Consonant, Consonant, Invalid,
459     Invalid, Invalid, Halant, Invalid,
460     Invalid, Invalid, Invalid, Matra,
461 
462     Matra, Matra, Matra, Matra,
463     Matra, Invalid, Matra, Invalid,
464     Matra, Matra, Matra, Matra,
465     Matra, Matra, Matra, Matra,
466 
467     Invalid, Invalid, Invalid, Invalid,
468     Invalid, Invalid, Invalid, Invalid,
469     Invalid, Invalid, Invalid, Invalid,
470     Invalid, Invalid, Invalid, Invalid,
471 
472     Invalid, Invalid, Matra, Matra,
473     Other, Other, Other, Other,
474     Other, Other, Other, Other,
475     Other, Other, Other, Other,
476 };
477 
478 enum Position {
479     None,
480     Pre,
481     Above,
482     Below,
483     Post,
484     Split,
485     Base,
486     Reph,
487     Vattu,
488     Inherit
489 };
490 
491 static const unsigned char indicPosition[0xe00-0x900] = {
492     // Devanagari
493     None, Above, Above, Post,
494     None, None, None, None,
495     None, None, None, None,
496     None, None, None, None,
497 
498     None, None, None, None,
499     None, None, None, None,
500     None, None, None, None,
501     None, None, None, None,
502 
503     None, None, None, None,
504     None, None, None, None,
505     None, None, None, None,
506     None, None, None, None,
507 
508     Below, None, None, None,
509     None, None, None, None,
510     None, None, None, None,
511     None, None, Post, Pre,
512 
513     Post, Below, Below, Below,
514     Below, Above, Above, Above,
515     Above, Post, Post, Post,
516     Post, None, None, None,
517 
518     None, Above, Below, Above,
519     Above, None, None, None,
520     None, None, None, None,
521     None, None, None, None,
522 
523     None, None, Below, Below,
524     None, None, None, None,
525     None, None, None, None,
526     None, None, None, None,
527 
528     None, None, None, None,
529     None, None, None, None,
530     None, None, None, None,
531     None, None, None, None,
532 
533     // Bengali
534     None, Above, Post, Post,
535     None, None, None, None,
536     None, None, None, None,
537     None, None, None, None,
538 
539     None, None, None, None,
540     None, None, None, None,
541     None, None, None, None,
542     None, None, None, None,
543 
544     None, None, None, None,
545     None, None, None, None,
546     None, None, None, None,
547     Below, None, None, Post,
548 
549     Below, None, None, None,
550     None, None, None, None,
551     None, None, None, None,
552     Below, None, Post, Pre,
553 
554     Post, Below, Below, Below,
555     Below, None, None, Pre,
556     Pre, None, None, Split,
557     Split, Below, None, None,
558 
559     None, None, None, None,
560     None, None, None, Post,
561     None, None, None, None,
562     None, None, None, None,
563 
564     None, None, Below, Below,
565     None, None, None, None,
566     None, None, None, None,
567     None, None, None, None,
568 
569     Below, None, None, None,
570     None, None, None, None,
571     None, None, None, None,
572     None, None, None, None,
573 
574     // Gurmukhi
575     None, Above, Above, Post,
576     None, None, None, None,
577     None, None, None, None,
578     None, None, None, None,
579 
580     None, None, None, None,
581     None, None, None, None,
582     None, None, None, None,
583     None, None, None, None,
584 
585     None, None, None, None,
586     None, None, None, None,
587     None, None, None, None,
588     None, None, None, Post,
589 
590     Below, None, None, None,
591     None, Below, None, None,
592     None, Below, None, None,
593     Below, None, Post, Pre,
594 
595     Post, Below, Below, None,
596     None, None, None, Above,
597     Above, None, None, Above,
598     Above, None, None, None,
599 
600     None, None, None, None,
601     None, None, None, None,
602     None, None, None, None,
603     None, None, None, None,
604 
605     None, None, None, None,
606     None, None, None, None,
607     None, None, None, None,
608     None, None, None, None,
609 
610     Above, Above, None, None,
611     None, None, None, None,
612     None, None, None, None,
613     None, None, None, None,
614 
615     // Gujarati
616     None, Above, Above, Post,
617     None, None, None, None,
618     None, None, None, None,
619     None, None, None, None,
620 
621     None, None, None, None,
622     None, None, None, None,
623     None, None, None, None,
624     None, None, None, None,
625 
626     None, None, None, None,
627     None, None, None, None,
628     None, None, None, None,
629     None, None, None, None,
630 
631     Below, None, None, None,
632     None, None, None, None,
633     None, None, None, None,
634     None, None, Post, Pre,
635 
636     Post, Below, Below, Below,
637     Below, Above, None, Above,
638     Above, Post, None, Post,
639     Post, None, None, None,
640 
641     None, None, None, None,
642     None, None, None, None,
643     None, None, None, None,
644     None, None, None, None,
645 
646     None, None, Below, Below,
647     None, None, None, None,
648     None, None, None, None,
649     None, None, None, None,
650 
651     None, None, None, None,
652     None, None, None, None,
653     None, None, None, None,
654     None, None, None, None,
655 
656     // Oriya
657     None, Above, Post, Post,
658     None, None, None, None,
659     None, None, None, None,
660     None, None, None, None,
661 
662     None, None, None, None,
663     None, Below, Below, Below,
664     Below, Below, Below, Below,
665     Below, Below, None, Below,
666 
667     Below, Below, Below, Below,
668     Below, Below, Below, Below,
669     Below, None, Below, Below,
670     Below, Below, Below, Post,
671 
672     Below, None, Below, Below,
673     None, Below, Below, Below,
674     Below, Below, None, None,
675     None, None, Post, Above,
676 
677     Post, Below, Below, Below,
678     None, None, None, Pre,
679     Split, None, None, Split,
680     Split, None, None, None,
681 
682     None, None, None, None,
683     None, None, Above, Post,
684     None, None, None, None,
685     None, None, None, Post,
686 
687     None, None, None, None,
688     None, None, None, None,
689     None, None, None, None,
690     None, None, None, None,
691 
692     None, Below, None, None,
693     None, None, None, None,
694     None, None, None, None,
695     None, None, None, None,
696 
697     // Tamil
698     None, None, Above, None,
699     None, None, None, None,
700     None, None, None, None,
701     None, None, None, None,
702 
703     None, None, None, None,
704     None, None, None, None,
705     None, None, None, None,
706     None, None, None, None,
707 
708     None, None, None, None,
709     None, None, None, None,
710     None, None, None, None,
711     None, None, None, None,
712 
713     None, None, None, None,
714     None, None, None, None,
715     None, None, None, None,
716     None, None, Post, Post,
717 
718     Above, Below, Below, None,
719     None, None, Pre, Pre,
720     Pre, None, Split, Split,
721     Split, Halant, None, None,
722 
723     None, None, None, None,
724     None, None, None, Post,
725     None, None, None, None,
726     None, None, None, None,
727 
728     None, None, None, None,
729     None, None, None, None,
730     None, None, None, None,
731     None, None, None, None,
732 
733     None, None, None, None,
734     None, None, None, None,
735     None, None, None, None,
736     None, None, None, None,
737 
738     // Telugu
739     None, Post, Post, Post,
740     None, None, None, None,
741     None, None, None, None,
742     None, None, None, None,
743 
744     None, None, None, None,
745     None, Below, Below, Below,
746     Below, Below, Below, Below,
747     Below, Below, Below, Below,
748 
749     Below, Below, Below, Below,
750     Below, Below, Below, Below,
751     Below, None, Below, Below,
752     Below, Below, Below, Below,
753 
754     Below, None, Below, Below,
755     None, Below, Below, Below,
756     Below, Below, None, None,
757     None, None, Post, Above,
758 
759     Above, Post, Post, Post,
760     Post, None, Above, Above,
761     Split, None, Post, Above,
762     Above, Halant, None, None,
763 
764     None, None, None, None,
765     None, Above, Below, None,
766     None, None, None, None,
767     None, None, None, None,
768 
769     None, None, None, None,
770     None, None, None, None,
771     None, None, None, None,
772     None, None, None, None,
773 
774     None, None, None, None,
775     None, None, None, None,
776     None, None, None, None,
777     None, None, None, None,
778 
779     // Kannada
780     None, None, Post, Post,
781     None, None, None, None,
782     None, None, None, None,
783     None, None, None, None,
784 
785     None, None, None, None,
786     None, Below, Below, Below,
787     Below, Below, Below, Below,
788     Below, Below, Below, Below,
789 
790     Below, Below, Below, Below,
791     Below, Below, Below, Below,
792     Below, Below, Below, Below,
793     Below, Below, Below, Below,
794 
795     Below, None, Below, Below,
796     None, Below, Below, Below,
797     Below, Below, None, None,
798     None, None, Post, Above,
799 
800     Split, Post, Post, Post,
801     Post, None, Above, Split,
802     Split, None, Split, Split,
803     Above, Halant, None, None,
804 
805     None, None, None, None,
806     None, Post, Post, None,
807     None, None, None, None,
808     None, None, Below, None,
809 
810     None, None, Below, Below,
811     None, None, None, None,
812     None, None, None, None,
813     None, None, None, None,
814 
815     None, None, None, None,
816     None, None, None, None,
817     None, None, None, None,
818     None, None, None, None,
819 
820     // Malayalam
821     None, None, Post, Post,
822     None, None, None, None,
823     None, None, None, None,
824     None, None, None, None,
825 
826     None, None, None, None,
827     None, None, None, None,
828     None, None, None, None,
829     None, None, None, None,
830 
831     None, None, None, None,
832     None, None, None, None,
833     None, None, None, None,
834     None, None, None, Post,
835 
836     Pre, None, Below, None,
837     None, Post, None, None,
838     None, None, None, None,
839     None, None, Post, Post,
840 
841     Post, Post, Post, Post,
842     None, None, Pre, Pre,
843     Pre, None, Split, Split,
844     Split, Halant, None, None,
845 
846     None, None, None, None,
847     None, None, None, Post,
848     None, None, None, None,
849     None, None, None, None,
850 
851     None, None, None, None,
852     None, None, None, None,
853     None, None, None, None,
854     None, None, None, None,
855 
856     None, None, None, None,
857     None, None, None, None,
858     None, None, None, None,
859     None, None, None, None,
860 
861     // Sinhala
862     None, None, Post, Post,
863     None, None, None, None,
864     None, None, None, None,
865     None, None, None, None,
866 
867     None, None, None, None,
868     None, None, None, None,
869     None, None, None, None,
870     None, None, None, None,
871 
872     None, None, None, None,
873     None, None, None, None,
874     None, None, None, None,
875     None, None, None, None,
876 
877     None, None, None, None,
878     None, None, None, None,
879     None, None, None, None,
880     None, None, None, None,
881 
882     None, None, None, None,
883     None, None, None, None,
884     None, None, None, None,
885     None, None, None, Post,
886 
887     Post, Post, Above, Above,
888     Below, None, Below, None,
889     Post, Pre, Split, Pre,
890     Split, Split, Split, Post,
891 
892     None, None, None, None,
893     None, None, None, None,
894     None, None, None, None,
895     None, None, None, None,
896 
897     None, None, Post, Post,
898     None, None, None, None,
899     None, None, None, None,
900     None, None, None, None
901 };
902 
form(unsigned short uc)903 static inline Form form(unsigned short uc) {
904     if (uc < 0x900 || uc > 0xdff) {
905         if (uc == 0x25cc)
906             return Consonant;
907         if (uc == 0x200c || uc == 0x200d)
908             return Control;
909         return Other;
910     }
911     return (Form)indicForms[uc-0x900];
912 }
913 
indic_position(unsigned short uc)914 static inline Position indic_position(unsigned short uc) {
915     if (uc < 0x900 || uc > 0xdff)
916         return None;
917     return (Position) indicPosition[uc-0x900];
918 }
919 
920 
921 enum IndicScriptProperties {
922     HasReph = 0x01,
923     HasSplit = 0x02
924 };
925 
926 const hb_uint8 scriptProperties[10] = {
927     // Devanagari,
928     HasReph,
929     // Bengali,
930     HasReph|HasSplit,
931     // Gurmukhi,
932     0,
933     // Gujarati,
934     HasReph,
935     // Oriya,
936     HasReph|HasSplit,
937     // Tamil,
938     HasSplit,
939     // Telugu,
940     HasSplit,
941     // Kannada,
942     HasSplit|HasReph,
943     // Malayalam,
944     HasSplit,
945     // Sinhala,
946     HasSplit
947 };
948 
949 struct IndicOrdering {
950     Form form;
951     Position position;
952 };
953 
954 static const IndicOrdering devanagari_order [] = {
955     { Consonant, Below },
956     { Matra, Below },
957     { VowelMark, Below },
958     { StressMark, Below },
959     { Matra, Above },
960     { Matra, Post },
961     { Consonant, Reph },
962     { VowelMark, Above },
963     { StressMark, Above },
964     { VowelMark, Post },
965     { (Form)0, None }
966 };
967 
968 static const IndicOrdering bengali_order [] = {
969     { Consonant, Below },
970     { Matra, Below },
971     { Matra, Above },
972     { Consonant, Reph },
973     { VowelMark, Above },
974     { Consonant, Post },
975     { Matra, Post },
976     { VowelMark, Post },
977     { (Form)0, None }
978 };
979 
980 static const IndicOrdering gurmukhi_order [] = {
981     { Consonant, Below },
982     { Matra, Below },
983     { Matra, Above },
984     { Consonant, Post },
985     { Matra, Post },
986     { VowelMark, Above },
987     { (Form)0, None }
988 };
989 
990 static const IndicOrdering tamil_order [] = {
991     { Matra, Above },
992     { Matra, Post },
993     { VowelMark, Post },
994     { (Form)0, None }
995 };
996 
997 static const IndicOrdering telugu_order [] = {
998     { Matra, Above },
999     { Matra, Below },
1000     { Matra, Post },
1001     { Consonant, Below },
1002     { Consonant, Post },
1003     { VowelMark, Post },
1004     { (Form)0, None }
1005 };
1006 
1007 static const IndicOrdering kannada_order [] = {
1008     { Matra, Above },
1009     { Matra, Post },
1010     { Consonant, Below },
1011     { Consonant, Post },
1012     { LengthMark, Post },
1013     { Consonant, Reph },
1014     { VowelMark, Post },
1015     { (Form)0, None }
1016 };
1017 
1018 static const IndicOrdering malayalam_order [] = {
1019     { Consonant, Below },
1020     { Matra, Below },
1021     { Consonant, Reph },
1022     { Consonant, Post },
1023     { Matra, Post },
1024     { VowelMark, Post },
1025     { (Form)0, None }
1026 };
1027 
1028 static const IndicOrdering sinhala_order [] = {
1029     { Matra, Below },
1030     { Matra, Above },
1031     { Matra, Post },
1032     { VowelMark, Post },
1033     { (Form)0, None }
1034 };
1035 
1036 static const IndicOrdering * const indic_order[] = {
1037     devanagari_order, // Devanagari
1038     bengali_order, // Bengali
1039     gurmukhi_order, // Gurmukhi
1040     devanagari_order, // Gujarati
1041     bengali_order, // Oriya
1042     tamil_order, // Tamil
1043     telugu_order, // Telugu
1044     kannada_order, // Kannada
1045     malayalam_order, // Malayalam
1046     sinhala_order // Sinhala
1047 };
1048 
1049 
1050 
1051 // vowel matras that have to be split into two parts.
1052 static const unsigned short split_matras[]  = {
1053     //  matra, split1, split2, split3
1054 
1055     // bengalis
1056     0x9cb, 0x9c7, 0x9be, 0x0,
1057     0x9cc, 0x9c7, 0x9d7, 0x0,
1058     // oriya
1059     0xb48, 0xb47, 0xb56, 0x0,
1060     0xb4b, 0xb47, 0xb3e, 0x0,
1061     0xb4c, 0xb47, 0xb57, 0x0,
1062     // tamil
1063     0xbca, 0xbc6, 0xbbe, 0x0,
1064     0xbcb, 0xbc7, 0xbbe, 0x0,
1065     0xbcc, 0xbc6, 0xbd7, 0x0,
1066     // telugu
1067     0xc48, 0xc46, 0xc56, 0x0,
1068     // kannada
1069     0xcc0, 0xcbf, 0xcd5, 0x0,
1070     0xcc7, 0xcc6, 0xcd5, 0x0,
1071     0xcc8, 0xcc6, 0xcd6, 0x0,
1072     0xcca, 0xcc6, 0xcc2, 0x0,
1073     0xccb, 0xcc6, 0xcc2, 0xcd5,
1074     // malayalam
1075     0xd4a, 0xd46, 0xd3e, 0x0,
1076     0xd4b, 0xd47, 0xd3e, 0x0,
1077     0xd4c, 0xd46, 0xd57, 0x0,
1078     // sinhala
1079     0xdda, 0xdd9, 0xdca, 0x0,
1080     0xddc, 0xdd9, 0xdcf, 0x0,
1081     0xddd, 0xdd9, 0xdcf, 0xdca,
1082     0xdde, 0xdd9, 0xddf, 0x0,
1083     0xffff
1084 };
1085 
splitMatra(unsigned short * reordered,int matra,int & len)1086 static inline void splitMatra(unsigned short *reordered, int matra, int &len)
1087 {
1088     unsigned short matra_uc = reordered[matra];
1089     //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
1090 
1091     const unsigned short *split = split_matras;
1092     while (split[0] < matra_uc)
1093         split += 4;
1094 
1095     assert(*split == matra_uc);
1096     ++split;
1097 
1098     int added_chars = split[2] == 0x0 ? 1 : 2;
1099 
1100     memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short));
1101     reordered[matra] = split[0];
1102     reordered[matra+1] = split[1];
1103     if(added_chars == 2)
1104         reordered[matra+2] = split[2];
1105     len += added_chars;
1106 }
1107 
1108 #ifndef NO_OPENTYPE
1109 static const HB_OpenTypeFeature indic_features[] = {
1110     { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty },
1111     { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1112     { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1113     { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
1114     { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
1115     { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
1116     { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
1117     { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
1118     { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
1119     { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty },
1120     { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
1121     { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
1122     { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
1123     { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
1124     { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
1125     { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
1126     { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty },
1127     { 0, 0 }
1128 };
1129 #endif
1130 
1131 // #define INDIC_DEBUG
1132 #ifdef INDIC_DEBUG
1133 #define IDEBUG hb_debug
1134 #include <stdarg.h>
1135 
hb_debug(const char * msg,...)1136 static void hb_debug(const char *msg, ...)
1137 {
1138     va_list ap;
1139     va_start(ap, msg); // use variable arg list
1140     vfprintf(stderr, msg, ap);
1141     va_end(ap);
1142     fprintf(stderr, "\n");
1143 }
1144 
1145 #else
1146 #define IDEBUG if(0) printf
1147 #endif
1148 
1149 #if 0 //def INDIC_DEBUG
1150 static QString propertiesToString(int properties)
1151 {
1152     QString res;
1153     properties = ~properties;
1154     if (properties & LocaProperty)
1155         res += "Loca ";
1156     if (properties & CcmpProperty)
1157         res += "Ccmp ";
1158     if (properties & InitProperty)
1159         res += "Init ";
1160     if (properties & NuktaProperty)
1161         res += "Nukta ";
1162     if (properties & AkhantProperty)
1163         res += "Akhant ";
1164     if (properties & RephProperty)
1165         res += "Reph ";
1166     if (properties & PreFormProperty)
1167         res += "PreForm ";
1168     if (properties & BelowFormProperty)
1169         res += "BelowForm ";
1170     if (properties & AboveFormProperty)
1171         res += "AboveForm ";
1172     if (properties & HalfFormProperty)
1173         res += "HalfForm ";
1174     if (properties & PostFormProperty)
1175         res += "PostForm ";
1176     if (properties & ConjunctFormProperty)
1177         res += "PostForm ";
1178     if (properties & VattuProperty)
1179         res += "Vattu ";
1180     if (properties & PreSubstProperty)
1181         res += "PreSubst ";
1182     if (properties & BelowSubstProperty)
1183         res += "BelowSubst ";
1184     if (properties & AboveSubstProperty)
1185         res += "AboveSubst ";
1186     if (properties & PostSubstProperty)
1187         res += "PostSubst ";
1188     if (properties & HalantProperty)
1189         res += "Halant ";
1190     if (properties & CligProperty)
1191         res += "Clig ";
1192     if (properties & IndicCaltProperty)
1193         res += "Calt ";
1194     return res;
1195 }
1196 #endif
1197 
indic_shape_syllable(HB_Bool openType,HB_ShaperItem * item,bool invalid)1198 static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid)
1199 {
1200     HB_Script script = item->item.script;
1201     assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala);
1202     const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari);
1203     const unsigned short ra = script_base + 0x30;
1204     const unsigned short halant = script_base + 0x4d;
1205     const unsigned short nukta = script_base + 0x3c;
1206     bool control = false;
1207 
1208     int len = (int)item->item.length;
1209     IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid);
1210 
1211     if ((int)item->num_glyphs < len+4) {
1212         item->num_glyphs = len+4;
1213         return false;
1214     }
1215 
1216     HB_STACKARRAY(HB_UChar16, reordered, len + 4);
1217     HB_STACKARRAY(hb_uint8, position, len + 4);
1218 
1219     unsigned char properties = scriptProperties[script-HB_Script_Devanagari];
1220 
1221     if (invalid) {
1222         *reordered = 0x25cc;
1223         memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16));
1224         len++;
1225     } else {
1226         memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16));
1227     }
1228     if (reordered[len-1] == 0x200c) // zero width non joiner
1229         len--;
1230 
1231     int i;
1232     int base = 0;
1233     int reph = -1;
1234 
1235 #ifdef INDIC_DEBUG
1236     IDEBUG("original:");
1237     for (i = 0; i < len; i++) {
1238         IDEBUG("    %d: %4x", i, reordered[i]);
1239     }
1240 #endif
1241 
1242     if (len != 1) {
1243         HB_UChar16 *uc = reordered;
1244         bool beginsWithRa = false;
1245 
1246         // Rule 1: find base consonant
1247         //
1248         // The shaping engine finds the base consonant of the
1249         // syllable, using the following algorithm: starting from the
1250         // end of the syllable, move backwards until a consonant is
1251         // found that does not have a below-base or post-base form
1252         // (post-base forms have to follow below-base forms), or
1253         // arrive at the first consonant. The consonant stopped at
1254         // will be the base.
1255         //
1256         //  * If the syllable starts with Ra + H (in a script that has
1257         //    'Reph'), Ra is excluded from candidates for base
1258         //    consonants.
1259         //
1260         // * In Kannada and Telugu, the base consonant cannot be
1261         //   farther than 3 consonants from the end of the syllable.
1262         // #### replace the HasReph property by testing if the feature exists in the font!
1263         if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) {
1264             if ((properties & HasReph) && (len > 2) &&
1265                 (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant)
1266                 beginsWithRa = true;
1267 
1268             if (beginsWithRa && form(*(uc+2)) == Control)
1269                 beginsWithRa = false;
1270 
1271             base = (beginsWithRa ? 2 : 0);
1272             IDEBUG("    length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
1273 
1274             int lastConsonant = 0;
1275             int matra = -1;
1276             // we remember:
1277             // * the last consonant since we need it for rule 2
1278             // * the matras position for rule 3 and 4
1279 
1280             // figure out possible base glyphs
1281             memset(position, 0, len);
1282             if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1283                 bool vattu = false;
1284                 for (i = base; i < len; ++i) {
1285                     position[i] = form(uc[i]);
1286                     if (position[i] == Consonant) {
1287                         lastConsonant = i;
1288                         vattu = (!vattu && uc[i] == ra);
1289                         if (vattu) {
1290                             IDEBUG("excluding vattu glyph at %d from base candidates", i);
1291                             position[i] = Vattu;
1292                         }
1293                     } else if (position[i] == Matra) {
1294                         matra = i;
1295                     }
1296                 }
1297             } else {
1298                 for (i = base; i < len; ++i) {
1299                     position[i] = form(uc[i]);
1300                     if (position[i] == Consonant)
1301                         lastConsonant = i;
1302                     else if (matra < 0 && position[i] == Matra)
1303                         matra = i;
1304                 }
1305             }
1306             int skipped = 0;
1307             Position pos = Post;
1308             for (i = len-1; i >= base; i--) {
1309                 if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada))
1310                     continue;
1311 
1312                 if (i < len-1 && position[i] == Control && position[i+1] == Consonant) {
1313                     base = i+1;
1314                     break;
1315                 }
1316 
1317                 Position charPosition = indic_position(uc[i]);
1318                 if (pos == Post && charPosition == Post) {
1319                     pos = Post;
1320                 } else if ((pos == Post || pos == Below) && charPosition == Below) {
1321                     if (script == HB_Script_Devanagari || script == HB_Script_Gujarati)
1322                         base = i;
1323                     pos = Below;
1324                 } else {
1325                     base = i;
1326                     break;
1327                 }
1328                 if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) {
1329                     base = i;
1330                     break;
1331                 }
1332                 ++skipped;
1333             }
1334 
1335             IDEBUG("    base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
1336 
1337             // Rule 2:
1338             //
1339             // If the base consonant is not the last one, Uniscribe
1340             // moves the halant from the base consonant to the last
1341             // one.
1342             if (lastConsonant > base) {
1343                 int halantPos = 0;
1344                 if (uc[base+1] == halant)
1345                     halantPos = base + 1;
1346                 else if (uc[base+1] == nukta && uc[base+2] == halant)
1347                     halantPos = base + 2;
1348                 if (halantPos > 0) {
1349                     IDEBUG("    moving halant from %d to %d!", base+1, lastConsonant);
1350                     for (i = halantPos; i < lastConsonant; i++)
1351                         uc[i] = uc[i+1];
1352                     uc[lastConsonant] = halant;
1353                 }
1354             }
1355 
1356             // Rule 3:
1357             //
1358             // If the syllable starts with Ra + H, Uniscribe moves
1359             // this combination so that it follows either:
1360 
1361             // * the post-base 'matra' (if any) or the base consonant
1362             //   (in scripts that show similarity to Devanagari, i.e.,
1363             //   Devanagari, Gujarati, Bengali)
1364             // * the base consonant (other scripts)
1365             // * the end of the syllable (Kannada)
1366 
1367             Position matra_position = None;
1368             if (matra > 0)
1369                 matra_position = indic_position(uc[matra]);
1370             IDEBUG("    matra at %d with form %d, base=%d", matra, matra_position, base);
1371 
1372             if (beginsWithRa && base != 0) {
1373                 int toPos = base+1;
1374                 if (toPos < len && uc[toPos] == nukta)
1375                     toPos++;
1376                 if (toPos < len && uc[toPos] == halant)
1377                     toPos++;
1378                 if (toPos < len && uc[toPos] == 0x200d)
1379                     toPos++;
1380                 if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
1381                     toPos += 2;
1382                 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) {
1383                     if (matra_position == Post || matra_position == Split) {
1384                         toPos = matra+1;
1385                         matra -= 2;
1386                     }
1387                 } else if (script == HB_Script_Kannada) {
1388                     toPos = len;
1389                     matra -= 2;
1390                 }
1391 
1392                 IDEBUG("moving leading ra+halant to position %d", toPos);
1393                 for (i = 2; i < toPos; i++)
1394                     uc[i-2] = uc[i];
1395                 uc[toPos-2] = ra;
1396                 uc[toPos-1] = halant;
1397                 base -= 2;
1398                 if (properties & HasReph)
1399                     reph = toPos-2;
1400             }
1401 
1402             // Rule 4:
1403 
1404             // Uniscribe splits two- or three-part matras into their
1405             // parts. This splitting is a character-to-character
1406             // operation).
1407             //
1408             //      Uniscribe describes some moving operations for these
1409             //      matras here. For shaping however all pre matras need
1410             //      to be at the beginning of the syllable, so we just move
1411             //      them there now.
1412             if (matra_position == Split) {
1413                 splitMatra(uc, matra, len);
1414                 // Handle three-part matras (0xccb in Kannada)
1415                 matra_position = indic_position(uc[matra]);
1416             }
1417 
1418             if (matra_position == Pre) {
1419                 unsigned short m = uc[matra];
1420                 while (matra--)
1421                     uc[matra+1] = uc[matra];
1422                 uc[0] = m;
1423                 base++;
1424             }
1425         }
1426 
1427         // Rule 5:
1428         //
1429         // Uniscribe classifies consonants and 'matra' parts as
1430         // pre-base, above-base (Reph), below-base or post-base. This
1431         // classification exists on the character code level and is
1432         // language-dependent, not font-dependent.
1433         for (i = 0; i < base; ++i)
1434             position[i] = Pre;
1435         position[base] = Base;
1436         for (i = base+1; i < len; ++i) {
1437             position[i] = indic_position(uc[i]);
1438             // #### replace by adjusting table
1439             if (uc[i] == nukta || uc[i] == halant)
1440                 position[i] = Inherit;
1441         }
1442         if (reph > 0) {
1443             // recalculate reph, it might have changed.
1444             for (i = base+1; i < len; ++i)
1445                 if (uc[i] == ra)
1446                     reph = i;
1447             position[reph] = Reph;
1448             position[reph+1] = Inherit;
1449         }
1450 
1451         // all reordering happens now to the chars after the base
1452         int fixed = base+1;
1453         if (fixed < len && uc[fixed] == nukta)
1454             fixed++;
1455         if (fixed < len && uc[fixed] == halant)
1456             fixed++;
1457         if (fixed < len && uc[fixed] == 0x200d)
1458             fixed++;
1459 
1460 #ifdef INDIC_DEBUG
1461         for (i = fixed; i < len; ++i)
1462             IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]);
1463 #endif
1464         // we continuosly position the matras and vowel marks and increase the fixed
1465         // until we reached the end.
1466         const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari];
1467 
1468         IDEBUG("    reordering pass:");
1469         IDEBUG("        base=%d fixed=%d", base, fixed);
1470         int toMove = 0;
1471         while (finalOrder[toMove].form && fixed < len-1) {
1472             IDEBUG("        fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position);
1473             for (i = fixed; i < len; i++) {
1474 //                IDEBUG() << "           i=" << i << "uc=" << Qt::hex << uc[i] << "form=" << form(uc[i])
1475 //                         << "position=" << position[i];
1476                 if (form(uc[i]) == finalOrder[toMove].form &&
1477                      position[i] == finalOrder[toMove].position) {
1478                     // need to move this glyph
1479                     int to = fixed;
1480                     if (i < len-1 && position[i+1] == Inherit) {
1481                         IDEBUG("         moving two chars from %d to %d", i, to);
1482                         unsigned short ch = uc[i];
1483                         unsigned short ch2 = uc[i+1];
1484                         unsigned char pos = position[i];
1485                         for (int j = i+1; j > to+1; j--) {
1486                             uc[j] = uc[j-2];
1487                             position[j] = position[j-2];
1488                         }
1489                         uc[to] = ch;
1490                         uc[to+1] = ch2;
1491                         position[to] = pos;
1492                         position[to+1] = pos;
1493                         fixed += 2;
1494                     } else {
1495                         IDEBUG("         moving one char from %d to %d", i, to);
1496                         unsigned short ch = uc[i];
1497                         unsigned char pos = position[i];
1498                         for (int j = i; j > to; j--) {
1499                             uc[j] = uc[j-1];
1500                             position[j] = position[j-1];
1501                         }
1502                         uc[to] = ch;
1503                         position[to] = pos;
1504                         fixed++;
1505                     }
1506                 }
1507             }
1508             toMove++;
1509         }
1510 
1511     }
1512 
1513     if (reph > 0) {
1514         // recalculate reph, it might have changed.
1515         for (i = base+1; i < len; ++i)
1516             if (reordered[i] == ra)
1517                 reph = i;
1518     }
1519 
1520 #ifndef NO_OPENTYPE
1521     const int availableGlyphs = item->num_glyphs;
1522 #endif
1523     if (!item->font->klass->convertStringToGlyphIndices(item->font,
1524                                                         reordered, len,
1525                                                         item->glyphs, &item->num_glyphs,
1526                                                         item->item.bidiLevel % 2))
1527         goto error;
1528 
1529 
1530     IDEBUG("  base=%d, reph=%d", base, reph);
1531     IDEBUG("reordered:");
1532     for (i = 0; i < len; i++) {
1533         item->attributes[i].mark = false;
1534         item->attributes[i].clusterStart = false;
1535         item->attributes[i].justification = 0;
1536         item->attributes[i].zeroWidth = false;
1537         IDEBUG("    %d: %4x", i, reordered[i]);
1538     }
1539 
1540     // now we have the syllable in the right order, and can start running it through open type.
1541 
1542     for (i = 0; i < len; ++i)
1543         control |= (form(reordered[i]) == Control);
1544 
1545 #ifndef NO_OPENTYPE
1546     if (openType) {
1547 
1548         // we need to keep track of where the base glyph is for some
1549         // scripts and use the cluster feature for this.  This
1550         // also means we have to correct the logCluster output from
1551         // the open type engine manually afterwards.  for indic this
1552         // is rather simple, as all chars just point to the first
1553         // glyph in the syllable.
1554         HB_STACKARRAY(unsigned short, clusters, len);
1555         HB_STACKARRAY(unsigned int, properties, len);
1556 
1557         for (i = 0; i < len; ++i)
1558             clusters[i] = i;
1559 
1560         // features we should always apply
1561         for (i = 0; i < len; ++i)
1562             properties[i] = ~(LocaProperty
1563                               | CcmpProperty
1564                               | NuktaProperty
1565                               | VattuProperty
1566                               | ConjunctFormProperty
1567                               | PreSubstProperty
1568                               | BelowSubstProperty
1569                               | AboveSubstProperty
1570                               | PostSubstProperty
1571                               | HalantProperty
1572                               | IndicCaltProperty
1573                               | PositioningProperties);
1574 
1575         // Loca always applies
1576         // Ccmp always applies
1577         // Init
1578         if (item->item.pos == 0
1579             || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
1580             properties[0] &= ~InitProperty;
1581 
1582         // Nukta always applies
1583         // Akhant
1584         for (i = 0; i <= base; ++i)
1585             properties[i] &= ~AkhantProperty;
1586         // Reph
1587         if (reph >= 0) {
1588             properties[reph] &= ~RephProperty;
1589             properties[reph+1] &= ~RephProperty;
1590         }
1591         // BelowForm
1592         for (i = base+1; i < len; ++i)
1593             properties[i] &= ~BelowFormProperty;
1594 
1595         if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1596             // vattu glyphs need this aswell
1597             bool vattu = false;
1598             for (i = base-2; i > 1; --i) {
1599                 if (form(reordered[i]) == Consonant) {
1600                     vattu = (!vattu && reordered[i] == ra);
1601                     if (vattu) {
1602                         IDEBUG("forming vattu ligature at %d", i);
1603                         properties[i] &= ~BelowFormProperty;
1604                         properties[i+1] &= ~BelowFormProperty;
1605                     }
1606                 }
1607             }
1608         }
1609         // HalfFormProperty
1610         for (i = 0; i < base; ++i)
1611             properties[i] &= ~HalfFormProperty;
1612         if (control) {
1613             for (i = 2; i < len; ++i) {
1614                 if (reordered[i] == 0x200d /* ZWJ */) {
1615                     properties[i-1] &= ~HalfFormProperty;
1616                     properties[i-2] &= ~HalfFormProperty;
1617                 } else if (reordered[i] == 0x200c /* ZWNJ */) {
1618                     properties[i-1] &= ~HalfFormProperty;
1619                     properties[i-2] &= ~HalfFormProperty;
1620                 }
1621             }
1622         }
1623         // PostFormProperty
1624         for (i = base+1; i < len; ++i)
1625             properties[i] &= ~PostFormProperty;
1626         // vattu always applies
1627         // pres always applies
1628         // blws always applies
1629         // abvs always applies
1630         // psts always applies
1631         // halant always applies
1632         // calt always applies
1633 
1634 #ifdef INDIC_DEBUG
1635 //        {
1636 //            IDEBUG("OT properties:");
1637 //            for (int i = 0; i < len; ++i)
1638 //                qDebug("    i: %s", ::propertiesToString(properties[i]).toLatin1().data());
1639 //        }
1640 #endif
1641 
1642         // initialize
1643         item->log_clusters = clusters;
1644         HB_OpenTypeShape(item, properties);
1645 
1646         int newLen = item->face->buffer->in_length;
1647         HB_GlyphItem otl_glyphs = item->face->buffer->in_string;
1648 
1649         // move the left matra back to its correct position in malayalam and tamil
1650         if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) {
1651 //             qDebug("reordering matra, len=%d", newLen);
1652             // need to find the base in the shaped string and move the matra there
1653             int basePos = 0;
1654             while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
1655                 basePos++;
1656             --basePos;
1657             if (basePos < newLen && basePos > 1) {
1658 //                 qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
1659                 HB_GlyphItemRec m = otl_glyphs[0];
1660                 --basePos;
1661                 for (i = 0; i < basePos; ++i)
1662                     otl_glyphs[i] = otl_glyphs[i+1];
1663                 otl_glyphs[basePos] = m;
1664             }
1665         }
1666 
1667         HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false);
1668 
1669         HB_FREE_STACKARRAY(clusters);
1670         HB_FREE_STACKARRAY(properties);
1671 
1672         if (!positioned)
1673             goto error;
1674 
1675         if (control) {
1676             IDEBUG("found a control char in the syllable");
1677             hb_uint32 i = 0, j = 0;
1678             while (i < item->num_glyphs) {
1679                 if (form(reordered[otl_glyphs[i].cluster]) == Control) {
1680                     ++i;
1681                     if (i >= item->num_glyphs)
1682                         break;
1683                 }
1684                 item->glyphs[j] = item->glyphs[i];
1685                 item->attributes[j] = item->attributes[i];
1686                 item->offsets[j] = item->offsets[i];
1687                 item->advances[j] = item->advances[i];
1688                 ++i;
1689                 ++j;
1690             }
1691             item->num_glyphs = j;
1692         }
1693 
1694     } else {
1695         HB_HeuristicPosition(item);
1696     }
1697 #endif // NO_OPENTYPE
1698     item->attributes[0].clusterStart = true;
1699 
1700     HB_FREE_STACKARRAY(reordered);
1701     HB_FREE_STACKARRAY(position);
1702 
1703     IDEBUG("<<<<<<");
1704     return true;
1705 
1706 error:
1707     HB_FREE_STACKARRAY(reordered);
1708     HB_FREE_STACKARRAY(position);
1709     return false;
1710 }
1711 
1712 /* syllables are of the form:
1713 
1714    (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
1715    (Consonant Nukta? Halant)* Consonant Halant
1716    IndependentVowel VowelMark? StressMark?
1717 
1718    We return syllable boundaries on invalid combinations aswell
1719 */
indic_nextSyllableBoundary(HB_Script script,const HB_UChar16 * s,int start,int end,bool * invalid)1720 static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid)
1721 {
1722     *invalid = false;
1723     IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
1724     const HB_UChar16 *uc = s+start;
1725 
1726     int pos = 0;
1727     Form state = form(uc[pos]);
1728     IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
1729     pos++;
1730 
1731     if (state != Consonant && state != IndependentVowel) {
1732         if (state != Other)
1733             *invalid = true;
1734         goto finish;
1735     }
1736 
1737     while (pos < end - start) {
1738         Form newState = form(uc[pos]);
1739         IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
1740         switch(newState) {
1741         case Control:
1742             newState = state;
1743  	    if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
1744   		break;
1745             // the control character should be the last char in the item
1746  	    if (state == Consonant && script == HB_Script_Bengali && uc[pos-1] == 0x09B0 && uc[pos] == 0x200d /* ZWJ */)
1747   		break;
1748  	    if (state == Consonant && script == HB_Script_Kannada && uc[pos-1] == 0x0CB0 && uc[pos] == 0x200d /* ZWJ */)
1749   		break;
1750             // Bengali and Kannada has a special exception for rendering yaphala with ra (to avoid reph) see http://www.unicode.org/faq/indic.html#15
1751             ++pos;
1752             goto finish;
1753         case Consonant:
1754 	    if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
1755                 break;
1756             goto finish;
1757         case Halant:
1758             if (state == Nukta || state == Consonant)
1759                 break;
1760             // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
1761             if (script == HB_Script_Bengali && pos == 1 &&
1762                  (uc[0] == 0x0985 || uc[0] == 0x098f))
1763                 break;
1764             // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
1765             if (script == HB_Script_Sinhala && state == Matra) {
1766                 ++pos;
1767                 continue;
1768             }
1769             if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
1770                 ++pos;
1771                 continue;
1772             }
1773             goto finish;
1774         case Nukta:
1775             if (state == Consonant)
1776                 break;
1777             goto finish;
1778         case StressMark:
1779             if (state == VowelMark)
1780                 break;
1781             // fall through
1782         case VowelMark:
1783             if (state == Matra || state == LengthMark || state == IndependentVowel)
1784                 break;
1785             // fall through
1786         case Matra:
1787             if (state == Consonant || state == Nukta)
1788                 break;
1789             if (state == Matra) {
1790                 // ### needs proper testing for correct two/three part matras
1791                 break;
1792             }
1793             // ### not sure if this is correct. If it is, does it apply only to Bengali or should
1794             // it work for all Indic languages?
1795             // the combination Independent_A + Vowel Sign AA is allowed.
1796             if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
1797                 break;
1798             if (script == HB_Script_Tamil && state == Matra) {
1799                 if (uc[pos-1] == 0x0bc6 &&
1800                      (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
1801                     break;
1802                 if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
1803                     break;
1804             }
1805             goto finish;
1806 
1807         case LengthMark:
1808             if (state == Matra) {
1809                 // ### needs proper testing for correct two/three part matras
1810                 break;
1811             }
1812         case IndependentVowel:
1813         case Invalid:
1814         case Other:
1815             goto finish;
1816         }
1817         state = newState;
1818         pos++;
1819     }
1820  finish:
1821     return pos+start;
1822 }
1823 
HB_IndicShape(HB_ShaperItem * item)1824 HB_Bool HB_IndicShape(HB_ShaperItem *item)
1825 {
1826     assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala);
1827 
1828     HB_Bool openType = false;
1829 #ifndef NO_OPENTYPE
1830     openType = HB_SelectScript(item, indic_features);
1831 #endif
1832     unsigned short *logClusters = item->log_clusters;
1833 
1834     HB_ShaperItem syllable = *item;
1835     int first_glyph = 0;
1836 
1837     int sstart = item->item.pos;
1838     int end = sstart + item->item.length;
1839     IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length);
1840     while (sstart < end) {
1841         bool invalid;
1842         int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid);
1843         IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
1844                invalid ? "true" : "false");
1845         syllable.item.pos = sstart;
1846         syllable.item.length = send-sstart;
1847         syllable.glyphs = item->glyphs + first_glyph;
1848         syllable.attributes = item->attributes + first_glyph;
1849         syllable.offsets = item->offsets + first_glyph;
1850         syllable.advances = item->advances + first_glyph;
1851         syllable.num_glyphs = item->num_glyphs - first_glyph;
1852         if (!indic_shape_syllable(openType, &syllable, invalid)) {
1853             IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
1854             item->num_glyphs += syllable.num_glyphs;
1855             return false;
1856         }
1857         // fix logcluster array
1858         IDEBUG("syllable:");
1859         hb_uint32 g;
1860         for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g)
1861             IDEBUG("        %d -> glyph %x", g, item->glyphs[g]);
1862         IDEBUG("    logclusters:");
1863         int i;
1864         for (i = sstart; i < send; ++i) {
1865             IDEBUG("        %d -> glyph %d", i, first_glyph);
1866             logClusters[i-item->item.pos] = first_glyph;
1867         }
1868         sstart = send;
1869         first_glyph += syllable.num_glyphs;
1870     }
1871     item->num_glyphs = first_glyph;
1872     return true;
1873 }
1874 
HB_IndicAttributes(HB_Script script,const HB_UChar16 * text,hb_uint32 from,hb_uint32 len,HB_CharAttributes * attributes)1875 void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
1876 {
1877     int end = from + len;
1878     const HB_UChar16 *uc = text + from;
1879     attributes += from;
1880     hb_uint32 i = 0;
1881     while (i < len) {
1882         bool invalid;
1883         hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
1884          attributes[i].graphemeBoundary = true;
1885 
1886         if (boundary > len-1) boundary = len;
1887         i++;
1888         while (i < boundary) {
1889             attributes[i].graphemeBoundary = false;
1890             ++uc;
1891             ++i;
1892         }
1893         assert(i == boundary);
1894     }
1895 
1896 
1897 }
1898 
1899 
1900