1 /*
2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
3 *
4 * This is part of HarfBuzz, an OpenType Layout engine library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 */
24
25 #include "harfbuzz-shaper.h"
26 #include "harfbuzz-shaper-private.h"
27
28 #include <assert.h>
29
30 static const HB_UChar16 ReplacementCharacter = 0xfffd;
31
32 typedef struct {
33 unsigned char shape;
34 unsigned char justification;
35 } HB_ArabicProperties;
36
37 typedef enum {
38 XIsolated,
39 XFinal,
40 XInitial,
41 XMedial,
42 /* intermediate state */
43 XCausing
44 } ArabicShape;
45
46 /*
47 // these groups correspond to the groups defined in the Unicode standard.
48 // Some of these groups are equal with regards to both joining and line breaking behaviour,
49 // and thus have the same enum value
50 //
51 // I'm not sure the mapping of syriac to arabic enums is correct with regards to justification, but as
52 // I couldn't find any better document I'll hope for the best.
53 */
54 typedef enum {
55 /* NonJoining */
56 ArabicNone,
57 ArabicSpace,
58 /* Transparent */
59 Transparent,
60 /* Causing */
61 Center,
62 Kashida,
63
64 /* Arabic */
65 /* Dual */
66 Beh,
67 Noon,
68 Meem = Noon,
69 Heh = Noon,
70 KnottedHeh = Noon,
71 HehGoal = Noon,
72 SwashKaf = Noon,
73 Yeh,
74 Hah,
75 Seen,
76 Sad = Seen,
77 Tah,
78 Kaf = Tah,
79 Gaf = Tah,
80 Lam = Tah,
81 Ain,
82 Feh = Ain,
83 Qaf = Ain,
84 /* Right */
85 Alef,
86 Waw,
87 Dal,
88 TehMarbuta = Dal,
89 Reh,
90 HamzaOnHehGoal,
91 YehWithTail = HamzaOnHehGoal,
92 YehBarre = HamzaOnHehGoal,
93
94 /* Syriac */
95 /* Dual */
96 Beth = Beh,
97 Gamal = Ain,
98 Heth = Noon,
99 Teth = Hah,
100 Yudh = Noon,
101 Kaph = Noon,
102 Lamadh = Lam,
103 Mim = Noon,
104 Nun = Noon,
105 Semakh = Noon,
106 FinalSemakh = Noon,
107 SyriacE = Ain,
108 Pe = Ain,
109 ReversedPe = Hah,
110 Qaph = Noon,
111 Shin = Noon,
112 Fe = Ain,
113
114 /* Right */
115 Alaph = Alef,
116 Dalath = Dal,
117 He = Dal,
118 SyriacWaw = Waw,
119 Zain = Alef,
120 YudhHe = Waw,
121 Sadhe = HamzaOnHehGoal,
122 Taw = Dal,
123
124 /* Compiler bug? Otherwise ArabicGroupsEnd would be equal to Dal + 1. */
125 Dummy = HamzaOnHehGoal,
126 ArabicGroupsEnd
127 } ArabicGroup;
128
129 static const unsigned char arabic_group[0x150] = {
130 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
131 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
132 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
133 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
134
135 Transparent, Transparent, Transparent, Transparent,
136 Transparent, Transparent, ArabicNone, ArabicNone,
137 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
138 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
139
140 ArabicNone, ArabicNone, Alef, Alef,
141 Waw, Alef, Yeh, Alef,
142 Beh, TehMarbuta, Beh, Beh,
143 Hah, Hah, Hah, Dal,
144
145 Dal, Reh, Reh, Seen,
146 Seen, Sad, Sad, Tah,
147 Tah, Ain, Ain, ArabicNone,
148 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
149
150 /* 0x640 */
151 Kashida, Feh, Qaf, Kaf,
152 Lam, Meem, Noon, Heh,
153 Waw, Yeh, Yeh, Transparent,
154 Transparent, Transparent, Transparent, Transparent,
155
156 Transparent, Transparent, Transparent, Transparent,
157 Transparent, Transparent, Transparent, Transparent,
158 Transparent, ArabicNone, ArabicNone, ArabicNone,
159 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
160
161 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
162 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
163 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
164 ArabicNone, ArabicNone, Beh, Qaf,
165
166 Transparent, Alef, Alef, Alef,
167 ArabicNone, Alef, Waw, Waw,
168 Yeh, Beh, Beh, Beh,
169 Beh, Beh, Beh, Beh,
170
171 /* 0x680 */
172 Beh, Hah, Hah, Hah,
173 Hah, Hah, Hah, Hah,
174 Dal, Dal, Dal, Dal,
175 Dal, Dal, Dal, Dal,
176
177 Dal, Reh, Reh, Reh,
178 Reh, Reh, Reh, Reh,
179 Reh, Reh, Seen, Seen,
180 Seen, Sad, Sad, Tah,
181
182 Ain, Feh, Feh, Feh,
183 Feh, Feh, Feh, Qaf,
184 Qaf, Gaf, SwashKaf, Gaf,
185 Kaf, Kaf, Kaf, Gaf,
186
187 Gaf, Gaf, Gaf, Gaf,
188 Gaf, Lam, Lam, Lam,
189 Lam, Noon, Noon, Noon,
190 Noon, Noon, KnottedHeh, Hah,
191
192 /* 0x6c0 */
193 TehMarbuta, HehGoal, HamzaOnHehGoal, HamzaOnHehGoal,
194 Waw, Waw, Waw, Waw,
195 Waw, Waw, Waw, Waw,
196 Yeh, YehWithTail, Yeh, Waw,
197
198 Yeh, Yeh, YehBarre, YehBarre,
199 ArabicNone, TehMarbuta, Transparent, Transparent,
200 Transparent, Transparent, Transparent, Transparent,
201 Transparent, ArabicNone, ArabicNone, Transparent,
202
203 Transparent, Transparent, Transparent, Transparent,
204 Transparent, ArabicNone, ArabicNone, Transparent,
205 Transparent, ArabicNone, Transparent, Transparent,
206 Transparent, Transparent, Dal, Reh,
207
208 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
209 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
210 ArabicNone, ArabicNone, Seen, Sad,
211 Ain, ArabicNone, ArabicNone, KnottedHeh,
212
213 /* 0x700 */
214 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
215 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
216 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
217 ArabicNone, ArabicNone, ArabicNone, ArabicNone,
218
219 Alaph, Transparent, Beth, Gamal,
220 Gamal, Dalath, Dalath, He,
221 SyriacWaw, Zain, Heth, Teth,
222 Teth, Yudh, YudhHe, Kaph,
223
224 Lamadh, Mim, Nun, Semakh,
225 FinalSemakh, SyriacE, Pe, ReversedPe,
226 Sadhe, Qaph, Dalath, Shin,
227 Taw, Beth, Gamal, Dalath,
228
229 Transparent, Transparent, Transparent, Transparent,
230 Transparent, Transparent, Transparent, Transparent,
231 Transparent, Transparent, Transparent, Transparent,
232 Transparent, Transparent, Transparent, Transparent,
233
234 Transparent, Transparent, Transparent, Transparent,
235 Transparent, Transparent, Transparent, Transparent,
236 Transparent, Transparent, Transparent, ArabicNone,
237 ArabicNone, Zain, Kaph, Fe,
238 };
239
arabicGroup(unsigned short uc)240 static ArabicGroup arabicGroup(unsigned short uc)
241 {
242 if (uc >= 0x0600 && uc < 0x750)
243 return (ArabicGroup) arabic_group[uc-0x600];
244 else if (uc == 0x200d)
245 return Center;
246 else if (HB_GetUnicodeCharCategory(uc) == HB_Separator_Space)
247 return ArabicSpace;
248 else
249 return ArabicNone;
250 }
251
252
253 /*
254 Arabic shaping obeys a number of rules according to the joining classes (see Unicode book, section on
255 arabic).
256
257 Each unicode char has a joining class (right, dual (left&right), center (joincausing) or transparent).
258 transparent joining is not encoded in HB_UChar16::joining(), but applies to all combining marks and format marks.
259
260 Right join-causing: dual + center
261 Left join-causing: dual + right + center
262
263 Rules are as follows (for a string already in visual order, as we have it here):
264
265 R1 Transparent characters do not affect joining behaviour.
266 R2 A right joining character, that has a right join-causing char on the right will get form XRight
267 (R3 A left joining character, that has a left join-causing char on the left will get form XLeft)
268 Note: the above rule is meaningless, as there are no pure left joining characters defined in Unicode
269 R4 A dual joining character, that has a left join-causing char on the left and a right join-causing char on
270 the right will get form XMedial
271 R5 A dual joining character, that has a right join causing char on the right, and no left join causing char on the left
272 will get form XRight
273 R6 A dual joining character, that has a left join causing char on the left, and no right join causing char on the right
274 will get form XLeft
275 R7 Otherwise the character will get form XIsolated
276
277 Additionally we have to do the minimal ligature support for lam-alef ligatures:
278
279 L1 Transparent characters do not affect ligature behaviour.
280 L2 Any sequence of Alef(XRight) + Lam(XMedial) will form the ligature Alef.Lam(XLeft)
281 L3 Any sequence of Alef(XRight) + Lam(XLeft) will form the ligature Alef.Lam(XIsolated)
282
283 The state table below handles rules R1-R7.
284 */
285
286 typedef enum {
287 JNone,
288 JCausing,
289 JDual,
290 JRight,
291 JTransparent
292 } Joining;
293
294 static const Joining joining_for_group[ArabicGroupsEnd] = {
295 /* NonJoining */
296 JNone, /* ArabicNone */
297 JNone, /* ArabicSpace */
298 /* Transparent */
299 JTransparent, /* Transparent */
300 /* Causing */
301 JCausing, /* Center */
302 JCausing, /* Kashida */
303 /* Dual */
304 JDual, /* Beh */
305 JDual, /* Noon */
306 JDual, /* Yeh */
307 JDual, /* Hah */
308 JDual, /* Seen */
309 JDual, /* Tah */
310 JDual, /* Ain */
311 /* Right */
312 JRight, /* Alef */
313 JRight, /* Waw */
314 JRight, /* Dal */
315 JRight, /* Reh */
316 JRight /* HamzaOnHehGoal */
317 };
318
319
320 typedef struct {
321 ArabicShape form1;
322 ArabicShape form2;
323 } JoiningPair;
324
325 static const JoiningPair joining_table[5][4] =
326 /* None, Causing, Dual, Right */
327 {
328 { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XInitial }, { XIsolated, XIsolated } }, /* XIsolated */
329 { { XFinal, XIsolated }, { XFinal, XCausing }, { XFinal, XInitial }, { XFinal, XIsolated } }, /* XFinal */
330 { { XIsolated, XIsolated }, { XInitial, XCausing }, { XInitial, XMedial }, { XInitial, XFinal } }, /* XInitial */
331 { { XFinal, XIsolated }, { XMedial, XCausing }, { XMedial, XMedial }, { XMedial, XFinal } }, /* XMedial */
332 { { XIsolated, XIsolated }, { XIsolated, XCausing }, { XIsolated, XMedial }, { XIsolated, XFinal } }, /* XCausing */
333 };
334
335
336 /*
337 According to http://www.microsoft.com/middleeast/Arabicdev/IE6/KBase.asp
338
339 1. Find the priority of the connecting opportunities in each word
340 2. Add expansion at the highest priority connection opportunity
341 3. If more than one connection opportunity have the same highest value,
342 use the opportunity closest to the end of the word.
343
344 Following is a chart that provides the priority for connection
345 opportunities and where expansion occurs. The character group names
346 are those in table 6.6 of the UNICODE 2.0 book.
347
348
349 PrioritY Glyph Condition Kashida Location
350
351 Arabic_Kashida User inserted Kashida The user entered a Kashida in a position. After the user
352 (Shift+j or Shift+[E with hat]) Thus, it is the highest priority to insert an inserted kashida
353 automatic kashida.
354
355 Arabic_Seen Seen, Sad Connecting to the next character. After the character.
356 (Initial or medial form).
357
358 Arabic_HaaDal Teh Marbutah, Haa, Dal Connecting to previous character. Before the final form
359 of these characters.
360
361 Arabic_Alef Alef, Tah, Lam, Connecting to previous character. Before the final form
362 Kaf and Gaf of these characters.
363
364 Arabic_BaRa Reh, Yeh Connected to medial Beh Before preceding medial Baa
365
366 Arabic_Waw Waw, Ain, Qaf, Feh Connecting to previous character. Before the final form of
367 these characters.
368
369 Arabic_Normal Other connecting Connecting to previous character. Before the final form
370 characters of these characters.
371
372
373
374 This seems to imply that we have at most one kashida point per arabic word.
375
376 */
377
getArabicProperties(const unsigned short * chars,int len,HB_ArabicProperties * properties)378 static void getArabicProperties(const unsigned short *chars, int len, HB_ArabicProperties *properties)
379 {
380 /* qDebug("arabicSyriacOpenTypeShape: properties:"); */
381 int lastPos = 0;
382 int lastGroup = ArabicNone;
383 int i = 0;
384
385 ArabicGroup group = arabicGroup(chars[0]);
386 Joining j = joining_for_group[group];
387 ArabicShape shape = joining_table[XIsolated][j].form2;
388 properties[0].justification = HB_NoJustification;
389
390 for (i = 1; i < len; ++i) {
391 /* #### fix handling for spaces and punktuation */
392 properties[i].justification = HB_NoJustification;
393
394 group = arabicGroup(chars[i]);
395 j = joining_for_group[group];
396
397 if (j == JTransparent) {
398 properties[i].shape = XIsolated;
399 continue;
400 }
401
402 properties[lastPos].shape = joining_table[shape][j].form1;
403 shape = joining_table[shape][j].form2;
404
405 switch(lastGroup) {
406 case Seen:
407 if (properties[lastPos].shape == XInitial || properties[lastPos].shape == XMedial)
408 properties[i-1].justification = HB_Arabic_Seen;
409 break;
410 case Hah:
411 if (properties[lastPos].shape == XFinal)
412 properties[lastPos-1].justification = HB_Arabic_HaaDal;
413 break;
414 case Alef:
415 if (properties[lastPos].shape == XFinal)
416 properties[lastPos-1].justification = HB_Arabic_Alef;
417 break;
418 case Ain:
419 if (properties[lastPos].shape == XFinal)
420 properties[lastPos-1].justification = HB_Arabic_Waw;
421 break;
422 case Noon:
423 if (properties[lastPos].shape == XFinal)
424 properties[lastPos-1].justification = HB_Arabic_Normal;
425 break;
426 case ArabicNone:
427 break;
428
429 default:
430 assert(FALSE);
431 }
432
433 lastGroup = ArabicNone;
434
435 switch(group) {
436 case ArabicNone:
437 case Transparent:
438 /* ### Center should probably be treated as transparent when it comes to justification. */
439 case Center:
440 break;
441 case ArabicSpace:
442 properties[i].justification = HB_Arabic_Space;
443 break;
444 case Kashida:
445 properties[i].justification = HB_Arabic_Kashida;
446 break;
447 case Seen:
448 lastGroup = Seen;
449 break;
450
451 case Hah:
452 case Dal:
453 lastGroup = Hah;
454 break;
455
456 case Alef:
457 case Tah:
458 lastGroup = Alef;
459 break;
460
461 case Yeh:
462 case Reh:
463 if (properties[lastPos].shape == XMedial && arabicGroup(chars[lastPos]) == Beh)
464 properties[lastPos-1].justification = HB_Arabic_BaRa;
465 break;
466
467 case Ain:
468 case Waw:
469 lastGroup = Ain;
470 break;
471
472 case Noon:
473 case Beh:
474 case HamzaOnHehGoal:
475 lastGroup = Noon;
476 break;
477 case ArabicGroupsEnd:
478 assert(FALSE);
479 }
480
481 lastPos = i;
482 }
483 properties[lastPos].shape = joining_table[shape][JNone].form1;
484
485
486 /*
487 for (int i = 0; i < len; ++i)
488 qDebug("arabic properties(%d): uc=%x shape=%d, justification=%d", i, chars[i], properties[i].shape, properties[i].justification);
489 */
490 }
491
492 /*
493 // The unicode to unicode shaping codec.
494 // does only presentation forms B at the moment, but that should be enough for
495 // simple display
496 */
497 static const hb_uint16 arabicUnicodeMapping[256][2] = {
498 /* base of shaped forms, and number-1 of them (0 for non shaping,
499 1 for right binding and 3 for dual binding */
500
501 /* These are just the glyphs available in Unicode,
502 some characters are in R class, but have no glyphs in Unicode. */
503
504 { 0x0600, 0 }, /* 0x0600 */
505 { 0x0601, 0 }, /* 0x0601 */
506 { 0x0602, 0 }, /* 0x0602 */
507 { 0x0603, 0 }, /* 0x0603 */
508 { 0x0604, 0 }, /* 0x0604 */
509 { 0x0605, 0 }, /* 0x0605 */
510 { 0x0606, 0 }, /* 0x0606 */
511 { 0x0607, 0 }, /* 0x0607 */
512 { 0x0608, 0 }, /* 0x0608 */
513 { 0x0609, 0 }, /* 0x0609 */
514 { 0x060A, 0 }, /* 0x060A */
515 { 0x060B, 0 }, /* 0x060B */
516 { 0x060C, 0 }, /* 0x060C */
517 { 0x060D, 0 }, /* 0x060D */
518 { 0x060E, 0 }, /* 0x060E */
519 { 0x060F, 0 }, /* 0x060F */
520
521 { 0x0610, 0 }, /* 0x0610 */
522 { 0x0611, 0 }, /* 0x0611 */
523 { 0x0612, 0 }, /* 0x0612 */
524 { 0x0613, 0 }, /* 0x0613 */
525 { 0x0614, 0 }, /* 0x0614 */
526 { 0x0615, 0 }, /* 0x0615 */
527 { 0x0616, 0 }, /* 0x0616 */
528 { 0x0617, 0 }, /* 0x0617 */
529 { 0x0618, 0 }, /* 0x0618 */
530 { 0x0619, 0 }, /* 0x0619 */
531 { 0x061A, 0 }, /* 0x061A */
532 { 0x061B, 0 }, /* 0x061B */
533 { 0x061C, 0 }, /* 0x061C */
534 { 0x061D, 0 }, /* 0x061D */
535 { 0x061E, 0 }, /* 0x061E */
536 { 0x061F, 0 }, /* 0x061F */
537
538 { 0x0620, 0 }, /* 0x0620 */
539 { 0xFE80, 0 }, /* 0x0621 HAMZA */
540 { 0xFE81, 1 }, /* 0x0622 R ALEF WITH MADDA ABOVE */
541 { 0xFE83, 1 }, /* 0x0623 R ALEF WITH HAMZA ABOVE */
542 { 0xFE85, 1 }, /* 0x0624 R WAW WITH HAMZA ABOVE */
543 { 0xFE87, 1 }, /* 0x0625 R ALEF WITH HAMZA BELOW */
544 { 0xFE89, 3 }, /* 0x0626 D YEH WITH HAMZA ABOVE */
545 { 0xFE8D, 1 }, /* 0x0627 R ALEF */
546 { 0xFE8F, 3 }, /* 0x0628 D BEH */
547 { 0xFE93, 1 }, /* 0x0629 R TEH MARBUTA */
548 { 0xFE95, 3 }, /* 0x062A D TEH */
549 { 0xFE99, 3 }, /* 0x062B D THEH */
550 { 0xFE9D, 3 }, /* 0x062C D JEEM */
551 { 0xFEA1, 3 }, /* 0x062D D HAH */
552 { 0xFEA5, 3 }, /* 0x062E D KHAH */
553 { 0xFEA9, 1 }, /* 0x062F R DAL */
554
555 { 0xFEAB, 1 }, /* 0x0630 R THAL */
556 { 0xFEAD, 1 }, /* 0x0631 R REH */
557 { 0xFEAF, 1 }, /* 0x0632 R ZAIN */
558 { 0xFEB1, 3 }, /* 0x0633 D SEEN */
559 { 0xFEB5, 3 }, /* 0x0634 D SHEEN */
560 { 0xFEB9, 3 }, /* 0x0635 D SAD */
561 { 0xFEBD, 3 }, /* 0x0636 D DAD */
562 { 0xFEC1, 3 }, /* 0x0637 D TAH */
563 { 0xFEC5, 3 }, /* 0x0638 D ZAH */
564 { 0xFEC9, 3 }, /* 0x0639 D AIN */
565 { 0xFECD, 3 }, /* 0x063A D GHAIN */
566 { 0x063B, 0 }, /* 0x063B */
567 { 0x063C, 0 }, /* 0x063C */
568 { 0x063D, 0 }, /* 0x063D */
569 { 0x063E, 0 }, /* 0x063E */
570 { 0x063F, 0 }, /* 0x063F */
571
572 { 0x0640, 0 }, /* 0x0640 C TATWEEL // ### Join Causing, only one glyph */
573 { 0xFED1, 3 }, /* 0x0641 D FEH */
574 { 0xFED5, 3 }, /* 0x0642 D QAF */
575 { 0xFED9, 3 }, /* 0x0643 D KAF */
576 { 0xFEDD, 3 }, /* 0x0644 D LAM */
577 { 0xFEE1, 3 }, /* 0x0645 D MEEM */
578 { 0xFEE5, 3 }, /* 0x0646 D NOON */
579 { 0xFEE9, 3 }, /* 0x0647 D HEH */
580 { 0xFEED, 1 }, /* 0x0648 R WAW */
581 { 0x0649, 3 }, /* 0x0649 ALEF MAKSURA // ### Dual, glyphs not consecutive, handle in code. */
582 { 0xFEF1, 3 }, /* 0x064A D YEH */
583 { 0x064B, 0 }, /* 0x064B */
584 { 0x064C, 0 }, /* 0x064C */
585 { 0x064D, 0 }, /* 0x064D */
586 { 0x064E, 0 }, /* 0x064E */
587 { 0x064F, 0 }, /* 0x064F */
588
589 { 0x0650, 0 }, /* 0x0650 */
590 { 0x0651, 0 }, /* 0x0651 */
591 { 0x0652, 0 }, /* 0x0652 */
592 { 0x0653, 0 }, /* 0x0653 */
593 { 0x0654, 0 }, /* 0x0654 */
594 { 0x0655, 0 }, /* 0x0655 */
595 { 0x0656, 0 }, /* 0x0656 */
596 { 0x0657, 0 }, /* 0x0657 */
597 { 0x0658, 0 }, /* 0x0658 */
598 { 0x0659, 0 }, /* 0x0659 */
599 { 0x065A, 0 }, /* 0x065A */
600 { 0x065B, 0 }, /* 0x065B */
601 { 0x065C, 0 }, /* 0x065C */
602 { 0x065D, 0 }, /* 0x065D */
603 { 0x065E, 0 }, /* 0x065E */
604 { 0x065F, 0 }, /* 0x065F */
605
606 { 0x0660, 0 }, /* 0x0660 */
607 { 0x0661, 0 }, /* 0x0661 */
608 { 0x0662, 0 }, /* 0x0662 */
609 { 0x0663, 0 }, /* 0x0663 */
610 { 0x0664, 0 }, /* 0x0664 */
611 { 0x0665, 0 }, /* 0x0665 */
612 { 0x0666, 0 }, /* 0x0666 */
613 { 0x0667, 0 }, /* 0x0667 */
614 { 0x0668, 0 }, /* 0x0668 */
615 { 0x0669, 0 }, /* 0x0669 */
616 { 0x066A, 0 }, /* 0x066A */
617 { 0x066B, 0 }, /* 0x066B */
618 { 0x066C, 0 }, /* 0x066C */
619 { 0x066D, 0 }, /* 0x066D */
620 { 0x066E, 0 }, /* 0x066E */
621 { 0x066F, 0 }, /* 0x066F */
622
623 { 0x0670, 0 }, /* 0x0670 */
624 { 0xFB50, 1 }, /* 0x0671 R ALEF WASLA */
625 { 0x0672, 0 }, /* 0x0672 */
626 { 0x0673, 0 }, /* 0x0673 */
627 { 0x0674, 0 }, /* 0x0674 */
628 { 0x0675, 0 }, /* 0x0675 */
629 { 0x0676, 0 }, /* 0x0676 */
630 { 0x0677, 0 }, /* 0x0677 */
631 { 0x0678, 0 }, /* 0x0678 */
632 { 0xFB66, 3 }, /* 0x0679 D TTEH */
633 { 0xFB5E, 3 }, /* 0x067A D TTEHEH */
634 { 0xFB52, 3 }, /* 0x067B D BEEH */
635 { 0x067C, 0 }, /* 0x067C */
636 { 0x067D, 0 }, /* 0x067D */
637 { 0xFB56, 3 }, /* 0x067E D PEH */
638 { 0xFB62, 3 }, /* 0x067F D TEHEH */
639
640 { 0xFB5A, 3 }, /* 0x0680 D BEHEH */
641 { 0x0681, 0 }, /* 0x0681 */
642 { 0x0682, 0 }, /* 0x0682 */
643 { 0xFB76, 3 }, /* 0x0683 D NYEH */
644 { 0xFB72, 3 }, /* 0x0684 D DYEH */
645 { 0x0685, 0 }, /* 0x0685 */
646 { 0xFB7A, 3 }, /* 0x0686 D TCHEH */
647 { 0xFB7E, 3 }, /* 0x0687 D TCHEHEH */
648 { 0xFB88, 1 }, /* 0x0688 R DDAL */
649 { 0x0689, 0 }, /* 0x0689 */
650 { 0x068A, 0 }, /* 0x068A */
651 { 0x068B, 0 }, /* 0x068B */
652 { 0xFB84, 1 }, /* 0x068C R DAHAL */
653 { 0xFB82, 1 }, /* 0x068D R DDAHAL */
654 { 0xFB86, 1 }, /* 0x068E R DUL */
655 { 0x068F, 0 }, /* 0x068F */
656
657 { 0x0690, 0 }, /* 0x0690 */
658 { 0xFB8C, 1 }, /* 0x0691 R RREH */
659 { 0x0692, 0 }, /* 0x0692 */
660 { 0x0693, 0 }, /* 0x0693 */
661 { 0x0694, 0 }, /* 0x0694 */
662 { 0x0695, 0 }, /* 0x0695 */
663 { 0x0696, 0 }, /* 0x0696 */
664 { 0x0697, 0 }, /* 0x0697 */
665 { 0xFB8A, 1 }, /* 0x0698 R JEH */
666 { 0x0699, 0 }, /* 0x0699 */
667 { 0x069A, 0 }, /* 0x069A */
668 { 0x069B, 0 }, /* 0x069B */
669 { 0x069C, 0 }, /* 0x069C */
670 { 0x069D, 0 }, /* 0x069D */
671 { 0x069E, 0 }, /* 0x069E */
672 { 0x069F, 0 }, /* 0x069F */
673
674 { 0x06A0, 0 }, /* 0x06A0 */
675 { 0x06A1, 0 }, /* 0x06A1 */
676 { 0x06A2, 0 }, /* 0x06A2 */
677 { 0x06A3, 0 }, /* 0x06A3 */
678 { 0xFB6A, 3 }, /* 0x06A4 D VEH */
679 { 0x06A5, 0 }, /* 0x06A5 */
680 { 0xFB6E, 3 }, /* 0x06A6 D PEHEH */
681 { 0x06A7, 0 }, /* 0x06A7 */
682 { 0x06A8, 0 }, /* 0x06A8 */
683 { 0xFB8E, 3 }, /* 0x06A9 D KEHEH */
684 { 0x06AA, 0 }, /* 0x06AA */
685 { 0x06AB, 0 }, /* 0x06AB */
686 { 0x06AC, 0 }, /* 0x06AC */
687 { 0xFBD3, 3 }, /* 0x06AD D NG */
688 { 0x06AE, 0 }, /* 0x06AE */
689 { 0xFB92, 3 }, /* 0x06AF D GAF */
690
691 { 0x06B0, 0 }, /* 0x06B0 */
692 { 0xFB9A, 3 }, /* 0x06B1 D NGOEH */
693 { 0x06B2, 0 }, /* 0x06B2 */
694 { 0xFB96, 3 }, /* 0x06B3 D GUEH */
695 { 0x06B4, 0 }, /* 0x06B4 */
696 { 0x06B5, 0 }, /* 0x06B5 */
697 { 0x06B6, 0 }, /* 0x06B6 */
698 { 0x06B7, 0 }, /* 0x06B7 */
699 { 0x06B8, 0 }, /* 0x06B8 */
700 { 0x06B9, 0 }, /* 0x06B9 */
701 { 0xFB9E, 1 }, /* 0x06BA R NOON GHUNNA */
702 { 0xFBA0, 3 }, /* 0x06BB D RNOON */
703 { 0x06BC, 0 }, /* 0x06BC */
704 { 0x06BD, 0 }, /* 0x06BD */
705 { 0xFBAA, 3 }, /* 0x06BE D HEH DOACHASHMEE */
706 { 0x06BF, 0 }, /* 0x06BF */
707
708 { 0xFBA4, 1 }, /* 0x06C0 R HEH WITH YEH ABOVE */
709 { 0xFBA6, 3 }, /* 0x06C1 D HEH GOAL */
710 { 0x06C2, 0 }, /* 0x06C2 */
711 { 0x06C3, 0 }, /* 0x06C3 */
712 { 0x06C4, 0 }, /* 0x06C4 */
713 { 0xFBE0, 1 }, /* 0x06C5 R KIRGHIZ OE */
714 { 0xFBD9, 1 }, /* 0x06C6 R OE */
715 { 0xFBD7, 1 }, /* 0x06C7 R U */
716 { 0xFBDB, 1 }, /* 0x06C8 R YU */
717 { 0xFBE2, 1 }, /* 0x06C9 R KIRGHIZ YU */
718 { 0x06CA, 0 }, /* 0x06CA */
719 { 0xFBDE, 1 }, /* 0x06CB R VE */
720 { 0xFBFC, 3 }, /* 0x06CC D FARSI YEH */
721 { 0x06CD, 0 }, /* 0x06CD */
722 { 0x06CE, 0 }, /* 0x06CE */
723 { 0x06CF, 0 }, /* 0x06CF */
724
725 { 0xFBE4, 3 }, /* 0x06D0 D E */
726 { 0x06D1, 0 }, /* 0x06D1 */
727 { 0xFBAE, 1 }, /* 0x06D2 R YEH BARREE */
728 { 0xFBB0, 1 }, /* 0x06D3 R YEH BARREE WITH HAMZA ABOVE */
729 { 0x06D4, 0 }, /* 0x06D4 */
730 { 0x06D5, 0 }, /* 0x06D5 */
731 { 0x06D6, 0 }, /* 0x06D6 */
732 { 0x06D7, 0 }, /* 0x06D7 */
733 { 0x06D8, 0 }, /* 0x06D8 */
734 { 0x06D9, 0 }, /* 0x06D9 */
735 { 0x06DA, 0 }, /* 0x06DA */
736 { 0x06DB, 0 }, /* 0x06DB */
737 { 0x06DC, 0 }, /* 0x06DC */
738 { 0x06DD, 0 }, /* 0x06DD */
739 { 0x06DE, 0 }, /* 0x06DE */
740 { 0x06DF, 0 }, /* 0x06DF */
741
742 { 0x06E0, 0 }, /* 0x06E0 */
743 { 0x06E1, 0 }, /* 0x06E1 */
744 { 0x06E2, 0 }, /* 0x06E2 */
745 { 0x06E3, 0 }, /* 0x06E3 */
746 { 0x06E4, 0 }, /* 0x06E4 */
747 { 0x06E5, 0 }, /* 0x06E5 */
748 { 0x06E6, 0 }, /* 0x06E6 */
749 { 0x06E7, 0 }, /* 0x06E7 */
750 { 0x06E8, 0 }, /* 0x06E8 */
751 { 0x06E9, 0 }, /* 0x06E9 */
752 { 0x06EA, 0 }, /* 0x06EA */
753 { 0x06EB, 0 }, /* 0x06EB */
754 { 0x06EC, 0 }, /* 0x06EC */
755 { 0x06ED, 0 }, /* 0x06ED */
756 { 0x06EE, 0 }, /* 0x06EE */
757 { 0x06EF, 0 }, /* 0x06EF */
758
759 { 0x06F0, 0 }, /* 0x06F0 */
760 { 0x06F1, 0 }, /* 0x06F1 */
761 { 0x06F2, 0 }, /* 0x06F2 */
762 { 0x06F3, 0 }, /* 0x06F3 */
763 { 0x06F4, 0 }, /* 0x06F4 */
764 { 0x06F5, 0 }, /* 0x06F5 */
765 { 0x06F6, 0 }, /* 0x06F6 */
766 { 0x06F7, 0 }, /* 0x06F7 */
767 { 0x06F8, 0 }, /* 0x06F8 */
768 { 0x06F9, 0 }, /* 0x06F9 */
769 { 0x06FA, 0 }, /* 0x06FA */
770 { 0x06FB, 0 }, /* 0x06FB */
771 { 0x06FC, 0 }, /* 0x06FC */
772 { 0x06FD, 0 }, /* 0x06FD */
773 { 0x06FE, 0 }, /* 0x06FE */
774 { 0x06FF, 0 } /* 0x06FF */
775 };
776
777 /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, this table does */
778 static const hb_uint16 alefMaksura[4] = {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9};
779
780 /*
781 // this is a bit tricky. Alef always binds to the right, so the second parameter descibing the shape
782 // of the lam can be either initial of medial. So initial maps to the isolated form of the ligature,
783 // medial to the final form
784 */
785 static const hb_uint16 arabicUnicodeLamAlefMapping[6][4] = {
786 { 0xfffd, 0xfffd, 0xfef5, 0xfef6 }, /* 0x622 R Alef with Madda above */
787 { 0xfffd, 0xfffd, 0xfef7, 0xfef8 }, /* 0x623 R Alef with Hamza above */
788 { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x624 // Just to fill the table ;-) */
789 { 0xfffd, 0xfffd, 0xfef9, 0xfefa }, /* 0x625 R Alef with Hamza below */
790 { 0xfffd, 0xfffd, 0xfffd, 0xfffd }, /* 0x626 // Just to fill the table ;-) */
791 { 0xfffd, 0xfffd, 0xfefb, 0xfefc } /* 0x627 R Alef */
792 };
793
getShape(hb_uint8 cell,int shape)794 static int getShape(hb_uint8 cell, int shape)
795 {
796 /* the arabicUnicodeMapping does not work for U+0649 ALEF MAKSURA, handle this here */
797 int ch = (cell != 0x49)
798 ? (shape ? arabicUnicodeMapping[cell][0] + shape : 0x600+cell)
799 : alefMaksura[shape] ;
800 return ch;
801 }
802
803
804 /*
805 Two small helper functions for arabic shaping.
806 */
prevChar(const HB_UChar16 * str,int pos)807 static HB_UChar16 prevChar(const HB_UChar16 *str, int pos)
808 {
809 /*qDebug("leftChar: pos=%d", pos); */
810 const HB_UChar16 *ch = str + pos - 1;
811 pos--;
812 while(pos > -1) {
813 if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
814 return *ch;
815 pos--;
816 ch--;
817 }
818 return ReplacementCharacter;
819 }
820
nextChar(const HB_UChar16 * str,hb_uint32 len,hb_uint32 pos)821 static HB_UChar16 nextChar(const HB_UChar16 *str, hb_uint32 len, hb_uint32 pos)
822 {
823 const HB_UChar16 *ch = str + pos + 1;
824 pos++;
825 while(pos < len) {
826 /*qDebug("rightChar: %d isLetter=%d, joining=%d", pos, ch.isLetter(), ch.joining()); */
827 if(HB_GetUnicodeCharCategory(*ch) != HB_Mark_NonSpacing)
828 return *ch;
829 /* assume it's a transparent char, this might not be 100% correct */
830 pos++;
831 ch++;
832 }
833 return ReplacementCharacter;
834 }
835
shapedString(const HB_UChar16 * uc,hb_uint32 stringLength,hb_uint32 from,hb_uint32 len,HB_UChar16 * shapeBuffer,int * shapedLength,HB_Bool reverse,HB_GlyphAttributes * attributes,unsigned short * logClusters)836 static void shapedString(const HB_UChar16 *uc, hb_uint32 stringLength, hb_uint32 from, hb_uint32 len, HB_UChar16 *shapeBuffer, int *shapedLength,
837 HB_Bool reverse, HB_GlyphAttributes *attributes, unsigned short *logClusters)
838 {
839 HB_ArabicProperties *properties;
840 hb_int32 f = from;
841 hb_uint32 l = len;
842 const HB_UChar16 *ch;
843 HB_UChar16 *data;
844 int clusterStart;
845 hb_uint32 i;
846 HB_STACKARRAY(HB_ArabicProperties, props, len + 2);
847 properties = props;
848
849 assert(stringLength >= from + len);
850
851 if(len == 0) {
852 *shapedLength = 0;
853 return;
854 }
855
856 if (from > 0) {
857 --f;
858 ++l;
859 ++properties;
860 }
861 if (f + l < stringLength)
862 ++l;
863 getArabicProperties(uc+f, l, props);
864
865 ch = uc + from;
866 data = shapeBuffer;
867 clusterStart = 0;
868
869 for (i = 0; i < len; i++) {
870 hb_uint8 r = *ch >> 8;
871 int gpos = data - shapeBuffer;
872
873 if (r != 0x06) {
874 if (r == 0x20) {
875 if (*ch == 0x200c || *ch == 0x200d)
876 /* remove ZWJ and ZWNJ */
877 goto skip;
878 }
879 if (reverse)
880 *data = HB_GetMirroredChar(*ch);
881 else
882 *data = *ch;
883 } else {
884 hb_uint8 c = *ch & 0xff;
885 int pos = i + from;
886 int shape = properties[i].shape;
887 /* qDebug("mapping U+%x to shape %d glyph=0x%x", ch->unicode(), shape, getShape(c, shape)); */
888 /* take care of lam-alef ligatures (lam right of alef) */
889 hb_uint16 map;
890 switch (c) {
891 case 0x44: { /* lam */
892 const HB_UChar16 pch = nextChar(uc, stringLength, pos);
893 if ((pch >> 8) == 0x06) {
894 switch (pch & 0xff) {
895 case 0x22:
896 case 0x23:
897 case 0x25:
898 case 0x27:
899 /* qDebug(" lam of lam-alef ligature"); */
900 map = arabicUnicodeLamAlefMapping[(pch & 0xff) - 0x22][shape];
901 goto next;
902 default:
903 break;
904 }
905 }
906 break;
907 }
908 case 0x22: /* alef with madda */
909 case 0x23: /* alef with hamza above */
910 case 0x25: /* alef with hamza below */
911 case 0x27: /* alef */
912 if (prevChar(uc, pos) == 0x0644) {
913 /* have a lam alef ligature */
914 /*qDebug(" alef of lam-alef ligature"); */
915 goto skip;
916 }
917 default:
918 break;
919 }
920 map = getShape(c, shape);
921 next:
922 *data = map;
923 }
924 /* ##### Fixme */
925 /*glyphs[gpos].attributes.zeroWidth = zeroWidth; */
926 if (HB_GetUnicodeCharCategory(*ch) == HB_Mark_NonSpacing) {
927 attributes[gpos].mark = TRUE;
928 /* qDebug("glyph %d (char %d) is mark!", gpos, i); */
929 } else {
930 attributes[gpos].mark = FALSE;
931 clusterStart = data - shapeBuffer;
932 }
933 attributes[gpos].clusterStart = !attributes[gpos].mark;
934 attributes[gpos].combiningClass = HB_GetUnicodeCharCombiningClass(*ch);
935 attributes[gpos].justification = properties[i].justification;
936 /* qDebug("data[%d] = %x (from %x)", gpos, (uint)data->unicode(), ch->unicode());*/
937 data++;
938 skip:
939 ch++;
940 logClusters[i] = clusterStart;
941 }
942 *shapedLength = data - shapeBuffer;
943
944 HB_FREE_STACKARRAY(props);
945 }
946
947 #ifndef NO_OPENTYPE
948
949 static const HB_OpenTypeFeature arabic_features[] = {
950 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
951 { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty },
952 { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty },
953 { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty },
954 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
955 { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty },
956 { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty },
957 { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty },
958 { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty },
959 { HB_MAKE_TAG('c', 's', 'w', 'h'), CswhProperty },
960 /* mset is used in old Win95 fonts that don't have a 'mark' positioning table. */
961 { HB_MAKE_TAG('m', 's', 'e', 't'), MsetProperty },
962 {0, 0}
963 };
964
965 static const HB_OpenTypeFeature syriac_features[] = {
966 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
967 { HB_MAKE_TAG('i', 's', 'o', 'l'), IsolProperty },
968 { HB_MAKE_TAG('f', 'i', 'n', 'a'), FinaProperty },
969 { HB_MAKE_TAG('f', 'i', 'n', '2'), FinaProperty },
970 { HB_MAKE_TAG('f', 'i', 'n', '3'), FinaProperty },
971 { HB_MAKE_TAG('m', 'e', 'd', 'i'), MediProperty },
972 { HB_MAKE_TAG('m', 'e', 'd', '2'), MediProperty },
973 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
974 { HB_MAKE_TAG('r', 'l', 'i', 'g'), RligProperty },
975 { HB_MAKE_TAG('c', 'a', 'l', 't'), CaltProperty },
976 { HB_MAKE_TAG('l', 'i', 'g', 'a'), LigaProperty },
977 { HB_MAKE_TAG('d', 'l', 'i', 'g'), DligProperty },
978 {0, 0}
979 };
980
arabicSyriacOpenTypeShape(HB_ShaperItem * item,HB_Bool * ot_ok)981 static HB_Bool arabicSyriacOpenTypeShape(HB_ShaperItem *item, HB_Bool *ot_ok)
982 {
983 const HB_UChar16 *uc;
984 const int nglyphs = item->num_glyphs;
985 hb_int32 f;
986 hb_uint32 l;
987 HB_ArabicProperties *properties;
988 HB_DECLARE_STACKARRAY(HB_ArabicProperties, props)
989 HB_DECLARE_STACKARRAY(hb_uint32, apply)
990 HB_Bool shaped;
991 int i = 0;
992
993 *ot_ok = TRUE;
994
995 if (!HB_ConvertStringToGlyphIndices(item))
996 return FALSE;
997 HB_HeuristicSetGlyphAttributes(item);
998
999 HB_INIT_STACKARRAY(HB_ArabicProperties, props, item->item.length + 2);
1000 HB_INIT_STACKARRAY(hb_uint32, apply, item->num_glyphs);
1001
1002 uc = item->string + item->item.pos;
1003
1004 properties = props;
1005 f = 0;
1006 l = item->item.length;
1007 if (item->item.pos > 0) {
1008 --f;
1009 ++l;
1010 ++properties;
1011 }
1012 if (f + l < item->stringLength) {
1013 ++l;
1014 }
1015 getArabicProperties(uc+f, l, props);
1016
1017 for (i = 0; i < (int)item->num_glyphs; i++) {
1018 apply[i] = 0;
1019
1020 if (properties[i].shape == XIsolated)
1021 apply[i] |= MediProperty|FinaProperty|InitProperty;
1022 else if (properties[i].shape == XMedial)
1023 apply[i] |= IsolProperty|FinaProperty|InitProperty;
1024 else if (properties[i].shape == XFinal)
1025 apply[i] |= IsolProperty|MediProperty|InitProperty;
1026 else if (properties[i].shape == XInitial)
1027 apply[i] |= IsolProperty|MediProperty|FinaProperty;
1028
1029 item->attributes[i].justification = properties[i].justification;
1030 }
1031
1032 HB_FREE_STACKARRAY(props);
1033
1034 shaped = HB_OpenTypeShape(item, apply);
1035
1036 HB_FREE_STACKARRAY(apply);
1037
1038 if (!shaped) {
1039 *ot_ok = FALSE;
1040 return FALSE;
1041 }
1042 return HB_OpenTypePosition(item, nglyphs, /*doLogClusters*/TRUE);
1043 }
1044
1045 #endif
1046
1047 /* #### stil missing: identify invalid character combinations */
HB_ArabicShape(HB_ShaperItem * item)1048 HB_Bool HB_ArabicShape(HB_ShaperItem *item)
1049 {
1050 int slen;
1051 HB_Bool haveGlyphs;
1052 HB_STACKARRAY(HB_UChar16, shapedChars, item->item.length);
1053
1054 assert(item->item.script == HB_Script_Arabic || item->item.script == HB_Script_Syriac);
1055
1056 #ifndef NO_OPENTYPE
1057
1058 if (HB_SelectScript(item, item->item.script == HB_Script_Arabic ? arabic_features : syriac_features)) {
1059 HB_Bool ot_ok;
1060 if (arabicSyriacOpenTypeShape(item, &ot_ok))
1061 return TRUE;
1062 if (ot_ok)
1063 return FALSE;
1064 /* fall through to the non OT code*/
1065 }
1066 #endif
1067
1068 if (item->item.script == HB_Script_Syriac)
1069 return HB_BasicShape(item);
1070
1071 shapedString(item->string, item->stringLength, item->item.pos, item->item.length, shapedChars, &slen,
1072 item->item.bidiLevel % 2,
1073 item->attributes, item->log_clusters);
1074
1075 haveGlyphs = item->font->klass
1076 ->convertStringToGlyphIndices(item->font,
1077 shapedChars, slen,
1078 item->glyphs, &item->num_glyphs,
1079 item->item.bidiLevel % 2);
1080
1081 HB_FREE_STACKARRAY(shapedChars);
1082
1083 if (!haveGlyphs)
1084 return FALSE;
1085
1086 HB_HeuristicPosition(item);
1087 return TRUE;
1088 }
1089
1090
1091