1 /************************************************************************
2  *
3  * ------------
4  * Description:
5  * ------------
6  * This is an implementation of Unicode's Bidirectional Algorithm
7  * (known as UAX #9).
8  *
9  *   http://www.unicode.org/reports/tr9/
10  *
11  * Author: Ahmad Khalifa
12  *
13  * (www.arabeyes.org - under MIT license)
14  *
15  ************************************************************************/
16 
17 /*
18  * TODO:
19  * =====
20  * - Explicit marks need to be handled (they are not 100% now)
21  * - Ligatures
22  */
23 
24 #include "minibidi.h"
25 
26 #include <cstdlib>     /* definition of wchar_t*/
27 #include <cstdio>
28 
29 #define LMASK   0x3F    /* Embedding Level mask */
30 #define OMASK   0xC0    /* Override mask */
31 #define OISL    0x80    /* Override is L */
32 #define OISR    0x40    /* Override is R */
33 
34 /* Shaping Helpers */
35 #define STYPE(xh) ((((xh) >= SHAPE_FIRST) && ((xh) <= SHAPE_LAST)) ? shapetypes[(xh)-SHAPE_FIRST].type : SU)
36 
37 #define SISOLATED(xh) (shapetypes[(xh)-SHAPE_FIRST].form_b)
38 #define SFINAL(xh) ((xh)+1)
39 #define SINITIAL(xh) ((xh)+2)
40 #define SMEDIAL(ch) ((ch)+3)
41 
42 #define leastGreaterOdd(x) ( ((x)+1) | 1 )
43 #define leastGreaterEven(x) ( ((x)+2) &~ 1 )
44 #define lenof(x) sizeof(x) / sizeof(x[0])
45 
46 
47 /* function declarations */
48 static void flipThisRun(
49     bidi_char *from, unsigned char *level, int max, int count);
50 static int findIndexOfRun(
51     unsigned char *level, int start, int count, int tlevel);
52 static unsigned char getType(int ch);
53 static unsigned char setOverrideBits(
54     unsigned char level, unsigned char override);
55 static int getPreviousLevel(unsigned char *level, int from);
56 static void doMirror(bidi_char *ch);
57 
58 /* character types */
59 enum {
60     L,
61     LRE,
62     LRO,
63     R,
64     AL,
65     RLE,
66     RLO,
67     PDF,
68     EN,
69     ES,
70     ET,
71     AN,
72     CS,
73     NSM,
74     BN,
75     B,
76     S,
77     WS,
78     ON
79 };
80 
81 /* Shaping Types */
82 enum {
83     SL, /* Left-Joining, doesn't exist in U+0600 - U+06FF */
84     SR, /* Right-Joining, ie has Isolated, Final */
85     SD, /* Dual-Joining, ie has Isolated, Final, Initial, Medial */
86     SU, /* Non-Joining */
87     SC  /* Join-Causing, like U+0640 (TATWEEL) */
88 };
89 
90 typedef struct {
91     char type;
92     wchar_t form_b;
93 } shape_node;
94 
95 /* Kept near the actual table, for verification. */
96 #define SHAPE_FIRST 0x621
97 #define SHAPE_LAST (SHAPE_FIRST + lenof(shapetypes) - 1)
98 
99 static const shape_node shapetypes[] = {
100     /* index, Typ, Iso, Ligature Index*/
101     /* 621 */ {SU, 0xFE80},
102     /* 622 */ {SR, 0xFE81},
103     /* 623 */ {SR, 0xFE83},
104     /* 624 */ {SR, 0xFE85},
105     /* 625 */ {SR, 0xFE87},
106     /* 626 */ {SD, 0xFE89},
107     /* 627 */ {SR, 0xFE8D},
108     /* 628 */ {SD, 0xFE8F},
109     /* 629 */ {SR, 0xFE93},
110     /* 62A */ {SD, 0xFE95},
111     /* 62B */ {SD, 0xFE99},
112     /* 62C */ {SD, 0xFE9D},
113     /* 62D */ {SD, 0xFEA1},
114     /* 62E */ {SD, 0xFEA5},
115     /* 62F */ {SR, 0xFEA9},
116     /* 630 */ {SR, 0xFEAB},
117     /* 631 */ {SR, 0xFEAD},
118     /* 632 */ {SR, 0xFEAF},
119     /* 633 */ {SD, 0xFEB1},
120     /* 634 */ {SD, 0xFEB5},
121     /* 635 */ {SD, 0xFEB9},
122     /* 636 */ {SD, 0xFEBD},
123     /* 637 */ {SD, 0xFEC1},
124     /* 638 */ {SD, 0xFEC5},
125     /* 639 */ {SD, 0xFEC9},
126     /* 63A */ {SD, 0xFECD},
127     /* 63B */ {SU, 0x0},
128     /* 63C */ {SU, 0x0},
129     /* 63D */ {SU, 0x0},
130     /* 63E */ {SU, 0x0},
131     /* 63F */ {SU, 0x0},
132     /* 640 */ {SC, 0x0},
133     /* 641 */ {SD, 0xFED1},
134     /* 642 */ {SD, 0xFED5},
135     /* 643 */ {SD, 0xFED9},
136     /* 644 */ {SD, 0xFEDD},
137     /* 645 */ {SD, 0xFEE1},
138     /* 646 */ {SD, 0xFEE5},
139     /* 647 */ {SD, 0xFEE9},
140     /* 648 */ {SR, 0xFEED},
141     /* 649 */ {SR, 0xFEEF}, /* SD */
142     /* 64A */ {SD, 0xFEF1},
143     /* 64B */ {SU, 0x0},
144     /* 64C */ {SU, 0x0},
145     /* 64D */ {SU, 0x0},
146     /* 64E */ {SU, 0x0},
147     /* 64F */ {SU, 0x0},
148     /* 650 */ {SU, 0x0},
149     /* 651 */ {SU, 0x0},
150     /* 652 */ {SU, 0x0},
151     /* 653 */ {SU, 0x0},
152     /* 654 */ {SU, 0x0},
153     /* 655 */ {SU, 0x0},
154     /* 656 */ {SU, 0x0},
155     /* 657 */ {SU, 0x0},
156     /* 658 */ {SU, 0x0},
157     /* 659 */ {SU, 0x0},
158     /* 65A */ {SU, 0x0},
159     /* 65B */ {SU, 0x0},
160     /* 65C */ {SU, 0x0},
161     /* 65D */ {SU, 0x0},
162     /* 65E */ {SU, 0x0},
163     /* 65F */ {SU, 0x0},
164     /* 660 */ {SU, 0x0},
165     /* 661 */ {SU, 0x0},
166     /* 662 */ {SU, 0x0},
167     /* 663 */ {SU, 0x0},
168     /* 664 */ {SU, 0x0},
169     /* 665 */ {SU, 0x0},
170     /* 666 */ {SU, 0x0},
171     /* 667 */ {SU, 0x0},
172     /* 668 */ {SU, 0x0},
173     /* 669 */ {SU, 0x0},
174     /* 66A */ {SU, 0x0},
175     /* 66B */ {SU, 0x0},
176     /* 66C */ {SU, 0x0},
177     /* 66D */ {SU, 0x0},
178     /* 66E */ {SU, 0x0},
179     /* 66F */ {SU, 0x0},
180     /* 670 */ {SU, 0x0},
181     /* 671 */ {SR, 0xFB50},
182     /* 672 */ {SU, 0x0},
183     /* 673 */ {SU, 0x0},
184     /* 674 */ {SU, 0x0},
185     /* 675 */ {SU, 0x0},
186     /* 676 */ {SU, 0x0},
187     /* 677 */ {SU, 0x0},
188     /* 678 */ {SU, 0x0},
189     /* 679 */ {SD, 0xFB66},
190     /* 67A */ {SD, 0xFB5E},
191     /* 67B */ {SD, 0xFB52},
192     /* 67C */ {SU, 0x0},
193     /* 67D */ {SU, 0x0},
194     /* 67E */ {SD, 0xFB56},
195     /* 67F */ {SD, 0xFB62},
196     /* 680 */ {SD, 0xFB5A},
197     /* 681 */ {SU, 0x0},
198     /* 682 */ {SU, 0x0},
199     /* 683 */ {SD, 0xFB76},
200     /* 684 */ {SD, 0xFB72},
201     /* 685 */ {SU, 0x0},
202     /* 686 */ {SD, 0xFB7A},
203     /* 687 */ {SD, 0xFB7E},
204     /* 688 */ {SR, 0xFB88},
205     /* 689 */ {SU, 0x0},
206     /* 68A */ {SU, 0x0},
207     /* 68B */ {SU, 0x0},
208     /* 68C */ {SR, 0xFB84},
209     /* 68D */ {SR, 0xFB82},
210     /* 68E */ {SR, 0xFB86},
211     /* 68F */ {SU, 0x0},
212     /* 690 */ {SU, 0x0},
213     /* 691 */ {SR, 0xFB8C},
214     /* 692 */ {SU, 0x0},
215     /* 693 */ {SU, 0x0},
216     /* 694 */ {SU, 0x0},
217     /* 695 */ {SU, 0x0},
218     /* 696 */ {SU, 0x0},
219     /* 697 */ {SU, 0x0},
220     /* 698 */ {SR, 0xFB8A},
221     /* 699 */ {SU, 0x0},
222     /* 69A */ {SU, 0x0},
223     /* 69B */ {SU, 0x0},
224     /* 69C */ {SU, 0x0},
225     /* 69D */ {SU, 0x0},
226     /* 69E */ {SU, 0x0},
227     /* 69F */ {SU, 0x0},
228     /* 6A0 */ {SU, 0x0},
229     /* 6A1 */ {SU, 0x0},
230     /* 6A2 */ {SU, 0x0},
231     /* 6A3 */ {SU, 0x0},
232     /* 6A4 */ {SD, 0xFB6A},
233     /* 6A5 */ {SU, 0x0},
234     /* 6A6 */ {SD, 0xFB6E},
235     /* 6A7 */ {SU, 0x0},
236     /* 6A8 */ {SU, 0x0},
237     /* 6A9 */ {SD, 0xFB8E},
238     /* 6AA */ {SU, 0x0},
239     /* 6AB */ {SU, 0x0},
240     /* 6AC */ {SU, 0x0},
241     /* 6AD */ {SD, 0xFBD3},
242     /* 6AE */ {SU, 0x0},
243     /* 6AF */ {SD, 0xFB92},
244     /* 6B0 */ {SU, 0x0},
245     /* 6B1 */ {SD, 0xFB9A},
246     /* 6B2 */ {SU, 0x0},
247     /* 6B3 */ {SD, 0xFB96},
248     /* 6B4 */ {SU, 0x0},
249     /* 6B5 */ {SU, 0x0},
250     /* 6B6 */ {SU, 0x0},
251     /* 6B7 */ {SU, 0x0},
252     /* 6B8 */ {SU, 0x0},
253     /* 6B9 */ {SU, 0x0},
254     /* 6BA */ {SR, 0xFB9E},
255     /* 6BB */ {SD, 0xFBA0},
256     /* 6BC */ {SU, 0x0},
257     /* 6BD */ {SU, 0x0},
258     /* 6BE */ {SD, 0xFBAA},
259     /* 6BF */ {SU, 0x0},
260     /* 6C0 */ {SR, 0xFBA4},
261     /* 6C1 */ {SD, 0xFBA6},
262     /* 6C2 */ {SU, 0x0},
263     /* 6C3 */ {SU, 0x0},
264     /* 6C4 */ {SU, 0x0},
265     /* 6C5 */ {SR, 0xFBE0},
266     /* 6C6 */ {SR, 0xFBD9},
267     /* 6C7 */ {SR, 0xFBD7},
268     /* 6C8 */ {SR, 0xFBDB},
269     /* 6C9 */ {SR, 0xFBE2},
270     /* 6CA */ {SU, 0x0},
271     /* 6CB */ {SR, 0xFBDE},
272     /* 6CC */ {SD, 0xFBFC},
273     /* 6CD */ {SU, 0x0},
274     /* 6CE */ {SU, 0x0},
275     /* 6CF */ {SU, 0x0},
276     /* 6D0 */ {SU, 0x0},
277     /* 6D1 */ {SU, 0x0},
278     /* 6D2 */ {SR, 0xFBAE},
279 };
280 
281 /*
282  * Flips the text buffer, according to max level, and
283  * all higher levels
284  *
285  * Input:
286  * from: text buffer, on which to apply flipping
287  * level: resolved levels buffer
288  * max: the maximum level found in this line (should be unsigned char)
289  * count: line size in bidi_char
290  */
flipThisRun(bidi_char * from,unsigned char * level,int max,int count)291 static void flipThisRun(
292     bidi_char *from, unsigned char *level, int max, int count)
293 {
294     int i, j, k, tlevel;
295     bidi_char temp;
296 
297     j = i = 0;
298     while (i<count && j<count) {
299 
300         /* find the start of the run of level=max */
301         tlevel = max;
302         i = j = findIndexOfRun(level, i, count, max);
303         /* find the end of the run */
304         while (i<count && tlevel <= level[i]) {
305             i++;
306         }
307         for (k = i - 1; k > j; k--, j++) {
308             temp = from[k];
309             from[k] = from[j];
310             from[j] = temp;
311         }
312     }
313 }
314 
315 /*
316  * Finds the index of a run with level equals tlevel
317  */
findIndexOfRun(unsigned char * level,int start,int count,int tlevel)318 static int findIndexOfRun(
319     unsigned char *level , int start, int count, int tlevel)
320 {
321     int i;
322     for (i=start; i<count; i++) {
323         if (tlevel == level[i]) {
324             return i;
325         }
326     }
327     return count;
328 }
329 
330 /*
331  * Returns the bidi character type of ch.
332  *
333  * The data table in this function is constructed from the Unicode
334  * Character Database, downloadable from unicode.org at the URL
335  *
336  *     http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
337  *
338  * by the following fragment of Perl:
339 
340 perl -ne 'split ";"; $num = hex $_[0]; $type = $_[4];' \
341       -e '$fl = ($_[1] =~ /First/ ? 1 : $_[1] =~ /Last/ ? 2 : 0);' \
342       -e 'if ($type eq $runtype and ($runend == $num-1 or ' \
343       -e '    ($fl==2 and $pfl==1))) {$runend = $num;} else { &reset; }' \
344       -e '$pfl=$fl; END { &reset }; sub reset {' \
345       -e 'printf"        {0x%04x, 0x%04x, %s},\n",$runstart,$runend,$runtype' \
346       -e '  if defined $runstart and $runtype ne "ON";' \
347       -e '$runstart=$runend=$num; $runtype=$type;}' \
348     UnicodeData.txt
349 
350  */
getType(int ch)351 static unsigned char getType(int ch)
352 {
353     static const struct {
354         int first, last, type;
355     } lookup[] = {
356         {0x0000, 0x0008, BN},
357         {0x0009, 0x0009, S},
358         {0x000a, 0x000a, B},
359         {0x000b, 0x000b, S},
360         {0x000c, 0x000c, WS},
361         {0x000d, 0x000d, B},
362         {0x000e, 0x001b, BN},
363         {0x001c, 0x001e, B},
364         {0x001f, 0x001f, S},
365         {0x0020, 0x0020, WS},
366         {0x0023, 0x0025, ET},
367         {0x002b, 0x002b, ES},
368         {0x002c, 0x002c, CS},
369         {0x002d, 0x002d, ES},
370         {0x002e, 0x002f, CS},
371         {0x0030, 0x0039, EN},
372         {0x003a, 0x003a, CS},
373         {0x0041, 0x005a, L},
374         {0x0061, 0x007a, L},
375         {0x007f, 0x0084, BN},
376         {0x0085, 0x0085, B},
377         {0x0086, 0x009f, BN},
378         {0x00a0, 0x00a0, CS},
379         {0x00a2, 0x00a5, ET},
380         {0x00aa, 0x00aa, L},
381         {0x00ad, 0x00ad, BN},
382         {0x00b0, 0x00b1, ET},
383         {0x00b2, 0x00b3, EN},
384         {0x00b5, 0x00b5, L},
385         {0x00b9, 0x00b9, EN},
386         {0x00ba, 0x00ba, L},
387         {0x00c0, 0x00d6, L},
388         {0x00d8, 0x00f6, L},
389         {0x00f8, 0x0236, L},
390         {0x0250, 0x02b8, L},
391         {0x02bb, 0x02c1, L},
392         {0x02d0, 0x02d1, L},
393         {0x02e0, 0x02e4, L},
394         {0x02ee, 0x02ee, L},
395         {0x0300, 0x0357, NSM},
396         {0x035d, 0x036f, NSM},
397         {0x037a, 0x037a, L},
398         {0x0386, 0x0386, L},
399         {0x0388, 0x038a, L},
400         {0x038c, 0x038c, L},
401         {0x038e, 0x03a1, L},
402         {0x03a3, 0x03ce, L},
403         {0x03d0, 0x03f5, L},
404         {0x03f7, 0x03fb, L},
405         {0x0400, 0x0482, L},
406         {0x0483, 0x0486, NSM},
407         {0x0488, 0x0489, NSM},
408         {0x048a, 0x04ce, L},
409         {0x04d0, 0x04f5, L},
410         {0x04f8, 0x04f9, L},
411         {0x0500, 0x050f, L},
412         {0x0531, 0x0556, L},
413         {0x0559, 0x055f, L},
414         {0x0561, 0x0587, L},
415         {0x0589, 0x0589, L},
416         {0x0591, 0x05a1, NSM},
417         {0x05a3, 0x05b9, NSM},
418         {0x05bb, 0x05bd, NSM},
419         {0x05be, 0x05be, R},
420         {0x05bf, 0x05bf, NSM},
421         {0x05c0, 0x05c0, R},
422         {0x05c1, 0x05c2, NSM},
423         {0x05c3, 0x05c3, R},
424         {0x05c4, 0x05c4, NSM},
425         {0x05d0, 0x05ea, R},
426         {0x05f0, 0x05f4, R},
427         {0x0600, 0x0603, AL},
428         {0x060c, 0x060c, CS},
429         {0x060d, 0x060d, AL},
430         {0x0610, 0x0615, NSM},
431         {0x061b, 0x061b, AL},
432         {0x061f, 0x061f, AL},
433         {0x0621, 0x063a, AL},
434         {0x0640, 0x064a, AL},
435         {0x064b, 0x0658, NSM},
436         {0x0660, 0x0669, AN},
437         {0x066a, 0x066a, ET},
438         {0x066b, 0x066c, AN},
439         {0x066d, 0x066f, AL},
440         {0x0670, 0x0670, NSM},
441         {0x0671, 0x06d5, AL},
442         {0x06d6, 0x06dc, NSM},
443         {0x06dd, 0x06dd, AL},
444         {0x06de, 0x06e4, NSM},
445         {0x06e5, 0x06e6, AL},
446         {0x06e7, 0x06e8, NSM},
447         {0x06ea, 0x06ed, NSM},
448         {0x06ee, 0x06ef, AL},
449         {0x06f0, 0x06f9, EN},
450         {0x06fa, 0x070d, AL},
451         {0x070f, 0x070f, BN},
452         {0x0710, 0x0710, AL},
453         {0x0711, 0x0711, NSM},
454         {0x0712, 0x072f, AL},
455         {0x0730, 0x074a, NSM},
456         {0x074d, 0x074f, AL},
457         {0x0780, 0x07a5, AL},
458         {0x07a6, 0x07b0, NSM},
459         {0x07b1, 0x07b1, AL},
460         {0x0901, 0x0902, NSM},
461         {0x0903, 0x0939, L},
462         {0x093c, 0x093c, NSM},
463         {0x093d, 0x0940, L},
464         {0x0941, 0x0948, NSM},
465         {0x0949, 0x094c, L},
466         {0x094d, 0x094d, NSM},
467         {0x0950, 0x0950, L},
468         {0x0951, 0x0954, NSM},
469         {0x0958, 0x0961, L},
470         {0x0962, 0x0963, NSM},
471         {0x0964, 0x0970, L},
472         {0x0981, 0x0981, NSM},
473         {0x0982, 0x0983, L},
474         {0x0985, 0x098c, L},
475         {0x098f, 0x0990, L},
476         {0x0993, 0x09a8, L},
477         {0x09aa, 0x09b0, L},
478         {0x09b2, 0x09b2, L},
479         {0x09b6, 0x09b9, L},
480         {0x09bc, 0x09bc, NSM},
481         {0x09bd, 0x09c0, L},
482         {0x09c1, 0x09c4, NSM},
483         {0x09c7, 0x09c8, L},
484         {0x09cb, 0x09cc, L},
485         {0x09cd, 0x09cd, NSM},
486         {0x09d7, 0x09d7, L},
487         {0x09dc, 0x09dd, L},
488         {0x09df, 0x09e1, L},
489         {0x09e2, 0x09e3, NSM},
490         {0x09e6, 0x09f1, L},
491         {0x09f2, 0x09f3, ET},
492         {0x09f4, 0x09fa, L},
493         {0x0a01, 0x0a02, NSM},
494         {0x0a03, 0x0a03, L},
495         {0x0a05, 0x0a0a, L},
496         {0x0a0f, 0x0a10, L},
497         {0x0a13, 0x0a28, L},
498         {0x0a2a, 0x0a30, L},
499         {0x0a32, 0x0a33, L},
500         {0x0a35, 0x0a36, L},
501         {0x0a38, 0x0a39, L},
502         {0x0a3c, 0x0a3c, NSM},
503         {0x0a3e, 0x0a40, L},
504         {0x0a41, 0x0a42, NSM},
505         {0x0a47, 0x0a48, NSM},
506         {0x0a4b, 0x0a4d, NSM},
507         {0x0a59, 0x0a5c, L},
508         {0x0a5e, 0x0a5e, L},
509         {0x0a66, 0x0a6f, L},
510         {0x0a70, 0x0a71, NSM},
511         {0x0a72, 0x0a74, L},
512         {0x0a81, 0x0a82, NSM},
513         {0x0a83, 0x0a83, L},
514         {0x0a85, 0x0a8d, L},
515         {0x0a8f, 0x0a91, L},
516         {0x0a93, 0x0aa8, L},
517         {0x0aaa, 0x0ab0, L},
518         {0x0ab2, 0x0ab3, L},
519         {0x0ab5, 0x0ab9, L},
520         {0x0abc, 0x0abc, NSM},
521         {0x0abd, 0x0ac0, L},
522         {0x0ac1, 0x0ac5, NSM},
523         {0x0ac7, 0x0ac8, NSM},
524         {0x0ac9, 0x0ac9, L},
525         {0x0acb, 0x0acc, L},
526         {0x0acd, 0x0acd, NSM},
527         {0x0ad0, 0x0ad0, L},
528         {0x0ae0, 0x0ae1, L},
529         {0x0ae2, 0x0ae3, NSM},
530         {0x0ae6, 0x0aef, L},
531         {0x0af1, 0x0af1, ET},
532         {0x0b01, 0x0b01, NSM},
533         {0x0b02, 0x0b03, L},
534         {0x0b05, 0x0b0c, L},
535         {0x0b0f, 0x0b10, L},
536         {0x0b13, 0x0b28, L},
537         {0x0b2a, 0x0b30, L},
538         {0x0b32, 0x0b33, L},
539         {0x0b35, 0x0b39, L},
540         {0x0b3c, 0x0b3c, NSM},
541         {0x0b3d, 0x0b3e, L},
542         {0x0b3f, 0x0b3f, NSM},
543         {0x0b40, 0x0b40, L},
544         {0x0b41, 0x0b43, NSM},
545         {0x0b47, 0x0b48, L},
546         {0x0b4b, 0x0b4c, L},
547         {0x0b4d, 0x0b4d, NSM},
548         {0x0b56, 0x0b56, NSM},
549         {0x0b57, 0x0b57, L},
550         {0x0b5c, 0x0b5d, L},
551         {0x0b5f, 0x0b61, L},
552         {0x0b66, 0x0b71, L},
553         {0x0b82, 0x0b82, NSM},
554         {0x0b83, 0x0b83, L},
555         {0x0b85, 0x0b8a, L},
556         {0x0b8e, 0x0b90, L},
557         {0x0b92, 0x0b95, L},
558         {0x0b99, 0x0b9a, L},
559         {0x0b9c, 0x0b9c, L},
560         {0x0b9e, 0x0b9f, L},
561         {0x0ba3, 0x0ba4, L},
562         {0x0ba8, 0x0baa, L},
563         {0x0bae, 0x0bb5, L},
564         {0x0bb7, 0x0bb9, L},
565         {0x0bbe, 0x0bbf, L},
566         {0x0bc0, 0x0bc0, NSM},
567         {0x0bc1, 0x0bc2, L},
568         {0x0bc6, 0x0bc8, L},
569         {0x0bca, 0x0bcc, L},
570         {0x0bcd, 0x0bcd, NSM},
571         {0x0bd7, 0x0bd7, L},
572         {0x0be7, 0x0bf2, L},
573         {0x0bf9, 0x0bf9, ET},
574         {0x0c01, 0x0c03, L},
575         {0x0c05, 0x0c0c, L},
576         {0x0c0e, 0x0c10, L},
577         {0x0c12, 0x0c28, L},
578         {0x0c2a, 0x0c33, L},
579         {0x0c35, 0x0c39, L},
580         {0x0c3e, 0x0c40, NSM},
581         {0x0c41, 0x0c44, L},
582         {0x0c46, 0x0c48, NSM},
583         {0x0c4a, 0x0c4d, NSM},
584         {0x0c55, 0x0c56, NSM},
585         {0x0c60, 0x0c61, L},
586         {0x0c66, 0x0c6f, L},
587         {0x0c82, 0x0c83, L},
588         {0x0c85, 0x0c8c, L},
589         {0x0c8e, 0x0c90, L},
590         {0x0c92, 0x0ca8, L},
591         {0x0caa, 0x0cb3, L},
592         {0x0cb5, 0x0cb9, L},
593         {0x0cbc, 0x0cbc, NSM},
594         {0x0cbd, 0x0cc4, L},
595         {0x0cc6, 0x0cc8, L},
596         {0x0cca, 0x0ccb, L},
597         {0x0ccc, 0x0ccd, NSM},
598         {0x0cd5, 0x0cd6, L},
599         {0x0cde, 0x0cde, L},
600         {0x0ce0, 0x0ce1, L},
601         {0x0ce6, 0x0cef, L},
602         {0x0d02, 0x0d03, L},
603         {0x0d05, 0x0d0c, L},
604         {0x0d0e, 0x0d10, L},
605         {0x0d12, 0x0d28, L},
606         {0x0d2a, 0x0d39, L},
607         {0x0d3e, 0x0d40, L},
608         {0x0d41, 0x0d43, NSM},
609         {0x0d46, 0x0d48, L},
610         {0x0d4a, 0x0d4c, L},
611         {0x0d4d, 0x0d4d, NSM},
612         {0x0d57, 0x0d57, L},
613         {0x0d60, 0x0d61, L},
614         {0x0d66, 0x0d6f, L},
615         {0x0d82, 0x0d83, L},
616         {0x0d85, 0x0d96, L},
617         {0x0d9a, 0x0db1, L},
618         {0x0db3, 0x0dbb, L},
619         {0x0dbd, 0x0dbd, L},
620         {0x0dc0, 0x0dc6, L},
621         {0x0dca, 0x0dca, NSM},
622         {0x0dcf, 0x0dd1, L},
623         {0x0dd2, 0x0dd4, NSM},
624         {0x0dd6, 0x0dd6, NSM},
625         {0x0dd8, 0x0ddf, L},
626         {0x0df2, 0x0df4, L},
627         {0x0e01, 0x0e30, L},
628         {0x0e31, 0x0e31, NSM},
629         {0x0e32, 0x0e33, L},
630         {0x0e34, 0x0e3a, NSM},
631         {0x0e3f, 0x0e3f, ET},
632         {0x0e40, 0x0e46, L},
633         {0x0e47, 0x0e4e, NSM},
634         {0x0e4f, 0x0e5b, L},
635         {0x0e81, 0x0e82, L},
636         {0x0e84, 0x0e84, L},
637         {0x0e87, 0x0e88, L},
638         {0x0e8a, 0x0e8a, L},
639         {0x0e8d, 0x0e8d, L},
640         {0x0e94, 0x0e97, L},
641         {0x0e99, 0x0e9f, L},
642         {0x0ea1, 0x0ea3, L},
643         {0x0ea5, 0x0ea5, L},
644         {0x0ea7, 0x0ea7, L},
645         {0x0eaa, 0x0eab, L},
646         {0x0ead, 0x0eb0, L},
647         {0x0eb1, 0x0eb1, NSM},
648         {0x0eb2, 0x0eb3, L},
649         {0x0eb4, 0x0eb9, NSM},
650         {0x0ebb, 0x0ebc, NSM},
651         {0x0ebd, 0x0ebd, L},
652         {0x0ec0, 0x0ec4, L},
653         {0x0ec6, 0x0ec6, L},
654         {0x0ec8, 0x0ecd, NSM},
655         {0x0ed0, 0x0ed9, L},
656         {0x0edc, 0x0edd, L},
657         {0x0f00, 0x0f17, L},
658         {0x0f18, 0x0f19, NSM},
659         {0x0f1a, 0x0f34, L},
660         {0x0f35, 0x0f35, NSM},
661         {0x0f36, 0x0f36, L},
662         {0x0f37, 0x0f37, NSM},
663         {0x0f38, 0x0f38, L},
664         {0x0f39, 0x0f39, NSM},
665         {0x0f3e, 0x0f47, L},
666         {0x0f49, 0x0f6a, L},
667         {0x0f71, 0x0f7e, NSM},
668         {0x0f7f, 0x0f7f, L},
669         {0x0f80, 0x0f84, NSM},
670         {0x0f85, 0x0f85, L},
671         {0x0f86, 0x0f87, NSM},
672         {0x0f88, 0x0f8b, L},
673         {0x0f90, 0x0f97, NSM},
674         {0x0f99, 0x0fbc, NSM},
675         {0x0fbe, 0x0fc5, L},
676         {0x0fc6, 0x0fc6, NSM},
677         {0x0fc7, 0x0fcc, L},
678         {0x0fcf, 0x0fcf, L},
679         {0x1000, 0x1021, L},
680         {0x1023, 0x1027, L},
681         {0x1029, 0x102a, L},
682         {0x102c, 0x102c, L},
683         {0x102d, 0x1030, NSM},
684         {0x1031, 0x1031, L},
685         {0x1032, 0x1032, NSM},
686         {0x1036, 0x1037, NSM},
687         {0x1038, 0x1038, L},
688         {0x1039, 0x1039, NSM},
689         {0x1040, 0x1057, L},
690         {0x1058, 0x1059, NSM},
691         {0x10a0, 0x10c5, L},
692         {0x10d0, 0x10f8, L},
693         {0x10fb, 0x10fb, L},
694         {0x1100, 0x1159, L},
695         {0x115f, 0x11a2, L},
696         {0x11a8, 0x11f9, L},
697         {0x1200, 0x1206, L},
698         {0x1208, 0x1246, L},
699         {0x1248, 0x1248, L},
700         {0x124a, 0x124d, L},
701         {0x1250, 0x1256, L},
702         {0x1258, 0x1258, L},
703         {0x125a, 0x125d, L},
704         {0x1260, 0x1286, L},
705         {0x1288, 0x1288, L},
706         {0x128a, 0x128d, L},
707         {0x1290, 0x12ae, L},
708         {0x12b0, 0x12b0, L},
709         {0x12b2, 0x12b5, L},
710         {0x12b8, 0x12be, L},
711         {0x12c0, 0x12c0, L},
712         {0x12c2, 0x12c5, L},
713         {0x12c8, 0x12ce, L},
714         {0x12d0, 0x12d6, L},
715         {0x12d8, 0x12ee, L},
716         {0x12f0, 0x130e, L},
717         {0x1310, 0x1310, L},
718         {0x1312, 0x1315, L},
719         {0x1318, 0x131e, L},
720         {0x1320, 0x1346, L},
721         {0x1348, 0x135a, L},
722         {0x1361, 0x137c, L},
723         {0x13a0, 0x13f4, L},
724         {0x1401, 0x1676, L},
725         {0x1680, 0x1680, WS},
726         {0x1681, 0x169a, L},
727         {0x16a0, 0x16f0, L},
728         {0x1700, 0x170c, L},
729         {0x170e, 0x1711, L},
730         {0x1712, 0x1714, NSM},
731         {0x1720, 0x1731, L},
732         {0x1732, 0x1734, NSM},
733         {0x1735, 0x1736, L},
734         {0x1740, 0x1751, L},
735         {0x1752, 0x1753, NSM},
736         {0x1760, 0x176c, L},
737         {0x176e, 0x1770, L},
738         {0x1772, 0x1773, NSM},
739         {0x1780, 0x17b6, L},
740         {0x17b7, 0x17bd, NSM},
741         {0x17be, 0x17c5, L},
742         {0x17c6, 0x17c6, NSM},
743         {0x17c7, 0x17c8, L},
744         {0x17c9, 0x17d3, NSM},
745         {0x17d4, 0x17da, L},
746         {0x17db, 0x17db, ET},
747         {0x17dc, 0x17dc, L},
748         {0x17dd, 0x17dd, NSM},
749         {0x17e0, 0x17e9, L},
750         {0x180b, 0x180d, NSM},
751         {0x180e, 0x180e, WS},
752         {0x1810, 0x1819, L},
753         {0x1820, 0x1877, L},
754         {0x1880, 0x18a8, L},
755         {0x18a9, 0x18a9, NSM},
756         {0x1900, 0x191c, L},
757         {0x1920, 0x1922, NSM},
758         {0x1923, 0x1926, L},
759         {0x1927, 0x192b, NSM},
760         {0x1930, 0x1931, L},
761         {0x1932, 0x1932, NSM},
762         {0x1933, 0x1938, L},
763         {0x1939, 0x193b, NSM},
764         {0x1946, 0x196d, L},
765         {0x1970, 0x1974, L},
766         {0x1d00, 0x1d6b, L},
767         {0x1e00, 0x1e9b, L},
768         {0x1ea0, 0x1ef9, L},
769         {0x1f00, 0x1f15, L},
770         {0x1f18, 0x1f1d, L},
771         {0x1f20, 0x1f45, L},
772         {0x1f48, 0x1f4d, L},
773         {0x1f50, 0x1f57, L},
774         {0x1f59, 0x1f59, L},
775         {0x1f5b, 0x1f5b, L},
776         {0x1f5d, 0x1f5d, L},
777         {0x1f5f, 0x1f7d, L},
778         {0x1f80, 0x1fb4, L},
779         {0x1fb6, 0x1fbc, L},
780         {0x1fbe, 0x1fbe, L},
781         {0x1fc2, 0x1fc4, L},
782         {0x1fc6, 0x1fcc, L},
783         {0x1fd0, 0x1fd3, L},
784         {0x1fd6, 0x1fdb, L},
785         {0x1fe0, 0x1fec, L},
786         {0x1ff2, 0x1ff4, L},
787         {0x1ff6, 0x1ffc, L},
788         {0x2000, 0x200a, WS},
789         {0x200b, 0x200d, BN},
790         {0x200e, 0x200e, L},
791         {0x200f, 0x200f, R},
792         {0x2028, 0x2028, WS},
793         {0x2029, 0x2029, B},
794         {0x202a, 0x202a, LRE},
795         {0x202b, 0x202b, RLE},
796         {0x202c, 0x202c, PDF},
797         {0x202d, 0x202d, LRO},
798         {0x202e, 0x202e, RLO},
799         {0x202f, 0x202f, WS},
800         {0x2030, 0x2034, ET},
801         {0x2044, 0x2044, CS},
802         {0x205f, 0x205f, WS},
803         {0x2060, 0x2063, BN},
804         {0x206a, 0x206f, BN},
805         {0x2070, 0x2070, EN},
806         {0x2071, 0x2071, L},
807         {0x2074, 0x2079, EN},
808         {0x207a, 0x207b, ET},
809         {0x207f, 0x207f, L},
810         {0x2080, 0x2089, EN},
811         {0x208a, 0x208b, ET},
812         {0x20a0, 0x20b1, ET},
813         {0x20d0, 0x20ea, NSM},
814         {0x2102, 0x2102, L},
815         {0x2107, 0x2107, L},
816         {0x210a, 0x2113, L},
817         {0x2115, 0x2115, L},
818         {0x2119, 0x211d, L},
819         {0x2124, 0x2124, L},
820         {0x2126, 0x2126, L},
821         {0x2128, 0x2128, L},
822         {0x212a, 0x212d, L},
823         {0x212e, 0x212e, ET},
824         {0x212f, 0x2131, L},
825         {0x2133, 0x2139, L},
826         {0x213d, 0x213f, L},
827         {0x2145, 0x2149, L},
828         {0x2160, 0x2183, L},
829         {0x2212, 0x2213, ET},
830         {0x2336, 0x237a, L},
831         {0x2395, 0x2395, L},
832         {0x2488, 0x249b, EN},
833         {0x249c, 0x24e9, L},
834         {0x2800, 0x28ff, L},
835         {0x3000, 0x3000, WS},
836         {0x3005, 0x3007, L},
837         {0x3021, 0x3029, L},
838         {0x302a, 0x302f, NSM},
839         {0x3031, 0x3035, L},
840         {0x3038, 0x303c, L},
841         {0x3041, 0x3096, L},
842         {0x3099, 0x309a, NSM},
843         {0x309d, 0x309f, L},
844         {0x30a1, 0x30fa, L},
845         {0x30fc, 0x30ff, L},
846         {0x3105, 0x312c, L},
847         {0x3131, 0x318e, L},
848         {0x3190, 0x31b7, L},
849         {0x31f0, 0x321c, L},
850         {0x3220, 0x3243, L},
851         {0x3260, 0x327b, L},
852         {0x327f, 0x32b0, L},
853         {0x32c0, 0x32cb, L},
854         {0x32d0, 0x32fe, L},
855         {0x3300, 0x3376, L},
856         {0x337b, 0x33dd, L},
857         {0x33e0, 0x33fe, L},
858         {0x3400, 0x4db5, L},
859         {0x4e00, 0x9fa5, L},
860         {0xa000, 0xa48c, L},
861         {0xac00, 0xd7a3, L},
862         {0xd800, 0xdff7, L},
863         {0xe000, 0xfa2d, L},
864         {0xfa30, 0xfa6a, L},
865         {0xfb00, 0xfb06, L},
866         {0xfb13, 0xfb17, L},
867         {0xfb1d, 0xfb1d, R},
868         {0xfb1e, 0xfb1e, NSM},
869         {0xfb1f, 0xfb28, R},
870         {0xfb29, 0xfb29, ET},
871         {0xfb2a, 0xfb36, R},
872         {0xfb38, 0xfb3c, R},
873         {0xfb3e, 0xfb3e, R},
874         {0xfb40, 0xfb41, R},
875         {0xfb43, 0xfb44, R},
876         {0xfb46, 0xfb4f, R},
877         {0xfb50, 0xfbb1, AL},
878         {0xfbd3, 0xfd3d, AL},
879         {0xfd50, 0xfd8f, AL},
880         {0xfd92, 0xfdc7, AL},
881         {0xfdf0, 0xfdfc, AL},
882         {0xfe00, 0xfe0f, NSM},
883         {0xfe20, 0xfe23, NSM},
884         {0xfe50, 0xfe50, CS},
885         {0xfe52, 0xfe52, CS},
886         {0xfe55, 0xfe55, CS},
887         {0xfe5f, 0xfe5f, ET},
888         {0xfe62, 0xfe63, ET},
889         {0xfe69, 0xfe6a, ET},
890         {0xfe70, 0xfe74, AL},
891         {0xfe76, 0xfefc, AL},
892         {0xfeff, 0xfeff, BN},
893         {0xff03, 0xff05, ET},
894         {0xff0b, 0xff0b, ET},
895         {0xff0c, 0xff0c, CS},
896         {0xff0d, 0xff0d, ET},
897         {0xff0e, 0xff0e, CS},
898         {0xff0f, 0xff0f, ES},
899         {0xff10, 0xff19, EN},
900         {0xff1a, 0xff1a, CS},
901         {0xff21, 0xff3a, L},
902         {0xff41, 0xff5a, L},
903         {0xff66, 0xffbe, L},
904         {0xffc2, 0xffc7, L},
905         {0xffca, 0xffcf, L},
906         {0xffd2, 0xffd7, L},
907         {0xffda, 0xffdc, L},
908         {0xffe0, 0xffe1, ET},
909         {0xffe5, 0xffe6, ET},
910         {0x10000, 0x1000b, L},
911         {0x1000d, 0x10026, L},
912         {0x10028, 0x1003a, L},
913         {0x1003c, 0x1003d, L},
914         {0x1003f, 0x1004d, L},
915         {0x10050, 0x1005d, L},
916         {0x10080, 0x100fa, L},
917         {0x10100, 0x10100, L},
918         {0x10102, 0x10102, L},
919         {0x10107, 0x10133, L},
920         {0x10137, 0x1013f, L},
921         {0x10300, 0x1031e, L},
922         {0x10320, 0x10323, L},
923         {0x10330, 0x1034a, L},
924         {0x10380, 0x1039d, L},
925         {0x1039f, 0x1039f, L},
926         {0x10400, 0x1049d, L},
927         {0x104a0, 0x104a9, L},
928         {0x10800, 0x10805, R},
929         {0x10808, 0x10808, R},
930         {0x1080a, 0x10835, R},
931         {0x10837, 0x10838, R},
932         {0x1083c, 0x1083c, R},
933         {0x1083f, 0x1083f, R},
934         {0x1d000, 0x1d0f5, L},
935         {0x1d100, 0x1d126, L},
936         {0x1d12a, 0x1d166, L},
937         {0x1d167, 0x1d169, NSM},
938         {0x1d16a, 0x1d172, L},
939         {0x1d173, 0x1d17a, BN},
940         {0x1d17b, 0x1d182, NSM},
941         {0x1d183, 0x1d184, L},
942         {0x1d185, 0x1d18b, NSM},
943         {0x1d18c, 0x1d1a9, L},
944         {0x1d1aa, 0x1d1ad, NSM},
945         {0x1d1ae, 0x1d1dd, L},
946         {0x1d400, 0x1d454, L},
947         {0x1d456, 0x1d49c, L},
948         {0x1d49e, 0x1d49f, L},
949         {0x1d4a2, 0x1d4a2, L},
950         {0x1d4a5, 0x1d4a6, L},
951         {0x1d4a9, 0x1d4ac, L},
952         {0x1d4ae, 0x1d4b9, L},
953         {0x1d4bb, 0x1d4bb, L},
954         {0x1d4bd, 0x1d4c3, L},
955         {0x1d4c5, 0x1d505, L},
956         {0x1d507, 0x1d50a, L},
957         {0x1d50d, 0x1d514, L},
958         {0x1d516, 0x1d51c, L},
959         {0x1d51e, 0x1d539, L},
960         {0x1d53b, 0x1d53e, L},
961         {0x1d540, 0x1d544, L},
962         {0x1d546, 0x1d546, L},
963         {0x1d54a, 0x1d550, L},
964         {0x1d552, 0x1d6a3, L},
965         {0x1d6a8, 0x1d7c9, L},
966         {0x1d7ce, 0x1d7ff, EN},
967         {0x20000, 0x2a6d6, L},
968         {0x2f800, 0x2fa1d, L},
969         {0xe0001, 0xe0001, BN},
970         {0xe0020, 0xe007f, BN},
971         {0xe0100, 0xe01ef, NSM},
972         {0xf0000, 0xffffd, L},
973         {0x100000, 0x10fffd, L}
974     };
975 
976     int i, j, k;
977 
978     i = -1;
979     j = lenof(lookup);
980 
981     while (j - i > 1) {
982         k = (i + j) / 2;
983         if (ch < lookup[k].first)
984             j = k;
985         else if (ch > lookup[k].last)
986             i = k;
987         else
988             return lookup[k].type;
989     }
990 
991     /*
992      * If we reach here, the character was not in any of the
993      * intervals listed in the lookup table. This means we return
994      * ON (`Other Neutrals'). This is the appropriate code for any
995      * character genuinely not listed in the Unicode table, and
996      * also the table above has deliberately left out any
997      * characters _explicitly_ listed as ON (to save space!).
998      */
999     return ON;
1000 }
1001 
1002 /*
1003  * Function exported to front ends to allow them to identify
1004  * bidi-active characters (in case, for example, the platform's
1005  * text display function can't conveniently be prevented from doing
1006  * its own bidi and so special treatment is required for characters
1007  * that would cause the bidi algorithm to activate).
1008  *
1009  * This function is passed a single Unicode code point, and returns
1010  * nonzero if the presence of this code point can possibly cause
1011  * the bidi algorithm to do any reordering. Thus, any string
1012  * composed entirely of characters for which is_rtl() returns zero
1013  * should be safe to pass to a bidi-active platform display
1014  * function without fear.
1015  *
1016  * (is_rtl() must therefore also return true for any character
1017  * which would be affected by Arabic shaping, but this isn't
1018  * important because all such characters are right-to-left so it
1019  * would have flagged them anyway.)
1020  */
is_rtl(int c)1021 bool is_rtl(int c)
1022 {
1023     /*
1024      * After careful reading of the Unicode bidi algorithm (URL as
1025      * given at the top of this file) I believe that the only
1026      * character classes which can possibly cause trouble are R,
1027      * AL, RLE and RLO. I think that any string containing no
1028      * character in any of those classes will be displayed
1029      * uniformly left-to-right by the Unicode bidi algorithm.
1030      */
1031     const int mask = (1<<R) | (1<<AL) | (1<<RLE) | (1<<RLO);
1032 
1033     return mask & (1 << (getType(c)));
1034 }
1035 
1036 /*
1037  * The most significant 2 bits of each level are used to store
1038  * Override status of each character
1039  * This function sets the override bits of level according
1040  * to the value in override, and reurns the new byte.
1041  */
setOverrideBits(unsigned char level,unsigned char override)1042 static unsigned char setOverrideBits(
1043     unsigned char level, unsigned char override)
1044 {
1045     if (override == ON)
1046         return level;
1047     else if (override == R)
1048         return level | OISR;
1049     else if (override == L)
1050         return level | OISL;
1051     return level;
1052 }
1053 
1054 /*
1055  * Find the most recent run of the same value in `level', and
1056  * return the value _before_ it. Used to process U+202C POP
1057  * DIRECTIONAL FORMATTING.
1058  */
getPreviousLevel(unsigned char * level,int from)1059 static int getPreviousLevel(unsigned char *level, int from)
1060 {
1061     if (from > 0) {
1062         unsigned char current = level[--from];
1063 
1064         while (from >= 0 && level[from] == current)
1065             from--;
1066 
1067         if (from >= 0)
1068             return level[from];
1069 
1070         return -1;
1071     } else
1072         return -1;
1073 }
1074 
1075 /* The Main shaping function, and the only one to be used
1076  * by the outside world.
1077  *
1078  * line: buffer to apply shaping to. this must be passed by doBidi() first
1079  * to: output buffer for the shaped data
1080  * count: number of characters in line
1081  */
do_shape(bidi_char * line,bidi_char * to,int count)1082 int do_shape(bidi_char *line, bidi_char *to, int count)
1083 {
1084     int i, tempShape;
1085     bool ligFlag = false;
1086 
1087     for (i=0; i<count; i++) {
1088         to[i] = line[i];
1089         tempShape = STYPE(line[i]);
1090         switch (tempShape) {
1091           case SC:
1092             break;
1093 
1094           case SU:
1095             break;
1096 
1097           case SR:
1098             tempShape = (i+1 < count ? STYPE(line[i+1]) : SU);
1099             if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1100                 to[i] = SFINAL((SISOLATED(line[i])));
1101             else
1102                 to[i] = SISOLATED(line[i]);
1103             break;
1104 
1105 
1106           case SD:
1107             /* Make Ligatures */
1108             tempShape = (i+1 < count ? STYPE(line[i+1]) : SU);
1109             if (line[i] == 0x644) {
1110                 if (i > 0) switch (line[i-1]) {
1111                   case 0x622:
1112                     ligFlag = true;
1113                     if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1114                         to[i] = 0xFEF6;
1115                     else
1116                         to[i] = 0xFEF5;
1117                     break;
1118                   case 0x623:
1119                     ligFlag = true;
1120                     if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1121                         to[i] = 0xFEF8;
1122                     else
1123                         to[i] = 0xFEF7;
1124                     break;
1125                   case 0x625:
1126                     ligFlag = true;
1127                     if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1128                         to[i] = 0xFEFA;
1129                     else
1130                         to[i] = 0xFEF9;
1131                     break;
1132                   case 0x627:
1133                     ligFlag = true;
1134                     if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC))
1135                         to[i] = 0xFEFC;
1136                     else
1137                         to[i] = 0xFEFB;
1138                     break;
1139                 }
1140                 if (ligFlag) {
1141                     to[i-1] = 0x20;
1142                     ligFlag = false;
1143                     break;
1144                 }
1145             }
1146 
1147             if ((tempShape == SL) || (tempShape == SD) || (tempShape == SC)) {
1148                 tempShape = (i > 0 ? STYPE(line[i-1]) : SU);
1149                 if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1150                     to[i] = SMEDIAL((SISOLATED(line[i])));
1151                 else
1152                     to[i] = SFINAL((SISOLATED(line[i])));
1153                 break;
1154             }
1155 
1156             tempShape = (i > 0 ? STYPE(line[i-1]) : SU);
1157             if ((tempShape == SR) || (tempShape == SD) || (tempShape == SC))
1158                 to[i] = SINITIAL((SISOLATED(line[i])));
1159             else
1160                 to[i] = SISOLATED(line[i]);
1161             break;
1162 
1163 
1164         }
1165     }
1166     return 1;
1167 }
1168 
1169 /*
1170  * The Main Bidi Function, and the only function that should
1171  * be used by the outside world.
1172  *
1173  * line: a buffer of size count containing text to apply
1174  * the Bidirectional algorithm to.
1175  */
1176 
doBidi(bidi_char * line,int count,bool applyShape,bool unused2)1177 int doBidi(bidi_char *line, int count, bool applyShape, bool unused2)
1178 {
1179     unsigned char* types;
1180     unsigned char* levels;
1181     unsigned char paragraphLevel;
1182     unsigned char currentEmbedding;
1183     unsigned char currentOverride;
1184     unsigned char tempType;
1185     int i, j;
1186     bool yes, bover;
1187     bidi_char* shapeTo;
1188 
1189     /* Check the presence of R or AL types as optimization */
1190     yes = false;
1191     for (i=0; i<count; i++) {
1192         int type = getType(line[i]);
1193         if (type == R || type == AL) {
1194             yes = true;
1195             break;
1196         }
1197     }
1198     if (!yes)
1199         return L;
1200 
1201     /* Initialize types, levels */
1202     types = (unsigned char*)calloc(count, sizeof(unsigned char));
1203     levels = (unsigned char*)calloc(count, sizeof(unsigned char));
1204 
1205     if(applyShape)
1206     {
1207       shapeTo = (bidi_char*)calloc(count, sizeof(bidi_char));
1208       if (shapeTo == NULL)
1209       {
1210         exit(-1);
1211       }
1212     }
1213 
1214 
1215     /* Rule (P1)  NOT IMPLEMENTED
1216      * P1. Split the text into separate paragraphs. A paragraph separator is
1217      * kept with the previous paragraph. Within each paragraph, apply all the
1218      * other rules of this algorithm.
1219      */
1220 
1221     /* Rule (P2), (P3)
1222      * P2. In each paragraph, find the first character of type L, AL, or R.
1223      * P3. If a character is found in P2 and it is of type AL or R, then set
1224      * the paragraph embedding level to one; otherwise, set it to zero.
1225      */
1226     paragraphLevel = 0;
1227     for (i=0; i<count ; i++) {
1228         int type = getType(line[i]);
1229         if (type == R || type == AL) {
1230             paragraphLevel = 1;
1231             break;
1232         } else if (type == L)
1233             break;
1234     }
1235 
1236     /* Rule (X1)
1237      * X1. Begin by setting the current embedding level to the paragraph
1238      * embedding level. Set the directional override status to neutral.
1239      */
1240     currentEmbedding = paragraphLevel;
1241     currentOverride = ON;
1242 
1243     /* Rule (X2), (X3), (X4), (X5), (X6), (X7), (X8)
1244      * X2. With each RLE, compute the least greater odd embedding level.
1245      * X3. With each LRE, compute the least greater even embedding level.
1246      * X4. With each RLO, compute the least greater odd embedding level.
1247      * X5. With each LRO, compute the least greater even embedding level.
1248      * X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
1249      *          a. Set the level of the current character to the current
1250      *              embedding level.
1251      *          b.  Whenever the directional override status is not neutral,
1252      *               reset the current character type to the directional
1253      *               override status.
1254      * X7. With each PDF, determine the matching embedding or override code.
1255      * If there was a valid matching code, restore (pop) the last
1256      * remembered (pushed) embedding level and directional override.
1257      * X8. All explicit directional embeddings and overrides are completely
1258      * terminated at the end of each paragraph. Paragraph separators are not
1259      * included in the embedding. (Useless here) NOT IMPLEMENTED
1260      */
1261     bover = false;
1262     for (i=0; i<count; i++) {
1263         tempType = getType(line[i]);
1264         switch (tempType) {
1265           case RLE:
1266             currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1267             levels[i] = setOverrideBits(levels[i], currentOverride);
1268             currentOverride = ON;
1269             break;
1270 
1271           case LRE:
1272             currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1273             levels[i] = setOverrideBits(levels[i], currentOverride);
1274             currentOverride = ON;
1275             break;
1276 
1277           case RLO:
1278             currentEmbedding = levels[i] = leastGreaterOdd(currentEmbedding);
1279             tempType = currentOverride = R;
1280             bover = true;
1281             break;
1282 
1283           case LRO:
1284             currentEmbedding = levels[i] = leastGreaterEven(currentEmbedding);
1285             tempType = currentOverride = L;
1286             bover = true;
1287             break;
1288 
1289           case PDF: {
1290             int prevlevel = getPreviousLevel(levels, i);
1291 
1292             if (prevlevel == -1) {
1293               currentEmbedding = paragraphLevel;
1294               currentOverride = ON;
1295             } else {
1296               currentOverride = currentEmbedding & OMASK;
1297               currentEmbedding = currentEmbedding & ~OMASK;
1298             }
1299             levels[i] = currentEmbedding;
1300             break;
1301           }
1302 
1303             /* Whitespace is treated as neutral for now */
1304           case WS:
1305           case S:
1306             levels[i] = currentEmbedding;
1307             tempType = ON;
1308             if (currentOverride != ON)
1309                 tempType = currentOverride;
1310             break;
1311 
1312           default:
1313             levels[i] = currentEmbedding;
1314             if (currentOverride != ON)
1315                 tempType = currentOverride;
1316             break;
1317 
1318         }
1319         types[i] = tempType;
1320     }
1321     /* this clears out all overrides, so we can use levels safely... */
1322     /* checks bover first */
1323     if (bover)
1324         for (i=0; i<count; i++)
1325             levels[i] = levels[i] & LMASK;
1326 
1327     /* Rule (X9)
1328      * X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
1329      * Here, they're converted to BN.
1330      */
1331     for (i=0; i<count; i++) {
1332         switch (types[i]) {
1333           case RLE:
1334           case LRE:
1335           case RLO:
1336           case LRO:
1337           case PDF:
1338             types[i] = BN;
1339             break;
1340         }
1341     }
1342 
1343     /* Rule (W1)
1344      * W1. Examine each non-spacing mark (NSM) in the level run, and change
1345      * the type of the NSM to the type of the previous character. If the NSM
1346      * is at the start of the level run, it will get the type of sor.
1347      */
1348     if (types[0] == NSM)
1349         types[0] = paragraphLevel;
1350 
1351     for (i=1; i<count; i++) {
1352         if (types[i] == NSM)
1353             types[i] = types[i-1];
1354         /* Is this a safe assumption?
1355          * I assumed the previous, IS a character.
1356          */
1357     }
1358 
1359     /* Rule (W2)
1360      * W2. Search backwards from each instance of a European number until the
1361      * first strong type (R, L, AL, or sor) is found.  If an AL is found,
1362      * change the type of the European number to Arabic number.
1363      */
1364     for (i=0; i<count; i++) {
1365         if (types[i] == EN) {
1366             j=i;
1367             while (j >= 0) {
1368                 if (types[j] == AL) {
1369                     types[i] = AN;
1370                     break;
1371                 } else if (types[j] == R || types[j] == L) {
1372                     break;
1373                 }
1374                 j--;
1375             }
1376         }
1377     }
1378 
1379     /* Rule (W3)
1380      * W3. Change all ALs to R.
1381      *
1382      * Optimization: on Rule Xn, we might set a flag on AL type
1383      * to prevent this loop in L R lines only...
1384      */
1385     for (i=0; i<count; i++) {
1386         if (types[i] == AL)
1387             types[i] = R;
1388     }
1389 
1390     /* Rule (W4)
1391      * W4. A single European separator between two European numbers changes
1392      * to a European number. A single common separator between two numbers
1393      * of the same type changes to that type.
1394      */
1395     for (i=1; i<(count-1); i++) {
1396         if (types[i] == ES) {
1397             if (types[i-1] == EN && types[i+1] == EN)
1398                 types[i] = EN;
1399         } else if (types[i] == CS) {
1400             if (types[i-1] == EN && types[i+1] == EN)
1401                 types[i] = EN;
1402             else if (types[i-1] == AN && types[i+1] == AN)
1403                 types[i] = AN;
1404         }
1405     }
1406 
1407     /* Rule (W5)
1408      * W5. A sequence of European terminators adjacent to European numbers
1409      * changes to all European numbers.
1410      *
1411      * Optimization: lots here... else ifs need rearrangement
1412      */
1413     for (i=0; i<count; i++) {
1414         if (types[i] == ET) {
1415             if (i > 0 && types[i-1] == EN) {
1416                 types[i] = EN;
1417                 continue;
1418             } else if (i < count-1 && types[i+1] == EN) {
1419                 types[i] = EN;
1420                 continue;
1421             } else if (i < count-1 && types[i+1] == ET) {
1422                 j=i;
1423                 while (j < count-1 && types[j] == ET) {
1424                     j++;
1425                 }
1426                 if (types[j] == EN)
1427                     types[i] = EN;
1428             }
1429         }
1430     }
1431 
1432     /* Rule (W6)
1433      * W6. Otherwise, separators and terminators change to Other Neutral:
1434      */
1435     for (i=0; i<count; i++) {
1436         switch (types[i]) {
1437           case ES:
1438           case ET:
1439           case CS:
1440             types[i] = ON;
1441             break;
1442         }
1443     }
1444 
1445     /* Rule (W7)
1446      * W7. Search backwards from each instance of a European number until
1447      * the first strong type (R, L, or sor) is found. If an L is found,
1448      * then change the type of the European number to L.
1449      */
1450     for (i=0; i<count; i++) {
1451         if (types[i] == EN) {
1452             j=i;
1453             while (j >= 0) {
1454                 if (types[j] == L) {
1455                     types[i] = L;
1456                     break;
1457                 } else if (types[j] == R || types[j] == AL) {
1458                     break;
1459                 }
1460                 j--;
1461             }
1462         }
1463     }
1464 
1465     /* Rule (N1)
1466      * N1. A sequence of neutrals takes the direction of the surrounding
1467      * strong text if the text on both sides has the same direction. European
1468      * and Arabic numbers are treated as though they were R.
1469      */
1470     if (count >= 2 && types[0] == ON) {
1471         if ((types[1] == R) || (types[1] == EN) || (types[1] == AN))
1472             types[0] = R;
1473         else if (types[1] == L)
1474             types[0] = L;
1475     }
1476     for (i=1; i<(count-1); i++) {
1477         if (types[i] == ON) {
1478             if (types[i-1] == L) {
1479                 j=i;
1480                 while (j<(count-1) && types[j] == ON) {
1481                     j++;
1482                 }
1483                 if (types[j] == L) {
1484                     while (i<j) {
1485                         types[i] = L;
1486                         i++;
1487                     }
1488                 }
1489 
1490             } else if ((types[i-1] == R)  ||
1491                        (types[i-1] == EN) ||
1492                        (types[i-1] == AN)) {
1493                 j=i;
1494                 while (j<(count-1) && types[j] == ON) {
1495                     j++;
1496                 }
1497                 if ((types[j] == R)  ||
1498                     (types[j] == EN) ||
1499                     (types[j] == AN)) {
1500                     while (i<j) {
1501                         types[i] = R;
1502                         i++;
1503                     }
1504                 }
1505             }
1506         }
1507     }
1508     if (count >= 2 && types[count-1] == ON) {
1509         if (types[count-2] == R || types[count-2] == EN || types[count-2] == AN)
1510             types[count-1] = R;
1511         else if (types[count-2] == L)
1512             types[count-1] = L;
1513     }
1514 
1515     /* Rule (N2)
1516      * N2. Any remaining neutrals take the embedding direction.
1517      */
1518     for (i=0; i<count; i++) {
1519         if (types[i] == ON) {
1520             if ((levels[i] % 2) == 0)
1521                 types[i] = L;
1522             else
1523                 types[i] = R;
1524         }
1525     }
1526 
1527     /* Rule (I1)
1528      * I1. For all characters with an even (left-to-right) embedding
1529      * direction, those of type R go up one level and those of type AN or
1530      * EN go up two levels.
1531      */
1532     for (i=0; i<count; i++) {
1533         if ((levels[i] % 2) == 0) {
1534             if (types[i] == R)
1535                 levels[i] += 1;
1536             else if (types[i] == AN || types[i] == EN)
1537                 levels[i] += 2;
1538         }
1539     }
1540 
1541     /* Rule (I2)
1542      * I2. For all characters with an odd (right-to-left) embedding direction,
1543      * those of type L, EN or AN go up one level.
1544      */
1545     for (i=0; i<count; i++) {
1546         if ((levels[i] % 2) == 1) {
1547             if (types[i] == L || types[i] == EN || types[i] == AN)
1548                 levels[i] += 1;
1549         }
1550     }
1551 
1552     /* Rule (L1)
1553      * L1. On each line, reset the embedding level of the following characters
1554      * to the paragraph embedding level:
1555      *          (1)segment separators, (2)paragraph separators,
1556      *           (3)any sequence of whitespace characters preceding
1557      *           a segment separator or paragraph separator,
1558      *           (4)and any sequence of white space characters
1559      *           at the end of the line.
1560      * The types of characters used here are the original types, not those
1561      * modified by the previous phase.
1562      */
1563     j=count-1;
1564     while (j>0 && (getType(line[j]) == WS)) {
1565         j--;
1566     }
1567     if (j < (count-1)) {
1568         for (j++; j<count; j++)
1569             levels[j] = paragraphLevel;
1570     }
1571     for (i=0; i<count; i++) {
1572         tempType = getType(line[i]);
1573         if (tempType == WS) {
1574             j=i;
1575             while (j<count && (getType(line[j]) == WS)) {
1576                 j++;
1577             }
1578             if (j==count || getType(line[j]) == B ||
1579                 getType(line[j]) == S) {
1580                 for (j--; j>=i ; j--) {
1581                     levels[j] = paragraphLevel;
1582                 }
1583             }
1584         } else if (tempType == B || tempType == S) {
1585             levels[i] = paragraphLevel;
1586         }
1587     }
1588 
1589     /* Rule (L4) NOT IMPLEMENTED
1590      * L4. A character that possesses the mirrored property as specified by
1591      * Section 4.7, Mirrored, must be depicted by a mirrored glyph if the
1592      * resolved directionality of that character is R.
1593      */
1594     /* Note: this is implemented before L2 for efficiency */
1595     for (i=0; i<count; i++)
1596         if ((levels[i] % 2) == 1)
1597             doMirror(&line[i]);
1598 
1599     /* Rule (L3) NOT IMPLEMENTED
1600      * L3. Combining marks applied to a right-to-left base character will at
1601      * this point precede their base character. If the rendering engine
1602      * expects them to follow the base characters in the final display
1603      * process, then the ordering of the marks and the base character must
1604      * be reversed.
1605      */
1606 
1607 
1608     /* Rule (L2)
1609      * L2. From the highest level found in the text to the lowest odd level on
1610      * each line, including intermediate levels not actually present in the
1611      * text, reverse any contiguous sequence of characters that are at that
1612      * level or higher
1613      */
1614     /* we flip the character string and leave the level array */
1615     i=0;
1616     tempType = levels[0];
1617     while (i < count) {
1618         if (levels[i] > tempType)
1619             tempType = levels[i];
1620         i++;
1621     }
1622     /* maximum level in tempType. */
1623     while (tempType > 0) {     /* loop from highest level to the least odd, */
1624         /* which i assume is 1 */
1625         flipThisRun(line, levels, tempType, count);
1626         tempType--;
1627     }
1628 
1629     /* Shaping
1630      */
1631 
1632     if(applyShape)
1633     {
1634       do_shape(line, shapeTo, count);
1635 
1636       for(i=0; i<count; i++)
1637       {
1638         line[i] = shapeTo[i];
1639       }
1640       free(shapeTo);
1641     }
1642 
1643     free(types);
1644     free(levels);
1645     return R;
1646 }
1647 
1648 
1649 /*
1650  * Bad, Horrible function
1651  * takes a pointer to a character that is checked for
1652  * having a mirror glyph.
1653  */
doMirror(bidi_char * ch)1654 static void doMirror(bidi_char *ch)
1655 {
1656     if ((*ch & 0xFF00) == 0) {
1657         switch (*ch) {
1658           case 0x0028: *ch = 0x0029; break;
1659           case 0x0029: *ch = 0x0028; break;
1660           case 0x003C: *ch = 0x003E; break;
1661           case 0x003E: *ch = 0x003C; break;
1662           case 0x005B: *ch = 0x005D; break;
1663           case 0x005D: *ch = 0x005B; break;
1664           case 0x007B: *ch = 0x007D; break;
1665           case 0x007D: *ch = 0x007B; break;
1666           case 0x00AB: *ch = 0x00BB; break;
1667           case 0x00BB: *ch = 0x00AB; break;
1668         }
1669     } else if ((*ch & 0xFF00) == 0x2000) {
1670         switch (*ch) {
1671           case 0x2039: *ch = 0x203A; break;
1672           case 0x203A: *ch = 0x2039; break;
1673           case 0x2045: *ch = 0x2046; break;
1674           case 0x2046: *ch = 0x2045; break;
1675           case 0x207D: *ch = 0x207E; break;
1676           case 0x207E: *ch = 0x207D; break;
1677           case 0x208D: *ch = 0x208E; break;
1678           case 0x208E: *ch = 0x208D; break;
1679         }
1680     } else if ((*ch & 0xFF00) == 0x2200) {
1681         switch (*ch) {
1682           case 0x2208: *ch = 0x220B; break;
1683           case 0x2209: *ch = 0x220C; break;
1684           case 0x220A: *ch = 0x220D; break;
1685           case 0x220B: *ch = 0x2208; break;
1686           case 0x220C: *ch = 0x2209; break;
1687           case 0x220D: *ch = 0x220A; break;
1688           case 0x2215: *ch = 0x29F5; break;
1689           case 0x223C: *ch = 0x223D; break;
1690           case 0x223D: *ch = 0x223C; break;
1691           case 0x2243: *ch = 0x22CD; break;
1692           case 0x2252: *ch = 0x2253; break;
1693           case 0x2253: *ch = 0x2252; break;
1694           case 0x2254: *ch = 0x2255; break;
1695           case 0x2255: *ch = 0x2254; break;
1696           case 0x2264: *ch = 0x2265; break;
1697           case 0x2265: *ch = 0x2264; break;
1698           case 0x2266: *ch = 0x2267; break;
1699           case 0x2267: *ch = 0x2266; break;
1700           case 0x2268: *ch = 0x2269; break;
1701           case 0x2269: *ch = 0x2268; break;
1702           case 0x226A: *ch = 0x226B; break;
1703           case 0x226B: *ch = 0x226A; break;
1704           case 0x226E: *ch = 0x226F; break;
1705           case 0x226F: *ch = 0x226E; break;
1706           case 0x2270: *ch = 0x2271; break;
1707           case 0x2271: *ch = 0x2270; break;
1708           case 0x2272: *ch = 0x2273; break;
1709           case 0x2273: *ch = 0x2272; break;
1710           case 0x2274: *ch = 0x2275; break;
1711           case 0x2275: *ch = 0x2274; break;
1712           case 0x2276: *ch = 0x2277; break;
1713           case 0x2277: *ch = 0x2276; break;
1714           case 0x2278: *ch = 0x2279; break;
1715           case 0x2279: *ch = 0x2278; break;
1716           case 0x227A: *ch = 0x227B; break;
1717           case 0x227B: *ch = 0x227A; break;
1718           case 0x227C: *ch = 0x227D; break;
1719           case 0x227D: *ch = 0x227C; break;
1720           case 0x227E: *ch = 0x227F; break;
1721           case 0x227F: *ch = 0x227E; break;
1722           case 0x2280: *ch = 0x2281; break;
1723           case 0x2281: *ch = 0x2280; break;
1724           case 0x2282: *ch = 0x2283; break;
1725           case 0x2283: *ch = 0x2282; break;
1726           case 0x2284: *ch = 0x2285; break;
1727           case 0x2285: *ch = 0x2284; break;
1728           case 0x2286: *ch = 0x2287; break;
1729           case 0x2287: *ch = 0x2286; break;
1730           case 0x2288: *ch = 0x2289; break;
1731           case 0x2289: *ch = 0x2288; break;
1732           case 0x228A: *ch = 0x228B; break;
1733           case 0x228B: *ch = 0x228A; break;
1734           case 0x228F: *ch = 0x2290; break;
1735           case 0x2290: *ch = 0x228F; break;
1736           case 0x2291: *ch = 0x2292; break;
1737           case 0x2292: *ch = 0x2291; break;
1738           case 0x2298: *ch = 0x29B8; break;
1739           case 0x22A2: *ch = 0x22A3; break;
1740           case 0x22A3: *ch = 0x22A2; break;
1741           case 0x22A6: *ch = 0x2ADE; break;
1742           case 0x22A8: *ch = 0x2AE4; break;
1743           case 0x22A9: *ch = 0x2AE3; break;
1744           case 0x22AB: *ch = 0x2AE5; break;
1745           case 0x22B0: *ch = 0x22B1; break;
1746           case 0x22B1: *ch = 0x22B0; break;
1747           case 0x22B2: *ch = 0x22B3; break;
1748           case 0x22B3: *ch = 0x22B2; break;
1749           case 0x22B4: *ch = 0x22B5; break;
1750           case 0x22B5: *ch = 0x22B4; break;
1751           case 0x22B6: *ch = 0x22B7; break;
1752           case 0x22B7: *ch = 0x22B6; break;
1753           case 0x22C9: *ch = 0x22CA; break;
1754           case 0x22CA: *ch = 0x22C9; break;
1755           case 0x22CB: *ch = 0x22CC; break;
1756           case 0x22CC: *ch = 0x22CB; break;
1757           case 0x22CD: *ch = 0x2243; break;
1758           case 0x22D0: *ch = 0x22D1; break;
1759           case 0x22D1: *ch = 0x22D0; break;
1760           case 0x22D6: *ch = 0x22D7; break;
1761           case 0x22D7: *ch = 0x22D6; break;
1762           case 0x22D8: *ch = 0x22D9; break;
1763           case 0x22D9: *ch = 0x22D8; break;
1764           case 0x22DA: *ch = 0x22DB; break;
1765           case 0x22DB: *ch = 0x22DA; break;
1766           case 0x22DC: *ch = 0x22DD; break;
1767           case 0x22DD: *ch = 0x22DC; break;
1768           case 0x22DE: *ch = 0x22DF; break;
1769           case 0x22DF: *ch = 0x22DE; break;
1770           case 0x22E0: *ch = 0x22E1; break;
1771           case 0x22E1: *ch = 0x22E0; break;
1772           case 0x22E2: *ch = 0x22E3; break;
1773           case 0x22E3: *ch = 0x22E2; break;
1774           case 0x22E4: *ch = 0x22E5; break;
1775           case 0x22E5: *ch = 0x22E4; break;
1776           case 0x22E6: *ch = 0x22E7; break;
1777           case 0x22E7: *ch = 0x22E6; break;
1778           case 0x22E8: *ch = 0x22E9; break;
1779           case 0x22E9: *ch = 0x22E8; break;
1780           case 0x22EA: *ch = 0x22EB; break;
1781           case 0x22EB: *ch = 0x22EA; break;
1782           case 0x22EC: *ch = 0x22ED; break;
1783           case 0x22ED: *ch = 0x22EC; break;
1784           case 0x22F0: *ch = 0x22F1; break;
1785           case 0x22F1: *ch = 0x22F0; break;
1786           case 0x22F2: *ch = 0x22FA; break;
1787           case 0x22F3: *ch = 0x22FB; break;
1788           case 0x22F4: *ch = 0x22FC; break;
1789           case 0x22F6: *ch = 0x22FD; break;
1790           case 0x22F7: *ch = 0x22FE; break;
1791           case 0x22FA: *ch = 0x22F2; break;
1792           case 0x22FB: *ch = 0x22F3; break;
1793           case 0x22FC: *ch = 0x22F4; break;
1794           case 0x22FD: *ch = 0x22F6; break;
1795           case 0x22FE: *ch = 0x22F7; break;
1796         }
1797     } else if ((*ch & 0xFF00) == 0x2300) {
1798         switch (*ch) {
1799           case 0x2308: *ch = 0x2309; break;
1800           case 0x2309: *ch = 0x2308; break;
1801           case 0x230A: *ch = 0x230B; break;
1802           case 0x230B: *ch = 0x230A; break;
1803           case 0x2329: *ch = 0x232A; break;
1804           case 0x232A: *ch = 0x2329; break;
1805         }
1806     } else if ((*ch & 0xFF00) == 0x2700) {
1807         switch (*ch) {
1808           case 0x2768: *ch = 0x2769; break;
1809           case 0x2769: *ch = 0x2768; break;
1810           case 0x276A: *ch = 0x276B; break;
1811           case 0x276B: *ch = 0x276A; break;
1812           case 0x276C: *ch = 0x276D; break;
1813           case 0x276D: *ch = 0x276C; break;
1814           case 0x276E: *ch = 0x276F; break;
1815           case 0x276F: *ch = 0x276E; break;
1816           case 0x2770: *ch = 0x2771; break;
1817           case 0x2771: *ch = 0x2770; break;
1818           case 0x2772: *ch = 0x2773; break;
1819           case 0x2773: *ch = 0x2772; break;
1820           case 0x2774: *ch = 0x2775; break;
1821           case 0x2775: *ch = 0x2774; break;
1822           case 0x27D5: *ch = 0x27D6; break;
1823           case 0x27D6: *ch = 0x27D5; break;
1824           case 0x27DD: *ch = 0x27DE; break;
1825           case 0x27DE: *ch = 0x27DD; break;
1826           case 0x27E2: *ch = 0x27E3; break;
1827           case 0x27E3: *ch = 0x27E2; break;
1828           case 0x27E4: *ch = 0x27E5; break;
1829           case 0x27E5: *ch = 0x27E4; break;
1830           case 0x27E6: *ch = 0x27E7; break;
1831           case 0x27E7: *ch = 0x27E6; break;
1832           case 0x27E8: *ch = 0x27E9; break;
1833           case 0x27E9: *ch = 0x27E8; break;
1834           case 0x27EA: *ch = 0x27EB; break;
1835           case 0x27EB: *ch = 0x27EA; break;
1836         }
1837     } else if ((*ch & 0xFF00) == 0x2900) {
1838         switch (*ch) {
1839           case 0x2983: *ch = 0x2984; break;
1840           case 0x2984: *ch = 0x2983; break;
1841           case 0x2985: *ch = 0x2986; break;
1842           case 0x2986: *ch = 0x2985; break;
1843           case 0x2987: *ch = 0x2988; break;
1844           case 0x2988: *ch = 0x2987; break;
1845           case 0x2989: *ch = 0x298A; break;
1846           case 0x298A: *ch = 0x2989; break;
1847           case 0x298B: *ch = 0x298C; break;
1848           case 0x298C: *ch = 0x298B; break;
1849           case 0x298D: *ch = 0x2990; break;
1850           case 0x298E: *ch = 0x298F; break;
1851           case 0x298F: *ch = 0x298E; break;
1852           case 0x2990: *ch = 0x298D; break;
1853           case 0x2991: *ch = 0x2992; break;
1854           case 0x2992: *ch = 0x2991; break;
1855           case 0x2993: *ch = 0x2994; break;
1856           case 0x2994: *ch = 0x2993; break;
1857           case 0x2995: *ch = 0x2996; break;
1858           case 0x2996: *ch = 0x2995; break;
1859           case 0x2997: *ch = 0x2998; break;
1860           case 0x2998: *ch = 0x2997; break;
1861           case 0x29B8: *ch = 0x2298; break;
1862           case 0x29C0: *ch = 0x29C1; break;
1863           case 0x29C1: *ch = 0x29C0; break;
1864           case 0x29C4: *ch = 0x29C5; break;
1865           case 0x29C5: *ch = 0x29C4; break;
1866           case 0x29CF: *ch = 0x29D0; break;
1867           case 0x29D0: *ch = 0x29CF; break;
1868           case 0x29D1: *ch = 0x29D2; break;
1869           case 0x29D2: *ch = 0x29D1; break;
1870           case 0x29D4: *ch = 0x29D5; break;
1871           case 0x29D5: *ch = 0x29D4; break;
1872           case 0x29D8: *ch = 0x29D9; break;
1873           case 0x29D9: *ch = 0x29D8; break;
1874           case 0x29DA: *ch = 0x29DB; break;
1875           case 0x29DB: *ch = 0x29DA; break;
1876           case 0x29F5: *ch = 0x2215; break;
1877           case 0x29F8: *ch = 0x29F9; break;
1878           case 0x29F9: *ch = 0x29F8; break;
1879           case 0x29FC: *ch = 0x29FD; break;
1880           case 0x29FD: *ch = 0x29FC; break;
1881         }
1882     } else if ((*ch & 0xFF00) == 0x2A00) {
1883         switch (*ch) {
1884           case 0x2A2B: *ch = 0x2A2C; break;
1885           case 0x2A2C: *ch = 0x2A2B; break;
1886           case 0x2A2D: *ch = 0x2A2C; break;
1887           case 0x2A2E: *ch = 0x2A2D; break;
1888           case 0x2A34: *ch = 0x2A35; break;
1889           case 0x2A35: *ch = 0x2A34; break;
1890           case 0x2A3C: *ch = 0x2A3D; break;
1891           case 0x2A3D: *ch = 0x2A3C; break;
1892           case 0x2A64: *ch = 0x2A65; break;
1893           case 0x2A65: *ch = 0x2A64; break;
1894           case 0x2A79: *ch = 0x2A7A; break;
1895           case 0x2A7A: *ch = 0x2A79; break;
1896           case 0x2A7D: *ch = 0x2A7E; break;
1897           case 0x2A7E: *ch = 0x2A7D; break;
1898           case 0x2A7F: *ch = 0x2A80; break;
1899           case 0x2A80: *ch = 0x2A7F; break;
1900           case 0x2A81: *ch = 0x2A82; break;
1901           case 0x2A82: *ch = 0x2A81; break;
1902           case 0x2A83: *ch = 0x2A84; break;
1903           case 0x2A84: *ch = 0x2A83; break;
1904           case 0x2A8B: *ch = 0x2A8C; break;
1905           case 0x2A8C: *ch = 0x2A8B; break;
1906           case 0x2A91: *ch = 0x2A92; break;
1907           case 0x2A92: *ch = 0x2A91; break;
1908           case 0x2A93: *ch = 0x2A94; break;
1909           case 0x2A94: *ch = 0x2A93; break;
1910           case 0x2A95: *ch = 0x2A96; break;
1911           case 0x2A96: *ch = 0x2A95; break;
1912           case 0x2A97: *ch = 0x2A98; break;
1913           case 0x2A98: *ch = 0x2A97; break;
1914           case 0x2A99: *ch = 0x2A9A; break;
1915           case 0x2A9A: *ch = 0x2A99; break;
1916           case 0x2A9B: *ch = 0x2A9C; break;
1917           case 0x2A9C: *ch = 0x2A9B; break;
1918           case 0x2AA1: *ch = 0x2AA2; break;
1919           case 0x2AA2: *ch = 0x2AA1; break;
1920           case 0x2AA6: *ch = 0x2AA7; break;
1921           case 0x2AA7: *ch = 0x2AA6; break;
1922           case 0x2AA8: *ch = 0x2AA9; break;
1923           case 0x2AA9: *ch = 0x2AA8; break;
1924           case 0x2AAA: *ch = 0x2AAB; break;
1925           case 0x2AAB: *ch = 0x2AAA; break;
1926           case 0x2AAC: *ch = 0x2AAD; break;
1927           case 0x2AAD: *ch = 0x2AAC; break;
1928           case 0x2AAF: *ch = 0x2AB0; break;
1929           case 0x2AB0: *ch = 0x2AAF; break;
1930           case 0x2AB3: *ch = 0x2AB4; break;
1931           case 0x2AB4: *ch = 0x2AB3; break;
1932           case 0x2ABB: *ch = 0x2ABC; break;
1933           case 0x2ABC: *ch = 0x2ABB; break;
1934           case 0x2ABD: *ch = 0x2ABE; break;
1935           case 0x2ABE: *ch = 0x2ABD; break;
1936           case 0x2ABF: *ch = 0x2AC0; break;
1937           case 0x2AC0: *ch = 0x2ABF; break;
1938           case 0x2AC1: *ch = 0x2AC2; break;
1939           case 0x2AC2: *ch = 0x2AC1; break;
1940           case 0x2AC3: *ch = 0x2AC4; break;
1941           case 0x2AC4: *ch = 0x2AC3; break;
1942           case 0x2AC5: *ch = 0x2AC6; break;
1943           case 0x2AC6: *ch = 0x2AC5; break;
1944           case 0x2ACD: *ch = 0x2ACE; break;
1945           case 0x2ACE: *ch = 0x2ACD; break;
1946           case 0x2ACF: *ch = 0x2AD0; break;
1947           case 0x2AD0: *ch = 0x2ACF; break;
1948           case 0x2AD1: *ch = 0x2AD2; break;
1949           case 0x2AD2: *ch = 0x2AD1; break;
1950           case 0x2AD3: *ch = 0x2AD4; break;
1951           case 0x2AD4: *ch = 0x2AD3; break;
1952           case 0x2AD5: *ch = 0x2AD6; break;
1953           case 0x2AD6: *ch = 0x2AD5; break;
1954           case 0x2ADE: *ch = 0x22A6; break;
1955           case 0x2AE3: *ch = 0x22A9; break;
1956           case 0x2AE4: *ch = 0x22A8; break;
1957           case 0x2AE5: *ch = 0x22AB; break;
1958           case 0x2AEC: *ch = 0x2AED; break;
1959           case 0x2AED: *ch = 0x2AEC; break;
1960           case 0x2AF7: *ch = 0x2AF8; break;
1961           case 0x2AF8: *ch = 0x2AF7; break;
1962           case 0x2AF9: *ch = 0x2AFA; break;
1963           case 0x2AFA: *ch = 0x2AF9; break;
1964         }
1965     } else if ((*ch & 0xFF00) == 0x3000) {
1966         switch (*ch) {
1967           case 0x3008: *ch = 0x3009; break;
1968           case 0x3009: *ch = 0x3008; break;
1969           case 0x300A: *ch = 0x300B; break;
1970           case 0x300B: *ch = 0x300A; break;
1971           case 0x300C: *ch = 0x300D; break;
1972           case 0x300D: *ch = 0x300C; break;
1973           case 0x300E: *ch = 0x300F; break;
1974           case 0x300F: *ch = 0x300E; break;
1975           case 0x3010: *ch = 0x3011; break;
1976           case 0x3011: *ch = 0x3010; break;
1977           case 0x3014: *ch = 0x3015; break;
1978           case 0x3015: *ch = 0x3014; break;
1979           case 0x3016: *ch = 0x3017; break;
1980           case 0x3017: *ch = 0x3016; break;
1981           case 0x3018: *ch = 0x3019; break;
1982           case 0x3019: *ch = 0x3018; break;
1983           case 0x301A: *ch = 0x301B; break;
1984           case 0x301B: *ch = 0x301A; break;
1985         }
1986     } else if ((*ch & 0xFF00) == 0xFF00) {
1987         switch (*ch) {
1988           case 0xFF08: *ch = 0xFF09; break;
1989           case 0xFF09: *ch = 0xFF08; break;
1990           case 0xFF1C: *ch = 0xFF1E; break;
1991           case 0xFF1E: *ch = 0xFF1C; break;
1992           case 0xFF3B: *ch = 0xFF3D; break;
1993           case 0xFF3D: *ch = 0xFF3B; break;
1994           case 0xFF5B: *ch = 0xFF5D; break;
1995           case 0xFF5D: *ch = 0xFF5B; break;
1996           case 0xFF5F: *ch = 0xFF60; break;
1997           case 0xFF60: *ch = 0xFF5F; break;
1998           case 0xFF62: *ch = 0xFF63; break;
1999           case 0xFF63: *ch = 0xFF62; break;
2000         }
2001     }
2002 }
2003 
2004