xref: /reactos/dll/win32/usp10/bidi.c (revision c2c66aff)
1 /*
2  * Uniscribe BiDirectional handling
3  *
4  * Copyright 2003 Shachar Shemesh
5  * Copyright 2007 Maarten Lankhorst
6  * Copyright 2010 CodeWeavers, Aric Stewart
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21  *
22  * Code derived from the modified reference implementation
23  * that was found in revision 17 of http://unicode.org/reports/tr9/
24  * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25  *
26  * -- Copyright (C) 1999-2005, ASMUS, Inc.
27  *
28  * Permission is hereby granted, free of charge, to any person obtaining a
29  * copy of the Unicode data files and any associated documentation (the
30  * "Data Files") or Unicode software and any associated documentation (the
31  * "Software") to deal in the Data Files or Software without restriction,
32  * including without limitation the rights to use, copy, modify, merge,
33  * publish, distribute, and/or sell copies of the Data Files or Software,
34  * and to permit persons to whom the Data Files or Software are furnished
35  * to do so, provided that (a) the above copyright notice(s) and this
36  * permission notice appear with all copies of the Data Files or Software,
37  * (b) both the above copyright notice(s) and this permission notice appear
38  * in associated documentation, and (c) there is clear notice in each
39  * modified Data File or in the Software as well as in the documentation
40  * associated with the Data File(s) or Software that the data or software
41  * has been modified.
42  */
43 
44 #include <windef.h>
45 
46 #include <wine/list.h>
47 
48 #include "usp10_internal.h"
49 
50 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
51 
52 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
53 
54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
55 #define MAX_DEPTH 125
56 
57 /* HELPER FUNCTIONS AND DECLARATIONS */
58 
59 /*------------------------------------------------------------------------
60     Bidirectional Character Types
61 
62     as defined by the Unicode Bidirectional Algorithm Table 3-7.
63 
64     Note:
65 
66       The list of bidirectional character types here is not grouped the
67       same way as the table 3-7, since the numberic values for the types
68       are chosen to keep the state and action tables compact.
69 ------------------------------------------------------------------------*/
70 enum directions
71 {
72     /* input types */
73              /* ON MUST be zero, code relies on ON = NI = 0 */
74     ON = 0,  /* Other Neutral */
75     L,       /* Left Letter */
76     R,       /* Right Letter */
77     AN,      /* Arabic Number */
78     EN,      /* European Number */
79     AL,      /* Arabic Letter (Right-to-left) */
80     NSM,     /* Non-spacing Mark */
81     CS,      /* Common Separator */
82     ES,      /* European Separator */
83     ET,      /* European Terminator (post/prefix e.g. $ and %) */
84 
85     /* resolved types */
86     BN,      /* Boundary neutral (type of RLE etc after explicit levels) */
87 
88     /* input types, */
89     S,       /* Segment Separator (TAB)        // used only in L1 */
90     WS,      /* White space                    // used only in L1 */
91     B,       /* Paragraph Separator (aka as PS) */
92 
93     /* types for explicit controls */
94     RLO,     /* these are used only in X1-X9 */
95     RLE,
96     LRO,
97     LRE,
98     PDF,
99 
100     LRI, /* Isolate formatting characters new with 6.3 */
101     RLI,
102     FSI,
103     PDI,
104 
105     /* resolved types, also resolved directions */
106     NI = ON,  /* alias, where ON, WS, S  and Isolates are treated the same */
107 };
108 
109 static const char debug_type[][4] =
110 {
111     "ON",      /* Other Neutral */
112     "L",       /* Left Letter */
113     "R",       /* Right Letter */
114     "AN",      /* Arabic Number */
115     "EN",      /* European Number */
116     "AL",      /* Arabic Letter (Right-to-left) */
117     "NSM",     /* Non-spacing Mark */
118     "CS",      /* Common Separator */
119     "ES",      /* European Separator */
120     "ET",      /* European Terminator (post/prefix e.g. $ and %) */
121     "BN",      /* Boundary neutral (type of RLE etc after explicit levels) */
122     "S",       /* Segment Separator (TAB)        // used only in L1 */
123     "WS",      /* White space                    // used only in L1 */
124     "B",       /* Paragraph Separator (aka as PS) */
125     "RLO",     /* these are used only in X1-X9 */
126     "RLE",
127     "LRO",
128     "LRE",
129     "PDF",
130     "LRI",     /* Isolate formatting characters new with 6.3 */
131     "RLI",
132     "FSI",
133     "PDI",
134 };
135 
136 /* HELPER FUNCTIONS */
137 
138 static inline void dump_types(const char* header, WORD *types, int start, int end)
139 {
140     int i, len = 0;
141     TRACE("%s:",header);
142     for (i = start; i < end && len < 200; i++)
143     {
144         TRACE(" %s",debug_type[types[i]]);
145         len += strlen(debug_type[types[i]])+1;
146     }
147     if (i != end)
148         TRACE("...");
149     TRACE("\n");
150 }
151 
152 /* Convert the libwine information to the direction enum */
153 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
154 {
155     static const enum directions dir_map[16] =
156     {
157         L,  /* unassigned defaults to L */
158         L,
159         R,
160         EN,
161         ES,
162         ET,
163         AN,
164         CS,
165         B,
166         S,
167         WS,
168         ON,
169         AL,
170         NSM,
171         BN,
172         PDF  /* also LRE, LRO, RLE, RLO */
173     };
174 
175     unsigned i;
176 
177     for (i = 0; i < count; ++i)
178     {
179         chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
180         switch (chartype[i])
181         {
182         case ES:
183             if (!c->fLegacyBidiClass) break;
184             switch (string[i])
185             {
186             case '-':
187             case '+': chartype[i] = NI; break;
188             case '/': chartype[i] = CS; break;
189             }
190             break;
191         case PDF:
192             switch (string[i])
193             {
194             case 0x202A: chartype[i] = LRE; break;
195             case 0x202B: chartype[i] = RLE; break;
196             case 0x202C: chartype[i] = PDF; break;
197             case 0x202D: chartype[i] = LRO; break;
198             case 0x202E: chartype[i] = RLO; break;
199             case 0x2066: chartype[i] = LRI; break;
200             case 0x2067: chartype[i] = RLI; break;
201             case 0x2068: chartype[i] = FSI; break;
202             case 0x2069: chartype[i] = PDI; break;
203             }
204             break;
205         }
206     }
207 }
208 
209 /* RESOLVE EXPLICIT */
210 
211 static WORD GreaterEven(int i)
212 {
213     return odd(i) ? i + 1 : i + 2;
214 }
215 
216 static WORD GreaterOdd(int i)
217 {
218     return odd(i) ? i + 2 : i + 1;
219 }
220 
221 static WORD EmbeddingDirection(int level)
222 {
223     return odd(level) ? R : L;
224 }
225 
226 /*------------------------------------------------------------------------
227     Function: resolveExplicit
228 
229     Recursively resolves explicit embedding levels and overrides.
230     Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
231 
232     Input: Base embedding level and direction
233            Character count
234 
235     Output: Array of embedding levels
236 
237     In/Out: Array of direction classes
238 
239 
240     Note: The function uses two simple counters to keep track of
241           matching explicit codes and PDF. Use the default argument for
242           the outermost call. The nesting counter counts the recursion
243           depth and not the embedding level.
244 ------------------------------------------------------------------------*/
245 typedef struct tagStackItem {
246     int level;
247     int override;
248     BOOL isolate;
249 } StackItem;
250 
251 #define push_stack(l,o,i)  \
252   do { stack_top--; \
253   stack[stack_top].level = l; \
254   stack[stack_top].override = o; \
255   stack[stack_top].isolate = i;} while(0)
256 
257 #define pop_stack() do { stack_top++; } while(0)
258 
259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
260 
261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
262 {
263     /* X1 */
264     int overflow_isolate_count = 0;
265     int overflow_embedding_count = 0;
266     int valid_isolate_count = 0;
267     int i;
268 
269     StackItem stack[MAX_DEPTH+2];
270     int stack_top = MAX_DEPTH+1;
271 
272     stack[stack_top].level = level;
273     stack[stack_top].override = NI;
274     stack[stack_top].isolate = FALSE;
275 
276     if (initialOverride)
277     {
278         if (odd(level))
279             push_stack(level, R, FALSE);
280         else
281             push_stack(level, L, FALSE);
282     }
283 
284     for (i = 0; i < count; i++)
285     {
286         poutOverrides[i] = stack[stack_top].override;
287 
288         /* X2 */
289         if (pclass[i] == RLE)
290         {
291             int least_odd = GreaterOdd(stack[stack_top].level);
292             poutLevel[i] = stack[stack_top].level;
293             if (valid_level(least_odd))
294                 push_stack(least_odd, NI, FALSE);
295             else if (overflow_isolate_count == 0)
296                 overflow_embedding_count++;
297         }
298         /* X3 */
299         else if (pclass[i] == LRE)
300         {
301             int least_even = GreaterEven(stack[stack_top].level);
302             poutLevel[i] = stack[stack_top].level;
303             if (valid_level(least_even))
304                 push_stack(least_even, NI, FALSE);
305             else if (overflow_isolate_count == 0)
306                 overflow_embedding_count++;
307         }
308         /* X4 */
309         else if (pclass[i] == RLO)
310         {
311             int least_odd = GreaterOdd(stack[stack_top].level);
312             poutLevel[i] = stack[stack_top].level;
313             if (valid_level(least_odd))
314                 push_stack(least_odd, R, FALSE);
315             else if (overflow_isolate_count == 0)
316                 overflow_embedding_count++;
317         }
318         /* X5 */
319         else if (pclass[i] == LRO)
320         {
321             int least_even = GreaterEven(stack[stack_top].level);
322             poutLevel[i] = stack[stack_top].level;
323             if (valid_level(least_even))
324                 push_stack(least_even, L, FALSE);
325             else if (overflow_isolate_count == 0)
326                 overflow_embedding_count++;
327         }
328         /* X5a */
329         else if (pclass[i] == RLI)
330         {
331             int least_odd = GreaterOdd(stack[stack_top].level);
332             poutLevel[i] = stack[stack_top].level;
333             if (valid_level(least_odd))
334             {
335                 valid_isolate_count++;
336                 push_stack(least_odd, NI, TRUE);
337             }
338             else
339                 overflow_isolate_count++;
340         }
341         /* X5b */
342         else if (pclass[i] == LRI)
343         {
344             int least_even = GreaterEven(stack[stack_top].level);
345             poutLevel[i] = stack[stack_top].level;
346             if (valid_level(least_even))
347             {
348                 valid_isolate_count++;
349                 push_stack(least_even, NI, TRUE);
350             }
351             else
352                 overflow_isolate_count++;
353         }
354         /* X5c */
355         else if (pclass[i] == FSI)
356         {
357             int j;
358             int new_level = 0;
359             int skipping = 0;
360             poutLevel[i] = stack[stack_top].level;
361             for (j = i+1; j < count; j++)
362             {
363                 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
364                 {
365                     skipping++;
366                     continue;
367                 }
368                 else if (pclass[j] == PDI)
369                 {
370                     if (skipping)
371                         skipping --;
372                     else
373                         break;
374                     continue;
375                 }
376 
377                 if (skipping) continue;
378 
379                 if (pclass[j] == L)
380                 {
381                     new_level = 0;
382                     break;
383                 }
384                 else if (pclass[j] == R || pclass[j] == AL)
385                 {
386                     new_level = 1;
387                     break;
388                 }
389             }
390             if (odd(new_level))
391             {
392                 int least_odd = GreaterOdd(stack[stack_top].level);
393                 if (valid_level(least_odd))
394                 {
395                     valid_isolate_count++;
396                     push_stack(least_odd, NI, TRUE);
397                 }
398                 else
399                     overflow_isolate_count++;
400             }
401             else
402             {
403                 int least_even = GreaterEven(stack[stack_top].level);
404                 if (valid_level(least_even))
405                 {
406                     valid_isolate_count++;
407                     push_stack(least_even, NI, TRUE);
408                 }
409                 else
410                     overflow_isolate_count++;
411             }
412         }
413         /* X6 */
414         else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
415         {
416             poutLevel[i] = stack[stack_top].level;
417             if (stack[stack_top].override != NI)
418                 pclass[i] = stack[stack_top].override;
419         }
420         /* X6a */
421         else if (pclass[i] == PDI)
422         {
423             if (overflow_isolate_count) overflow_isolate_count--;
424             else if (!valid_isolate_count) {/* do nothing */}
425             else
426             {
427                 overflow_embedding_count = 0;
428                 while (!stack[stack_top].isolate) pop_stack();
429                 pop_stack();
430                 valid_isolate_count --;
431             }
432             poutLevel[i] = stack[stack_top].level;
433         }
434         /* X7 */
435         else if (pclass[i] == PDF)
436         {
437             poutLevel[i] = stack[stack_top].level;
438             if (overflow_isolate_count) {/* do nothing */}
439             else if (overflow_embedding_count) overflow_embedding_count--;
440             else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
441                 pop_stack();
442         }
443         /* X8: Nothing */
444     }
445     /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
446     for (i = 0; i < count ; i++)
447         if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
448             pclass[i] = BN;
449 }
450 
451 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
452 {
453     if (index == -1 || index == back_fence) return index;
454     index --;
455     while (index > back_fence && pcls[index] == BN) index --;
456     return index;
457 }
458 
459 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
460 {
461     if (index == front_fence) return index;
462     index ++;
463     while (index < front_fence && pcls[index] == BN) index ++;
464     return index;
465 }
466 
467 typedef struct tagRun
468 {
469     int start;
470     int end;
471     WORD e;
472 } Run;
473 
474 typedef struct tagRunChar
475 {
476     WCHAR ch;
477     WORD *pcls;
478 } RunChar;
479 
480 typedef struct tagIsolatedRun
481 {
482     struct list entry;
483     int length;
484     WORD sos;
485     WORD eos;
486     WORD e;
487 
488     RunChar item[1];
489 } IsolatedRun;
490 
491 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
492 {
493     if (index >= (iso_run->length-1)) return -1;
494     index ++;
495     while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
496     if (index == iso_run->length) return -1;
497     return index;
498 }
499 
500 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
501 {
502 
503     if (index <= 0) return -1;
504     index --;
505     while (index > -1 && *iso_run->item[index].pcls == BN) index--;
506     return index;
507 }
508 
509 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
510 {
511     int i, len = 0;
512     TRACE("%s:",header);
513     TRACE("[ ");
514     for (i = 0; i < iso_run->length && len < 200; i++)
515     {
516         TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
517         len += strlen(debug_type[*iso_run->item[i].pcls])+1;
518     }
519     if (i != iso_run->length)
520         TRACE("...");
521     TRACE(" ]\n");
522 }
523 
524 /*------------------------------------------------------------------------
525     Function: resolveWeak
526 
527     Resolves the directionality of numeric and other weak character types
528 
529     Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
530 
531     Input: Array of embedding levels
532            Character count
533 
534     In/Out: Array of directional classes
535 
536     Note: On input only these directional classes are expected
537           AL, HL, R, L,  ON, BN, NSM, AN, EN, ES, ET, CS,
538 ------------------------------------------------------------------------*/
539 
540 static void resolveWeak(IsolatedRun * iso_run)
541 {
542     int i;
543 
544     /* W1 */
545     for (i=0; i < iso_run->length; i++)
546     {
547         if (*iso_run->item[i].pcls == NSM)
548         {
549             int j = iso_previousValidChar(iso_run, i);
550             if (j == -1)
551                 *iso_run->item[i].pcls = iso_run->sos;
552             else if (*iso_run->item[j].pcls >= LRI)
553                 *iso_run->item[i].pcls = ON;
554             else
555                 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
556         }
557     }
558 
559     /* W2 */
560     for (i = 0; i < iso_run->length; i++)
561     {
562         if (*iso_run->item[i].pcls == EN)
563         {
564             int j = iso_previousValidChar(iso_run, i);
565             while (j > -1)
566             {
567                 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
568                 {
569                     if (*iso_run->item[j].pcls == AL)
570                         *iso_run->item[i].pcls = AN;
571                     break;
572                 }
573                 j = iso_previousValidChar(iso_run, j);
574             }
575         }
576     }
577 
578     /* W3 */
579     for (i = 0; i < iso_run->length; i++)
580     {
581         if (*iso_run->item[i].pcls == AL)
582             *iso_run->item[i].pcls = R;
583     }
584 
585     /* W4 */
586     for (i = 0; i < iso_run->length; i++)
587     {
588         if (*iso_run->item[i].pcls == ES)
589         {
590             int b = iso_previousValidChar(iso_run, i);
591             int f = iso_nextValidChar(iso_run, i);
592 
593             if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
594                 *iso_run->item[i].pcls = EN;
595         }
596         else if (*iso_run->item[i].pcls == CS)
597         {
598             int b = iso_previousValidChar(iso_run, i);
599             int f = iso_nextValidChar(iso_run, i);
600 
601             if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
602                 *iso_run->item[i].pcls = EN;
603             else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
604                 *iso_run->item[i].pcls = AN;
605         }
606     }
607 
608     /* W5 */
609     for (i = 0; i < iso_run->length; i++)
610     {
611         if (*iso_run->item[i].pcls == ET)
612         {
613             int j;
614             for (j = i-1 ; j > -1; j--)
615             {
616                 if (*iso_run->item[j].pcls == BN) continue;
617                 if (*iso_run->item[j].pcls == ET) continue;
618                 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
619                 else break;
620             }
621             if (*iso_run->item[i].pcls == ET)
622             {
623                 for (j = i+1; j < iso_run->length; j++)
624                 {
625                     if (*iso_run->item[j].pcls == BN) continue;
626                     if (*iso_run->item[j].pcls == ET) continue;
627                     else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
628                     else break;
629                 }
630             }
631         }
632     }
633 
634     /* W6 */
635     for (i = 0; i < iso_run->length; i++)
636     {
637         if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
638         {
639             int b = i-1;
640             int f = i+1;
641             if (b > -1 && *iso_run->item[b].pcls == BN)
642                 *iso_run->item[b].pcls = ON;
643             if (f < iso_run->length && *iso_run->item[f].pcls == BN)
644                 *iso_run->item[f].pcls = ON;
645 
646             *iso_run->item[i].pcls = ON;
647         }
648     }
649 
650     /* W7 */
651     for (i = 0; i < iso_run->length; i++)
652     {
653         if (*iso_run->item[i].pcls == EN)
654         {
655             int j;
656             for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
657                 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
658                 {
659                     if (*iso_run->item[j].pcls == L)
660                         *iso_run->item[i].pcls = L;
661                     break;
662                 }
663             if (iso_run->sos == L &&  j == -1)
664                 *iso_run->item[i].pcls = L;
665         }
666     }
667 }
668 
669 typedef struct tagBracketPair
670 {
671     int start;
672     int end;
673 } BracketPair;
674 
675 static int compr(const void *a, const void* b)
676 {
677     return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
678 }
679 
680 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
681 {
682     WCHAR *open_stack;
683     int *stack_index;
684     int stack_top = iso_run->length;
685     BracketPair *out = NULL;
686     int pair_count = 0;
687     int i;
688 
689     open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
690     stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
691 
692     for (i = 0; i < iso_run->length; i++)
693     {
694         unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
695         if (ubv)
696         {
697             if (!out)
698             {
699                 out = heap_alloc(sizeof(*out));
700                 out[0].start = -1;
701             }
702 
703             if ((ubv >> 8) == 0)
704             {
705                 stack_top --;
706                 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
707                 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */
708                 if (open_stack[stack_top] == 0x232A)
709                     open_stack[stack_top] = 0x3009;
710                 stack_index[stack_top] = i;
711             }
712             else if ((ubv >> 8) == 1)
713             {
714                 int j;
715                 if (stack_top == iso_run->length) continue;
716                 for (j = stack_top; j < iso_run->length; j++)
717                 {
718                     WCHAR c = iso_run->item[i].ch;
719                     if (c == 0x232A) c = 0x3009;
720                     if (c == open_stack[j])
721                     {
722                         out[pair_count].start = stack_index[j];
723                         out[pair_count].end = i;
724                         pair_count++;
725                         out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1));
726                         out[pair_count].start = -1;
727                         stack_top = j+1;
728                         break;
729                     }
730                 }
731             }
732         }
733     }
734     if (pair_count == 0)
735     {
736         heap_free(out);
737         out = NULL;
738     }
739     else if (pair_count > 1)
740         qsort(out, pair_count, sizeof(BracketPair), compr);
741 
742     heap_free(open_stack);
743     heap_free(stack_index);
744     return out;
745 }
746 
747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
748 
749 /*------------------------------------------------------------------------
750     Function: resolveNeutrals
751 
752     Resolves the directionality of neutral character types.
753 
754     Implements rules N1 and N2 of the Unicode Bidi Algorithm.
755 
756     Input: Array of embedding levels
757            Character count
758            Baselevel
759 
760     In/Out: Array of directional classes
761 
762     Note: On input only these directional classes are expected
763           R,  L,  NI, AN, EN and BN
764 
765           W8 resolves a number of ENs to L
766 ------------------------------------------------------------------------*/
767 static void resolveNeutrals(IsolatedRun *iso_run)
768 {
769     int i;
770     BracketPair *pairs = NULL;
771 
772     /* Translate isolates into NI */
773     for (i = 0; i < iso_run->length; i++)
774     {
775         if (*iso_run->item[i].pcls >= LRI)
776             *iso_run->item[i].pcls = NI;
777 
778         switch(*iso_run->item[i].pcls)
779         {
780             case B:
781             case S:
782             case WS: *iso_run->item[i].pcls = NI;
783         }
784 
785         ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R,  AN, EN and BN are allowed" */
786     }
787 
788     /* N0: Skipping bracketed pairs for now */
789     pairs = computeBracketPairs(iso_run);
790     if (pairs)
791     {
792         BracketPair *p = &pairs[0];
793         int i = 0;
794         while (p->start >= 0)
795         {
796             int j;
797             int e = EmbeddingDirection(iso_run->e);
798             int o = EmbeddingDirection(iso_run->e+1);
799             BOOL flag_o = FALSE;
800             TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
801 
802             /* N0.b */
803             for (j = p->start+1; j < p->end; j++)
804             {
805                 if (N0_TYPE(*iso_run->item[j].pcls) == e)
806                 {
807                     *iso_run->item[p->start].pcls = e;
808                     *iso_run->item[p->end].pcls = e;
809                     break;
810                 }
811                 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
812                     flag_o = TRUE;
813             }
814             /* N0.c */
815             if (j == p->end && flag_o)
816             {
817                 for (j = p->start; j >= 0; j--)
818                 {
819                     if (N0_TYPE(*iso_run->item[j].pcls) == o)
820                     {
821                         *iso_run->item[p->start].pcls = o;
822                         *iso_run->item[p->end].pcls = o;
823                         break;
824                     }
825                     else if (N0_TYPE(*iso_run->item[j].pcls) == e)
826                     {
827                         *iso_run->item[p->start].pcls = e;
828                         *iso_run->item[p->end].pcls = e;
829                         break;
830                     }
831                 }
832                 if ( j < 0 )
833                 {
834                     *iso_run->item[p->start].pcls = iso_run->sos;
835                     *iso_run->item[p->end].pcls = iso_run->sos;
836                 }
837             }
838 
839             i++;
840             p = &pairs[i];
841         }
842         heap_free(pairs);
843     }
844 
845     /* N1 */
846     for (i = 0; i < iso_run->length; i++)
847     {
848         WORD l,r;
849 
850         if (*iso_run->item[i].pcls == NI)
851         {
852             int j;
853             int b = iso_previousValidChar(iso_run, i);
854 
855             if (b == -1)
856             {
857                 l = iso_run->sos;
858                 b = 0;
859             }
860             else
861             {
862                 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
863                     l = R;
864                 else if (*iso_run->item[b].pcls == L)
865                     l = L;
866                 else /* No string type */
867                     continue;
868             }
869             j = iso_nextValidChar(iso_run, i);
870             while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
871 
872             if (j == -1)
873             {
874                 r = iso_run->eos;
875                 j = iso_run->length;
876             }
877             else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
878                 r = R;
879             else if (*iso_run->item[j].pcls == L)
880                 r = L;
881             else /* No string type */
882                 continue;
883 
884             if (r == l)
885             {
886                 for (b = i; b < j && b < iso_run->length; b++)
887                     *iso_run->item[b].pcls = r;
888             }
889         }
890     }
891 
892     /* N2 */
893     for (i = 0; i < iso_run->length; i++)
894     {
895         if (*iso_run->item[i].pcls == NI)
896         {
897             int b = i-1;
898             int f = i+1;
899             *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
900             if (b > -1 && *iso_run->item[b].pcls == BN)
901                 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
902             if (f < iso_run->length && *iso_run->item[f].pcls == BN)
903                 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
904         }
905     }
906 }
907 
908 /*------------------------------------------------------------------------
909     Function: resolveImplicit
910 
911     Recursively resolves implicit embedding levels.
912     Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
913 
914     Input: Array of direction classes
915            Character count
916            Base level
917 
918     In/Out: Array of embedding levels
919 
920     Note: levels may exceed 15 on output.
921           Accepted subset of direction classes
922           R, L, AN, EN
923 ------------------------------------------------------------------------*/
924 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
925 {
926     int i;
927 
928     /* I1/2 */
929     for (i = sos; i <= eos; i++)
930     {
931         if (pcls[i] == BN)
932             continue;
933 
934         ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
935         ASSERT(pcls[i] < 5); /* "Out of range." */
936 
937         if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
938             plevel[i]++;
939         else if (!odd(plevel[i]) && pcls[i] == R)
940             plevel[i]++;
941         else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
942             plevel[i]+=2;
943     }
944 }
945 
946 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
947 {
948     int i;
949 
950     /* L1 */
951     for (i = sos; i <= eos; i++)
952     {
953         if (pcls[i] == B || pcls[i] == S)
954         {
955             int j = i -1;
956             while (i > sos  && j >= sos &&
957                    (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
958                     pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
959                     pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
960                 plevel[j--] = baselevel;
961             plevel[i] = baselevel;
962         }
963         else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
964                  pcls[i] == PDF || pcls[i] == BN)
965         {
966             plevel[i] = i ? plevel[i - 1] : baselevel;
967         }
968         if (i == eos &&
969             (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
970              pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
971              pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
972         {
973             int j = i;
974             while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
975                                 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
976                                 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
977                 plevel[j--] = baselevel;
978         }
979     }
980 }
981 
982 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
983         const WCHAR *string, unsigned int uCount, struct list *set)
984 {
985     int run_start, run_end, i;
986     int run_count = 0;
987     Run *runs;
988     IsolatedRun *current_isolated;
989 
990     if (!(runs = heap_alloc(uCount * sizeof(*runs))))
991         return;
992 
993     list_init(set);
994 
995     /* Build Runs */
996     run_start = 0;
997     while (run_start < uCount)
998     {
999         run_end = nextValidChar(pcls, run_start, uCount);
1000         while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1001         run_end --;
1002         runs[run_count].start = run_start;
1003         runs[run_count].end = run_end;
1004         runs[run_count].e = pLevel[run_start];
1005         run_start = nextValidChar(pcls, run_end, uCount);
1006         run_count++;
1007     }
1008 
1009     /* Build Isolating Runs */
1010     i = 0;
1011     while (i < run_count)
1012     {
1013         int k = i;
1014         if (runs[k].start >= 0)
1015         {
1016             int type_fence, real_end;
1017             int j;
1018 
1019             if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1020                 break;
1021 
1022             run_start = runs[k].start;
1023             current_isolated->e = runs[k].e;
1024             current_isolated->length = (runs[k].end - runs[k].start)+1;
1025 
1026             for (j = 0; j < current_isolated->length;  j++)
1027             {
1028                 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1029                 current_isolated->item[j].ch = string[runs[k].start + j];
1030             }
1031 
1032             run_end = runs[k].end;
1033 
1034             TRACE("{ [%i -- %i]",run_start, run_end);
1035 
1036             if (pcls[run_end] == BN)
1037                 run_end = previousValidChar(pcls, run_end, runs[k].start);
1038 
1039             while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1040             {
1041                 j = k+1;
1042 search:
1043                 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1044                 if (j < run_count && runs[i].e != runs[j].e)
1045                 {
1046                     j++;
1047                     goto search;
1048                 }
1049 
1050                 if (j != run_count)
1051                 {
1052                     int m;
1053                     int l = current_isolated->length;
1054 
1055                     current_isolated->length += (runs[j].end - runs[j].start)+1;
1056                     for (m = 0; l < current_isolated->length; l++, m++)
1057                     {
1058                         current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1059                         current_isolated->item[l].ch = string[runs[j].start + m];
1060                     }
1061 
1062                     TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1063 
1064                     run_end = runs[j].end;
1065                     if (pcls[run_end] == BN)
1066                         run_end = previousValidChar(pcls, run_end, runs[i].start);
1067                     runs[j].start = -1;
1068                     k = j;
1069                 }
1070                 else
1071                 {
1072                     run_end = uCount;
1073                     break;
1074                 }
1075             }
1076 
1077             type_fence = previousValidChar(pcls, run_start, -1);
1078 
1079             if (type_fence == -1)
1080                 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1081             else
1082                 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1083 
1084             current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1085 
1086             if (run_end == uCount)
1087                 current_isolated->eos = current_isolated->sos;
1088             else
1089             {
1090                 /* eos could be an BN */
1091                 if ( pcls[run_end] == BN )
1092                 {
1093                     real_end = previousValidChar(pcls, run_end, run_start-1);
1094                     if (real_end < run_start)
1095                         real_end = run_start;
1096                 }
1097                 else
1098                     real_end = run_end;
1099 
1100                 type_fence = nextValidChar(pcls, run_end, uCount);
1101                 if (type_fence == uCount)
1102                     current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1103                 else
1104                     current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1105 
1106                 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1107             }
1108 
1109             list_add_tail(set, &current_isolated->entry);
1110             TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1111         }
1112         i++;
1113     }
1114 
1115     heap_free(runs);
1116 }
1117 
1118 /*************************************************************
1119  *    BIDI_DeterminLevels
1120  */
1121 BOOL BIDI_DetermineLevels(
1122                 const WCHAR *lpString,  /* [in] The string for which information is to be returned */
1123                 unsigned int uCount,    /* [in] Number of WCHARs in string. */
1124                 const SCRIPT_STATE *s,
1125                 const SCRIPT_CONTROL *c,
1126                 WORD *lpOutLevels, /* [out] final string levels */
1127                 WORD *lpOutOverrides /* [out] final string overrides */
1128     )
1129 {
1130     WORD *chartype;
1131     unsigned baselevel = 0;
1132     struct list IsolatingRuns;
1133     IsolatedRun *iso_run, *next;
1134 
1135     TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1136 
1137     if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1138     {
1139         WARN("Out of memory\n");
1140         return FALSE;
1141     }
1142 
1143     baselevel = s->uBidiLevel;
1144 
1145     classify(lpString, chartype, uCount, c);
1146     if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1147 
1148     memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1149 
1150     /* resolve explicit */
1151     resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1152     if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1153 
1154     /* X10/BD13: Computer Isolating runs */
1155     computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1156 
1157     LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1158     {
1159         if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1160 
1161         /* resolve weak */
1162         resolveWeak(iso_run);
1163         if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1164 
1165         /* resolve neutrals */
1166         resolveNeutrals(iso_run);
1167         if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1168 
1169         list_remove(&iso_run->entry);
1170         heap_free(iso_run);
1171     }
1172 
1173     if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1174     /* resolveImplicit */
1175     resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1176 
1177     /* resolveResolvedLevels*/
1178     classify(lpString, chartype, uCount, c);
1179     resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1180 
1181     heap_free(chartype);
1182     return TRUE;
1183 }
1184 
1185 /* reverse cch indexes */
1186 static void reverse(int *pidx, int cch)
1187 {
1188     int temp;
1189     int ich = 0;
1190     for (; ich < --cch; ich++)
1191     {
1192         temp = pidx[ich];
1193         pidx[ich] = pidx[cch];
1194         pidx[cch] = temp;
1195     }
1196 }
1197 
1198 
1199 /*------------------------------------------------------------------------
1200     Functions: reorder/reorderLevel
1201 
1202     Recursively reorders the display string
1203     "From the highest level down, reverse all characters at that level and
1204     higher, down to the lowest odd level"
1205 
1206     Implements rule L2 of the Unicode bidi Algorithm.
1207 
1208     Input: Array of embedding levels
1209            Character count
1210            Flag enabling reversal (set to false by initial caller)
1211 
1212     In/Out: Text to reorder
1213 
1214     Note: levels may exceed 15 resp. 61 on input.
1215 
1216     Rule L3 - reorder combining marks is not implemented here
1217     Rule L4 - glyph mirroring is implemented as a display option below
1218 
1219     Note: this should be applied a line at a time
1220 -------------------------------------------------------------------------*/
1221 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1222 {
1223     int ich = 0;
1224 
1225     /* true as soon as first odd level encountered */
1226     fReverse = fReverse || odd(level);
1227 
1228     for (; ich < cch; ich++)
1229     {
1230         if (plevel[ich] < level)
1231         {
1232             break;
1233         }
1234         else if (plevel[ich] > level)
1235         {
1236             ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1237                 cch - ich, fReverse) - 1;
1238         }
1239     }
1240     if (fReverse)
1241     {
1242         reverse(pIndexs, ich);
1243     }
1244     return ich;
1245 }
1246 
1247 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1248 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1249 {
1250     int ich = 0;
1251     int newlevel = -1;
1252 
1253     /* true as soon as first odd level encountered */
1254     fReverse = fReverse || odd(level);
1255 
1256     for (; ich < cch; ich++)
1257     {
1258         if (plevel[ich] < level)
1259             break;
1260         else if (plevel[ich] > level)
1261             newlevel = ich;
1262     }
1263     if (fReverse)
1264     {
1265         reverse(pIndexs, ich);
1266     }
1267 
1268     if (newlevel >= 0)
1269     {
1270         ich = 0;
1271         for (; ich < cch; ich++)
1272             if (plevel[ich] < level)
1273                 break;
1274             else if (plevel[ich] > level)
1275                 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1276                 cch - ich, fReverse) - 1;
1277     }
1278 
1279     return ich;
1280 }
1281 
1282 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1283 {
1284     unsigned int i;
1285 
1286     classify(string, strength, count, c);
1287     for (i = 0; i < count; i++)
1288     {
1289         switch (strength[i])
1290         {
1291             case L:
1292             case LRE:
1293             case LRO:
1294             case R:
1295             case AL:
1296             case RLE:
1297             case RLO:
1298                 strength[i] = BIDI_STRONG;
1299                 break;
1300             case PDF:
1301             case EN:
1302             case ES:
1303             case ET:
1304             case AN:
1305             case CS:
1306             case BN:
1307                 strength[i] = BIDI_WEAK;
1308                 break;
1309             case B:
1310             case S:
1311             case WS:
1312             case ON:
1313             default: /* Neutrals and NSM */
1314                 strength[i] = BIDI_NEUTRAL;
1315         }
1316     }
1317     return TRUE;
1318 }
1319