xref: /reactos/dll/win32/usp10/bidi.c (revision 02e84521)
1 /*
2  * Uniscribe BiDirectional handling
3  *
4  * Copyright 2003 Shachar Shemesh
5  * Copyright 2007 Maarten Lankhorst
6  * Copyright 2010 CodeWeavers, Aric Stewart
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21  *
22  * Code derived from the modified reference implementation
23  * that was found in revision 17 of http://unicode.org/reports/tr9/
24  * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25  *
26  * -- Copyright (C) 1999-2005, ASMUS, Inc.
27  *
28  * Permission is hereby granted, free of charge, to any person obtaining a
29  * copy of the Unicode data files and any associated documentation (the
30  * "Data Files") or Unicode software and any associated documentation (the
31  * "Software") to deal in the Data Files or Software without restriction,
32  * including without limitation the rights to use, copy, modify, merge,
33  * publish, distribute, and/or sell copies of the Data Files or Software,
34  * and to permit persons to whom the Data Files or Software are furnished
35  * to do so, provided that (a) the above copyright notice(s) and this
36  * permission notice appear with all copies of the Data Files or Software,
37  * (b) both the above copyright notice(s) and this permission notice appear
38  * in associated documentation, and (c) there is clear notice in each
39  * modified Data File or in the Software as well as in the documentation
40  * associated with the Data File(s) or Software that the data or software
41  * has been modified.
42  */
43 
44 #include "config.h"
45 
46 #include <stdarg.h>
47 #include <stdlib.h>
48 #include "windef.h"
49 #include "winbase.h"
50 #include "wingdi.h"
51 #include "winnls.h"
52 #include "usp10.h"
53 #include "wine/unicode.h"
54 #include "wine/debug.h"
55 #include "wine/heap.h"
56 #include "wine/list.h"
57 
58 #include "usp10_internal.h"
59 
60 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
61 
62 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
63 
64 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
65 #define MAX_DEPTH 125
66 
67 /* HELPER FUNCTIONS AND DECLARATIONS */
68 
69 /*------------------------------------------------------------------------
70     Bidirectional Character Types
71 
72     as defined by the Unicode Bidirectional Algorithm Table 3-7.
73 
74     Note:
75 
76       The list of bidirectional character types here is not grouped the
77       same way as the table 3-7, since the numberic values for the types
78       are chosen to keep the state and action tables compact.
79 ------------------------------------------------------------------------*/
80 enum directions
81 {
82     /* input types */
83              /* ON MUST be zero, code relies on ON = NI = 0 */
84     ON = 0,  /* Other Neutral */
85     L,       /* Left Letter */
86     R,       /* Right Letter */
87     AN,      /* Arabic Number */
88     EN,      /* European Number */
89     AL,      /* Arabic Letter (Right-to-left) */
90     NSM,     /* Non-spacing Mark */
91     CS,      /* Common Separator */
92     ES,      /* European Separator */
93     ET,      /* European Terminator (post/prefix e.g. $ and %) */
94 
95     /* resolved types */
96     BN,      /* Boundary neutral (type of RLE etc after explicit levels) */
97 
98     /* input types, */
99     S,       /* Segment Separator (TAB)        // used only in L1 */
100     WS,      /* White space                    // used only in L1 */
101     B,       /* Paragraph Separator (aka as PS) */
102 
103     /* types for explicit controls */
104     RLO,     /* these are used only in X1-X9 */
105     RLE,
106     LRO,
107     LRE,
108     PDF,
109 
110     LRI, /* Isolate formatting characters new with 6.3 */
111     RLI,
112     FSI,
113     PDI,
114 
115     /* resolved types, also resolved directions */
116     NI = ON,  /* alias, where ON, WS, S  and Isolates are treated the same */
117 };
118 
119 static const char debug_type[][4] =
120 {
121     "ON",      /* Other Neutral */
122     "L",       /* Left Letter */
123     "R",       /* Right Letter */
124     "AN",      /* Arabic Number */
125     "EN",      /* European Number */
126     "AL",      /* Arabic Letter (Right-to-left) */
127     "NSM",     /* Non-spacing Mark */
128     "CS",      /* Common Separator */
129     "ES",      /* European Separator */
130     "ET",      /* European Terminator (post/prefix e.g. $ and %) */
131     "BN",      /* Boundary neutral (type of RLE etc after explicit levels) */
132     "S",       /* Segment Separator (TAB)        // used only in L1 */
133     "WS",      /* White space                    // used only in L1 */
134     "B",       /* Paragraph Separator (aka as PS) */
135     "RLO",     /* these are used only in X1-X9 */
136     "RLE",
137     "LRO",
138     "LRE",
139     "PDF",
140     "LRI",     /* Isolate formatting characters new with 6.3 */
141     "RLI",
142     "FSI",
143     "PDI",
144 };
145 
146 /* HELPER FUNCTIONS */
147 
148 static inline void dump_types(const char* header, WORD *types, int start, int end)
149 {
150     int i, len = 0;
151     TRACE("%s:",header);
152     for (i = start; i < end && len < 200; i++)
153     {
154         TRACE(" %s",debug_type[types[i]]);
155         len += strlen(debug_type[types[i]])+1;
156     }
157     if (i != end)
158         TRACE("...");
159     TRACE("\n");
160 }
161 
162 /* Convert the libwine information to the direction enum */
163 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
164 {
165     static const enum directions dir_map[16] =
166     {
167         L,  /* unassigned defaults to L */
168         L,
169         R,
170         EN,
171         ES,
172         ET,
173         AN,
174         CS,
175         B,
176         S,
177         WS,
178         ON,
179         AL,
180         NSM,
181         BN,
182         PDF  /* also LRE, LRO, RLE, RLO */
183     };
184 
185     unsigned i;
186 
187     for (i = 0; i < count; ++i)
188     {
189         chartype[i] = dir_map[get_char_typeW(string[i]) >> 12];
190         switch (chartype[i])
191         {
192         case ES:
193             if (!c->fLegacyBidiClass) break;
194             switch (string[i])
195             {
196             case '-':
197             case '+': chartype[i] = NI; break;
198             case '/': chartype[i] = CS; break;
199             }
200             break;
201         case PDF:
202             switch (string[i])
203             {
204             case 0x202A: chartype[i] = LRE; break;
205             case 0x202B: chartype[i] = RLE; break;
206             case 0x202C: chartype[i] = PDF; break;
207             case 0x202D: chartype[i] = LRO; break;
208             case 0x202E: chartype[i] = RLO; break;
209             case 0x2066: chartype[i] = LRI; break;
210             case 0x2067: chartype[i] = RLI; break;
211             case 0x2068: chartype[i] = FSI; break;
212             case 0x2069: chartype[i] = PDI; break;
213             }
214             break;
215         }
216     }
217 }
218 
219 /* RESOLVE EXPLICIT */
220 
221 static WORD GreaterEven(int i)
222 {
223     return odd(i) ? i + 1 : i + 2;
224 }
225 
226 static WORD GreaterOdd(int i)
227 {
228     return odd(i) ? i + 2 : i + 1;
229 }
230 
231 static WORD EmbeddingDirection(int level)
232 {
233     return odd(level) ? R : L;
234 }
235 
236 /*------------------------------------------------------------------------
237     Function: resolveExplicit
238 
239     Recursively resolves explicit embedding levels and overrides.
240     Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
241 
242     Input: Base embedding level and direction
243            Character count
244 
245     Output: Array of embedding levels
246 
247     In/Out: Array of direction classes
248 
249 
250     Note: The function uses two simple counters to keep track of
251           matching explicit codes and PDF. Use the default argument for
252           the outermost call. The nesting counter counts the recursion
253           depth and not the embedding level.
254 ------------------------------------------------------------------------*/
255 typedef struct tagStackItem {
256     int level;
257     int override;
258     BOOL isolate;
259 } StackItem;
260 
261 #define push_stack(l,o,i)  \
262   do { stack_top--; \
263   stack[stack_top].level = l; \
264   stack[stack_top].override = o; \
265   stack[stack_top].isolate = i;} while(0)
266 
267 #define pop_stack() do { stack_top++; } while(0)
268 
269 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
270 
271 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
272 {
273     /* X1 */
274     int overflow_isolate_count = 0;
275     int overflow_embedding_count = 0;
276     int valid_isolate_count = 0;
277     int i;
278 
279     StackItem stack[MAX_DEPTH+2];
280     int stack_top = MAX_DEPTH+1;
281 
282     stack[stack_top].level = level;
283     stack[stack_top].override = NI;
284     stack[stack_top].isolate = FALSE;
285 
286     if (initialOverride)
287     {
288         if (odd(level))
289             push_stack(level, R, FALSE);
290         else
291             push_stack(level, L, FALSE);
292     }
293 
294     for (i = 0; i < count; i++)
295     {
296         poutOverrides[i] = stack[stack_top].override;
297 
298         /* X2 */
299         if (pclass[i] == RLE)
300         {
301             int least_odd = GreaterOdd(stack[stack_top].level);
302             poutLevel[i] = stack[stack_top].level;
303             if (valid_level(least_odd))
304                 push_stack(least_odd, NI, FALSE);
305             else if (overflow_isolate_count == 0)
306                 overflow_embedding_count++;
307         }
308         /* X3 */
309         else if (pclass[i] == LRE)
310         {
311             int least_even = GreaterEven(stack[stack_top].level);
312             poutLevel[i] = stack[stack_top].level;
313             if (valid_level(least_even))
314                 push_stack(least_even, NI, FALSE);
315             else if (overflow_isolate_count == 0)
316                 overflow_embedding_count++;
317         }
318         /* X4 */
319         else if (pclass[i] == RLO)
320         {
321             int least_odd = GreaterOdd(stack[stack_top].level);
322             poutLevel[i] = stack[stack_top].level;
323             if (valid_level(least_odd))
324                 push_stack(least_odd, R, FALSE);
325             else if (overflow_isolate_count == 0)
326                 overflow_embedding_count++;
327         }
328         /* X5 */
329         else if (pclass[i] == LRO)
330         {
331             int least_even = GreaterEven(stack[stack_top].level);
332             poutLevel[i] = stack[stack_top].level;
333             if (valid_level(least_even))
334                 push_stack(least_even, L, FALSE);
335             else if (overflow_isolate_count == 0)
336                 overflow_embedding_count++;
337         }
338         /* X5a */
339         else if (pclass[i] == RLI)
340         {
341             int least_odd = GreaterOdd(stack[stack_top].level);
342             poutLevel[i] = stack[stack_top].level;
343             if (valid_level(least_odd))
344             {
345                 valid_isolate_count++;
346                 push_stack(least_odd, NI, TRUE);
347             }
348             else
349                 overflow_isolate_count++;
350         }
351         /* X5b */
352         else if (pclass[i] == LRI)
353         {
354             int least_even = GreaterEven(stack[stack_top].level);
355             poutLevel[i] = stack[stack_top].level;
356             if (valid_level(least_even))
357             {
358                 valid_isolate_count++;
359                 push_stack(least_even, NI, TRUE);
360             }
361             else
362                 overflow_isolate_count++;
363         }
364         /* X5c */
365         else if (pclass[i] == FSI)
366         {
367             int j;
368             int new_level = 0;
369             int skipping = 0;
370             poutLevel[i] = stack[stack_top].level;
371             for (j = i+1; j < count; j++)
372             {
373                 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
374                 {
375                     skipping++;
376                     continue;
377                 }
378                 else if (pclass[j] == PDI)
379                 {
380                     if (skipping)
381                         skipping --;
382                     else
383                         break;
384                     continue;
385                 }
386 
387                 if (skipping) continue;
388 
389                 if (pclass[j] == L)
390                 {
391                     new_level = 0;
392                     break;
393                 }
394                 else if (pclass[j] == R || pclass[j] == AL)
395                 {
396                     new_level = 1;
397                     break;
398                 }
399             }
400             if (odd(new_level))
401             {
402                 int least_odd = GreaterOdd(stack[stack_top].level);
403                 if (valid_level(least_odd))
404                 {
405                     valid_isolate_count++;
406                     push_stack(least_odd, NI, TRUE);
407                 }
408                 else
409                     overflow_isolate_count++;
410             }
411             else
412             {
413                 int least_even = GreaterEven(stack[stack_top].level);
414                 if (valid_level(least_even))
415                 {
416                     valid_isolate_count++;
417                     push_stack(least_even, NI, TRUE);
418                 }
419                 else
420                     overflow_isolate_count++;
421             }
422         }
423         /* X6 */
424         else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
425         {
426             poutLevel[i] = stack[stack_top].level;
427             if (stack[stack_top].override != NI)
428                 pclass[i] = stack[stack_top].override;
429         }
430         /* X6a */
431         else if (pclass[i] == PDI)
432         {
433             if (overflow_isolate_count) overflow_isolate_count--;
434             else if (!valid_isolate_count) {/* do nothing */}
435             else
436             {
437                 overflow_embedding_count = 0;
438                 while (!stack[stack_top].isolate) pop_stack();
439                 pop_stack();
440                 valid_isolate_count --;
441             }
442             poutLevel[i] = stack[stack_top].level;
443         }
444         /* X7 */
445         else if (pclass[i] == PDF)
446         {
447             poutLevel[i] = stack[stack_top].level;
448             if (overflow_isolate_count) {/* do nothing */}
449             else if (overflow_embedding_count) overflow_embedding_count--;
450             else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
451                 pop_stack();
452         }
453         /* X8: Nothing */
454     }
455     /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
456     for (i = 0; i < count ; i++)
457         if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
458             pclass[i] = BN;
459 }
460 
461 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
462 {
463     if (index == -1 || index == back_fence) return index;
464     index --;
465     while (index > back_fence && pcls[index] == BN) index --;
466     return index;
467 }
468 
469 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
470 {
471     if (index == front_fence) return index;
472     index ++;
473     while (index < front_fence && pcls[index] == BN) index ++;
474     return index;
475 }
476 
477 typedef struct tagRun
478 {
479     int start;
480     int end;
481     WORD e;
482 } Run;
483 
484 typedef struct tagRunChar
485 {
486     WCHAR ch;
487     WORD *pcls;
488 } RunChar;
489 
490 typedef struct tagIsolatedRun
491 {
492     struct list entry;
493     int length;
494     WORD sos;
495     WORD eos;
496     WORD e;
497 
498     RunChar item[1];
499 } IsolatedRun;
500 
501 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
502 {
503     if (index >= (iso_run->length-1)) return -1;
504     index ++;
505     while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
506     if (index == iso_run->length) return -1;
507     return index;
508 }
509 
510 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
511 {
512 
513     if (index <= 0) return -1;
514     index --;
515     while (index > -1 && *iso_run->item[index].pcls == BN) index--;
516     return index;
517 }
518 
519 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
520 {
521     int i, len = 0;
522     TRACE("%s:",header);
523     TRACE("[ ");
524     for (i = 0; i < iso_run->length && len < 200; i++)
525     {
526         TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
527         len += strlen(debug_type[*iso_run->item[i].pcls])+1;
528     }
529     if (i != iso_run->length)
530         TRACE("...");
531     TRACE(" ]\n");
532 }
533 
534 /*------------------------------------------------------------------------
535     Function: resolveWeak
536 
537     Resolves the directionality of numeric and other weak character types
538 
539     Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
540 
541     Input: Array of embedding levels
542            Character count
543 
544     In/Out: Array of directional classes
545 
546     Note: On input only these directional classes are expected
547           AL, HL, R, L,  ON, BN, NSM, AN, EN, ES, ET, CS,
548 ------------------------------------------------------------------------*/
549 
550 static void resolveWeak(IsolatedRun * iso_run)
551 {
552     int i;
553 
554     /* W1 */
555     for (i=0; i < iso_run->length; i++)
556     {
557         if (*iso_run->item[i].pcls == NSM)
558         {
559             int j = iso_previousValidChar(iso_run, i);
560             if (j == -1)
561                 *iso_run->item[i].pcls = iso_run->sos;
562             else if (*iso_run->item[j].pcls >= LRI)
563                 *iso_run->item[i].pcls = ON;
564             else
565                 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
566         }
567     }
568 
569     /* W2 */
570     for (i = 0; i < iso_run->length; i++)
571     {
572         if (*iso_run->item[i].pcls == EN)
573         {
574             int j = iso_previousValidChar(iso_run, i);
575             while (j > -1)
576             {
577                 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
578                 {
579                     if (*iso_run->item[j].pcls == AL)
580                         *iso_run->item[i].pcls = AN;
581                     break;
582                 }
583                 j = iso_previousValidChar(iso_run, j);
584             }
585         }
586     }
587 
588     /* W3 */
589     for (i = 0; i < iso_run->length; i++)
590     {
591         if (*iso_run->item[i].pcls == AL)
592             *iso_run->item[i].pcls = R;
593     }
594 
595     /* W4 */
596     for (i = 0; i < iso_run->length; i++)
597     {
598         if (*iso_run->item[i].pcls == ES)
599         {
600             int b = iso_previousValidChar(iso_run, i);
601             int f = iso_nextValidChar(iso_run, i);
602 
603             if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
604                 *iso_run->item[i].pcls = EN;
605         }
606         else if (*iso_run->item[i].pcls == CS)
607         {
608             int b = iso_previousValidChar(iso_run, i);
609             int f = iso_nextValidChar(iso_run, i);
610 
611             if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
612                 *iso_run->item[i].pcls = EN;
613             else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
614                 *iso_run->item[i].pcls = AN;
615         }
616     }
617 
618     /* W5 */
619     for (i = 0; i < iso_run->length; i++)
620     {
621         if (*iso_run->item[i].pcls == ET)
622         {
623             int j;
624             for (j = i-1 ; j > -1; j--)
625             {
626                 if (*iso_run->item[j].pcls == BN) continue;
627                 if (*iso_run->item[j].pcls == ET) continue;
628                 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
629                 else break;
630             }
631             if (*iso_run->item[i].pcls == ET)
632             {
633                 for (j = i+1; j < iso_run->length; j++)
634                 {
635                     if (*iso_run->item[j].pcls == BN) continue;
636                     if (*iso_run->item[j].pcls == ET) continue;
637                     else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
638                     else break;
639                 }
640             }
641         }
642     }
643 
644     /* W6 */
645     for (i = 0; i < iso_run->length; i++)
646     {
647         if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
648         {
649             int b = i-1;
650             int f = i+1;
651             if (b > -1 && *iso_run->item[b].pcls == BN)
652                 *iso_run->item[b].pcls = ON;
653             if (f < iso_run->length && *iso_run->item[f].pcls == BN)
654                 *iso_run->item[f].pcls = ON;
655 
656             *iso_run->item[i].pcls = ON;
657         }
658     }
659 
660     /* W7 */
661     for (i = 0; i < iso_run->length; i++)
662     {
663         if (*iso_run->item[i].pcls == EN)
664         {
665             int j;
666             for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
667                 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
668                 {
669                     if (*iso_run->item[j].pcls == L)
670                         *iso_run->item[i].pcls = L;
671                     break;
672                 }
673             if (iso_run->sos == L &&  j == -1)
674                 *iso_run->item[i].pcls = L;
675         }
676     }
677 }
678 
679 typedef struct tagBracketPair
680 {
681     int start;
682     int end;
683 } BracketPair;
684 
685 static int compr(const void *a, const void* b)
686 {
687     return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
688 }
689 
690 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
691 {
692     WCHAR *open_stack;
693     int *stack_index;
694     int stack_top = iso_run->length;
695     unsigned int pair_count = 0;
696     BracketPair *out = NULL;
697     SIZE_T out_size = 0;
698     int i;
699 
700     open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
701     stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
702 
703     for (i = 0; i < iso_run->length; i++)
704     {
705         unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
706 
707         if (!ubv)
708             continue;
709 
710         if ((ubv >> 8) == 0)
711         {
712             --stack_top;
713             open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
714             /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
715             if (open_stack[stack_top] == 0x232a)
716                 open_stack[stack_top] = 0x3009;
717             stack_index[stack_top] = i;
718         }
719         else if ((ubv >> 8) == 1)
720         {
721             unsigned int j;
722 
723             for (j = stack_top; j < iso_run->length; ++j)
724             {
725                 WCHAR c = iso_run->item[i].ch;
726 
727                 if (c == 0x232a)
728                     c = 0x3009;
729 
730                 if (c != open_stack[j])
731                     continue;
732 
733                 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out))))
734                     ERR("Failed to grow output array.\n");
735 
736                 out[pair_count].start = stack_index[j];
737                 out[pair_count].end = i;
738                 ++pair_count;
739 
740                 out[pair_count].start = -1;
741                 stack_top = j + 1;
742                 break;
743             }
744         }
745     }
746 
747     heap_free(open_stack);
748     heap_free(stack_index);
749 
750     if (!pair_count)
751         return NULL;
752 
753     qsort(out, pair_count, sizeof(*out), compr);
754 
755     return out;
756 }
757 
758 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
759 
760 /*------------------------------------------------------------------------
761     Function: resolveNeutrals
762 
763     Resolves the directionality of neutral character types.
764 
765     Implements rules N1 and N2 of the Unicode Bidi Algorithm.
766 
767     Input: Array of embedding levels
768            Character count
769            Baselevel
770 
771     In/Out: Array of directional classes
772 
773     Note: On input only these directional classes are expected
774           R,  L,  NI, AN, EN and BN
775 
776           W8 resolves a number of ENs to L
777 ------------------------------------------------------------------------*/
778 static void resolveNeutrals(IsolatedRun *iso_run)
779 {
780     int i;
781     BracketPair *pairs = NULL;
782 
783     /* Translate isolates into NI */
784     for (i = 0; i < iso_run->length; i++)
785     {
786         if (*iso_run->item[i].pcls >= LRI)
787             *iso_run->item[i].pcls = NI;
788 
789         switch(*iso_run->item[i].pcls)
790         {
791             case B:
792             case S:
793             case WS: *iso_run->item[i].pcls = NI;
794         }
795 
796         ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R,  AN, EN and BN are allowed" */
797     }
798 
799     /* N0: Skipping bracketed pairs for now */
800     pairs = computeBracketPairs(iso_run);
801     if (pairs)
802     {
803         BracketPair *p = &pairs[0];
804         int i = 0;
805         while (p->start >= 0)
806         {
807             int j;
808             int e = EmbeddingDirection(iso_run->e);
809             int o = EmbeddingDirection(iso_run->e+1);
810             BOOL flag_o = FALSE;
811             TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
812 
813             /* N0.b */
814             for (j = p->start+1; j < p->end; j++)
815             {
816                 if (N0_TYPE(*iso_run->item[j].pcls) == e)
817                 {
818                     *iso_run->item[p->start].pcls = e;
819                     *iso_run->item[p->end].pcls = e;
820                     break;
821                 }
822                 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
823                     flag_o = TRUE;
824             }
825             /* N0.c */
826             if (j == p->end && flag_o)
827             {
828                 for (j = p->start; j >= 0; j--)
829                 {
830                     if (N0_TYPE(*iso_run->item[j].pcls) == o)
831                     {
832                         *iso_run->item[p->start].pcls = o;
833                         *iso_run->item[p->end].pcls = o;
834                         break;
835                     }
836                     else if (N0_TYPE(*iso_run->item[j].pcls) == e)
837                     {
838                         *iso_run->item[p->start].pcls = e;
839                         *iso_run->item[p->end].pcls = e;
840                         break;
841                     }
842                 }
843                 if ( j < 0 )
844                 {
845                     *iso_run->item[p->start].pcls = iso_run->sos;
846                     *iso_run->item[p->end].pcls = iso_run->sos;
847                 }
848             }
849 
850             i++;
851             p = &pairs[i];
852         }
853         heap_free(pairs);
854     }
855 
856     /* N1 */
857     for (i = 0; i < iso_run->length; i++)
858     {
859         WORD l,r;
860 
861         if (*iso_run->item[i].pcls == NI)
862         {
863             int j;
864             int b = iso_previousValidChar(iso_run, i);
865 
866             if (b == -1)
867             {
868                 l = iso_run->sos;
869                 b = 0;
870             }
871             else
872             {
873                 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
874                     l = R;
875                 else if (*iso_run->item[b].pcls == L)
876                     l = L;
877                 else /* No string type */
878                     continue;
879             }
880             j = iso_nextValidChar(iso_run, i);
881             while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
882 
883             if (j == -1)
884             {
885                 r = iso_run->eos;
886                 j = iso_run->length;
887             }
888             else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
889                 r = R;
890             else if (*iso_run->item[j].pcls == L)
891                 r = L;
892             else /* No string type */
893                 continue;
894 
895             if (r == l)
896             {
897                 for (b = i; b < j && b < iso_run->length; b++)
898                     *iso_run->item[b].pcls = r;
899             }
900         }
901     }
902 
903     /* N2 */
904     for (i = 0; i < iso_run->length; i++)
905     {
906         if (*iso_run->item[i].pcls == NI)
907         {
908             int b = i-1;
909             int f = i+1;
910             *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
911             if (b > -1 && *iso_run->item[b].pcls == BN)
912                 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
913             if (f < iso_run->length && *iso_run->item[f].pcls == BN)
914                 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
915         }
916     }
917 }
918 
919 /*------------------------------------------------------------------------
920     Function: resolveImplicit
921 
922     Recursively resolves implicit embedding levels.
923     Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
924 
925     Input: Array of direction classes
926            Character count
927            Base level
928 
929     In/Out: Array of embedding levels
930 
931     Note: levels may exceed 15 on output.
932           Accepted subset of direction classes
933           R, L, AN, EN
934 ------------------------------------------------------------------------*/
935 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
936 {
937     int i;
938 
939     /* I1/2 */
940     for (i = sos; i <= eos; i++)
941     {
942         if (pcls[i] == BN)
943             continue;
944 
945         ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
946         ASSERT(pcls[i] < 5); /* "Out of range." */
947 
948         if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
949             plevel[i]++;
950         else if (!odd(plevel[i]) && pcls[i] == R)
951             plevel[i]++;
952         else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
953             plevel[i]+=2;
954     }
955 }
956 
957 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
958 {
959     int i;
960 
961     /* L1 */
962     for (i = sos; i <= eos; i++)
963     {
964         if (pcls[i] == B || pcls[i] == S)
965         {
966             int j = i -1;
967             while (i > sos  && j >= sos &&
968                    (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
969                     pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
970                     pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
971                 plevel[j--] = baselevel;
972             plevel[i] = baselevel;
973         }
974         else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
975                  pcls[i] == PDF || pcls[i] == BN)
976         {
977             plevel[i] = i ? plevel[i - 1] : baselevel;
978         }
979         if (i == eos &&
980             (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
981              pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
982              pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
983         {
984             int j = i;
985             while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
986                                 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
987                                 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
988                 plevel[j--] = baselevel;
989         }
990     }
991 }
992 
993 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
994         const WCHAR *string, unsigned int uCount, struct list *set)
995 {
996     int run_start, run_end, i;
997     int run_count = 0;
998     Run *runs;
999     IsolatedRun *current_isolated;
1000 
1001     if (!(runs = heap_calloc(uCount, sizeof(*runs))))
1002         return;
1003 
1004     list_init(set);
1005 
1006     /* Build Runs */
1007     run_start = 0;
1008     while (run_start < uCount)
1009     {
1010         run_end = nextValidChar(pcls, run_start, uCount);
1011         while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
1012         run_end --;
1013         runs[run_count].start = run_start;
1014         runs[run_count].end = run_end;
1015         runs[run_count].e = pLevel[run_start];
1016         run_start = nextValidChar(pcls, run_end, uCount);
1017         run_count++;
1018     }
1019 
1020     /* Build Isolating Runs */
1021     i = 0;
1022     while (i < run_count)
1023     {
1024         int k = i;
1025         if (runs[k].start >= 0)
1026         {
1027             int type_fence, real_end;
1028             int j;
1029 
1030             if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
1031                 break;
1032 
1033             run_start = runs[k].start;
1034             current_isolated->e = runs[k].e;
1035             current_isolated->length = (runs[k].end - runs[k].start)+1;
1036 
1037             for (j = 0; j < current_isolated->length;  j++)
1038             {
1039                 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
1040                 current_isolated->item[j].ch = string[runs[k].start + j];
1041             }
1042 
1043             run_end = runs[k].end;
1044 
1045             TRACE("{ [%i -- %i]",run_start, run_end);
1046 
1047             if (pcls[run_end] == BN)
1048                 run_end = previousValidChar(pcls, run_end, runs[k].start);
1049 
1050             while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1051             {
1052                 j = k+1;
1053 search:
1054                 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1055                 if (j < run_count && runs[i].e != runs[j].e)
1056                 {
1057                     j++;
1058                     goto search;
1059                 }
1060 
1061                 if (j != run_count)
1062                 {
1063                     int m;
1064                     int l = current_isolated->length;
1065 
1066                     current_isolated->length += (runs[j].end - runs[j].start)+1;
1067                     for (m = 0; l < current_isolated->length; l++, m++)
1068                     {
1069                         current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1070                         current_isolated->item[l].ch = string[runs[j].start + m];
1071                     }
1072 
1073                     TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1074 
1075                     run_end = runs[j].end;
1076                     if (pcls[run_end] == BN)
1077                         run_end = previousValidChar(pcls, run_end, runs[i].start);
1078                     runs[j].start = -1;
1079                     k = j;
1080                 }
1081                 else
1082                 {
1083                     run_end = uCount;
1084                     break;
1085                 }
1086             }
1087 
1088             type_fence = previousValidChar(pcls, run_start, -1);
1089 
1090             if (type_fence == -1)
1091                 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1092             else
1093                 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1094 
1095             current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1096 
1097             if (run_end == uCount)
1098                 current_isolated->eos = current_isolated->sos;
1099             else
1100             {
1101                 /* eos could be an BN */
1102                 if ( pcls[run_end] == BN )
1103                 {
1104                     real_end = previousValidChar(pcls, run_end, run_start-1);
1105                     if (real_end < run_start)
1106                         real_end = run_start;
1107                 }
1108                 else
1109                     real_end = run_end;
1110 
1111                 type_fence = nextValidChar(pcls, run_end, uCount);
1112                 if (type_fence == uCount)
1113                     current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1114                 else
1115                     current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1116 
1117                 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1118             }
1119 
1120             list_add_tail(set, &current_isolated->entry);
1121             TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1122         }
1123         i++;
1124     }
1125 
1126     heap_free(runs);
1127 }
1128 
1129 /*************************************************************
1130  *    BIDI_DeterminLevels
1131  */
1132 BOOL BIDI_DetermineLevels(
1133                 const WCHAR *lpString,  /* [in] The string for which information is to be returned */
1134                 unsigned int uCount,    /* [in] Number of WCHARs in string. */
1135                 const SCRIPT_STATE *s,
1136                 const SCRIPT_CONTROL *c,
1137                 WORD *lpOutLevels, /* [out] final string levels */
1138                 WORD *lpOutOverrides /* [out] final string overrides */
1139     )
1140 {
1141     WORD *chartype;
1142     unsigned baselevel = 0;
1143     struct list IsolatingRuns;
1144     IsolatedRun *iso_run, *next;
1145 
1146     TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1147 
1148     if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1149     {
1150         WARN("Out of memory\n");
1151         return FALSE;
1152     }
1153 
1154     baselevel = s->uBidiLevel;
1155 
1156     classify(lpString, chartype, uCount, c);
1157     if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1158 
1159     memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1160 
1161     /* resolve explicit */
1162     resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1163     if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1164 
1165     /* X10/BD13: Computer Isolating runs */
1166     computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1167 
1168     LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1169     {
1170         if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1171 
1172         /* resolve weak */
1173         resolveWeak(iso_run);
1174         if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1175 
1176         /* resolve neutrals */
1177         resolveNeutrals(iso_run);
1178         if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1179 
1180         list_remove(&iso_run->entry);
1181         heap_free(iso_run);
1182     }
1183 
1184     if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1185     /* resolveImplicit */
1186     resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1187 
1188     /* resolveResolvedLevels*/
1189     classify(lpString, chartype, uCount, c);
1190     resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1191 
1192     heap_free(chartype);
1193     return TRUE;
1194 }
1195 
1196 /* reverse cch indexes */
1197 static void reverse(int *pidx, int cch)
1198 {
1199     int temp;
1200     int ich = 0;
1201     for (; ich < --cch; ich++)
1202     {
1203         temp = pidx[ich];
1204         pidx[ich] = pidx[cch];
1205         pidx[cch] = temp;
1206     }
1207 }
1208 
1209 
1210 /*------------------------------------------------------------------------
1211     Functions: reorder/reorderLevel
1212 
1213     Recursively reorders the display string
1214     "From the highest level down, reverse all characters at that level and
1215     higher, down to the lowest odd level"
1216 
1217     Implements rule L2 of the Unicode bidi Algorithm.
1218 
1219     Input: Array of embedding levels
1220            Character count
1221            Flag enabling reversal (set to false by initial caller)
1222 
1223     In/Out: Text to reorder
1224 
1225     Note: levels may exceed 15 resp. 61 on input.
1226 
1227     Rule L3 - reorder combining marks is not implemented here
1228     Rule L4 - glyph mirroring is implemented as a display option below
1229 
1230     Note: this should be applied a line at a time
1231 -------------------------------------------------------------------------*/
1232 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1233 {
1234     int ich = 0;
1235 
1236     /* true as soon as first odd level encountered */
1237     fReverse = fReverse || odd(level);
1238 
1239     for (; ich < cch; ich++)
1240     {
1241         if (plevel[ich] < level)
1242         {
1243             break;
1244         }
1245         else if (plevel[ich] > level)
1246         {
1247             ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1248                 cch - ich, fReverse) - 1;
1249         }
1250     }
1251     if (fReverse)
1252     {
1253         reverse(pIndexs, ich);
1254     }
1255     return ich;
1256 }
1257 
1258 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
1259 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1260 {
1261     int ich = 0;
1262     int newlevel = -1;
1263 
1264     /* true as soon as first odd level encountered */
1265     fReverse = fReverse || odd(level);
1266 
1267     for (; ich < cch; ich++)
1268     {
1269         if (plevel[ich] < level)
1270             break;
1271         else if (plevel[ich] > level)
1272             newlevel = ich;
1273     }
1274     if (fReverse)
1275     {
1276         reverse(pIndexs, ich);
1277     }
1278 
1279     if (newlevel >= 0)
1280     {
1281         ich = 0;
1282         for (; ich < cch; ich++)
1283             if (plevel[ich] < level)
1284                 break;
1285             else if (plevel[ich] > level)
1286                 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1287                 cch - ich, fReverse) - 1;
1288     }
1289 
1290     return ich;
1291 }
1292 
1293 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1294 {
1295     unsigned int i;
1296 
1297     classify(string, strength, count, c);
1298     for (i = 0; i < count; i++)
1299     {
1300         switch (strength[i])
1301         {
1302             case L:
1303             case LRE:
1304             case LRO:
1305             case R:
1306             case AL:
1307             case RLE:
1308             case RLO:
1309                 strength[i] = BIDI_STRONG;
1310                 break;
1311             case PDF:
1312             case EN:
1313             case ES:
1314             case ET:
1315             case AN:
1316             case CS:
1317             case BN:
1318                 strength[i] = BIDI_WEAK;
1319                 break;
1320             case B:
1321             case S:
1322             case WS:
1323             case ON:
1324             default: /* Neutrals and NSM */
1325                 strength[i] = BIDI_NEUTRAL;
1326         }
1327     }
1328     return TRUE;
1329 }
1330