xref: /reactos/dll/win32/usp10/bidi.c (revision 29ff85ba)
1 /*
2  * Uniscribe BiDirectional handling
3  *
4  * Copyright 2003 Shachar Shemesh
5  * Copyright 2007 Maarten Lankhorst
6  * Copyright 2010 CodeWeavers, Aric Stewart
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21  *
22  * Code derived from the modified reference implementation
23  * that was found in revision 17 of http://unicode.org/reports/tr9/
24  * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25  *
26  * -- Copyright (C) 1999-2005, ASMUS, Inc.
27  *
28  * Permission is hereby granted, free of charge, to any person obtaining a
29  * copy of the Unicode data files and any associated documentation (the
30  * "Data Files") or Unicode software and any associated documentation (the
31  * "Software") to deal in the Data Files or Software without restriction,
32  * including without limitation the rights to use, copy, modify, merge,
33  * publish, distribute, and/or sell copies of the Data Files or Software,
34  * and to permit persons to whom the Data Files or Software are furnished
35  * to do so, provided that (a) the above copyright notice(s) and this
36  * permission notice appear with all copies of the Data Files or Software,
37  * (b) both the above copyright notice(s) and this permission notice appear
38  * in associated documentation, and (c) there is clear notice in each
39  * modified Data File or in the Software as well as in the documentation
40  * associated with the Data File(s) or Software that the data or software
41  * has been modified.
42  */
43 
44 #include <stdarg.h>
45 #include <stdlib.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wingdi.h"
49 #include "winnls.h"
50 #include "usp10.h"
51 #include "wine/debug.h"
52 #include "wine/heap.h"
53 #include "wine/list.h"
54 
55 #include "usp10_internal.h"
56 
57 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
58 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN;
59 
60 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
61 
62 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
63 #define MAX_DEPTH 125
64 
65 /* HELPER FUNCTIONS AND DECLARATIONS */
66 
67 /*------------------------------------------------------------------------
68     Bidirectional Character Types
69 
70     as defined by the Unicode Bidirectional Algorithm Table 3-7.
71 
72     Note:
73 
74       The list of bidirectional character types here is not grouped the
75       same way as the table 3-7, since the numberic values for the types
76       are chosen to keep the state and action tables compact.
77 ------------------------------------------------------------------------*/
78 enum directions
79 {
80     /* input types */
81              /* ON MUST be zero, code relies on ON = NI = 0 */
82     ON = 0,  /* Other Neutral */
83     L,       /* Left Letter */
84     R,       /* Right Letter */
85     AN,      /* Arabic Number */
86     EN,      /* European Number */
87     AL,      /* Arabic Letter (Right-to-left) */
88     NSM,     /* Non-spacing Mark */
89     CS,      /* Common Separator */
90     ES,      /* European Separator */
91     ET,      /* European Terminator (post/prefix e.g. $ and %) */
92 
93     /* resolved types */
94     BN,      /* Boundary neutral (type of RLE etc after explicit levels) */
95 
96     /* input types, */
97     S,       /* Segment Separator (TAB)        // used only in L1 */
98     WS,      /* White space                    // used only in L1 */
99     B,       /* Paragraph Separator (aka as PS) */
100 
101     /* types for explicit controls */
102     RLO,     /* these are used only in X1-X9 */
103     RLE,
104     LRO,
105     LRE,
106     PDF,
107 
108     LRI, /* Isolate formatting characters new with 6.3 */
109     RLI,
110     FSI,
111     PDI,
112 
113     /* resolved types, also resolved directions */
114     NI = ON,  /* alias, where ON, WS, S  and Isolates are treated the same */
115 };
116 
117 static const char debug_type[][4] =
118 {
119     "ON",      /* Other Neutral */
120     "L",       /* Left Letter */
121     "R",       /* Right Letter */
122     "AN",      /* Arabic Number */
123     "EN",      /* European Number */
124     "AL",      /* Arabic Letter (Right-to-left) */
125     "NSM",     /* Non-spacing Mark */
126     "CS",      /* Common Separator */
127     "ES",      /* European Separator */
128     "ET",      /* European Terminator (post/prefix e.g. $ and %) */
129     "BN",      /* Boundary neutral (type of RLE etc after explicit levels) */
130     "S",       /* Segment Separator (TAB)        // used only in L1 */
131     "WS",      /* White space                    // used only in L1 */
132     "B",       /* Paragraph Separator (aka as PS) */
133     "RLO",     /* these are used only in X1-X9 */
134     "RLE",
135     "LRO",
136     "LRE",
137     "PDF",
138     "LRI",     /* Isolate formatting characters new with 6.3 */
139     "RLI",
140     "FSI",
141     "PDI",
142 };
143 
144 /* HELPER FUNCTIONS */
145 
dump_types(const char * header,WORD * types,int start,int end)146 static inline void dump_types(const char* header, WORD *types, int start, int end)
147 {
148     int i, len = 0;
149     TRACE("%s:",header);
150     for (i = start; i < end && len < 200; i++)
151     {
152         TRACE(" %s",debug_type[types[i]]);
153         len += strlen(debug_type[types[i]])+1;
154     }
155     if (i != end)
156         TRACE("...");
157     TRACE("\n");
158 }
159 
160 /* Convert the libwine information to the direction enum */
classify(const WCHAR * string,WORD * chartype,DWORD count,const SCRIPT_CONTROL * c)161 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
162 {
163     unsigned i;
164 
165     for (i = 0; i < count; ++i)
166     {
167         chartype[i] = get_table_entry( bidi_direction_table, string[i] );
168         if (c->fLegacyBidiClass && chartype[i] == ES)
169         {
170             if (string[i] == '+' || string[i] == '-') chartype[i] = NI;
171         }
172     }
173 }
174 
175 /* RESOLVE EXPLICIT */
176 
GreaterEven(int i)177 static WORD GreaterEven(int i)
178 {
179     return odd(i) ? i + 1 : i + 2;
180 }
181 
GreaterOdd(int i)182 static WORD GreaterOdd(int i)
183 {
184     return odd(i) ? i + 2 : i + 1;
185 }
186 
EmbeddingDirection(int level)187 static WORD EmbeddingDirection(int level)
188 {
189     return odd(level) ? R : L;
190 }
191 
192 /*------------------------------------------------------------------------
193     Function: resolveExplicit
194 
195     Recursively resolves explicit embedding levels and overrides.
196     Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
197 
198     Input: Base embedding level and direction
199            Character count
200 
201     Output: Array of embedding levels
202 
203     In/Out: Array of direction classes
204 
205 
206     Note: The function uses two simple counters to keep track of
207           matching explicit codes and PDF. Use the default argument for
208           the outermost call. The nesting counter counts the recursion
209           depth and not the embedding level.
210 ------------------------------------------------------------------------*/
211 typedef struct tagStackItem {
212     int level;
213     int override;
214     BOOL isolate;
215 } StackItem;
216 
217 #define push_stack(l,o,i)  \
218   do { stack_top--; \
219   stack[stack_top].level = l; \
220   stack[stack_top].override = o; \
221   stack[stack_top].isolate = i;} while(0)
222 
223 #define pop_stack() do { stack_top++; } while(0)
224 
225 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
226 
resolveExplicit(int level,WORD * pclass,WORD * poutLevel,WORD * poutOverrides,int count,BOOL initialOverride)227 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
228 {
229     /* X1 */
230     int overflow_isolate_count = 0;
231     int overflow_embedding_count = 0;
232     int valid_isolate_count = 0;
233     int i;
234 
235     StackItem stack[MAX_DEPTH+2];
236     int stack_top = MAX_DEPTH+1;
237 
238     stack[stack_top].level = level;
239     stack[stack_top].override = NI;
240     stack[stack_top].isolate = FALSE;
241 
242     if (initialOverride)
243     {
244         if (odd(level))
245             push_stack(level, R, FALSE);
246         else
247             push_stack(level, L, FALSE);
248     }
249 
250     for (i = 0; i < count; i++)
251     {
252         poutOverrides[i] = stack[stack_top].override;
253 
254         /* X2 */
255         if (pclass[i] == RLE)
256         {
257             int least_odd = GreaterOdd(stack[stack_top].level);
258             poutLevel[i] = stack[stack_top].level;
259             if (valid_level(least_odd))
260                 push_stack(least_odd, NI, FALSE);
261             else if (overflow_isolate_count == 0)
262                 overflow_embedding_count++;
263         }
264         /* X3 */
265         else if (pclass[i] == LRE)
266         {
267             int least_even = GreaterEven(stack[stack_top].level);
268             poutLevel[i] = stack[stack_top].level;
269             if (valid_level(least_even))
270                 push_stack(least_even, NI, FALSE);
271             else if (overflow_isolate_count == 0)
272                 overflow_embedding_count++;
273         }
274         /* X4 */
275         else if (pclass[i] == RLO)
276         {
277             int least_odd = GreaterOdd(stack[stack_top].level);
278             poutLevel[i] = stack[stack_top].level;
279             if (valid_level(least_odd))
280                 push_stack(least_odd, R, FALSE);
281             else if (overflow_isolate_count == 0)
282                 overflow_embedding_count++;
283         }
284         /* X5 */
285         else if (pclass[i] == LRO)
286         {
287             int least_even = GreaterEven(stack[stack_top].level);
288             poutLevel[i] = stack[stack_top].level;
289             if (valid_level(least_even))
290                 push_stack(least_even, L, FALSE);
291             else if (overflow_isolate_count == 0)
292                 overflow_embedding_count++;
293         }
294         /* X5a */
295         else if (pclass[i] == RLI)
296         {
297             int least_odd = GreaterOdd(stack[stack_top].level);
298             poutLevel[i] = stack[stack_top].level;
299             if (valid_level(least_odd))
300             {
301                 valid_isolate_count++;
302                 push_stack(least_odd, NI, TRUE);
303             }
304             else
305                 overflow_isolate_count++;
306         }
307         /* X5b */
308         else if (pclass[i] == LRI)
309         {
310             int least_even = GreaterEven(stack[stack_top].level);
311             poutLevel[i] = stack[stack_top].level;
312             if (valid_level(least_even))
313             {
314                 valid_isolate_count++;
315                 push_stack(least_even, NI, TRUE);
316             }
317             else
318                 overflow_isolate_count++;
319         }
320         /* X5c */
321         else if (pclass[i] == FSI)
322         {
323             int j;
324             int new_level = 0;
325             int skipping = 0;
326             poutLevel[i] = stack[stack_top].level;
327             for (j = i+1; j < count; j++)
328             {
329                 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
330                 {
331                     skipping++;
332                     continue;
333                 }
334                 else if (pclass[j] == PDI)
335                 {
336                     if (skipping)
337                         skipping --;
338                     else
339                         break;
340                     continue;
341                 }
342 
343                 if (skipping) continue;
344 
345                 if (pclass[j] == L)
346                 {
347                     new_level = 0;
348                     break;
349                 }
350                 else if (pclass[j] == R || pclass[j] == AL)
351                 {
352                     new_level = 1;
353                     break;
354                 }
355             }
356             if (odd(new_level))
357             {
358                 int least_odd = GreaterOdd(stack[stack_top].level);
359                 if (valid_level(least_odd))
360                 {
361                     valid_isolate_count++;
362                     push_stack(least_odd, NI, TRUE);
363                 }
364                 else
365                     overflow_isolate_count++;
366             }
367             else
368             {
369                 int least_even = GreaterEven(stack[stack_top].level);
370                 if (valid_level(least_even))
371                 {
372                     valid_isolate_count++;
373                     push_stack(least_even, NI, TRUE);
374                 }
375                 else
376                     overflow_isolate_count++;
377             }
378         }
379         /* X6 */
380         else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
381         {
382             poutLevel[i] = stack[stack_top].level;
383             if (stack[stack_top].override != NI)
384                 pclass[i] = stack[stack_top].override;
385         }
386         /* X6a */
387         else if (pclass[i] == PDI)
388         {
389             if (overflow_isolate_count) overflow_isolate_count--;
390             else if (!valid_isolate_count) {/* do nothing */}
391             else
392             {
393                 overflow_embedding_count = 0;
394                 while (!stack[stack_top].isolate) pop_stack();
395                 pop_stack();
396                 valid_isolate_count --;
397             }
398             poutLevel[i] = stack[stack_top].level;
399         }
400         /* X7 */
401         else if (pclass[i] == PDF)
402         {
403             poutLevel[i] = stack[stack_top].level;
404             if (overflow_isolate_count) {/* do nothing */}
405             else if (overflow_embedding_count) overflow_embedding_count--;
406             else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
407                 pop_stack();
408         }
409         /* X8: Nothing */
410     }
411     /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
412     for (i = 0; i < count ; i++)
413         if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
414             pclass[i] = BN;
415 }
416 
previousValidChar(const WORD * pcls,int index,int back_fence)417 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
418 {
419     if (index == -1 || index == back_fence) return index;
420     index --;
421     while (index > back_fence && pcls[index] == BN) index --;
422     return index;
423 }
424 
nextValidChar(const WORD * pcls,int index,int front_fence)425 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
426 {
427     if (index == front_fence) return index;
428     index ++;
429     while (index < front_fence && pcls[index] == BN) index ++;
430     return index;
431 }
432 
433 typedef struct tagRun
434 {
435     int start;
436     int end;
437     WORD e;
438 } Run;
439 
440 typedef struct tagRunChar
441 {
442     WCHAR ch;
443     WORD *pcls;
444 } RunChar;
445 
446 typedef struct tagIsolatedRun
447 {
448     struct list entry;
449     int length;
450     WORD sos;
451     WORD eos;
452     WORD e;
453 
454     RunChar item[1];
455 } IsolatedRun;
456 
iso_nextValidChar(IsolatedRun * iso_run,int index)457 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
458 {
459     if (index >= (iso_run->length-1)) return -1;
460     index ++;
461     while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
462     if (index == iso_run->length) return -1;
463     return index;
464 }
465 
iso_previousValidChar(IsolatedRun * iso_run,int index)466 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
467 {
468 
469     if (index <= 0) return -1;
470     index --;
471     while (index > -1 && *iso_run->item[index].pcls == BN) index--;
472     return index;
473 }
474 
iso_dump_types(const char * header,IsolatedRun * iso_run)475 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
476 {
477     int i, len = 0;
478     TRACE("%s:",header);
479     TRACE("[ ");
480     for (i = 0; i < iso_run->length && len < 200; i++)
481     {
482         TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
483         len += strlen(debug_type[*iso_run->item[i].pcls])+1;
484     }
485     if (i != iso_run->length)
486         TRACE("...");
487     TRACE(" ]\n");
488 }
489 
490 /*------------------------------------------------------------------------
491     Function: resolveWeak
492 
493     Resolves the directionality of numeric and other weak character types
494 
495     Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
496 
497     Input: Array of embedding levels
498            Character count
499 
500     In/Out: Array of directional classes
501 
502     Note: On input only these directional classes are expected
503           AL, HL, R, L,  ON, BN, NSM, AN, EN, ES, ET, CS,
504 ------------------------------------------------------------------------*/
505 
resolveWeak(IsolatedRun * iso_run)506 static void resolveWeak(IsolatedRun * iso_run)
507 {
508     int i;
509 
510     /* W1 */
511     for (i=0; i < iso_run->length; i++)
512     {
513         if (*iso_run->item[i].pcls == NSM)
514         {
515             int j = iso_previousValidChar(iso_run, i);
516             if (j == -1)
517                 *iso_run->item[i].pcls = iso_run->sos;
518             else if (*iso_run->item[j].pcls >= LRI)
519                 *iso_run->item[i].pcls = ON;
520             else
521                 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
522         }
523     }
524 
525     /* W2 */
526     for (i = 0; i < iso_run->length; i++)
527     {
528         if (*iso_run->item[i].pcls == EN)
529         {
530             int j = iso_previousValidChar(iso_run, i);
531             while (j > -1)
532             {
533                 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
534                 {
535                     if (*iso_run->item[j].pcls == AL)
536                         *iso_run->item[i].pcls = AN;
537                     break;
538                 }
539                 j = iso_previousValidChar(iso_run, j);
540             }
541         }
542     }
543 
544     /* W3 */
545     for (i = 0; i < iso_run->length; i++)
546     {
547         if (*iso_run->item[i].pcls == AL)
548             *iso_run->item[i].pcls = R;
549     }
550 
551     /* W4 */
552     for (i = 0; i < iso_run->length; i++)
553     {
554         if (*iso_run->item[i].pcls == ES)
555         {
556             int b = iso_previousValidChar(iso_run, i);
557             int f = iso_nextValidChar(iso_run, i);
558 
559             if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
560                 *iso_run->item[i].pcls = EN;
561         }
562         else if (*iso_run->item[i].pcls == CS)
563         {
564             int b = iso_previousValidChar(iso_run, i);
565             int f = iso_nextValidChar(iso_run, i);
566 
567             if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
568                 *iso_run->item[i].pcls = EN;
569             else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
570                 *iso_run->item[i].pcls = AN;
571         }
572     }
573 
574     /* W5 */
575     for (i = 0; i < iso_run->length; i++)
576     {
577         if (*iso_run->item[i].pcls == ET)
578         {
579             int j;
580             for (j = i-1 ; j > -1; j--)
581             {
582                 if (*iso_run->item[j].pcls == BN) continue;
583                 if (*iso_run->item[j].pcls == ET) continue;
584                 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
585                 else break;
586             }
587             if (*iso_run->item[i].pcls == ET)
588             {
589                 for (j = i+1; j < iso_run->length; j++)
590                 {
591                     if (*iso_run->item[j].pcls == BN) continue;
592                     if (*iso_run->item[j].pcls == ET) continue;
593                     else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
594                     else break;
595                 }
596             }
597         }
598     }
599 
600     /* W6 */
601     for (i = 0; i < iso_run->length; i++)
602     {
603         if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
604         {
605             int b = i-1;
606             int f = i+1;
607             if (b > -1 && *iso_run->item[b].pcls == BN)
608                 *iso_run->item[b].pcls = ON;
609             if (f < iso_run->length && *iso_run->item[f].pcls == BN)
610                 *iso_run->item[f].pcls = ON;
611 
612             *iso_run->item[i].pcls = ON;
613         }
614     }
615 
616     /* W7 */
617     for (i = 0; i < iso_run->length; i++)
618     {
619         if (*iso_run->item[i].pcls == EN)
620         {
621             int j;
622             for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
623                 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
624                 {
625                     if (*iso_run->item[j].pcls == L)
626                         *iso_run->item[i].pcls = L;
627                     break;
628                 }
629             if (iso_run->sos == L &&  j == -1)
630                 *iso_run->item[i].pcls = L;
631         }
632     }
633 }
634 
635 typedef struct tagBracketPair
636 {
637     int start;
638     int end;
639 } BracketPair;
640 
compr(const void * a,const void * b)641 static int __cdecl compr(const void *a, const void* b)
642 {
643     return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
644 }
645 
computeBracketPairs(IsolatedRun * iso_run)646 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
647 {
648     WCHAR *open_stack;
649     int *stack_index;
650     int stack_top = iso_run->length;
651     unsigned int pair_count = 0;
652     BracketPair *out = NULL;
653     SIZE_T out_size = 0;
654     int i;
655 
656     open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
657     stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
658 
659     for (i = 0; i < iso_run->length; i++)
660     {
661         unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
662 
663         if (!ubv)
664             continue;
665 
666         if ((ubv >> 8) == 0)
667         {
668             --stack_top;
669             open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
670             /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
671             if (open_stack[stack_top] == 0x232a)
672                 open_stack[stack_top] = 0x3009;
673             stack_index[stack_top] = i;
674         }
675         else if ((ubv >> 8) == 1)
676         {
677             unsigned int j;
678 
679             for (j = stack_top; j < iso_run->length; ++j)
680             {
681                 WCHAR c = iso_run->item[i].ch;
682 
683                 if (c == 0x232a)
684                     c = 0x3009;
685 
686                 if (c != open_stack[j])
687                     continue;
688 
689                 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out))))
690                     ERR("Failed to grow output array.\n");
691 
692                 out[pair_count].start = stack_index[j];
693                 out[pair_count].end = i;
694                 ++pair_count;
695 
696                 out[pair_count].start = -1;
697                 stack_top = j + 1;
698                 break;
699             }
700         }
701     }
702 
703     heap_free(open_stack);
704     heap_free(stack_index);
705 
706     if (!pair_count)
707         return NULL;
708 
709     qsort(out, pair_count, sizeof(*out), compr);
710 
711     return out;
712 }
713 
714 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
715 
716 /*------------------------------------------------------------------------
717     Function: resolveNeutrals
718 
719     Resolves the directionality of neutral character types.
720 
721     Implements rules N1 and N2 of the Unicode Bidi Algorithm.
722 
723     Input: Array of embedding levels
724            Character count
725            Baselevel
726 
727     In/Out: Array of directional classes
728 
729     Note: On input only these directional classes are expected
730           R,  L,  NI, AN, EN and BN
731 
732           W8 resolves a number of ENs to L
733 ------------------------------------------------------------------------*/
resolveNeutrals(IsolatedRun * iso_run)734 static void resolveNeutrals(IsolatedRun *iso_run)
735 {
736     int i;
737     BracketPair *pairs = NULL;
738 
739     /* Translate isolates into NI */
740     for (i = 0; i < iso_run->length; i++)
741     {
742         if (*iso_run->item[i].pcls >= LRI)
743             *iso_run->item[i].pcls = NI;
744 
745         switch(*iso_run->item[i].pcls)
746         {
747             case B:
748             case S:
749             case WS: *iso_run->item[i].pcls = NI;
750         }
751 
752         ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R,  AN, EN and BN are allowed" */
753     }
754 
755     /* N0: Skipping bracketed pairs for now */
756     pairs = computeBracketPairs(iso_run);
757     if (pairs)
758     {
759         BracketPair *p = &pairs[0];
760         int i = 0;
761         while (p->start >= 0)
762         {
763             int j;
764             int e = EmbeddingDirection(iso_run->e);
765             int o = EmbeddingDirection(iso_run->e+1);
766             BOOL flag_o = FALSE;
767             TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
768 
769             /* N0.b */
770             for (j = p->start+1; j < p->end; j++)
771             {
772                 if (N0_TYPE(*iso_run->item[j].pcls) == e)
773                 {
774                     *iso_run->item[p->start].pcls = e;
775                     *iso_run->item[p->end].pcls = e;
776                     break;
777                 }
778                 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
779                     flag_o = TRUE;
780             }
781             /* N0.c */
782             if (j == p->end && flag_o)
783             {
784                 for (j = p->start; j >= 0; j--)
785                 {
786                     if (N0_TYPE(*iso_run->item[j].pcls) == o)
787                     {
788                         *iso_run->item[p->start].pcls = o;
789                         *iso_run->item[p->end].pcls = o;
790                         break;
791                     }
792                     else if (N0_TYPE(*iso_run->item[j].pcls) == e)
793                     {
794                         *iso_run->item[p->start].pcls = e;
795                         *iso_run->item[p->end].pcls = e;
796                         break;
797                     }
798                 }
799                 if ( j < 0 )
800                 {
801                     *iso_run->item[p->start].pcls = iso_run->sos;
802                     *iso_run->item[p->end].pcls = iso_run->sos;
803                 }
804             }
805 
806             i++;
807             p = &pairs[i];
808         }
809         heap_free(pairs);
810     }
811 
812     /* N1 */
813     for (i = 0; i < iso_run->length; i++)
814     {
815         WORD l,r;
816 
817         if (*iso_run->item[i].pcls == NI)
818         {
819             int j;
820             int b = iso_previousValidChar(iso_run, i);
821 
822             if (b == -1)
823             {
824                 l = iso_run->sos;
825                 b = 0;
826             }
827             else
828             {
829                 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
830                     l = R;
831                 else if (*iso_run->item[b].pcls == L)
832                     l = L;
833                 else /* No string type */
834                     continue;
835             }
836             j = iso_nextValidChar(iso_run, i);
837             while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
838 
839             if (j == -1)
840             {
841                 r = iso_run->eos;
842                 j = iso_run->length;
843             }
844             else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
845                 r = R;
846             else if (*iso_run->item[j].pcls == L)
847                 r = L;
848             else /* No string type */
849                 continue;
850 
851             if (r == l)
852             {
853                 for (b = i; b < j && b < iso_run->length; b++)
854                     *iso_run->item[b].pcls = r;
855             }
856         }
857     }
858 
859     /* N2 */
860     for (i = 0; i < iso_run->length; i++)
861     {
862         if (*iso_run->item[i].pcls == NI)
863         {
864             int b = i-1;
865             int f = i+1;
866             *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
867             if (b > -1 && *iso_run->item[b].pcls == BN)
868                 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
869             if (f < iso_run->length && *iso_run->item[f].pcls == BN)
870                 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
871         }
872     }
873 }
874 
875 /*------------------------------------------------------------------------
876     Function: resolveImplicit
877 
878     Recursively resolves implicit embedding levels.
879     Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
880 
881     Input: Array of direction classes
882            Character count
883            Base level
884 
885     In/Out: Array of embedding levels
886 
887     Note: levels may exceed 15 on output.
888           Accepted subset of direction classes
889           R, L, AN, EN
890 ------------------------------------------------------------------------*/
resolveImplicit(const WORD * pcls,WORD * plevel,int sos,int eos)891 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
892 {
893     int i;
894 
895     /* I1/2 */
896     for (i = sos; i <= eos; i++)
897     {
898         if (pcls[i] == BN)
899             continue;
900 
901         ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
902         ASSERT(pcls[i] < 5); /* "Out of range." */
903 
904         if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
905             plevel[i]++;
906         else if (!odd(plevel[i]) && pcls[i] == R)
907             plevel[i]++;
908         else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
909             plevel[i]+=2;
910     }
911 }
912 
resolveResolved(unsigned baselevel,const WORD * pcls,WORD * plevel,int sos,int eos)913 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
914 {
915     int i;
916 
917     /* L1 */
918     for (i = sos; i <= eos; i++)
919     {
920         if (pcls[i] == B || pcls[i] == S)
921         {
922             int j = i -1;
923             while (i > sos  && j >= sos &&
924                    (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
925                     pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
926                     pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
927                 plevel[j--] = baselevel;
928             plevel[i] = baselevel;
929         }
930         else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
931                  pcls[i] == PDF || pcls[i] == BN)
932         {
933             plevel[i] = i ? plevel[i - 1] : baselevel;
934         }
935         if (i == eos &&
936             (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
937              pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
938              pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
939         {
940             int j = i;
941             while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
942                                 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
943                                 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
944                 plevel[j--] = baselevel;
945         }
946     }
947 }
948 
computeIsolatingRunsSet(unsigned baselevel,WORD * pcls,const WORD * pLevel,const WCHAR * string,unsigned int uCount,struct list * set)949 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
950         const WCHAR *string, unsigned int uCount, struct list *set)
951 {
952     int run_start, run_end, i;
953     int run_count = 0;
954     Run *runs;
955     IsolatedRun *current_isolated;
956 
957     if (!(runs = heap_calloc(uCount, sizeof(*runs))))
958         return;
959 
960     list_init(set);
961 
962     /* Build Runs */
963     run_start = 0;
964     while (run_start < uCount)
965     {
966         run_end = nextValidChar(pcls, run_start, uCount);
967         while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
968         run_end --;
969         runs[run_count].start = run_start;
970         runs[run_count].end = run_end;
971         runs[run_count].e = pLevel[run_start];
972         run_start = nextValidChar(pcls, run_end, uCount);
973         run_count++;
974     }
975 
976     /* Build Isolating Runs */
977     i = 0;
978     while (i < run_count)
979     {
980         int k = i;
981         if (runs[k].start >= 0)
982         {
983             int type_fence, real_end;
984             int j;
985 
986             if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
987                 break;
988 
989             run_start = runs[k].start;
990             current_isolated->e = runs[k].e;
991             current_isolated->length = (runs[k].end - runs[k].start)+1;
992 
993             for (j = 0; j < current_isolated->length;  j++)
994             {
995                 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
996                 current_isolated->item[j].ch = string[runs[k].start + j];
997             }
998 
999             run_end = runs[k].end;
1000 
1001             TRACE("{ [%i -- %i]",run_start, run_end);
1002 
1003             if (pcls[run_end] == BN)
1004                 run_end = previousValidChar(pcls, run_end, runs[k].start);
1005 
1006             while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1007             {
1008                 j = k+1;
1009 search:
1010                 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1011                 if (j < run_count && runs[i].e != runs[j].e)
1012                 {
1013                     j++;
1014                     goto search;
1015                 }
1016 
1017                 if (j != run_count)
1018                 {
1019                     int m;
1020                     int l = current_isolated->length;
1021 
1022                     current_isolated->length += (runs[j].end - runs[j].start)+1;
1023                     for (m = 0; l < current_isolated->length; l++, m++)
1024                     {
1025                         current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1026                         current_isolated->item[l].ch = string[runs[j].start + m];
1027                     }
1028 
1029                     TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1030 
1031                     run_end = runs[j].end;
1032                     if (pcls[run_end] == BN)
1033                         run_end = previousValidChar(pcls, run_end, runs[i].start);
1034                     runs[j].start = -1;
1035                     k = j;
1036                 }
1037                 else
1038                 {
1039                     run_end = uCount;
1040                     break;
1041                 }
1042             }
1043 
1044             type_fence = previousValidChar(pcls, run_start, -1);
1045 
1046             if (type_fence == -1)
1047                 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1048             else
1049                 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1050 
1051             current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1052 
1053             if (run_end == uCount)
1054                 current_isolated->eos = current_isolated->sos;
1055             else
1056             {
1057                 /* eos could be an BN */
1058                 if ( pcls[run_end] == BN )
1059                 {
1060                     real_end = previousValidChar(pcls, run_end, run_start-1);
1061                     if (real_end < run_start)
1062                         real_end = run_start;
1063                 }
1064                 else
1065                     real_end = run_end;
1066 
1067                 type_fence = nextValidChar(pcls, run_end, uCount);
1068                 if (type_fence == uCount)
1069                     current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1070                 else
1071                     current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1072 
1073                 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1074             }
1075 
1076             list_add_tail(set, &current_isolated->entry);
1077             TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1078         }
1079         i++;
1080     }
1081 
1082     heap_free(runs);
1083 }
1084 
1085 /*************************************************************
1086  *    BIDI_DeterminLevels
1087  */
BIDI_DetermineLevels(const WCHAR * lpString,unsigned int uCount,const SCRIPT_STATE * s,const SCRIPT_CONTROL * c,WORD * lpOutLevels,WORD * lpOutOverrides)1088 BOOL BIDI_DetermineLevels(
1089                 const WCHAR *lpString,  /* [in] The string for which information is to be returned */
1090                 unsigned int uCount,    /* [in] Number of WCHARs in string. */
1091                 const SCRIPT_STATE *s,
1092                 const SCRIPT_CONTROL *c,
1093                 WORD *lpOutLevels, /* [out] final string levels */
1094                 WORD *lpOutOverrides /* [out] final string overrides */
1095     )
1096 {
1097     WORD *chartype;
1098     unsigned baselevel = 0;
1099     struct list IsolatingRuns;
1100     IsolatedRun *iso_run, *next;
1101 
1102     TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1103 
1104     if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1105     {
1106         WARN("Out of memory\n");
1107         return FALSE;
1108     }
1109 
1110     baselevel = s->uBidiLevel;
1111 
1112     classify(lpString, chartype, uCount, c);
1113     if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1114 
1115     memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1116 
1117     /* resolve explicit */
1118     resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1119     if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1120 
1121     /* X10/BD13: Computer Isolating runs */
1122     computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1123 
1124     LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1125     {
1126         if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1127 
1128         /* resolve weak */
1129         resolveWeak(iso_run);
1130         if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1131 
1132         /* resolve neutrals */
1133         resolveNeutrals(iso_run);
1134         if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1135 
1136         list_remove(&iso_run->entry);
1137         heap_free(iso_run);
1138     }
1139 
1140     if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1141     /* resolveImplicit */
1142     resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1143 
1144     /* resolveResolvedLevels*/
1145     classify(lpString, chartype, uCount, c);
1146     resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1147 
1148     heap_free(chartype);
1149     return TRUE;
1150 }
1151 
1152 /* reverse cch indexes */
reverse(int * pidx,int cch)1153 static void reverse(int *pidx, int cch)
1154 {
1155     int temp;
1156     int ich = 0;
1157     for (; ich < --cch; ich++)
1158     {
1159         temp = pidx[ich];
1160         pidx[ich] = pidx[cch];
1161         pidx[cch] = temp;
1162     }
1163 }
1164 
1165 
1166 /*------------------------------------------------------------------------
1167     Functions: reorder/reorderLevel
1168 
1169     Recursively reorders the display string
1170     "From the highest level down, reverse all characters at that level and
1171     higher, down to the lowest odd level"
1172 
1173     Implements rule L2 of the Unicode bidi Algorithm.
1174 
1175     Input: Array of embedding levels
1176            Character count
1177            Flag enabling reversal (set to false by initial caller)
1178 
1179     In/Out: Text to reorder
1180 
1181     Note: levels may exceed 15 resp. 61 on input.
1182 
1183     Rule L3 - reorder combining marks is not implemented here
1184     Rule L4 - glyph mirroring is implemented as a display option below
1185 
1186     Note: this should be applied a line at a time
1187 -------------------------------------------------------------------------*/
BIDI_ReorderV2lLevel(int level,int * pIndexs,const BYTE * plevel,int cch,BOOL fReverse)1188 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1189 {
1190     int ich = 0;
1191 
1192     /* true as soon as first odd level encountered */
1193     fReverse = fReverse || odd(level);
1194 
1195     for (; ich < cch; ich++)
1196     {
1197         if (plevel[ich] < level)
1198         {
1199             break;
1200         }
1201         else if (plevel[ich] > level)
1202         {
1203             ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1204                 cch - ich, fReverse) - 1;
1205         }
1206     }
1207     if (fReverse)
1208     {
1209         reverse(pIndexs, ich);
1210     }
1211     return ich;
1212 }
1213 
1214 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
BIDI_ReorderL2vLevel(int level,int * pIndexs,const BYTE * plevel,int cch,BOOL fReverse)1215 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1216 {
1217     int ich = 0;
1218     int newlevel = -1;
1219 
1220     /* true as soon as first odd level encountered */
1221     fReverse = fReverse || odd(level);
1222 
1223     for (; ich < cch; ich++)
1224     {
1225         if (plevel[ich] < level)
1226             break;
1227         else if (plevel[ich] > level)
1228             newlevel = ich;
1229     }
1230     if (fReverse)
1231     {
1232         reverse(pIndexs, ich);
1233     }
1234 
1235     if (newlevel >= 0)
1236     {
1237         ich = 0;
1238         for (; ich < cch; ich++)
1239             if (plevel[ich] < level)
1240                 break;
1241             else if (plevel[ich] > level)
1242                 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1243                 cch - ich, fReverse) - 1;
1244     }
1245 
1246     return ich;
1247 }
1248 
BIDI_GetStrengths(const WCHAR * string,unsigned int count,const SCRIPT_CONTROL * c,WORD * strength)1249 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1250 {
1251     unsigned int i;
1252 
1253     classify(string, strength, count, c);
1254     for (i = 0; i < count; i++)
1255     {
1256         switch (strength[i])
1257         {
1258             case L:
1259             case LRE:
1260             case LRO:
1261             case R:
1262             case AL:
1263             case RLE:
1264             case RLO:
1265                 strength[i] = BIDI_STRONG;
1266                 break;
1267             case PDF:
1268             case EN:
1269             case ES:
1270             case ET:
1271             case AN:
1272             case CS:
1273             case BN:
1274                 strength[i] = BIDI_WEAK;
1275                 break;
1276             case B:
1277             case S:
1278             case WS:
1279             case ON:
1280             default: /* Neutrals and NSM */
1281                 strength[i] = BIDI_NEUTRAL;
1282         }
1283     }
1284     return TRUE;
1285 }
1286