1 /*
2 * Uniscribe BiDirectional handling
3 *
4 * Copyright 2003 Shachar Shemesh
5 * Copyright 2007 Maarten Lankhorst
6 * Copyright 2010 CodeWeavers, Aric Stewart
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 *
22 * Code derived from the modified reference implementation
23 * that was found in revision 17 of http://unicode.org/reports/tr9/
24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM"
25 *
26 * -- Copyright (C) 1999-2005, ASMUS, Inc.
27 *
28 * Permission is hereby granted, free of charge, to any person obtaining a
29 * copy of the Unicode data files and any associated documentation (the
30 * "Data Files") or Unicode software and any associated documentation (the
31 * "Software") to deal in the Data Files or Software without restriction,
32 * including without limitation the rights to use, copy, modify, merge,
33 * publish, distribute, and/or sell copies of the Data Files or Software,
34 * and to permit persons to whom the Data Files or Software are furnished
35 * to do so, provided that (a) the above copyright notice(s) and this
36 * permission notice appear with all copies of the Data Files or Software,
37 * (b) both the above copyright notice(s) and this permission notice appear
38 * in associated documentation, and (c) there is clear notice in each
39 * modified Data File or in the Software as well as in the documentation
40 * associated with the Data File(s) or Software that the data or software
41 * has been modified.
42 */
43
44 #include <stdarg.h>
45 #include <stdlib.h>
46 #include "windef.h"
47 #include "winbase.h"
48 #include "wingdi.h"
49 #include "winnls.h"
50 #include "usp10.h"
51 #include "wine/debug.h"
52 #include "wine/heap.h"
53 #include "wine/list.h"
54
55 #include "usp10_internal.h"
56
57 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN;
58 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN;
59
60 WINE_DEFAULT_DEBUG_CHANNEL(bidi);
61
62 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0)
63 #define MAX_DEPTH 125
64
65 /* HELPER FUNCTIONS AND DECLARATIONS */
66
67 /*------------------------------------------------------------------------
68 Bidirectional Character Types
69
70 as defined by the Unicode Bidirectional Algorithm Table 3-7.
71
72 Note:
73
74 The list of bidirectional character types here is not grouped the
75 same way as the table 3-7, since the numberic values for the types
76 are chosen to keep the state and action tables compact.
77 ------------------------------------------------------------------------*/
78 enum directions
79 {
80 /* input types */
81 /* ON MUST be zero, code relies on ON = NI = 0 */
82 ON = 0, /* Other Neutral */
83 L, /* Left Letter */
84 R, /* Right Letter */
85 AN, /* Arabic Number */
86 EN, /* European Number */
87 AL, /* Arabic Letter (Right-to-left) */
88 NSM, /* Non-spacing Mark */
89 CS, /* Common Separator */
90 ES, /* European Separator */
91 ET, /* European Terminator (post/prefix e.g. $ and %) */
92
93 /* resolved types */
94 BN, /* Boundary neutral (type of RLE etc after explicit levels) */
95
96 /* input types, */
97 S, /* Segment Separator (TAB) // used only in L1 */
98 WS, /* White space // used only in L1 */
99 B, /* Paragraph Separator (aka as PS) */
100
101 /* types for explicit controls */
102 RLO, /* these are used only in X1-X9 */
103 RLE,
104 LRO,
105 LRE,
106 PDF,
107
108 LRI, /* Isolate formatting characters new with 6.3 */
109 RLI,
110 FSI,
111 PDI,
112
113 /* resolved types, also resolved directions */
114 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */
115 };
116
117 static const char debug_type[][4] =
118 {
119 "ON", /* Other Neutral */
120 "L", /* Left Letter */
121 "R", /* Right Letter */
122 "AN", /* Arabic Number */
123 "EN", /* European Number */
124 "AL", /* Arabic Letter (Right-to-left) */
125 "NSM", /* Non-spacing Mark */
126 "CS", /* Common Separator */
127 "ES", /* European Separator */
128 "ET", /* European Terminator (post/prefix e.g. $ and %) */
129 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */
130 "S", /* Segment Separator (TAB) // used only in L1 */
131 "WS", /* White space // used only in L1 */
132 "B", /* Paragraph Separator (aka as PS) */
133 "RLO", /* these are used only in X1-X9 */
134 "RLE",
135 "LRO",
136 "LRE",
137 "PDF",
138 "LRI", /* Isolate formatting characters new with 6.3 */
139 "RLI",
140 "FSI",
141 "PDI",
142 };
143
144 /* HELPER FUNCTIONS */
145
dump_types(const char * header,WORD * types,int start,int end)146 static inline void dump_types(const char* header, WORD *types, int start, int end)
147 {
148 int i, len = 0;
149 TRACE("%s:",header);
150 for (i = start; i < end && len < 200; i++)
151 {
152 TRACE(" %s",debug_type[types[i]]);
153 len += strlen(debug_type[types[i]])+1;
154 }
155 if (i != end)
156 TRACE("...");
157 TRACE("\n");
158 }
159
160 /* Convert the libwine information to the direction enum */
classify(const WCHAR * string,WORD * chartype,DWORD count,const SCRIPT_CONTROL * c)161 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c)
162 {
163 unsigned i;
164
165 for (i = 0; i < count; ++i)
166 {
167 chartype[i] = get_table_entry( bidi_direction_table, string[i] );
168 if (c->fLegacyBidiClass && chartype[i] == ES)
169 {
170 if (string[i] == '+' || string[i] == '-') chartype[i] = NI;
171 }
172 }
173 }
174
175 /* RESOLVE EXPLICIT */
176
GreaterEven(int i)177 static WORD GreaterEven(int i)
178 {
179 return odd(i) ? i + 1 : i + 2;
180 }
181
GreaterOdd(int i)182 static WORD GreaterOdd(int i)
183 {
184 return odd(i) ? i + 2 : i + 1;
185 }
186
EmbeddingDirection(int level)187 static WORD EmbeddingDirection(int level)
188 {
189 return odd(level) ? R : L;
190 }
191
192 /*------------------------------------------------------------------------
193 Function: resolveExplicit
194
195 Recursively resolves explicit embedding levels and overrides.
196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
197
198 Input: Base embedding level and direction
199 Character count
200
201 Output: Array of embedding levels
202
203 In/Out: Array of direction classes
204
205
206 Note: The function uses two simple counters to keep track of
207 matching explicit codes and PDF. Use the default argument for
208 the outermost call. The nesting counter counts the recursion
209 depth and not the embedding level.
210 ------------------------------------------------------------------------*/
211 typedef struct tagStackItem {
212 int level;
213 int override;
214 BOOL isolate;
215 } StackItem;
216
217 #define push_stack(l,o,i) \
218 do { stack_top--; \
219 stack[stack_top].level = l; \
220 stack[stack_top].override = o; \
221 stack[stack_top].isolate = i;} while(0)
222
223 #define pop_stack() do { stack_top++; } while(0)
224
225 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0)
226
resolveExplicit(int level,WORD * pclass,WORD * poutLevel,WORD * poutOverrides,int count,BOOL initialOverride)227 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride)
228 {
229 /* X1 */
230 int overflow_isolate_count = 0;
231 int overflow_embedding_count = 0;
232 int valid_isolate_count = 0;
233 int i;
234
235 StackItem stack[MAX_DEPTH+2];
236 int stack_top = MAX_DEPTH+1;
237
238 stack[stack_top].level = level;
239 stack[stack_top].override = NI;
240 stack[stack_top].isolate = FALSE;
241
242 if (initialOverride)
243 {
244 if (odd(level))
245 push_stack(level, R, FALSE);
246 else
247 push_stack(level, L, FALSE);
248 }
249
250 for (i = 0; i < count; i++)
251 {
252 poutOverrides[i] = stack[stack_top].override;
253
254 /* X2 */
255 if (pclass[i] == RLE)
256 {
257 int least_odd = GreaterOdd(stack[stack_top].level);
258 poutLevel[i] = stack[stack_top].level;
259 if (valid_level(least_odd))
260 push_stack(least_odd, NI, FALSE);
261 else if (overflow_isolate_count == 0)
262 overflow_embedding_count++;
263 }
264 /* X3 */
265 else if (pclass[i] == LRE)
266 {
267 int least_even = GreaterEven(stack[stack_top].level);
268 poutLevel[i] = stack[stack_top].level;
269 if (valid_level(least_even))
270 push_stack(least_even, NI, FALSE);
271 else if (overflow_isolate_count == 0)
272 overflow_embedding_count++;
273 }
274 /* X4 */
275 else if (pclass[i] == RLO)
276 {
277 int least_odd = GreaterOdd(stack[stack_top].level);
278 poutLevel[i] = stack[stack_top].level;
279 if (valid_level(least_odd))
280 push_stack(least_odd, R, FALSE);
281 else if (overflow_isolate_count == 0)
282 overflow_embedding_count++;
283 }
284 /* X5 */
285 else if (pclass[i] == LRO)
286 {
287 int least_even = GreaterEven(stack[stack_top].level);
288 poutLevel[i] = stack[stack_top].level;
289 if (valid_level(least_even))
290 push_stack(least_even, L, FALSE);
291 else if (overflow_isolate_count == 0)
292 overflow_embedding_count++;
293 }
294 /* X5a */
295 else if (pclass[i] == RLI)
296 {
297 int least_odd = GreaterOdd(stack[stack_top].level);
298 poutLevel[i] = stack[stack_top].level;
299 if (valid_level(least_odd))
300 {
301 valid_isolate_count++;
302 push_stack(least_odd, NI, TRUE);
303 }
304 else
305 overflow_isolate_count++;
306 }
307 /* X5b */
308 else if (pclass[i] == LRI)
309 {
310 int least_even = GreaterEven(stack[stack_top].level);
311 poutLevel[i] = stack[stack_top].level;
312 if (valid_level(least_even))
313 {
314 valid_isolate_count++;
315 push_stack(least_even, NI, TRUE);
316 }
317 else
318 overflow_isolate_count++;
319 }
320 /* X5c */
321 else if (pclass[i] == FSI)
322 {
323 int j;
324 int new_level = 0;
325 int skipping = 0;
326 poutLevel[i] = stack[stack_top].level;
327 for (j = i+1; j < count; j++)
328 {
329 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI)
330 {
331 skipping++;
332 continue;
333 }
334 else if (pclass[j] == PDI)
335 {
336 if (skipping)
337 skipping --;
338 else
339 break;
340 continue;
341 }
342
343 if (skipping) continue;
344
345 if (pclass[j] == L)
346 {
347 new_level = 0;
348 break;
349 }
350 else if (pclass[j] == R || pclass[j] == AL)
351 {
352 new_level = 1;
353 break;
354 }
355 }
356 if (odd(new_level))
357 {
358 int least_odd = GreaterOdd(stack[stack_top].level);
359 if (valid_level(least_odd))
360 {
361 valid_isolate_count++;
362 push_stack(least_odd, NI, TRUE);
363 }
364 else
365 overflow_isolate_count++;
366 }
367 else
368 {
369 int least_even = GreaterEven(stack[stack_top].level);
370 if (valid_level(least_even))
371 {
372 valid_isolate_count++;
373 push_stack(least_even, NI, TRUE);
374 }
375 else
376 overflow_isolate_count++;
377 }
378 }
379 /* X6 */
380 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF)
381 {
382 poutLevel[i] = stack[stack_top].level;
383 if (stack[stack_top].override != NI)
384 pclass[i] = stack[stack_top].override;
385 }
386 /* X6a */
387 else if (pclass[i] == PDI)
388 {
389 if (overflow_isolate_count) overflow_isolate_count--;
390 else if (!valid_isolate_count) {/* do nothing */}
391 else
392 {
393 overflow_embedding_count = 0;
394 while (!stack[stack_top].isolate) pop_stack();
395 pop_stack();
396 valid_isolate_count --;
397 }
398 poutLevel[i] = stack[stack_top].level;
399 }
400 /* X7 */
401 else if (pclass[i] == PDF)
402 {
403 poutLevel[i] = stack[stack_top].level;
404 if (overflow_isolate_count) {/* do nothing */}
405 else if (overflow_embedding_count) overflow_embedding_count--;
406 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1))
407 pop_stack();
408 }
409 /* X8: Nothing */
410 }
411 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */
412 for (i = 0; i < count ; i++)
413 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF)
414 pclass[i] = BN;
415 }
416
previousValidChar(const WORD * pcls,int index,int back_fence)417 static inline int previousValidChar(const WORD *pcls, int index, int back_fence)
418 {
419 if (index == -1 || index == back_fence) return index;
420 index --;
421 while (index > back_fence && pcls[index] == BN) index --;
422 return index;
423 }
424
nextValidChar(const WORD * pcls,int index,int front_fence)425 static inline int nextValidChar(const WORD *pcls, int index, int front_fence)
426 {
427 if (index == front_fence) return index;
428 index ++;
429 while (index < front_fence && pcls[index] == BN) index ++;
430 return index;
431 }
432
433 typedef struct tagRun
434 {
435 int start;
436 int end;
437 WORD e;
438 } Run;
439
440 typedef struct tagRunChar
441 {
442 WCHAR ch;
443 WORD *pcls;
444 } RunChar;
445
446 typedef struct tagIsolatedRun
447 {
448 struct list entry;
449 int length;
450 WORD sos;
451 WORD eos;
452 WORD e;
453
454 RunChar item[1];
455 } IsolatedRun;
456
iso_nextValidChar(IsolatedRun * iso_run,int index)457 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index)
458 {
459 if (index >= (iso_run->length-1)) return -1;
460 index ++;
461 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++;
462 if (index == iso_run->length) return -1;
463 return index;
464 }
465
iso_previousValidChar(IsolatedRun * iso_run,int index)466 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index)
467 {
468
469 if (index <= 0) return -1;
470 index --;
471 while (index > -1 && *iso_run->item[index].pcls == BN) index--;
472 return index;
473 }
474
iso_dump_types(const char * header,IsolatedRun * iso_run)475 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run)
476 {
477 int i, len = 0;
478 TRACE("%s:",header);
479 TRACE("[ ");
480 for (i = 0; i < iso_run->length && len < 200; i++)
481 {
482 TRACE(" %s",debug_type[*iso_run->item[i].pcls]);
483 len += strlen(debug_type[*iso_run->item[i].pcls])+1;
484 }
485 if (i != iso_run->length)
486 TRACE("...");
487 TRACE(" ]\n");
488 }
489
490 /*------------------------------------------------------------------------
491 Function: resolveWeak
492
493 Resolves the directionality of numeric and other weak character types
494
495 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm.
496
497 Input: Array of embedding levels
498 Character count
499
500 In/Out: Array of directional classes
501
502 Note: On input only these directional classes are expected
503 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
504 ------------------------------------------------------------------------*/
505
resolveWeak(IsolatedRun * iso_run)506 static void resolveWeak(IsolatedRun * iso_run)
507 {
508 int i;
509
510 /* W1 */
511 for (i=0; i < iso_run->length; i++)
512 {
513 if (*iso_run->item[i].pcls == NSM)
514 {
515 int j = iso_previousValidChar(iso_run, i);
516 if (j == -1)
517 *iso_run->item[i].pcls = iso_run->sos;
518 else if (*iso_run->item[j].pcls >= LRI)
519 *iso_run->item[i].pcls = ON;
520 else
521 *iso_run->item[i].pcls = *iso_run->item[j].pcls;
522 }
523 }
524
525 /* W2 */
526 for (i = 0; i < iso_run->length; i++)
527 {
528 if (*iso_run->item[i].pcls == EN)
529 {
530 int j = iso_previousValidChar(iso_run, i);
531 while (j > -1)
532 {
533 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL)
534 {
535 if (*iso_run->item[j].pcls == AL)
536 *iso_run->item[i].pcls = AN;
537 break;
538 }
539 j = iso_previousValidChar(iso_run, j);
540 }
541 }
542 }
543
544 /* W3 */
545 for (i = 0; i < iso_run->length; i++)
546 {
547 if (*iso_run->item[i].pcls == AL)
548 *iso_run->item[i].pcls = R;
549 }
550
551 /* W4 */
552 for (i = 0; i < iso_run->length; i++)
553 {
554 if (*iso_run->item[i].pcls == ES)
555 {
556 int b = iso_previousValidChar(iso_run, i);
557 int f = iso_nextValidChar(iso_run, i);
558
559 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
560 *iso_run->item[i].pcls = EN;
561 }
562 else if (*iso_run->item[i].pcls == CS)
563 {
564 int b = iso_previousValidChar(iso_run, i);
565 int f = iso_nextValidChar(iso_run, i);
566
567 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN)
568 *iso_run->item[i].pcls = EN;
569 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN)
570 *iso_run->item[i].pcls = AN;
571 }
572 }
573
574 /* W5 */
575 for (i = 0; i < iso_run->length; i++)
576 {
577 if (*iso_run->item[i].pcls == ET)
578 {
579 int j;
580 for (j = i-1 ; j > -1; j--)
581 {
582 if (*iso_run->item[j].pcls == BN) continue;
583 if (*iso_run->item[j].pcls == ET) continue;
584 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
585 else break;
586 }
587 if (*iso_run->item[i].pcls == ET)
588 {
589 for (j = i+1; j < iso_run->length; j++)
590 {
591 if (*iso_run->item[j].pcls == BN) continue;
592 if (*iso_run->item[j].pcls == ET) continue;
593 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN;
594 else break;
595 }
596 }
597 }
598 }
599
600 /* W6 */
601 for (i = 0; i < iso_run->length; i++)
602 {
603 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON)
604 {
605 int b = i-1;
606 int f = i+1;
607 if (b > -1 && *iso_run->item[b].pcls == BN)
608 *iso_run->item[b].pcls = ON;
609 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
610 *iso_run->item[f].pcls = ON;
611
612 *iso_run->item[i].pcls = ON;
613 }
614 }
615
616 /* W7 */
617 for (i = 0; i < iso_run->length; i++)
618 {
619 if (*iso_run->item[i].pcls == EN)
620 {
621 int j;
622 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j))
623 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L)
624 {
625 if (*iso_run->item[j].pcls == L)
626 *iso_run->item[i].pcls = L;
627 break;
628 }
629 if (iso_run->sos == L && j == -1)
630 *iso_run->item[i].pcls = L;
631 }
632 }
633 }
634
635 typedef struct tagBracketPair
636 {
637 int start;
638 int end;
639 } BracketPair;
640
compr(const void * a,const void * b)641 static int __cdecl compr(const void *a, const void* b)
642 {
643 return ((BracketPair*)a)->start - ((BracketPair*)b)->start;
644 }
645
computeBracketPairs(IsolatedRun * iso_run)646 static BracketPair *computeBracketPairs(IsolatedRun *iso_run)
647 {
648 WCHAR *open_stack;
649 int *stack_index;
650 int stack_top = iso_run->length;
651 unsigned int pair_count = 0;
652 BracketPair *out = NULL;
653 SIZE_T out_size = 0;
654 int i;
655
656 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack));
657 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index));
658
659 for (i = 0; i < iso_run->length; i++)
660 {
661 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch);
662
663 if (!ubv)
664 continue;
665
666 if ((ubv >> 8) == 0)
667 {
668 --stack_top;
669 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff);
670 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */
671 if (open_stack[stack_top] == 0x232a)
672 open_stack[stack_top] = 0x3009;
673 stack_index[stack_top] = i;
674 }
675 else if ((ubv >> 8) == 1)
676 {
677 unsigned int j;
678
679 for (j = stack_top; j < iso_run->length; ++j)
680 {
681 WCHAR c = iso_run->item[i].ch;
682
683 if (c == 0x232a)
684 c = 0x3009;
685
686 if (c != open_stack[j])
687 continue;
688
689 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out))))
690 ERR("Failed to grow output array.\n");
691
692 out[pair_count].start = stack_index[j];
693 out[pair_count].end = i;
694 ++pair_count;
695
696 out[pair_count].start = -1;
697 stack_top = j + 1;
698 break;
699 }
700 }
701 }
702
703 heap_free(open_stack);
704 heap_free(stack_index);
705
706 if (!pair_count)
707 return NULL;
708
709 qsort(out, pair_count, sizeof(*out), compr);
710
711 return out;
712 }
713
714 #define N0_TYPE(a) ((a == AN || a == EN)?R:a)
715
716 /*------------------------------------------------------------------------
717 Function: resolveNeutrals
718
719 Resolves the directionality of neutral character types.
720
721 Implements rules N1 and N2 of the Unicode Bidi Algorithm.
722
723 Input: Array of embedding levels
724 Character count
725 Baselevel
726
727 In/Out: Array of directional classes
728
729 Note: On input only these directional classes are expected
730 R, L, NI, AN, EN and BN
731
732 W8 resolves a number of ENs to L
733 ------------------------------------------------------------------------*/
resolveNeutrals(IsolatedRun * iso_run)734 static void resolveNeutrals(IsolatedRun *iso_run)
735 {
736 int i;
737 BracketPair *pairs = NULL;
738
739 /* Translate isolates into NI */
740 for (i = 0; i < iso_run->length; i++)
741 {
742 if (*iso_run->item[i].pcls >= LRI)
743 *iso_run->item[i].pcls = NI;
744
745 switch(*iso_run->item[i].pcls)
746 {
747 case B:
748 case S:
749 case WS: *iso_run->item[i].pcls = NI;
750 }
751
752 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */
753 }
754
755 /* N0: Skipping bracketed pairs for now */
756 pairs = computeBracketPairs(iso_run);
757 if (pairs)
758 {
759 BracketPair *p = &pairs[0];
760 int i = 0;
761 while (p->start >= 0)
762 {
763 int j;
764 int e = EmbeddingDirection(iso_run->e);
765 int o = EmbeddingDirection(iso_run->e+1);
766 BOOL flag_o = FALSE;
767 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end);
768
769 /* N0.b */
770 for (j = p->start+1; j < p->end; j++)
771 {
772 if (N0_TYPE(*iso_run->item[j].pcls) == e)
773 {
774 *iso_run->item[p->start].pcls = e;
775 *iso_run->item[p->end].pcls = e;
776 break;
777 }
778 else if (N0_TYPE(*iso_run->item[j].pcls) == o)
779 flag_o = TRUE;
780 }
781 /* N0.c */
782 if (j == p->end && flag_o)
783 {
784 for (j = p->start; j >= 0; j--)
785 {
786 if (N0_TYPE(*iso_run->item[j].pcls) == o)
787 {
788 *iso_run->item[p->start].pcls = o;
789 *iso_run->item[p->end].pcls = o;
790 break;
791 }
792 else if (N0_TYPE(*iso_run->item[j].pcls) == e)
793 {
794 *iso_run->item[p->start].pcls = e;
795 *iso_run->item[p->end].pcls = e;
796 break;
797 }
798 }
799 if ( j < 0 )
800 {
801 *iso_run->item[p->start].pcls = iso_run->sos;
802 *iso_run->item[p->end].pcls = iso_run->sos;
803 }
804 }
805
806 i++;
807 p = &pairs[i];
808 }
809 heap_free(pairs);
810 }
811
812 /* N1 */
813 for (i = 0; i < iso_run->length; i++)
814 {
815 WORD l,r;
816
817 if (*iso_run->item[i].pcls == NI)
818 {
819 int j;
820 int b = iso_previousValidChar(iso_run, i);
821
822 if (b == -1)
823 {
824 l = iso_run->sos;
825 b = 0;
826 }
827 else
828 {
829 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN)
830 l = R;
831 else if (*iso_run->item[b].pcls == L)
832 l = L;
833 else /* No string type */
834 continue;
835 }
836 j = iso_nextValidChar(iso_run, i);
837 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j);
838
839 if (j == -1)
840 {
841 r = iso_run->eos;
842 j = iso_run->length;
843 }
844 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN)
845 r = R;
846 else if (*iso_run->item[j].pcls == L)
847 r = L;
848 else /* No string type */
849 continue;
850
851 if (r == l)
852 {
853 for (b = i; b < j && b < iso_run->length; b++)
854 *iso_run->item[b].pcls = r;
855 }
856 }
857 }
858
859 /* N2 */
860 for (i = 0; i < iso_run->length; i++)
861 {
862 if (*iso_run->item[i].pcls == NI)
863 {
864 int b = i-1;
865 int f = i+1;
866 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e);
867 if (b > -1 && *iso_run->item[b].pcls == BN)
868 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e);
869 if (f < iso_run->length && *iso_run->item[f].pcls == BN)
870 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e);
871 }
872 }
873 }
874
875 /*------------------------------------------------------------------------
876 Function: resolveImplicit
877
878 Recursively resolves implicit embedding levels.
879 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
880
881 Input: Array of direction classes
882 Character count
883 Base level
884
885 In/Out: Array of embedding levels
886
887 Note: levels may exceed 15 on output.
888 Accepted subset of direction classes
889 R, L, AN, EN
890 ------------------------------------------------------------------------*/
resolveImplicit(const WORD * pcls,WORD * plevel,int sos,int eos)891 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos)
892 {
893 int i;
894
895 /* I1/2 */
896 for (i = sos; i <= eos; i++)
897 {
898 if (pcls[i] == BN)
899 continue;
900
901 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */
902 ASSERT(pcls[i] < 5); /* "Out of range." */
903
904 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN))
905 plevel[i]++;
906 else if (!odd(plevel[i]) && pcls[i] == R)
907 plevel[i]++;
908 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN))
909 plevel[i]+=2;
910 }
911 }
912
resolveResolved(unsigned baselevel,const WORD * pcls,WORD * plevel,int sos,int eos)913 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos)
914 {
915 int i;
916
917 /* L1 */
918 for (i = sos; i <= eos; i++)
919 {
920 if (pcls[i] == B || pcls[i] == S)
921 {
922 int j = i -1;
923 while (i > sos && j >= sos &&
924 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
925 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
926 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
927 plevel[j--] = baselevel;
928 plevel[i] = baselevel;
929 }
930 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO ||
931 pcls[i] == PDF || pcls[i] == BN)
932 {
933 plevel[i] = i ? plevel[i - 1] : baselevel;
934 }
935 if (i == eos &&
936 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI ||
937 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO ||
938 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN ))
939 {
940 int j = i;
941 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI ||
942 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO ||
943 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN))
944 plevel[j--] = baselevel;
945 }
946 }
947 }
948
computeIsolatingRunsSet(unsigned baselevel,WORD * pcls,const WORD * pLevel,const WCHAR * string,unsigned int uCount,struct list * set)949 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel,
950 const WCHAR *string, unsigned int uCount, struct list *set)
951 {
952 int run_start, run_end, i;
953 int run_count = 0;
954 Run *runs;
955 IsolatedRun *current_isolated;
956
957 if (!(runs = heap_calloc(uCount, sizeof(*runs))))
958 return;
959
960 list_init(set);
961
962 /* Build Runs */
963 run_start = 0;
964 while (run_start < uCount)
965 {
966 run_end = nextValidChar(pcls, run_start, uCount);
967 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount);
968 run_end --;
969 runs[run_count].start = run_start;
970 runs[run_count].end = run_end;
971 runs[run_count].e = pLevel[run_start];
972 run_start = nextValidChar(pcls, run_end, uCount);
973 run_count++;
974 }
975
976 /* Build Isolating Runs */
977 i = 0;
978 while (i < run_count)
979 {
980 int k = i;
981 if (runs[k].start >= 0)
982 {
983 int type_fence, real_end;
984 int j;
985
986 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount]))))
987 break;
988
989 run_start = runs[k].start;
990 current_isolated->e = runs[k].e;
991 current_isolated->length = (runs[k].end - runs[k].start)+1;
992
993 for (j = 0; j < current_isolated->length; j++)
994 {
995 current_isolated->item[j].pcls = &pcls[runs[k].start+j];
996 current_isolated->item[j].ch = string[runs[k].start + j];
997 }
998
999 run_end = runs[k].end;
1000
1001 TRACE("{ [%i -- %i]",run_start, run_end);
1002
1003 if (pcls[run_end] == BN)
1004 run_end = previousValidChar(pcls, run_end, runs[k].start);
1005
1006 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI))
1007 {
1008 j = k+1;
1009 search:
1010 while (j < run_count && pcls[runs[j].start] != PDI) j++;
1011 if (j < run_count && runs[i].e != runs[j].e)
1012 {
1013 j++;
1014 goto search;
1015 }
1016
1017 if (j != run_count)
1018 {
1019 int m;
1020 int l = current_isolated->length;
1021
1022 current_isolated->length += (runs[j].end - runs[j].start)+1;
1023 for (m = 0; l < current_isolated->length; l++, m++)
1024 {
1025 current_isolated->item[l].pcls = &pcls[runs[j].start+m];
1026 current_isolated->item[l].ch = string[runs[j].start + m];
1027 }
1028
1029 TRACE("[%i -- %i]",runs[j].start, runs[j].end);
1030
1031 run_end = runs[j].end;
1032 if (pcls[run_end] == BN)
1033 run_end = previousValidChar(pcls, run_end, runs[i].start);
1034 runs[j].start = -1;
1035 k = j;
1036 }
1037 else
1038 {
1039 run_end = uCount;
1040 break;
1041 }
1042 }
1043
1044 type_fence = previousValidChar(pcls, run_start, -1);
1045
1046 if (type_fence == -1)
1047 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start];
1048 else
1049 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start];
1050
1051 current_isolated->sos = EmbeddingDirection(current_isolated->sos);
1052
1053 if (run_end == uCount)
1054 current_isolated->eos = current_isolated->sos;
1055 else
1056 {
1057 /* eos could be an BN */
1058 if ( pcls[run_end] == BN )
1059 {
1060 real_end = previousValidChar(pcls, run_end, run_start-1);
1061 if (real_end < run_start)
1062 real_end = run_start;
1063 }
1064 else
1065 real_end = run_end;
1066
1067 type_fence = nextValidChar(pcls, run_end, uCount);
1068 if (type_fence == uCount)
1069 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end];
1070 else
1071 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end];
1072
1073 current_isolated->eos = EmbeddingDirection(current_isolated->eos);
1074 }
1075
1076 list_add_tail(set, ¤t_isolated->entry);
1077 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]);
1078 }
1079 i++;
1080 }
1081
1082 heap_free(runs);
1083 }
1084
1085 /*************************************************************
1086 * BIDI_DeterminLevels
1087 */
BIDI_DetermineLevels(const WCHAR * lpString,unsigned int uCount,const SCRIPT_STATE * s,const SCRIPT_CONTROL * c,WORD * lpOutLevels,WORD * lpOutOverrides)1088 BOOL BIDI_DetermineLevels(
1089 const WCHAR *lpString, /* [in] The string for which information is to be returned */
1090 unsigned int uCount, /* [in] Number of WCHARs in string. */
1091 const SCRIPT_STATE *s,
1092 const SCRIPT_CONTROL *c,
1093 WORD *lpOutLevels, /* [out] final string levels */
1094 WORD *lpOutOverrides /* [out] final string overrides */
1095 )
1096 {
1097 WORD *chartype;
1098 unsigned baselevel = 0;
1099 struct list IsolatingRuns;
1100 IsolatedRun *iso_run, *next;
1101
1102 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount);
1103
1104 if (!(chartype = heap_alloc(uCount * sizeof(*chartype))))
1105 {
1106 WARN("Out of memory\n");
1107 return FALSE;
1108 }
1109
1110 baselevel = s->uBidiLevel;
1111
1112 classify(lpString, chartype, uCount, c);
1113 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount);
1114
1115 memset(lpOutOverrides, 0, sizeof(WORD) * uCount);
1116
1117 /* resolve explicit */
1118 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection);
1119 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount);
1120
1121 /* X10/BD13: Computer Isolating runs */
1122 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns);
1123
1124 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry)
1125 {
1126 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run);
1127
1128 /* resolve weak */
1129 resolveWeak(iso_run);
1130 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run);
1131
1132 /* resolve neutrals */
1133 resolveNeutrals(iso_run);
1134 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run);
1135
1136 list_remove(&iso_run->entry);
1137 heap_free(iso_run);
1138 }
1139
1140 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount);
1141 /* resolveImplicit */
1142 resolveImplicit(chartype, lpOutLevels, 0, uCount-1);
1143
1144 /* resolveResolvedLevels*/
1145 classify(lpString, chartype, uCount, c);
1146 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1);
1147
1148 heap_free(chartype);
1149 return TRUE;
1150 }
1151
1152 /* reverse cch indexes */
reverse(int * pidx,int cch)1153 static void reverse(int *pidx, int cch)
1154 {
1155 int temp;
1156 int ich = 0;
1157 for (; ich < --cch; ich++)
1158 {
1159 temp = pidx[ich];
1160 pidx[ich] = pidx[cch];
1161 pidx[cch] = temp;
1162 }
1163 }
1164
1165
1166 /*------------------------------------------------------------------------
1167 Functions: reorder/reorderLevel
1168
1169 Recursively reorders the display string
1170 "From the highest level down, reverse all characters at that level and
1171 higher, down to the lowest odd level"
1172
1173 Implements rule L2 of the Unicode bidi Algorithm.
1174
1175 Input: Array of embedding levels
1176 Character count
1177 Flag enabling reversal (set to false by initial caller)
1178
1179 In/Out: Text to reorder
1180
1181 Note: levels may exceed 15 resp. 61 on input.
1182
1183 Rule L3 - reorder combining marks is not implemented here
1184 Rule L4 - glyph mirroring is implemented as a display option below
1185
1186 Note: this should be applied a line at a time
1187 -------------------------------------------------------------------------*/
BIDI_ReorderV2lLevel(int level,int * pIndexs,const BYTE * plevel,int cch,BOOL fReverse)1188 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1189 {
1190 int ich = 0;
1191
1192 /* true as soon as first odd level encountered */
1193 fReverse = fReverse || odd(level);
1194
1195 for (; ich < cch; ich++)
1196 {
1197 if (plevel[ich] < level)
1198 {
1199 break;
1200 }
1201 else if (plevel[ich] > level)
1202 {
1203 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich,
1204 cch - ich, fReverse) - 1;
1205 }
1206 }
1207 if (fReverse)
1208 {
1209 reverse(pIndexs, ich);
1210 }
1211 return ich;
1212 }
1213
1214 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */
BIDI_ReorderL2vLevel(int level,int * pIndexs,const BYTE * plevel,int cch,BOOL fReverse)1215 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse)
1216 {
1217 int ich = 0;
1218 int newlevel = -1;
1219
1220 /* true as soon as first odd level encountered */
1221 fReverse = fReverse || odd(level);
1222
1223 for (; ich < cch; ich++)
1224 {
1225 if (plevel[ich] < level)
1226 break;
1227 else if (plevel[ich] > level)
1228 newlevel = ich;
1229 }
1230 if (fReverse)
1231 {
1232 reverse(pIndexs, ich);
1233 }
1234
1235 if (newlevel >= 0)
1236 {
1237 ich = 0;
1238 for (; ich < cch; ich++)
1239 if (plevel[ich] < level)
1240 break;
1241 else if (plevel[ich] > level)
1242 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich,
1243 cch - ich, fReverse) - 1;
1244 }
1245
1246 return ich;
1247 }
1248
BIDI_GetStrengths(const WCHAR * string,unsigned int count,const SCRIPT_CONTROL * c,WORD * strength)1249 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength)
1250 {
1251 unsigned int i;
1252
1253 classify(string, strength, count, c);
1254 for (i = 0; i < count; i++)
1255 {
1256 switch (strength[i])
1257 {
1258 case L:
1259 case LRE:
1260 case LRO:
1261 case R:
1262 case AL:
1263 case RLE:
1264 case RLO:
1265 strength[i] = BIDI_STRONG;
1266 break;
1267 case PDF:
1268 case EN:
1269 case ES:
1270 case ET:
1271 case AN:
1272 case CS:
1273 case BN:
1274 strength[i] = BIDI_WEAK;
1275 break;
1276 case B:
1277 case S:
1278 case WS:
1279 case ON:
1280 default: /* Neutrals and NSM */
1281 strength[i] = BIDI_NEUTRAL;
1282 }
1283 }
1284 return TRUE;
1285 }
1286