1 /* 2 * Uniscribe BiDirectional handling 3 * 4 * Copyright 2003 Shachar Shemesh 5 * Copyright 2007 Maarten Lankhorst 6 * Copyright 2010 CodeWeavers, Aric Stewart 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 21 * 22 * Code derived from the modified reference implementation 23 * that was found in revision 17 of http://unicode.org/reports/tr9/ 24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM" 25 * 26 * -- Copyright (C) 1999-2005, ASMUS, Inc. 27 * 28 * Permission is hereby granted, free of charge, to any person obtaining a 29 * copy of the Unicode data files and any associated documentation (the 30 * "Data Files") or Unicode software and any associated documentation (the 31 * "Software") to deal in the Data Files or Software without restriction, 32 * including without limitation the rights to use, copy, modify, merge, 33 * publish, distribute, and/or sell copies of the Data Files or Software, 34 * and to permit persons to whom the Data Files or Software are furnished 35 * to do so, provided that (a) the above copyright notice(s) and this 36 * permission notice appear with all copies of the Data Files or Software, 37 * (b) both the above copyright notice(s) and this permission notice appear 38 * in associated documentation, and (c) there is clear notice in each 39 * modified Data File or in the Software as well as in the documentation 40 * associated with the Data File(s) or Software that the data or software 41 * has been modified. 42 */ 43 44 #include <stdarg.h> 45 #include <stdlib.h> 46 #include "windef.h" 47 #include "winbase.h" 48 #include "wingdi.h" 49 #include "winnls.h" 50 #include "usp10.h" 51 #include "wine/debug.h" 52 #include "wine/heap.h" 53 #include "wine/list.h" 54 55 #include "usp10_internal.h" 56 57 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN; 58 extern const unsigned short bidi_direction_table[] DECLSPEC_HIDDEN; 59 60 WINE_DEFAULT_DEBUG_CHANNEL(bidi); 61 62 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) 63 #define MAX_DEPTH 125 64 65 /* HELPER FUNCTIONS AND DECLARATIONS */ 66 67 /*------------------------------------------------------------------------ 68 Bidirectional Character Types 69 70 as defined by the Unicode Bidirectional Algorithm Table 3-7. 71 72 Note: 73 74 The list of bidirectional character types here is not grouped the 75 same way as the table 3-7, since the numberic values for the types 76 are chosen to keep the state and action tables compact. 77 ------------------------------------------------------------------------*/ 78 enum directions 79 { 80 /* input types */ 81 /* ON MUST be zero, code relies on ON = NI = 0 */ 82 ON = 0, /* Other Neutral */ 83 L, /* Left Letter */ 84 R, /* Right Letter */ 85 AN, /* Arabic Number */ 86 EN, /* European Number */ 87 AL, /* Arabic Letter (Right-to-left) */ 88 NSM, /* Non-spacing Mark */ 89 CS, /* Common Separator */ 90 ES, /* European Separator */ 91 ET, /* European Terminator (post/prefix e.g. $ and %) */ 92 93 /* resolved types */ 94 BN, /* Boundary neutral (type of RLE etc after explicit levels) */ 95 96 /* input types, */ 97 S, /* Segment Separator (TAB) // used only in L1 */ 98 WS, /* White space // used only in L1 */ 99 B, /* Paragraph Separator (aka as PS) */ 100 101 /* types for explicit controls */ 102 RLO, /* these are used only in X1-X9 */ 103 RLE, 104 LRO, 105 LRE, 106 PDF, 107 108 LRI, /* Isolate formatting characters new with 6.3 */ 109 RLI, 110 FSI, 111 PDI, 112 113 /* resolved types, also resolved directions */ 114 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */ 115 }; 116 117 static const char debug_type[][4] = 118 { 119 "ON", /* Other Neutral */ 120 "L", /* Left Letter */ 121 "R", /* Right Letter */ 122 "AN", /* Arabic Number */ 123 "EN", /* European Number */ 124 "AL", /* Arabic Letter (Right-to-left) */ 125 "NSM", /* Non-spacing Mark */ 126 "CS", /* Common Separator */ 127 "ES", /* European Separator */ 128 "ET", /* European Terminator (post/prefix e.g. $ and %) */ 129 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */ 130 "S", /* Segment Separator (TAB) // used only in L1 */ 131 "WS", /* White space // used only in L1 */ 132 "B", /* Paragraph Separator (aka as PS) */ 133 "RLO", /* these are used only in X1-X9 */ 134 "RLE", 135 "LRO", 136 "LRE", 137 "PDF", 138 "LRI", /* Isolate formatting characters new with 6.3 */ 139 "RLI", 140 "FSI", 141 "PDI", 142 }; 143 144 /* HELPER FUNCTIONS */ 145 146 static inline void dump_types(const char* header, WORD *types, int start, int end) 147 { 148 int i, len = 0; 149 TRACE("%s:",header); 150 for (i = start; i < end && len < 200; i++) 151 { 152 TRACE(" %s",debug_type[types[i]]); 153 len += strlen(debug_type[types[i]])+1; 154 } 155 if (i != end) 156 TRACE("..."); 157 TRACE("\n"); 158 } 159 160 /* Convert the libwine information to the direction enum */ 161 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c) 162 { 163 unsigned i; 164 165 for (i = 0; i < count; ++i) 166 { 167 chartype[i] = get_table_entry( bidi_direction_table, string[i] ); 168 if (c->fLegacyBidiClass && chartype[i] == ES) 169 { 170 if (string[i] == '+' || string[i] == '-') chartype[i] = NI; 171 } 172 } 173 } 174 175 /* RESOLVE EXPLICIT */ 176 177 static WORD GreaterEven(int i) 178 { 179 return odd(i) ? i + 1 : i + 2; 180 } 181 182 static WORD GreaterOdd(int i) 183 { 184 return odd(i) ? i + 2 : i + 1; 185 } 186 187 static WORD EmbeddingDirection(int level) 188 { 189 return odd(level) ? R : L; 190 } 191 192 /*------------------------------------------------------------------------ 193 Function: resolveExplicit 194 195 Recursively resolves explicit embedding levels and overrides. 196 Implements rules X1-X9, of the Unicode Bidirectional Algorithm. 197 198 Input: Base embedding level and direction 199 Character count 200 201 Output: Array of embedding levels 202 203 In/Out: Array of direction classes 204 205 206 Note: The function uses two simple counters to keep track of 207 matching explicit codes and PDF. Use the default argument for 208 the outermost call. The nesting counter counts the recursion 209 depth and not the embedding level. 210 ------------------------------------------------------------------------*/ 211 typedef struct tagStackItem { 212 int level; 213 int override; 214 BOOL isolate; 215 } StackItem; 216 217 #define push_stack(l,o,i) \ 218 do { stack_top--; \ 219 stack[stack_top].level = l; \ 220 stack[stack_top].override = o; \ 221 stack[stack_top].isolate = i;} while(0) 222 223 #define pop_stack() do { stack_top++; } while(0) 224 225 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) 226 227 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride) 228 { 229 /* X1 */ 230 int overflow_isolate_count = 0; 231 int overflow_embedding_count = 0; 232 int valid_isolate_count = 0; 233 int i; 234 235 StackItem stack[MAX_DEPTH+2]; 236 int stack_top = MAX_DEPTH+1; 237 238 stack[stack_top].level = level; 239 stack[stack_top].override = NI; 240 stack[stack_top].isolate = FALSE; 241 242 if (initialOverride) 243 { 244 if (odd(level)) 245 push_stack(level, R, FALSE); 246 else 247 push_stack(level, L, FALSE); 248 } 249 250 for (i = 0; i < count; i++) 251 { 252 poutOverrides[i] = stack[stack_top].override; 253 254 /* X2 */ 255 if (pclass[i] == RLE) 256 { 257 int least_odd = GreaterOdd(stack[stack_top].level); 258 poutLevel[i] = stack[stack_top].level; 259 if (valid_level(least_odd)) 260 push_stack(least_odd, NI, FALSE); 261 else if (overflow_isolate_count == 0) 262 overflow_embedding_count++; 263 } 264 /* X3 */ 265 else if (pclass[i] == LRE) 266 { 267 int least_even = GreaterEven(stack[stack_top].level); 268 poutLevel[i] = stack[stack_top].level; 269 if (valid_level(least_even)) 270 push_stack(least_even, NI, FALSE); 271 else if (overflow_isolate_count == 0) 272 overflow_embedding_count++; 273 } 274 /* X4 */ 275 else if (pclass[i] == RLO) 276 { 277 int least_odd = GreaterOdd(stack[stack_top].level); 278 poutLevel[i] = stack[stack_top].level; 279 if (valid_level(least_odd)) 280 push_stack(least_odd, R, FALSE); 281 else if (overflow_isolate_count == 0) 282 overflow_embedding_count++; 283 } 284 /* X5 */ 285 else if (pclass[i] == LRO) 286 { 287 int least_even = GreaterEven(stack[stack_top].level); 288 poutLevel[i] = stack[stack_top].level; 289 if (valid_level(least_even)) 290 push_stack(least_even, L, FALSE); 291 else if (overflow_isolate_count == 0) 292 overflow_embedding_count++; 293 } 294 /* X5a */ 295 else if (pclass[i] == RLI) 296 { 297 int least_odd = GreaterOdd(stack[stack_top].level); 298 poutLevel[i] = stack[stack_top].level; 299 if (valid_level(least_odd)) 300 { 301 valid_isolate_count++; 302 push_stack(least_odd, NI, TRUE); 303 } 304 else 305 overflow_isolate_count++; 306 } 307 /* X5b */ 308 else if (pclass[i] == LRI) 309 { 310 int least_even = GreaterEven(stack[stack_top].level); 311 poutLevel[i] = stack[stack_top].level; 312 if (valid_level(least_even)) 313 { 314 valid_isolate_count++; 315 push_stack(least_even, NI, TRUE); 316 } 317 else 318 overflow_isolate_count++; 319 } 320 /* X5c */ 321 else if (pclass[i] == FSI) 322 { 323 int j; 324 int new_level = 0; 325 int skipping = 0; 326 poutLevel[i] = stack[stack_top].level; 327 for (j = i+1; j < count; j++) 328 { 329 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI) 330 { 331 skipping++; 332 continue; 333 } 334 else if (pclass[j] == PDI) 335 { 336 if (skipping) 337 skipping --; 338 else 339 break; 340 continue; 341 } 342 343 if (skipping) continue; 344 345 if (pclass[j] == L) 346 { 347 new_level = 0; 348 break; 349 } 350 else if (pclass[j] == R || pclass[j] == AL) 351 { 352 new_level = 1; 353 break; 354 } 355 } 356 if (odd(new_level)) 357 { 358 int least_odd = GreaterOdd(stack[stack_top].level); 359 if (valid_level(least_odd)) 360 { 361 valid_isolate_count++; 362 push_stack(least_odd, NI, TRUE); 363 } 364 else 365 overflow_isolate_count++; 366 } 367 else 368 { 369 int least_even = GreaterEven(stack[stack_top].level); 370 if (valid_level(least_even)) 371 { 372 valid_isolate_count++; 373 push_stack(least_even, NI, TRUE); 374 } 375 else 376 overflow_isolate_count++; 377 } 378 } 379 /* X6 */ 380 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF) 381 { 382 poutLevel[i] = stack[stack_top].level; 383 if (stack[stack_top].override != NI) 384 pclass[i] = stack[stack_top].override; 385 } 386 /* X6a */ 387 else if (pclass[i] == PDI) 388 { 389 if (overflow_isolate_count) overflow_isolate_count--; 390 else if (!valid_isolate_count) {/* do nothing */} 391 else 392 { 393 overflow_embedding_count = 0; 394 while (!stack[stack_top].isolate) pop_stack(); 395 pop_stack(); 396 valid_isolate_count --; 397 } 398 poutLevel[i] = stack[stack_top].level; 399 } 400 /* X7 */ 401 else if (pclass[i] == PDF) 402 { 403 poutLevel[i] = stack[stack_top].level; 404 if (overflow_isolate_count) {/* do nothing */} 405 else if (overflow_embedding_count) overflow_embedding_count--; 406 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1)) 407 pop_stack(); 408 } 409 /* X8: Nothing */ 410 } 411 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */ 412 for (i = 0; i < count ; i++) 413 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF) 414 pclass[i] = BN; 415 } 416 417 static inline int previousValidChar(const WORD *pcls, int index, int back_fence) 418 { 419 if (index == -1 || index == back_fence) return index; 420 index --; 421 while (index > back_fence && pcls[index] == BN) index --; 422 return index; 423 } 424 425 static inline int nextValidChar(const WORD *pcls, int index, int front_fence) 426 { 427 if (index == front_fence) return index; 428 index ++; 429 while (index < front_fence && pcls[index] == BN) index ++; 430 return index; 431 } 432 433 typedef struct tagRun 434 { 435 int start; 436 int end; 437 WORD e; 438 } Run; 439 440 typedef struct tagRunChar 441 { 442 WCHAR ch; 443 WORD *pcls; 444 } RunChar; 445 446 typedef struct tagIsolatedRun 447 { 448 struct list entry; 449 int length; 450 WORD sos; 451 WORD eos; 452 WORD e; 453 454 RunChar item[1]; 455 } IsolatedRun; 456 457 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index) 458 { 459 if (index >= (iso_run->length-1)) return -1; 460 index ++; 461 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++; 462 if (index == iso_run->length) return -1; 463 return index; 464 } 465 466 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index) 467 { 468 469 if (index <= 0) return -1; 470 index --; 471 while (index > -1 && *iso_run->item[index].pcls == BN) index--; 472 return index; 473 } 474 475 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run) 476 { 477 int i, len = 0; 478 TRACE("%s:",header); 479 TRACE("[ "); 480 for (i = 0; i < iso_run->length && len < 200; i++) 481 { 482 TRACE(" %s",debug_type[*iso_run->item[i].pcls]); 483 len += strlen(debug_type[*iso_run->item[i].pcls])+1; 484 } 485 if (i != iso_run->length) 486 TRACE("..."); 487 TRACE(" ]\n"); 488 } 489 490 /*------------------------------------------------------------------------ 491 Function: resolveWeak 492 493 Resolves the directionality of numeric and other weak character types 494 495 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. 496 497 Input: Array of embedding levels 498 Character count 499 500 In/Out: Array of directional classes 501 502 Note: On input only these directional classes are expected 503 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS, 504 ------------------------------------------------------------------------*/ 505 506 static void resolveWeak(IsolatedRun * iso_run) 507 { 508 int i; 509 510 /* W1 */ 511 for (i=0; i < iso_run->length; i++) 512 { 513 if (*iso_run->item[i].pcls == NSM) 514 { 515 int j = iso_previousValidChar(iso_run, i); 516 if (j == -1) 517 *iso_run->item[i].pcls = iso_run->sos; 518 else if (*iso_run->item[j].pcls >= LRI) 519 *iso_run->item[i].pcls = ON; 520 else 521 *iso_run->item[i].pcls = *iso_run->item[j].pcls; 522 } 523 } 524 525 /* W2 */ 526 for (i = 0; i < iso_run->length; i++) 527 { 528 if (*iso_run->item[i].pcls == EN) 529 { 530 int j = iso_previousValidChar(iso_run, i); 531 while (j > -1) 532 { 533 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL) 534 { 535 if (*iso_run->item[j].pcls == AL) 536 *iso_run->item[i].pcls = AN; 537 break; 538 } 539 j = iso_previousValidChar(iso_run, j); 540 } 541 } 542 } 543 544 /* W3 */ 545 for (i = 0; i < iso_run->length; i++) 546 { 547 if (*iso_run->item[i].pcls == AL) 548 *iso_run->item[i].pcls = R; 549 } 550 551 /* W4 */ 552 for (i = 0; i < iso_run->length; i++) 553 { 554 if (*iso_run->item[i].pcls == ES) 555 { 556 int b = iso_previousValidChar(iso_run, i); 557 int f = iso_nextValidChar(iso_run, i); 558 559 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN) 560 *iso_run->item[i].pcls = EN; 561 } 562 else if (*iso_run->item[i].pcls == CS) 563 { 564 int b = iso_previousValidChar(iso_run, i); 565 int f = iso_nextValidChar(iso_run, i); 566 567 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN) 568 *iso_run->item[i].pcls = EN; 569 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN) 570 *iso_run->item[i].pcls = AN; 571 } 572 } 573 574 /* W5 */ 575 for (i = 0; i < iso_run->length; i++) 576 { 577 if (*iso_run->item[i].pcls == ET) 578 { 579 int j; 580 for (j = i-1 ; j > -1; j--) 581 { 582 if (*iso_run->item[j].pcls == BN) continue; 583 if (*iso_run->item[j].pcls == ET) continue; 584 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN; 585 else break; 586 } 587 if (*iso_run->item[i].pcls == ET) 588 { 589 for (j = i+1; j < iso_run->length; j++) 590 { 591 if (*iso_run->item[j].pcls == BN) continue; 592 if (*iso_run->item[j].pcls == ET) continue; 593 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN; 594 else break; 595 } 596 } 597 } 598 } 599 600 /* W6 */ 601 for (i = 0; i < iso_run->length; i++) 602 { 603 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON) 604 { 605 int b = i-1; 606 int f = i+1; 607 if (b > -1 && *iso_run->item[b].pcls == BN) 608 *iso_run->item[b].pcls = ON; 609 if (f < iso_run->length && *iso_run->item[f].pcls == BN) 610 *iso_run->item[f].pcls = ON; 611 612 *iso_run->item[i].pcls = ON; 613 } 614 } 615 616 /* W7 */ 617 for (i = 0; i < iso_run->length; i++) 618 { 619 if (*iso_run->item[i].pcls == EN) 620 { 621 int j; 622 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j)) 623 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L) 624 { 625 if (*iso_run->item[j].pcls == L) 626 *iso_run->item[i].pcls = L; 627 break; 628 } 629 if (iso_run->sos == L && j == -1) 630 *iso_run->item[i].pcls = L; 631 } 632 } 633 } 634 635 typedef struct tagBracketPair 636 { 637 int start; 638 int end; 639 } BracketPair; 640 641 static int __cdecl compr(const void *a, const void* b) 642 { 643 return ((BracketPair*)a)->start - ((BracketPair*)b)->start; 644 } 645 646 static BracketPair *computeBracketPairs(IsolatedRun *iso_run) 647 { 648 WCHAR *open_stack; 649 int *stack_index; 650 int stack_top = iso_run->length; 651 unsigned int pair_count = 0; 652 BracketPair *out = NULL; 653 SIZE_T out_size = 0; 654 int i; 655 656 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack)); 657 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index)); 658 659 for (i = 0; i < iso_run->length; i++) 660 { 661 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch); 662 663 if (!ubv) 664 continue; 665 666 if ((ubv >> 8) == 0) 667 { 668 --stack_top; 669 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff); 670 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */ 671 if (open_stack[stack_top] == 0x232a) 672 open_stack[stack_top] = 0x3009; 673 stack_index[stack_top] = i; 674 } 675 else if ((ubv >> 8) == 1) 676 { 677 unsigned int j; 678 679 for (j = stack_top; j < iso_run->length; ++j) 680 { 681 WCHAR c = iso_run->item[i].ch; 682 683 if (c == 0x232a) 684 c = 0x3009; 685 686 if (c != open_stack[j]) 687 continue; 688 689 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out)))) 690 ERR("Failed to grow output array.\n"); 691 692 out[pair_count].start = stack_index[j]; 693 out[pair_count].end = i; 694 ++pair_count; 695 696 out[pair_count].start = -1; 697 stack_top = j + 1; 698 break; 699 } 700 } 701 } 702 703 heap_free(open_stack); 704 heap_free(stack_index); 705 706 if (!pair_count) 707 return NULL; 708 709 qsort(out, pair_count, sizeof(*out), compr); 710 711 return out; 712 } 713 714 #define N0_TYPE(a) ((a == AN || a == EN)?R:a) 715 716 /*------------------------------------------------------------------------ 717 Function: resolveNeutrals 718 719 Resolves the directionality of neutral character types. 720 721 Implements rules N1 and N2 of the Unicode Bidi Algorithm. 722 723 Input: Array of embedding levels 724 Character count 725 Baselevel 726 727 In/Out: Array of directional classes 728 729 Note: On input only these directional classes are expected 730 R, L, NI, AN, EN and BN 731 732 W8 resolves a number of ENs to L 733 ------------------------------------------------------------------------*/ 734 static void resolveNeutrals(IsolatedRun *iso_run) 735 { 736 int i; 737 BracketPair *pairs = NULL; 738 739 /* Translate isolates into NI */ 740 for (i = 0; i < iso_run->length; i++) 741 { 742 if (*iso_run->item[i].pcls >= LRI) 743 *iso_run->item[i].pcls = NI; 744 745 switch(*iso_run->item[i].pcls) 746 { 747 case B: 748 case S: 749 case WS: *iso_run->item[i].pcls = NI; 750 } 751 752 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */ 753 } 754 755 /* N0: Skipping bracketed pairs for now */ 756 pairs = computeBracketPairs(iso_run); 757 if (pairs) 758 { 759 BracketPair *p = &pairs[0]; 760 int i = 0; 761 while (p->start >= 0) 762 { 763 int j; 764 int e = EmbeddingDirection(iso_run->e); 765 int o = EmbeddingDirection(iso_run->e+1); 766 BOOL flag_o = FALSE; 767 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end); 768 769 /* N0.b */ 770 for (j = p->start+1; j < p->end; j++) 771 { 772 if (N0_TYPE(*iso_run->item[j].pcls) == e) 773 { 774 *iso_run->item[p->start].pcls = e; 775 *iso_run->item[p->end].pcls = e; 776 break; 777 } 778 else if (N0_TYPE(*iso_run->item[j].pcls) == o) 779 flag_o = TRUE; 780 } 781 /* N0.c */ 782 if (j == p->end && flag_o) 783 { 784 for (j = p->start; j >= 0; j--) 785 { 786 if (N0_TYPE(*iso_run->item[j].pcls) == o) 787 { 788 *iso_run->item[p->start].pcls = o; 789 *iso_run->item[p->end].pcls = o; 790 break; 791 } 792 else if (N0_TYPE(*iso_run->item[j].pcls) == e) 793 { 794 *iso_run->item[p->start].pcls = e; 795 *iso_run->item[p->end].pcls = e; 796 break; 797 } 798 } 799 if ( j < 0 ) 800 { 801 *iso_run->item[p->start].pcls = iso_run->sos; 802 *iso_run->item[p->end].pcls = iso_run->sos; 803 } 804 } 805 806 i++; 807 p = &pairs[i]; 808 } 809 heap_free(pairs); 810 } 811 812 /* N1 */ 813 for (i = 0; i < iso_run->length; i++) 814 { 815 WORD l,r; 816 817 if (*iso_run->item[i].pcls == NI) 818 { 819 int j; 820 int b = iso_previousValidChar(iso_run, i); 821 822 if (b == -1) 823 { 824 l = iso_run->sos; 825 b = 0; 826 } 827 else 828 { 829 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN) 830 l = R; 831 else if (*iso_run->item[b].pcls == L) 832 l = L; 833 else /* No string type */ 834 continue; 835 } 836 j = iso_nextValidChar(iso_run, i); 837 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j); 838 839 if (j == -1) 840 { 841 r = iso_run->eos; 842 j = iso_run->length; 843 } 844 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN) 845 r = R; 846 else if (*iso_run->item[j].pcls == L) 847 r = L; 848 else /* No string type */ 849 continue; 850 851 if (r == l) 852 { 853 for (b = i; b < j && b < iso_run->length; b++) 854 *iso_run->item[b].pcls = r; 855 } 856 } 857 } 858 859 /* N2 */ 860 for (i = 0; i < iso_run->length; i++) 861 { 862 if (*iso_run->item[i].pcls == NI) 863 { 864 int b = i-1; 865 int f = i+1; 866 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e); 867 if (b > -1 && *iso_run->item[b].pcls == BN) 868 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e); 869 if (f < iso_run->length && *iso_run->item[f].pcls == BN) 870 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e); 871 } 872 } 873 } 874 875 /*------------------------------------------------------------------------ 876 Function: resolveImplicit 877 878 Recursively resolves implicit embedding levels. 879 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. 880 881 Input: Array of direction classes 882 Character count 883 Base level 884 885 In/Out: Array of embedding levels 886 887 Note: levels may exceed 15 on output. 888 Accepted subset of direction classes 889 R, L, AN, EN 890 ------------------------------------------------------------------------*/ 891 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos) 892 { 893 int i; 894 895 /* I1/2 */ 896 for (i = sos; i <= eos; i++) 897 { 898 if (pcls[i] == BN) 899 continue; 900 901 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */ 902 ASSERT(pcls[i] < 5); /* "Out of range." */ 903 904 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN)) 905 plevel[i]++; 906 else if (!odd(plevel[i]) && pcls[i] == R) 907 plevel[i]++; 908 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN)) 909 plevel[i]+=2; 910 } 911 } 912 913 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos) 914 { 915 int i; 916 917 /* L1 */ 918 for (i = sos; i <= eos; i++) 919 { 920 if (pcls[i] == B || pcls[i] == S) 921 { 922 int j = i -1; 923 while (i > sos && j >= sos && 924 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI || 925 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO || 926 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN)) 927 plevel[j--] = baselevel; 928 plevel[i] = baselevel; 929 } 930 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO || 931 pcls[i] == PDF || pcls[i] == BN) 932 { 933 plevel[i] = i ? plevel[i - 1] : baselevel; 934 } 935 if (i == eos && 936 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI || 937 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || 938 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN )) 939 { 940 int j = i; 941 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI || 942 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO || 943 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN)) 944 plevel[j--] = baselevel; 945 } 946 } 947 } 948 949 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel, 950 const WCHAR *string, unsigned int uCount, struct list *set) 951 { 952 int run_start, run_end, i; 953 int run_count = 0; 954 Run *runs; 955 IsolatedRun *current_isolated; 956 957 if (!(runs = heap_calloc(uCount, sizeof(*runs)))) 958 return; 959 960 list_init(set); 961 962 /* Build Runs */ 963 run_start = 0; 964 while (run_start < uCount) 965 { 966 run_end = nextValidChar(pcls, run_start, uCount); 967 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount); 968 run_end --; 969 runs[run_count].start = run_start; 970 runs[run_count].end = run_end; 971 runs[run_count].e = pLevel[run_start]; 972 run_start = nextValidChar(pcls, run_end, uCount); 973 run_count++; 974 } 975 976 /* Build Isolating Runs */ 977 i = 0; 978 while (i < run_count) 979 { 980 int k = i; 981 if (runs[k].start >= 0) 982 { 983 int type_fence, real_end; 984 int j; 985 986 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount])))) 987 break; 988 989 run_start = runs[k].start; 990 current_isolated->e = runs[k].e; 991 current_isolated->length = (runs[k].end - runs[k].start)+1; 992 993 for (j = 0; j < current_isolated->length; j++) 994 { 995 current_isolated->item[j].pcls = &pcls[runs[k].start+j]; 996 current_isolated->item[j].ch = string[runs[k].start + j]; 997 } 998 999 run_end = runs[k].end; 1000 1001 TRACE("{ [%i -- %i]",run_start, run_end); 1002 1003 if (pcls[run_end] == BN) 1004 run_end = previousValidChar(pcls, run_end, runs[k].start); 1005 1006 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI)) 1007 { 1008 j = k+1; 1009 search: 1010 while (j < run_count && pcls[runs[j].start] != PDI) j++; 1011 if (j < run_count && runs[i].e != runs[j].e) 1012 { 1013 j++; 1014 goto search; 1015 } 1016 1017 if (j != run_count) 1018 { 1019 int m; 1020 int l = current_isolated->length; 1021 1022 current_isolated->length += (runs[j].end - runs[j].start)+1; 1023 for (m = 0; l < current_isolated->length; l++, m++) 1024 { 1025 current_isolated->item[l].pcls = &pcls[runs[j].start+m]; 1026 current_isolated->item[l].ch = string[runs[j].start + m]; 1027 } 1028 1029 TRACE("[%i -- %i]",runs[j].start, runs[j].end); 1030 1031 run_end = runs[j].end; 1032 if (pcls[run_end] == BN) 1033 run_end = previousValidChar(pcls, run_end, runs[i].start); 1034 runs[j].start = -1; 1035 k = j; 1036 } 1037 else 1038 { 1039 run_end = uCount; 1040 break; 1041 } 1042 } 1043 1044 type_fence = previousValidChar(pcls, run_start, -1); 1045 1046 if (type_fence == -1) 1047 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start]; 1048 else 1049 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start]; 1050 1051 current_isolated->sos = EmbeddingDirection(current_isolated->sos); 1052 1053 if (run_end == uCount) 1054 current_isolated->eos = current_isolated->sos; 1055 else 1056 { 1057 /* eos could be an BN */ 1058 if ( pcls[run_end] == BN ) 1059 { 1060 real_end = previousValidChar(pcls, run_end, run_start-1); 1061 if (real_end < run_start) 1062 real_end = run_start; 1063 } 1064 else 1065 real_end = run_end; 1066 1067 type_fence = nextValidChar(pcls, run_end, uCount); 1068 if (type_fence == uCount) 1069 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end]; 1070 else 1071 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end]; 1072 1073 current_isolated->eos = EmbeddingDirection(current_isolated->eos); 1074 } 1075 1076 list_add_tail(set, ¤t_isolated->entry); 1077 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]); 1078 } 1079 i++; 1080 } 1081 1082 heap_free(runs); 1083 } 1084 1085 /************************************************************* 1086 * BIDI_DeterminLevels 1087 */ 1088 BOOL BIDI_DetermineLevels( 1089 const WCHAR *lpString, /* [in] The string for which information is to be returned */ 1090 unsigned int uCount, /* [in] Number of WCHARs in string. */ 1091 const SCRIPT_STATE *s, 1092 const SCRIPT_CONTROL *c, 1093 WORD *lpOutLevels, /* [out] final string levels */ 1094 WORD *lpOutOverrides /* [out] final string overrides */ 1095 ) 1096 { 1097 WORD *chartype; 1098 unsigned baselevel = 0; 1099 struct list IsolatingRuns; 1100 IsolatedRun *iso_run, *next; 1101 1102 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount); 1103 1104 if (!(chartype = heap_alloc(uCount * sizeof(*chartype)))) 1105 { 1106 WARN("Out of memory\n"); 1107 return FALSE; 1108 } 1109 1110 baselevel = s->uBidiLevel; 1111 1112 classify(lpString, chartype, uCount, c); 1113 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount); 1114 1115 memset(lpOutOverrides, 0, sizeof(WORD) * uCount); 1116 1117 /* resolve explicit */ 1118 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection); 1119 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount); 1120 1121 /* X10/BD13: Computer Isolating runs */ 1122 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns); 1123 1124 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry) 1125 { 1126 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run); 1127 1128 /* resolve weak */ 1129 resolveWeak(iso_run); 1130 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run); 1131 1132 /* resolve neutrals */ 1133 resolveNeutrals(iso_run); 1134 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run); 1135 1136 list_remove(&iso_run->entry); 1137 heap_free(iso_run); 1138 } 1139 1140 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount); 1141 /* resolveImplicit */ 1142 resolveImplicit(chartype, lpOutLevels, 0, uCount-1); 1143 1144 /* resolveResolvedLevels*/ 1145 classify(lpString, chartype, uCount, c); 1146 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1); 1147 1148 heap_free(chartype); 1149 return TRUE; 1150 } 1151 1152 /* reverse cch indexes */ 1153 static void reverse(int *pidx, int cch) 1154 { 1155 int temp; 1156 int ich = 0; 1157 for (; ich < --cch; ich++) 1158 { 1159 temp = pidx[ich]; 1160 pidx[ich] = pidx[cch]; 1161 pidx[cch] = temp; 1162 } 1163 } 1164 1165 1166 /*------------------------------------------------------------------------ 1167 Functions: reorder/reorderLevel 1168 1169 Recursively reorders the display string 1170 "From the highest level down, reverse all characters at that level and 1171 higher, down to the lowest odd level" 1172 1173 Implements rule L2 of the Unicode bidi Algorithm. 1174 1175 Input: Array of embedding levels 1176 Character count 1177 Flag enabling reversal (set to false by initial caller) 1178 1179 In/Out: Text to reorder 1180 1181 Note: levels may exceed 15 resp. 61 on input. 1182 1183 Rule L3 - reorder combining marks is not implemented here 1184 Rule L4 - glyph mirroring is implemented as a display option below 1185 1186 Note: this should be applied a line at a time 1187 -------------------------------------------------------------------------*/ 1188 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 1189 { 1190 int ich = 0; 1191 1192 /* true as soon as first odd level encountered */ 1193 fReverse = fReverse || odd(level); 1194 1195 for (; ich < cch; ich++) 1196 { 1197 if (plevel[ich] < level) 1198 { 1199 break; 1200 } 1201 else if (plevel[ich] > level) 1202 { 1203 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich, 1204 cch - ich, fReverse) - 1; 1205 } 1206 } 1207 if (fReverse) 1208 { 1209 reverse(pIndexs, ich); 1210 } 1211 return ich; 1212 } 1213 1214 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */ 1215 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 1216 { 1217 int ich = 0; 1218 int newlevel = -1; 1219 1220 /* true as soon as first odd level encountered */ 1221 fReverse = fReverse || odd(level); 1222 1223 for (; ich < cch; ich++) 1224 { 1225 if (plevel[ich] < level) 1226 break; 1227 else if (plevel[ich] > level) 1228 newlevel = ich; 1229 } 1230 if (fReverse) 1231 { 1232 reverse(pIndexs, ich); 1233 } 1234 1235 if (newlevel >= 0) 1236 { 1237 ich = 0; 1238 for (; ich < cch; ich++) 1239 if (plevel[ich] < level) 1240 break; 1241 else if (plevel[ich] > level) 1242 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich, 1243 cch - ich, fReverse) - 1; 1244 } 1245 1246 return ich; 1247 } 1248 1249 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength) 1250 { 1251 unsigned int i; 1252 1253 classify(string, strength, count, c); 1254 for (i = 0; i < count; i++) 1255 { 1256 switch (strength[i]) 1257 { 1258 case L: 1259 case LRE: 1260 case LRO: 1261 case R: 1262 case AL: 1263 case RLE: 1264 case RLO: 1265 strength[i] = BIDI_STRONG; 1266 break; 1267 case PDF: 1268 case EN: 1269 case ES: 1270 case ET: 1271 case AN: 1272 case CS: 1273 case BN: 1274 strength[i] = BIDI_WEAK; 1275 break; 1276 case B: 1277 case S: 1278 case WS: 1279 case ON: 1280 default: /* Neutrals and NSM */ 1281 strength[i] = BIDI_NEUTRAL; 1282 } 1283 } 1284 return TRUE; 1285 } 1286