1 /* 2 * Uniscribe BiDirectional handling 3 * 4 * Copyright 2003 Shachar Shemesh 5 * Copyright 2007 Maarten Lankhorst 6 * Copyright 2010 CodeWeavers, Aric Stewart 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 21 * 22 * Code derived from the modified reference implementation 23 * that was found in revision 17 of http://unicode.org/reports/tr9/ 24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM" 25 * 26 * -- Copyright (C) 1999-2005, ASMUS, Inc. 27 * 28 * Permission is hereby granted, free of charge, to any person obtaining a 29 * copy of the Unicode data files and any associated documentation (the 30 * "Data Files") or Unicode software and any associated documentation (the 31 * "Software") to deal in the Data Files or Software without restriction, 32 * including without limitation the rights to use, copy, modify, merge, 33 * publish, distribute, and/or sell copies of the Data Files or Software, 34 * and to permit persons to whom the Data Files or Software are furnished 35 * to do so, provided that (a) the above copyright notice(s) and this 36 * permission notice appear with all copies of the Data Files or Software, 37 * (b) both the above copyright notice(s) and this permission notice appear 38 * in associated documentation, and (c) there is clear notice in each 39 * modified Data File or in the Software as well as in the documentation 40 * associated with the Data File(s) or Software that the data or software 41 * has been modified. 42 */ 43 44 #include "config.h" 45 46 #include <stdarg.h> 47 #include <stdlib.h> 48 #include "windef.h" 49 #include "winbase.h" 50 #include "wingdi.h" 51 #include "winnls.h" 52 #include "usp10.h" 53 #include "wine/unicode.h" 54 #include "wine/debug.h" 55 #include "wine/heap.h" 56 #include "wine/list.h" 57 58 #include "usp10_internal.h" 59 60 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN; 61 62 WINE_DEFAULT_DEBUG_CHANNEL(bidi); 63 64 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) 65 #define MAX_DEPTH 125 66 67 /* HELPER FUNCTIONS AND DECLARATIONS */ 68 69 /*------------------------------------------------------------------------ 70 Bidirectional Character Types 71 72 as defined by the Unicode Bidirectional Algorithm Table 3-7. 73 74 Note: 75 76 The list of bidirectional character types here is not grouped the 77 same way as the table 3-7, since the numberic values for the types 78 are chosen to keep the state and action tables compact. 79 ------------------------------------------------------------------------*/ 80 enum directions 81 { 82 /* input types */ 83 /* ON MUST be zero, code relies on ON = NI = 0 */ 84 ON = 0, /* Other Neutral */ 85 L, /* Left Letter */ 86 R, /* Right Letter */ 87 AN, /* Arabic Number */ 88 EN, /* European Number */ 89 AL, /* Arabic Letter (Right-to-left) */ 90 NSM, /* Non-spacing Mark */ 91 CS, /* Common Separator */ 92 ES, /* European Separator */ 93 ET, /* European Terminator (post/prefix e.g. $ and %) */ 94 95 /* resolved types */ 96 BN, /* Boundary neutral (type of RLE etc after explicit levels) */ 97 98 /* input types, */ 99 S, /* Segment Separator (TAB) // used only in L1 */ 100 WS, /* White space // used only in L1 */ 101 B, /* Paragraph Separator (aka as PS) */ 102 103 /* types for explicit controls */ 104 RLO, /* these are used only in X1-X9 */ 105 RLE, 106 LRO, 107 LRE, 108 PDF, 109 110 LRI, /* Isolate formatting characters new with 6.3 */ 111 RLI, 112 FSI, 113 PDI, 114 115 /* resolved types, also resolved directions */ 116 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */ 117 }; 118 119 static const char debug_type[][4] = 120 { 121 "ON", /* Other Neutral */ 122 "L", /* Left Letter */ 123 "R", /* Right Letter */ 124 "AN", /* Arabic Number */ 125 "EN", /* European Number */ 126 "AL", /* Arabic Letter (Right-to-left) */ 127 "NSM", /* Non-spacing Mark */ 128 "CS", /* Common Separator */ 129 "ES", /* European Separator */ 130 "ET", /* European Terminator (post/prefix e.g. $ and %) */ 131 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */ 132 "S", /* Segment Separator (TAB) // used only in L1 */ 133 "WS", /* White space // used only in L1 */ 134 "B", /* Paragraph Separator (aka as PS) */ 135 "RLO", /* these are used only in X1-X9 */ 136 "RLE", 137 "LRO", 138 "LRE", 139 "PDF", 140 "LRI", /* Isolate formatting characters new with 6.3 */ 141 "RLI", 142 "FSI", 143 "PDI", 144 }; 145 146 /* HELPER FUNCTIONS */ 147 148 static inline void dump_types(const char* header, WORD *types, int start, int end) 149 { 150 int i, len = 0; 151 TRACE("%s:",header); 152 for (i = start; i < end && len < 200; i++) 153 { 154 TRACE(" %s",debug_type[types[i]]); 155 len += strlen(debug_type[types[i]])+1; 156 } 157 if (i != end) 158 TRACE("..."); 159 TRACE("\n"); 160 } 161 162 /* Convert the libwine information to the direction enum */ 163 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c) 164 { 165 static const enum directions dir_map[16] = 166 { 167 L, /* unassigned defaults to L */ 168 L, 169 R, 170 EN, 171 ES, 172 ET, 173 AN, 174 CS, 175 B, 176 S, 177 WS, 178 ON, 179 AL, 180 NSM, 181 BN, 182 PDF /* also LRE, LRO, RLE, RLO */ 183 }; 184 185 unsigned i; 186 187 for (i = 0; i < count; ++i) 188 { 189 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12]; 190 switch (chartype[i]) 191 { 192 case ES: 193 if (!c->fLegacyBidiClass) break; 194 switch (string[i]) 195 { 196 case '-': 197 case '+': chartype[i] = NI; break; 198 case '/': chartype[i] = CS; break; 199 } 200 break; 201 case PDF: 202 switch (string[i]) 203 { 204 case 0x202A: chartype[i] = LRE; break; 205 case 0x202B: chartype[i] = RLE; break; 206 case 0x202C: chartype[i] = PDF; break; 207 case 0x202D: chartype[i] = LRO; break; 208 case 0x202E: chartype[i] = RLO; break; 209 case 0x2066: chartype[i] = LRI; break; 210 case 0x2067: chartype[i] = RLI; break; 211 case 0x2068: chartype[i] = FSI; break; 212 case 0x2069: chartype[i] = PDI; break; 213 } 214 break; 215 } 216 } 217 } 218 219 /* RESOLVE EXPLICIT */ 220 221 static WORD GreaterEven(int i) 222 { 223 return odd(i) ? i + 1 : i + 2; 224 } 225 226 static WORD GreaterOdd(int i) 227 { 228 return odd(i) ? i + 2 : i + 1; 229 } 230 231 static WORD EmbeddingDirection(int level) 232 { 233 return odd(level) ? R : L; 234 } 235 236 /*------------------------------------------------------------------------ 237 Function: resolveExplicit 238 239 Recursively resolves explicit embedding levels and overrides. 240 Implements rules X1-X9, of the Unicode Bidirectional Algorithm. 241 242 Input: Base embedding level and direction 243 Character count 244 245 Output: Array of embedding levels 246 247 In/Out: Array of direction classes 248 249 250 Note: The function uses two simple counters to keep track of 251 matching explicit codes and PDF. Use the default argument for 252 the outermost call. The nesting counter counts the recursion 253 depth and not the embedding level. 254 ------------------------------------------------------------------------*/ 255 typedef struct tagStackItem { 256 int level; 257 int override; 258 BOOL isolate; 259 } StackItem; 260 261 #define push_stack(l,o,i) \ 262 do { stack_top--; \ 263 stack[stack_top].level = l; \ 264 stack[stack_top].override = o; \ 265 stack[stack_top].isolate = i;} while(0) 266 267 #define pop_stack() do { stack_top++; } while(0) 268 269 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) 270 271 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride) 272 { 273 /* X1 */ 274 int overflow_isolate_count = 0; 275 int overflow_embedding_count = 0; 276 int valid_isolate_count = 0; 277 int i; 278 279 StackItem stack[MAX_DEPTH+2]; 280 int stack_top = MAX_DEPTH+1; 281 282 stack[stack_top].level = level; 283 stack[stack_top].override = NI; 284 stack[stack_top].isolate = FALSE; 285 286 if (initialOverride) 287 { 288 if (odd(level)) 289 push_stack(level, R, FALSE); 290 else 291 push_stack(level, L, FALSE); 292 } 293 294 for (i = 0; i < count; i++) 295 { 296 poutOverrides[i] = stack[stack_top].override; 297 298 /* X2 */ 299 if (pclass[i] == RLE) 300 { 301 int least_odd = GreaterOdd(stack[stack_top].level); 302 poutLevel[i] = stack[stack_top].level; 303 if (valid_level(least_odd)) 304 push_stack(least_odd, NI, FALSE); 305 else if (overflow_isolate_count == 0) 306 overflow_embedding_count++; 307 } 308 /* X3 */ 309 else if (pclass[i] == LRE) 310 { 311 int least_even = GreaterEven(stack[stack_top].level); 312 poutLevel[i] = stack[stack_top].level; 313 if (valid_level(least_even)) 314 push_stack(least_even, NI, FALSE); 315 else if (overflow_isolate_count == 0) 316 overflow_embedding_count++; 317 } 318 /* X4 */ 319 else if (pclass[i] == RLO) 320 { 321 int least_odd = GreaterOdd(stack[stack_top].level); 322 poutLevel[i] = stack[stack_top].level; 323 if (valid_level(least_odd)) 324 push_stack(least_odd, R, FALSE); 325 else if (overflow_isolate_count == 0) 326 overflow_embedding_count++; 327 } 328 /* X5 */ 329 else if (pclass[i] == LRO) 330 { 331 int least_even = GreaterEven(stack[stack_top].level); 332 poutLevel[i] = stack[stack_top].level; 333 if (valid_level(least_even)) 334 push_stack(least_even, L, FALSE); 335 else if (overflow_isolate_count == 0) 336 overflow_embedding_count++; 337 } 338 /* X5a */ 339 else if (pclass[i] == RLI) 340 { 341 int least_odd = GreaterOdd(stack[stack_top].level); 342 poutLevel[i] = stack[stack_top].level; 343 if (valid_level(least_odd)) 344 { 345 valid_isolate_count++; 346 push_stack(least_odd, NI, TRUE); 347 } 348 else 349 overflow_isolate_count++; 350 } 351 /* X5b */ 352 else if (pclass[i] == LRI) 353 { 354 int least_even = GreaterEven(stack[stack_top].level); 355 poutLevel[i] = stack[stack_top].level; 356 if (valid_level(least_even)) 357 { 358 valid_isolate_count++; 359 push_stack(least_even, NI, TRUE); 360 } 361 else 362 overflow_isolate_count++; 363 } 364 /* X5c */ 365 else if (pclass[i] == FSI) 366 { 367 int j; 368 int new_level = 0; 369 int skipping = 0; 370 poutLevel[i] = stack[stack_top].level; 371 for (j = i+1; j < count; j++) 372 { 373 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI) 374 { 375 skipping++; 376 continue; 377 } 378 else if (pclass[j] == PDI) 379 { 380 if (skipping) 381 skipping --; 382 else 383 break; 384 continue; 385 } 386 387 if (skipping) continue; 388 389 if (pclass[j] == L) 390 { 391 new_level = 0; 392 break; 393 } 394 else if (pclass[j] == R || pclass[j] == AL) 395 { 396 new_level = 1; 397 break; 398 } 399 } 400 if (odd(new_level)) 401 { 402 int least_odd = GreaterOdd(stack[stack_top].level); 403 if (valid_level(least_odd)) 404 { 405 valid_isolate_count++; 406 push_stack(least_odd, NI, TRUE); 407 } 408 else 409 overflow_isolate_count++; 410 } 411 else 412 { 413 int least_even = GreaterEven(stack[stack_top].level); 414 if (valid_level(least_even)) 415 { 416 valid_isolate_count++; 417 push_stack(least_even, NI, TRUE); 418 } 419 else 420 overflow_isolate_count++; 421 } 422 } 423 /* X6 */ 424 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF) 425 { 426 poutLevel[i] = stack[stack_top].level; 427 if (stack[stack_top].override != NI) 428 pclass[i] = stack[stack_top].override; 429 } 430 /* X6a */ 431 else if (pclass[i] == PDI) 432 { 433 if (overflow_isolate_count) overflow_isolate_count--; 434 else if (!valid_isolate_count) {/* do nothing */} 435 else 436 { 437 overflow_embedding_count = 0; 438 while (!stack[stack_top].isolate) pop_stack(); 439 pop_stack(); 440 valid_isolate_count --; 441 } 442 poutLevel[i] = stack[stack_top].level; 443 } 444 /* X7 */ 445 else if (pclass[i] == PDF) 446 { 447 poutLevel[i] = stack[stack_top].level; 448 if (overflow_isolate_count) {/* do nothing */} 449 else if (overflow_embedding_count) overflow_embedding_count--; 450 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1)) 451 pop_stack(); 452 } 453 /* X8: Nothing */ 454 } 455 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */ 456 for (i = 0; i < count ; i++) 457 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF) 458 pclass[i] = BN; 459 } 460 461 static inline int previousValidChar(const WORD *pcls, int index, int back_fence) 462 { 463 if (index == -1 || index == back_fence) return index; 464 index --; 465 while (index > back_fence && pcls[index] == BN) index --; 466 return index; 467 } 468 469 static inline int nextValidChar(const WORD *pcls, int index, int front_fence) 470 { 471 if (index == front_fence) return index; 472 index ++; 473 while (index < front_fence && pcls[index] == BN) index ++; 474 return index; 475 } 476 477 typedef struct tagRun 478 { 479 int start; 480 int end; 481 WORD e; 482 } Run; 483 484 typedef struct tagRunChar 485 { 486 WCHAR ch; 487 WORD *pcls; 488 } RunChar; 489 490 typedef struct tagIsolatedRun 491 { 492 struct list entry; 493 int length; 494 WORD sos; 495 WORD eos; 496 WORD e; 497 498 RunChar item[1]; 499 } IsolatedRun; 500 501 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index) 502 { 503 if (index >= (iso_run->length-1)) return -1; 504 index ++; 505 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++; 506 if (index == iso_run->length) return -1; 507 return index; 508 } 509 510 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index) 511 { 512 513 if (index <= 0) return -1; 514 index --; 515 while (index > -1 && *iso_run->item[index].pcls == BN) index--; 516 return index; 517 } 518 519 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run) 520 { 521 int i, len = 0; 522 TRACE("%s:",header); 523 TRACE("[ "); 524 for (i = 0; i < iso_run->length && len < 200; i++) 525 { 526 TRACE(" %s",debug_type[*iso_run->item[i].pcls]); 527 len += strlen(debug_type[*iso_run->item[i].pcls])+1; 528 } 529 if (i != iso_run->length) 530 TRACE("..."); 531 TRACE(" ]\n"); 532 } 533 534 /*------------------------------------------------------------------------ 535 Function: resolveWeak 536 537 Resolves the directionality of numeric and other weak character types 538 539 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. 540 541 Input: Array of embedding levels 542 Character count 543 544 In/Out: Array of directional classes 545 546 Note: On input only these directional classes are expected 547 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS, 548 ------------------------------------------------------------------------*/ 549 550 static void resolveWeak(IsolatedRun * iso_run) 551 { 552 int i; 553 554 /* W1 */ 555 for (i=0; i < iso_run->length; i++) 556 { 557 if (*iso_run->item[i].pcls == NSM) 558 { 559 int j = iso_previousValidChar(iso_run, i); 560 if (j == -1) 561 *iso_run->item[i].pcls = iso_run->sos; 562 else if (*iso_run->item[j].pcls >= LRI) 563 *iso_run->item[i].pcls = ON; 564 else 565 *iso_run->item[i].pcls = *iso_run->item[j].pcls; 566 } 567 } 568 569 /* W2 */ 570 for (i = 0; i < iso_run->length; i++) 571 { 572 if (*iso_run->item[i].pcls == EN) 573 { 574 int j = iso_previousValidChar(iso_run, i); 575 while (j > -1) 576 { 577 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL) 578 { 579 if (*iso_run->item[j].pcls == AL) 580 *iso_run->item[i].pcls = AN; 581 break; 582 } 583 j = iso_previousValidChar(iso_run, j); 584 } 585 } 586 } 587 588 /* W3 */ 589 for (i = 0; i < iso_run->length; i++) 590 { 591 if (*iso_run->item[i].pcls == AL) 592 *iso_run->item[i].pcls = R; 593 } 594 595 /* W4 */ 596 for (i = 0; i < iso_run->length; i++) 597 { 598 if (*iso_run->item[i].pcls == ES) 599 { 600 int b = iso_previousValidChar(iso_run, i); 601 int f = iso_nextValidChar(iso_run, i); 602 603 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN) 604 *iso_run->item[i].pcls = EN; 605 } 606 else if (*iso_run->item[i].pcls == CS) 607 { 608 int b = iso_previousValidChar(iso_run, i); 609 int f = iso_nextValidChar(iso_run, i); 610 611 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN) 612 *iso_run->item[i].pcls = EN; 613 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN) 614 *iso_run->item[i].pcls = AN; 615 } 616 } 617 618 /* W5 */ 619 for (i = 0; i < iso_run->length; i++) 620 { 621 if (*iso_run->item[i].pcls == ET) 622 { 623 int j; 624 for (j = i-1 ; j > -1; j--) 625 { 626 if (*iso_run->item[j].pcls == BN) continue; 627 if (*iso_run->item[j].pcls == ET) continue; 628 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN; 629 else break; 630 } 631 if (*iso_run->item[i].pcls == ET) 632 { 633 for (j = i+1; j < iso_run->length; j++) 634 { 635 if (*iso_run->item[j].pcls == BN) continue; 636 if (*iso_run->item[j].pcls == ET) continue; 637 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN; 638 else break; 639 } 640 } 641 } 642 } 643 644 /* W6 */ 645 for (i = 0; i < iso_run->length; i++) 646 { 647 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON) 648 { 649 int b = i-1; 650 int f = i+1; 651 if (b > -1 && *iso_run->item[b].pcls == BN) 652 *iso_run->item[b].pcls = ON; 653 if (f < iso_run->length && *iso_run->item[f].pcls == BN) 654 *iso_run->item[f].pcls = ON; 655 656 *iso_run->item[i].pcls = ON; 657 } 658 } 659 660 /* W7 */ 661 for (i = 0; i < iso_run->length; i++) 662 { 663 if (*iso_run->item[i].pcls == EN) 664 { 665 int j; 666 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j)) 667 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L) 668 { 669 if (*iso_run->item[j].pcls == L) 670 *iso_run->item[i].pcls = L; 671 break; 672 } 673 if (iso_run->sos == L && j == -1) 674 *iso_run->item[i].pcls = L; 675 } 676 } 677 } 678 679 typedef struct tagBracketPair 680 { 681 int start; 682 int end; 683 } BracketPair; 684 685 static int compr(const void *a, const void* b) 686 { 687 return ((BracketPair*)a)->start - ((BracketPair*)b)->start; 688 } 689 690 static BracketPair *computeBracketPairs(IsolatedRun *iso_run) 691 { 692 WCHAR *open_stack; 693 int *stack_index; 694 int stack_top = iso_run->length; 695 unsigned int pair_count = 0; 696 BracketPair *out = NULL; 697 SIZE_T out_size = 0; 698 int i; 699 700 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack)); 701 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index)); 702 703 for (i = 0; i < iso_run->length; i++) 704 { 705 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch); 706 707 if (!ubv) 708 continue; 709 710 if ((ubv >> 8) == 0) 711 { 712 --stack_top; 713 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff); 714 /* Deal with canonical equivalent U+2329/232A and U+3008/3009. */ 715 if (open_stack[stack_top] == 0x232a) 716 open_stack[stack_top] = 0x3009; 717 stack_index[stack_top] = i; 718 } 719 else if ((ubv >> 8) == 1) 720 { 721 unsigned int j; 722 723 for (j = stack_top; j < iso_run->length; ++j) 724 { 725 WCHAR c = iso_run->item[i].ch; 726 727 if (c == 0x232a) 728 c = 0x3009; 729 730 if (c != open_stack[j]) 731 continue; 732 733 if (!(usp10_array_reserve((void **)&out, &out_size, pair_count + 2, sizeof(*out)))) 734 ERR("Failed to grow output array.\n"); 735 736 out[pair_count].start = stack_index[j]; 737 out[pair_count].end = i; 738 ++pair_count; 739 740 out[pair_count].start = -1; 741 stack_top = j + 1; 742 break; 743 } 744 } 745 } 746 747 heap_free(open_stack); 748 heap_free(stack_index); 749 750 if (!pair_count) 751 return NULL; 752 753 qsort(out, pair_count, sizeof(*out), compr); 754 755 return out; 756 } 757 758 #define N0_TYPE(a) ((a == AN || a == EN)?R:a) 759 760 /*------------------------------------------------------------------------ 761 Function: resolveNeutrals 762 763 Resolves the directionality of neutral character types. 764 765 Implements rules N1 and N2 of the Unicode Bidi Algorithm. 766 767 Input: Array of embedding levels 768 Character count 769 Baselevel 770 771 In/Out: Array of directional classes 772 773 Note: On input only these directional classes are expected 774 R, L, NI, AN, EN and BN 775 776 W8 resolves a number of ENs to L 777 ------------------------------------------------------------------------*/ 778 static void resolveNeutrals(IsolatedRun *iso_run) 779 { 780 int i; 781 BracketPair *pairs = NULL; 782 783 /* Translate isolates into NI */ 784 for (i = 0; i < iso_run->length; i++) 785 { 786 if (*iso_run->item[i].pcls >= LRI) 787 *iso_run->item[i].pcls = NI; 788 789 switch(*iso_run->item[i].pcls) 790 { 791 case B: 792 case S: 793 case WS: *iso_run->item[i].pcls = NI; 794 } 795 796 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */ 797 } 798 799 /* N0: Skipping bracketed pairs for now */ 800 pairs = computeBracketPairs(iso_run); 801 if (pairs) 802 { 803 BracketPair *p = &pairs[0]; 804 int i = 0; 805 while (p->start >= 0) 806 { 807 int j; 808 int e = EmbeddingDirection(iso_run->e); 809 int o = EmbeddingDirection(iso_run->e+1); 810 BOOL flag_o = FALSE; 811 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end); 812 813 /* N0.b */ 814 for (j = p->start+1; j < p->end; j++) 815 { 816 if (N0_TYPE(*iso_run->item[j].pcls) == e) 817 { 818 *iso_run->item[p->start].pcls = e; 819 *iso_run->item[p->end].pcls = e; 820 break; 821 } 822 else if (N0_TYPE(*iso_run->item[j].pcls) == o) 823 flag_o = TRUE; 824 } 825 /* N0.c */ 826 if (j == p->end && flag_o) 827 { 828 for (j = p->start; j >= 0; j--) 829 { 830 if (N0_TYPE(*iso_run->item[j].pcls) == o) 831 { 832 *iso_run->item[p->start].pcls = o; 833 *iso_run->item[p->end].pcls = o; 834 break; 835 } 836 else if (N0_TYPE(*iso_run->item[j].pcls) == e) 837 { 838 *iso_run->item[p->start].pcls = e; 839 *iso_run->item[p->end].pcls = e; 840 break; 841 } 842 } 843 if ( j < 0 ) 844 { 845 *iso_run->item[p->start].pcls = iso_run->sos; 846 *iso_run->item[p->end].pcls = iso_run->sos; 847 } 848 } 849 850 i++; 851 p = &pairs[i]; 852 } 853 heap_free(pairs); 854 } 855 856 /* N1 */ 857 for (i = 0; i < iso_run->length; i++) 858 { 859 WORD l,r; 860 861 if (*iso_run->item[i].pcls == NI) 862 { 863 int j; 864 int b = iso_previousValidChar(iso_run, i); 865 866 if (b == -1) 867 { 868 l = iso_run->sos; 869 b = 0; 870 } 871 else 872 { 873 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN) 874 l = R; 875 else if (*iso_run->item[b].pcls == L) 876 l = L; 877 else /* No string type */ 878 continue; 879 } 880 j = iso_nextValidChar(iso_run, i); 881 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j); 882 883 if (j == -1) 884 { 885 r = iso_run->eos; 886 j = iso_run->length; 887 } 888 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN) 889 r = R; 890 else if (*iso_run->item[j].pcls == L) 891 r = L; 892 else /* No string type */ 893 continue; 894 895 if (r == l) 896 { 897 for (b = i; b < j && b < iso_run->length; b++) 898 *iso_run->item[b].pcls = r; 899 } 900 } 901 } 902 903 /* N2 */ 904 for (i = 0; i < iso_run->length; i++) 905 { 906 if (*iso_run->item[i].pcls == NI) 907 { 908 int b = i-1; 909 int f = i+1; 910 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e); 911 if (b > -1 && *iso_run->item[b].pcls == BN) 912 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e); 913 if (f < iso_run->length && *iso_run->item[f].pcls == BN) 914 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e); 915 } 916 } 917 } 918 919 /*------------------------------------------------------------------------ 920 Function: resolveImplicit 921 922 Recursively resolves implicit embedding levels. 923 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. 924 925 Input: Array of direction classes 926 Character count 927 Base level 928 929 In/Out: Array of embedding levels 930 931 Note: levels may exceed 15 on output. 932 Accepted subset of direction classes 933 R, L, AN, EN 934 ------------------------------------------------------------------------*/ 935 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos) 936 { 937 int i; 938 939 /* I1/2 */ 940 for (i = sos; i <= eos; i++) 941 { 942 if (pcls[i] == BN) 943 continue; 944 945 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */ 946 ASSERT(pcls[i] < 5); /* "Out of range." */ 947 948 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN)) 949 plevel[i]++; 950 else if (!odd(plevel[i]) && pcls[i] == R) 951 plevel[i]++; 952 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN)) 953 plevel[i]+=2; 954 } 955 } 956 957 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos) 958 { 959 int i; 960 961 /* L1 */ 962 for (i = sos; i <= eos; i++) 963 { 964 if (pcls[i] == B || pcls[i] == S) 965 { 966 int j = i -1; 967 while (i > sos && j >= sos && 968 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI || 969 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO || 970 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN)) 971 plevel[j--] = baselevel; 972 plevel[i] = baselevel; 973 } 974 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO || 975 pcls[i] == PDF || pcls[i] == BN) 976 { 977 plevel[i] = i ? plevel[i - 1] : baselevel; 978 } 979 if (i == eos && 980 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI || 981 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || 982 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN )) 983 { 984 int j = i; 985 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI || 986 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO || 987 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN)) 988 plevel[j--] = baselevel; 989 } 990 } 991 } 992 993 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel, 994 const WCHAR *string, unsigned int uCount, struct list *set) 995 { 996 int run_start, run_end, i; 997 int run_count = 0; 998 Run *runs; 999 IsolatedRun *current_isolated; 1000 1001 if (!(runs = heap_calloc(uCount, sizeof(*runs)))) 1002 return; 1003 1004 list_init(set); 1005 1006 /* Build Runs */ 1007 run_start = 0; 1008 while (run_start < uCount) 1009 { 1010 run_end = nextValidChar(pcls, run_start, uCount); 1011 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount); 1012 run_end --; 1013 runs[run_count].start = run_start; 1014 runs[run_count].end = run_end; 1015 runs[run_count].e = pLevel[run_start]; 1016 run_start = nextValidChar(pcls, run_end, uCount); 1017 run_count++; 1018 } 1019 1020 /* Build Isolating Runs */ 1021 i = 0; 1022 while (i < run_count) 1023 { 1024 int k = i; 1025 if (runs[k].start >= 0) 1026 { 1027 int type_fence, real_end; 1028 int j; 1029 1030 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount])))) 1031 break; 1032 1033 run_start = runs[k].start; 1034 current_isolated->e = runs[k].e; 1035 current_isolated->length = (runs[k].end - runs[k].start)+1; 1036 1037 for (j = 0; j < current_isolated->length; j++) 1038 { 1039 current_isolated->item[j].pcls = &pcls[runs[k].start+j]; 1040 current_isolated->item[j].ch = string[runs[k].start + j]; 1041 } 1042 1043 run_end = runs[k].end; 1044 1045 TRACE("{ [%i -- %i]",run_start, run_end); 1046 1047 if (pcls[run_end] == BN) 1048 run_end = previousValidChar(pcls, run_end, runs[k].start); 1049 1050 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI)) 1051 { 1052 j = k+1; 1053 search: 1054 while (j < run_count && pcls[runs[j].start] != PDI) j++; 1055 if (j < run_count && runs[i].e != runs[j].e) 1056 { 1057 j++; 1058 goto search; 1059 } 1060 1061 if (j != run_count) 1062 { 1063 int m; 1064 int l = current_isolated->length; 1065 1066 current_isolated->length += (runs[j].end - runs[j].start)+1; 1067 for (m = 0; l < current_isolated->length; l++, m++) 1068 { 1069 current_isolated->item[l].pcls = &pcls[runs[j].start+m]; 1070 current_isolated->item[l].ch = string[runs[j].start + m]; 1071 } 1072 1073 TRACE("[%i -- %i]",runs[j].start, runs[j].end); 1074 1075 run_end = runs[j].end; 1076 if (pcls[run_end] == BN) 1077 run_end = previousValidChar(pcls, run_end, runs[i].start); 1078 runs[j].start = -1; 1079 k = j; 1080 } 1081 else 1082 { 1083 run_end = uCount; 1084 break; 1085 } 1086 } 1087 1088 type_fence = previousValidChar(pcls, run_start, -1); 1089 1090 if (type_fence == -1) 1091 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start]; 1092 else 1093 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start]; 1094 1095 current_isolated->sos = EmbeddingDirection(current_isolated->sos); 1096 1097 if (run_end == uCount) 1098 current_isolated->eos = current_isolated->sos; 1099 else 1100 { 1101 /* eos could be an BN */ 1102 if ( pcls[run_end] == BN ) 1103 { 1104 real_end = previousValidChar(pcls, run_end, run_start-1); 1105 if (real_end < run_start) 1106 real_end = run_start; 1107 } 1108 else 1109 real_end = run_end; 1110 1111 type_fence = nextValidChar(pcls, run_end, uCount); 1112 if (type_fence == uCount) 1113 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end]; 1114 else 1115 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end]; 1116 1117 current_isolated->eos = EmbeddingDirection(current_isolated->eos); 1118 } 1119 1120 list_add_tail(set, ¤t_isolated->entry); 1121 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]); 1122 } 1123 i++; 1124 } 1125 1126 heap_free(runs); 1127 } 1128 1129 /************************************************************* 1130 * BIDI_DeterminLevels 1131 */ 1132 BOOL BIDI_DetermineLevels( 1133 const WCHAR *lpString, /* [in] The string for which information is to be returned */ 1134 unsigned int uCount, /* [in] Number of WCHARs in string. */ 1135 const SCRIPT_STATE *s, 1136 const SCRIPT_CONTROL *c, 1137 WORD *lpOutLevels, /* [out] final string levels */ 1138 WORD *lpOutOverrides /* [out] final string overrides */ 1139 ) 1140 { 1141 WORD *chartype; 1142 unsigned baselevel = 0; 1143 struct list IsolatingRuns; 1144 IsolatedRun *iso_run, *next; 1145 1146 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount); 1147 1148 if (!(chartype = heap_alloc(uCount * sizeof(*chartype)))) 1149 { 1150 WARN("Out of memory\n"); 1151 return FALSE; 1152 } 1153 1154 baselevel = s->uBidiLevel; 1155 1156 classify(lpString, chartype, uCount, c); 1157 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount); 1158 1159 memset(lpOutOverrides, 0, sizeof(WORD) * uCount); 1160 1161 /* resolve explicit */ 1162 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection); 1163 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount); 1164 1165 /* X10/BD13: Computer Isolating runs */ 1166 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns); 1167 1168 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry) 1169 { 1170 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run); 1171 1172 /* resolve weak */ 1173 resolveWeak(iso_run); 1174 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run); 1175 1176 /* resolve neutrals */ 1177 resolveNeutrals(iso_run); 1178 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run); 1179 1180 list_remove(&iso_run->entry); 1181 heap_free(iso_run); 1182 } 1183 1184 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount); 1185 /* resolveImplicit */ 1186 resolveImplicit(chartype, lpOutLevels, 0, uCount-1); 1187 1188 /* resolveResolvedLevels*/ 1189 classify(lpString, chartype, uCount, c); 1190 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1); 1191 1192 heap_free(chartype); 1193 return TRUE; 1194 } 1195 1196 /* reverse cch indexes */ 1197 static void reverse(int *pidx, int cch) 1198 { 1199 int temp; 1200 int ich = 0; 1201 for (; ich < --cch; ich++) 1202 { 1203 temp = pidx[ich]; 1204 pidx[ich] = pidx[cch]; 1205 pidx[cch] = temp; 1206 } 1207 } 1208 1209 1210 /*------------------------------------------------------------------------ 1211 Functions: reorder/reorderLevel 1212 1213 Recursively reorders the display string 1214 "From the highest level down, reverse all characters at that level and 1215 higher, down to the lowest odd level" 1216 1217 Implements rule L2 of the Unicode bidi Algorithm. 1218 1219 Input: Array of embedding levels 1220 Character count 1221 Flag enabling reversal (set to false by initial caller) 1222 1223 In/Out: Text to reorder 1224 1225 Note: levels may exceed 15 resp. 61 on input. 1226 1227 Rule L3 - reorder combining marks is not implemented here 1228 Rule L4 - glyph mirroring is implemented as a display option below 1229 1230 Note: this should be applied a line at a time 1231 -------------------------------------------------------------------------*/ 1232 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 1233 { 1234 int ich = 0; 1235 1236 /* true as soon as first odd level encountered */ 1237 fReverse = fReverse || odd(level); 1238 1239 for (; ich < cch; ich++) 1240 { 1241 if (plevel[ich] < level) 1242 { 1243 break; 1244 } 1245 else if (plevel[ich] > level) 1246 { 1247 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich, 1248 cch - ich, fReverse) - 1; 1249 } 1250 } 1251 if (fReverse) 1252 { 1253 reverse(pIndexs, ich); 1254 } 1255 return ich; 1256 } 1257 1258 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */ 1259 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 1260 { 1261 int ich = 0; 1262 int newlevel = -1; 1263 1264 /* true as soon as first odd level encountered */ 1265 fReverse = fReverse || odd(level); 1266 1267 for (; ich < cch; ich++) 1268 { 1269 if (plevel[ich] < level) 1270 break; 1271 else if (plevel[ich] > level) 1272 newlevel = ich; 1273 } 1274 if (fReverse) 1275 { 1276 reverse(pIndexs, ich); 1277 } 1278 1279 if (newlevel >= 0) 1280 { 1281 ich = 0; 1282 for (; ich < cch; ich++) 1283 if (plevel[ich] < level) 1284 break; 1285 else if (plevel[ich] > level) 1286 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich, 1287 cch - ich, fReverse) - 1; 1288 } 1289 1290 return ich; 1291 } 1292 1293 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength) 1294 { 1295 unsigned int i; 1296 1297 classify(string, strength, count, c); 1298 for (i = 0; i < count; i++) 1299 { 1300 switch (strength[i]) 1301 { 1302 case L: 1303 case LRE: 1304 case LRO: 1305 case R: 1306 case AL: 1307 case RLE: 1308 case RLO: 1309 strength[i] = BIDI_STRONG; 1310 break; 1311 case PDF: 1312 case EN: 1313 case ES: 1314 case ET: 1315 case AN: 1316 case CS: 1317 case BN: 1318 strength[i] = BIDI_WEAK; 1319 break; 1320 case B: 1321 case S: 1322 case WS: 1323 case ON: 1324 default: /* Neutrals and NSM */ 1325 strength[i] = BIDI_NEUTRAL; 1326 } 1327 } 1328 return TRUE; 1329 } 1330