1 /* 2 * Uniscribe BiDirectional handling 3 * 4 * Copyright 2003 Shachar Shemesh 5 * Copyright 2007 Maarten Lankhorst 6 * Copyright 2010 CodeWeavers, Aric Stewart 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with this library; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 21 * 22 * Code derived from the modified reference implementation 23 * that was found in revision 17 of http://unicode.org/reports/tr9/ 24 * "Unicode Standard Annex #9: THE BIDIRECTIONAL ALGORITHM" 25 * 26 * -- Copyright (C) 1999-2005, ASMUS, Inc. 27 * 28 * Permission is hereby granted, free of charge, to any person obtaining a 29 * copy of the Unicode data files and any associated documentation (the 30 * "Data Files") or Unicode software and any associated documentation (the 31 * "Software") to deal in the Data Files or Software without restriction, 32 * including without limitation the rights to use, copy, modify, merge, 33 * publish, distribute, and/or sell copies of the Data Files or Software, 34 * and to permit persons to whom the Data Files or Software are furnished 35 * to do so, provided that (a) the above copyright notice(s) and this 36 * permission notice appear with all copies of the Data Files or Software, 37 * (b) both the above copyright notice(s) and this permission notice appear 38 * in associated documentation, and (c) there is clear notice in each 39 * modified Data File or in the Software as well as in the documentation 40 * associated with the Data File(s) or Software that the data or software 41 * has been modified. 42 */ 43 44 #include <windef.h> 45 46 #include <wine/list.h> 47 48 #include "usp10_internal.h" 49 50 extern const unsigned short bidi_bracket_table[] DECLSPEC_HIDDEN; 51 52 WINE_DEFAULT_DEBUG_CHANNEL(bidi); 53 54 #define ASSERT(x) do { if (!(x)) FIXME("assert failed: %s\n", #x); } while(0) 55 #define MAX_DEPTH 125 56 57 /* HELPER FUNCTIONS AND DECLARATIONS */ 58 59 /*------------------------------------------------------------------------ 60 Bidirectional Character Types 61 62 as defined by the Unicode Bidirectional Algorithm Table 3-7. 63 64 Note: 65 66 The list of bidirectional character types here is not grouped the 67 same way as the table 3-7, since the numberic values for the types 68 are chosen to keep the state and action tables compact. 69 ------------------------------------------------------------------------*/ 70 enum directions 71 { 72 /* input types */ 73 /* ON MUST be zero, code relies on ON = NI = 0 */ 74 ON = 0, /* Other Neutral */ 75 L, /* Left Letter */ 76 R, /* Right Letter */ 77 AN, /* Arabic Number */ 78 EN, /* European Number */ 79 AL, /* Arabic Letter (Right-to-left) */ 80 NSM, /* Non-spacing Mark */ 81 CS, /* Common Separator */ 82 ES, /* European Separator */ 83 ET, /* European Terminator (post/prefix e.g. $ and %) */ 84 85 /* resolved types */ 86 BN, /* Boundary neutral (type of RLE etc after explicit levels) */ 87 88 /* input types, */ 89 S, /* Segment Separator (TAB) // used only in L1 */ 90 WS, /* White space // used only in L1 */ 91 B, /* Paragraph Separator (aka as PS) */ 92 93 /* types for explicit controls */ 94 RLO, /* these are used only in X1-X9 */ 95 RLE, 96 LRO, 97 LRE, 98 PDF, 99 100 LRI, /* Isolate formatting characters new with 6.3 */ 101 RLI, 102 FSI, 103 PDI, 104 105 /* resolved types, also resolved directions */ 106 NI = ON, /* alias, where ON, WS, S and Isolates are treated the same */ 107 }; 108 109 static const char debug_type[][4] = 110 { 111 "ON", /* Other Neutral */ 112 "L", /* Left Letter */ 113 "R", /* Right Letter */ 114 "AN", /* Arabic Number */ 115 "EN", /* European Number */ 116 "AL", /* Arabic Letter (Right-to-left) */ 117 "NSM", /* Non-spacing Mark */ 118 "CS", /* Common Separator */ 119 "ES", /* European Separator */ 120 "ET", /* European Terminator (post/prefix e.g. $ and %) */ 121 "BN", /* Boundary neutral (type of RLE etc after explicit levels) */ 122 "S", /* Segment Separator (TAB) // used only in L1 */ 123 "WS", /* White space // used only in L1 */ 124 "B", /* Paragraph Separator (aka as PS) */ 125 "RLO", /* these are used only in X1-X9 */ 126 "RLE", 127 "LRO", 128 "LRE", 129 "PDF", 130 "LRI", /* Isolate formatting characters new with 6.3 */ 131 "RLI", 132 "FSI", 133 "PDI", 134 }; 135 136 /* HELPER FUNCTIONS */ 137 138 static inline void dump_types(const char* header, WORD *types, int start, int end) 139 { 140 int i, len = 0; 141 TRACE("%s:",header); 142 for (i = start; i < end && len < 200; i++) 143 { 144 TRACE(" %s",debug_type[types[i]]); 145 len += strlen(debug_type[types[i]])+1; 146 } 147 if (i != end) 148 TRACE("..."); 149 TRACE("\n"); 150 } 151 152 /* Convert the libwine information to the direction enum */ 153 static void classify(const WCHAR *string, WORD *chartype, DWORD count, const SCRIPT_CONTROL *c) 154 { 155 static const enum directions dir_map[16] = 156 { 157 L, /* unassigned defaults to L */ 158 L, 159 R, 160 EN, 161 ES, 162 ET, 163 AN, 164 CS, 165 B, 166 S, 167 WS, 168 ON, 169 AL, 170 NSM, 171 BN, 172 PDF /* also LRE, LRO, RLE, RLO */ 173 }; 174 175 unsigned i; 176 177 for (i = 0; i < count; ++i) 178 { 179 chartype[i] = dir_map[get_char_typeW(string[i]) >> 12]; 180 switch (chartype[i]) 181 { 182 case ES: 183 if (!c->fLegacyBidiClass) break; 184 switch (string[i]) 185 { 186 case '-': 187 case '+': chartype[i] = NI; break; 188 case '/': chartype[i] = CS; break; 189 } 190 break; 191 case PDF: 192 switch (string[i]) 193 { 194 case 0x202A: chartype[i] = LRE; break; 195 case 0x202B: chartype[i] = RLE; break; 196 case 0x202C: chartype[i] = PDF; break; 197 case 0x202D: chartype[i] = LRO; break; 198 case 0x202E: chartype[i] = RLO; break; 199 case 0x2066: chartype[i] = LRI; break; 200 case 0x2067: chartype[i] = RLI; break; 201 case 0x2068: chartype[i] = FSI; break; 202 case 0x2069: chartype[i] = PDI; break; 203 } 204 break; 205 } 206 } 207 } 208 209 /* RESOLVE EXPLICIT */ 210 211 static WORD GreaterEven(int i) 212 { 213 return odd(i) ? i + 1 : i + 2; 214 } 215 216 static WORD GreaterOdd(int i) 217 { 218 return odd(i) ? i + 2 : i + 1; 219 } 220 221 static WORD EmbeddingDirection(int level) 222 { 223 return odd(level) ? R : L; 224 } 225 226 /*------------------------------------------------------------------------ 227 Function: resolveExplicit 228 229 Recursively resolves explicit embedding levels and overrides. 230 Implements rules X1-X9, of the Unicode Bidirectional Algorithm. 231 232 Input: Base embedding level and direction 233 Character count 234 235 Output: Array of embedding levels 236 237 In/Out: Array of direction classes 238 239 240 Note: The function uses two simple counters to keep track of 241 matching explicit codes and PDF. Use the default argument for 242 the outermost call. The nesting counter counts the recursion 243 depth and not the embedding level. 244 ------------------------------------------------------------------------*/ 245 typedef struct tagStackItem { 246 int level; 247 int override; 248 BOOL isolate; 249 } StackItem; 250 251 #define push_stack(l,o,i) \ 252 do { stack_top--; \ 253 stack[stack_top].level = l; \ 254 stack[stack_top].override = o; \ 255 stack[stack_top].isolate = i;} while(0) 256 257 #define pop_stack() do { stack_top++; } while(0) 258 259 #define valid_level(x) (x <= MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) 260 261 static void resolveExplicit(int level, WORD *pclass, WORD *poutLevel, WORD *poutOverrides, int count, BOOL initialOverride) 262 { 263 /* X1 */ 264 int overflow_isolate_count = 0; 265 int overflow_embedding_count = 0; 266 int valid_isolate_count = 0; 267 int i; 268 269 StackItem stack[MAX_DEPTH+2]; 270 int stack_top = MAX_DEPTH+1; 271 272 stack[stack_top].level = level; 273 stack[stack_top].override = NI; 274 stack[stack_top].isolate = FALSE; 275 276 if (initialOverride) 277 { 278 if (odd(level)) 279 push_stack(level, R, FALSE); 280 else 281 push_stack(level, L, FALSE); 282 } 283 284 for (i = 0; i < count; i++) 285 { 286 poutOverrides[i] = stack[stack_top].override; 287 288 /* X2 */ 289 if (pclass[i] == RLE) 290 { 291 int least_odd = GreaterOdd(stack[stack_top].level); 292 poutLevel[i] = stack[stack_top].level; 293 if (valid_level(least_odd)) 294 push_stack(least_odd, NI, FALSE); 295 else if (overflow_isolate_count == 0) 296 overflow_embedding_count++; 297 } 298 /* X3 */ 299 else if (pclass[i] == LRE) 300 { 301 int least_even = GreaterEven(stack[stack_top].level); 302 poutLevel[i] = stack[stack_top].level; 303 if (valid_level(least_even)) 304 push_stack(least_even, NI, FALSE); 305 else if (overflow_isolate_count == 0) 306 overflow_embedding_count++; 307 } 308 /* X4 */ 309 else if (pclass[i] == RLO) 310 { 311 int least_odd = GreaterOdd(stack[stack_top].level); 312 poutLevel[i] = stack[stack_top].level; 313 if (valid_level(least_odd)) 314 push_stack(least_odd, R, FALSE); 315 else if (overflow_isolate_count == 0) 316 overflow_embedding_count++; 317 } 318 /* X5 */ 319 else if (pclass[i] == LRO) 320 { 321 int least_even = GreaterEven(stack[stack_top].level); 322 poutLevel[i] = stack[stack_top].level; 323 if (valid_level(least_even)) 324 push_stack(least_even, L, FALSE); 325 else if (overflow_isolate_count == 0) 326 overflow_embedding_count++; 327 } 328 /* X5a */ 329 else if (pclass[i] == RLI) 330 { 331 int least_odd = GreaterOdd(stack[stack_top].level); 332 poutLevel[i] = stack[stack_top].level; 333 if (valid_level(least_odd)) 334 { 335 valid_isolate_count++; 336 push_stack(least_odd, NI, TRUE); 337 } 338 else 339 overflow_isolate_count++; 340 } 341 /* X5b */ 342 else if (pclass[i] == LRI) 343 { 344 int least_even = GreaterEven(stack[stack_top].level); 345 poutLevel[i] = stack[stack_top].level; 346 if (valid_level(least_even)) 347 { 348 valid_isolate_count++; 349 push_stack(least_even, NI, TRUE); 350 } 351 else 352 overflow_isolate_count++; 353 } 354 /* X5c */ 355 else if (pclass[i] == FSI) 356 { 357 int j; 358 int new_level = 0; 359 int skipping = 0; 360 poutLevel[i] = stack[stack_top].level; 361 for (j = i+1; j < count; j++) 362 { 363 if (pclass[j] == LRI || pclass[j] == RLI || pclass[j] == FSI) 364 { 365 skipping++; 366 continue; 367 } 368 else if (pclass[j] == PDI) 369 { 370 if (skipping) 371 skipping --; 372 else 373 break; 374 continue; 375 } 376 377 if (skipping) continue; 378 379 if (pclass[j] == L) 380 { 381 new_level = 0; 382 break; 383 } 384 else if (pclass[j] == R || pclass[j] == AL) 385 { 386 new_level = 1; 387 break; 388 } 389 } 390 if (odd(new_level)) 391 { 392 int least_odd = GreaterOdd(stack[stack_top].level); 393 if (valid_level(least_odd)) 394 { 395 valid_isolate_count++; 396 push_stack(least_odd, NI, TRUE); 397 } 398 else 399 overflow_isolate_count++; 400 } 401 else 402 { 403 int least_even = GreaterEven(stack[stack_top].level); 404 if (valid_level(least_even)) 405 { 406 valid_isolate_count++; 407 push_stack(least_even, NI, TRUE); 408 } 409 else 410 overflow_isolate_count++; 411 } 412 } 413 /* X6 */ 414 else if (pclass[i] != B && pclass[i] != BN && pclass[i] != PDI && pclass[i] != PDF) 415 { 416 poutLevel[i] = stack[stack_top].level; 417 if (stack[stack_top].override != NI) 418 pclass[i] = stack[stack_top].override; 419 } 420 /* X6a */ 421 else if (pclass[i] == PDI) 422 { 423 if (overflow_isolate_count) overflow_isolate_count--; 424 else if (!valid_isolate_count) {/* do nothing */} 425 else 426 { 427 overflow_embedding_count = 0; 428 while (!stack[stack_top].isolate) pop_stack(); 429 pop_stack(); 430 valid_isolate_count --; 431 } 432 poutLevel[i] = stack[stack_top].level; 433 } 434 /* X7 */ 435 else if (pclass[i] == PDF) 436 { 437 poutLevel[i] = stack[stack_top].level; 438 if (overflow_isolate_count) {/* do nothing */} 439 else if (overflow_embedding_count) overflow_embedding_count--; 440 else if (!stack[stack_top].isolate && stack_top < (MAX_DEPTH+1)) 441 pop_stack(); 442 } 443 /* X8: Nothing */ 444 } 445 /* X9: Based on 5.2 Retaining Explicit Formatting Characters */ 446 for (i = 0; i < count ; i++) 447 if (pclass[i] == RLE || pclass[i] == LRE || pclass[i] == RLO || pclass[i] == LRO || pclass[i] == PDF) 448 pclass[i] = BN; 449 } 450 451 static inline int previousValidChar(const WORD *pcls, int index, int back_fence) 452 { 453 if (index == -1 || index == back_fence) return index; 454 index --; 455 while (index > back_fence && pcls[index] == BN) index --; 456 return index; 457 } 458 459 static inline int nextValidChar(const WORD *pcls, int index, int front_fence) 460 { 461 if (index == front_fence) return index; 462 index ++; 463 while (index < front_fence && pcls[index] == BN) index ++; 464 return index; 465 } 466 467 typedef struct tagRun 468 { 469 int start; 470 int end; 471 WORD e; 472 } Run; 473 474 typedef struct tagRunChar 475 { 476 WCHAR ch; 477 WORD *pcls; 478 } RunChar; 479 480 typedef struct tagIsolatedRun 481 { 482 struct list entry; 483 int length; 484 WORD sos; 485 WORD eos; 486 WORD e; 487 488 RunChar item[1]; 489 } IsolatedRun; 490 491 static inline int iso_nextValidChar(IsolatedRun *iso_run, int index) 492 { 493 if (index >= (iso_run->length-1)) return -1; 494 index ++; 495 while (index < iso_run->length && *iso_run->item[index].pcls == BN) index++; 496 if (index == iso_run->length) return -1; 497 return index; 498 } 499 500 static inline int iso_previousValidChar(IsolatedRun *iso_run, int index) 501 { 502 503 if (index <= 0) return -1; 504 index --; 505 while (index > -1 && *iso_run->item[index].pcls == BN) index--; 506 return index; 507 } 508 509 static inline void iso_dump_types(const char* header, IsolatedRun *iso_run) 510 { 511 int i, len = 0; 512 TRACE("%s:",header); 513 TRACE("[ "); 514 for (i = 0; i < iso_run->length && len < 200; i++) 515 { 516 TRACE(" %s",debug_type[*iso_run->item[i].pcls]); 517 len += strlen(debug_type[*iso_run->item[i].pcls])+1; 518 } 519 if (i != iso_run->length) 520 TRACE("..."); 521 TRACE(" ]\n"); 522 } 523 524 /*------------------------------------------------------------------------ 525 Function: resolveWeak 526 527 Resolves the directionality of numeric and other weak character types 528 529 Implements rules X10 and W1-W6 of the Unicode Bidirectional Algorithm. 530 531 Input: Array of embedding levels 532 Character count 533 534 In/Out: Array of directional classes 535 536 Note: On input only these directional classes are expected 537 AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS, 538 ------------------------------------------------------------------------*/ 539 540 static void resolveWeak(IsolatedRun * iso_run) 541 { 542 int i; 543 544 /* W1 */ 545 for (i=0; i < iso_run->length; i++) 546 { 547 if (*iso_run->item[i].pcls == NSM) 548 { 549 int j = iso_previousValidChar(iso_run, i); 550 if (j == -1) 551 *iso_run->item[i].pcls = iso_run->sos; 552 else if (*iso_run->item[j].pcls >= LRI) 553 *iso_run->item[i].pcls = ON; 554 else 555 *iso_run->item[i].pcls = *iso_run->item[j].pcls; 556 } 557 } 558 559 /* W2 */ 560 for (i = 0; i < iso_run->length; i++) 561 { 562 if (*iso_run->item[i].pcls == EN) 563 { 564 int j = iso_previousValidChar(iso_run, i); 565 while (j > -1) 566 { 567 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L || *iso_run->item[j].pcls == AL) 568 { 569 if (*iso_run->item[j].pcls == AL) 570 *iso_run->item[i].pcls = AN; 571 break; 572 } 573 j = iso_previousValidChar(iso_run, j); 574 } 575 } 576 } 577 578 /* W3 */ 579 for (i = 0; i < iso_run->length; i++) 580 { 581 if (*iso_run->item[i].pcls == AL) 582 *iso_run->item[i].pcls = R; 583 } 584 585 /* W4 */ 586 for (i = 0; i < iso_run->length; i++) 587 { 588 if (*iso_run->item[i].pcls == ES) 589 { 590 int b = iso_previousValidChar(iso_run, i); 591 int f = iso_nextValidChar(iso_run, i); 592 593 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN) 594 *iso_run->item[i].pcls = EN; 595 } 596 else if (*iso_run->item[i].pcls == CS) 597 { 598 int b = iso_previousValidChar(iso_run, i); 599 int f = iso_nextValidChar(iso_run, i); 600 601 if (b > -1 && f > -1 && *iso_run->item[b].pcls == EN && *iso_run->item[f].pcls == EN) 602 *iso_run->item[i].pcls = EN; 603 else if (b > -1 && f > -1 && *iso_run->item[b].pcls == AN && *iso_run->item[f].pcls == AN) 604 *iso_run->item[i].pcls = AN; 605 } 606 } 607 608 /* W5 */ 609 for (i = 0; i < iso_run->length; i++) 610 { 611 if (*iso_run->item[i].pcls == ET) 612 { 613 int j; 614 for (j = i-1 ; j > -1; j--) 615 { 616 if (*iso_run->item[j].pcls == BN) continue; 617 if (*iso_run->item[j].pcls == ET) continue; 618 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN; 619 else break; 620 } 621 if (*iso_run->item[i].pcls == ET) 622 { 623 for (j = i+1; j < iso_run->length; j++) 624 { 625 if (*iso_run->item[j].pcls == BN) continue; 626 if (*iso_run->item[j].pcls == ET) continue; 627 else if (*iso_run->item[j].pcls == EN) *iso_run->item[i].pcls = EN; 628 else break; 629 } 630 } 631 } 632 } 633 634 /* W6 */ 635 for (i = 0; i < iso_run->length; i++) 636 { 637 if (*iso_run->item[i].pcls == ET || *iso_run->item[i].pcls == ES || *iso_run->item[i].pcls == CS || *iso_run->item[i].pcls == ON) 638 { 639 int b = i-1; 640 int f = i+1; 641 if (b > -1 && *iso_run->item[b].pcls == BN) 642 *iso_run->item[b].pcls = ON; 643 if (f < iso_run->length && *iso_run->item[f].pcls == BN) 644 *iso_run->item[f].pcls = ON; 645 646 *iso_run->item[i].pcls = ON; 647 } 648 } 649 650 /* W7 */ 651 for (i = 0; i < iso_run->length; i++) 652 { 653 if (*iso_run->item[i].pcls == EN) 654 { 655 int j; 656 for (j = iso_previousValidChar(iso_run, i); j > -1; j = iso_previousValidChar(iso_run, j)) 657 if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == L) 658 { 659 if (*iso_run->item[j].pcls == L) 660 *iso_run->item[i].pcls = L; 661 break; 662 } 663 if (iso_run->sos == L && j == -1) 664 *iso_run->item[i].pcls = L; 665 } 666 } 667 } 668 669 typedef struct tagBracketPair 670 { 671 int start; 672 int end; 673 } BracketPair; 674 675 static int compr(const void *a, const void* b) 676 { 677 return ((BracketPair*)a)->start - ((BracketPair*)b)->start; 678 } 679 680 static BracketPair *computeBracketPairs(IsolatedRun *iso_run) 681 { 682 WCHAR *open_stack; 683 int *stack_index; 684 int stack_top = iso_run->length; 685 BracketPair *out = NULL; 686 int pair_count = 0; 687 int i; 688 689 open_stack = heap_alloc(iso_run->length * sizeof(*open_stack)); 690 stack_index = heap_alloc(iso_run->length * sizeof(*stack_index)); 691 692 for (i = 0; i < iso_run->length; i++) 693 { 694 unsigned short ubv = get_table_entry(bidi_bracket_table, iso_run->item[i].ch); 695 if (ubv) 696 { 697 if (!out) 698 { 699 out = heap_alloc(sizeof(*out)); 700 out[0].start = -1; 701 } 702 703 if ((ubv >> 8) == 0) 704 { 705 stack_top --; 706 open_stack[stack_top] = iso_run->item[i].ch + (signed char)(ubv & 0xff); 707 /* deal with canonical equivalent U+2329/232A and U+3008/3009 */ 708 if (open_stack[stack_top] == 0x232A) 709 open_stack[stack_top] = 0x3009; 710 stack_index[stack_top] = i; 711 } 712 else if ((ubv >> 8) == 1) 713 { 714 int j; 715 if (stack_top == iso_run->length) continue; 716 for (j = stack_top; j < iso_run->length; j++) 717 { 718 WCHAR c = iso_run->item[i].ch; 719 if (c == 0x232A) c = 0x3009; 720 if (c == open_stack[j]) 721 { 722 out[pair_count].start = stack_index[j]; 723 out[pair_count].end = i; 724 pair_count++; 725 out = HeapReAlloc(GetProcessHeap(), 0, out, sizeof(BracketPair) * (pair_count+1)); 726 out[pair_count].start = -1; 727 stack_top = j+1; 728 break; 729 } 730 } 731 } 732 } 733 } 734 if (pair_count == 0) 735 { 736 heap_free(out); 737 out = NULL; 738 } 739 else if (pair_count > 1) 740 qsort(out, pair_count, sizeof(BracketPair), compr); 741 742 heap_free(open_stack); 743 heap_free(stack_index); 744 return out; 745 } 746 747 #define N0_TYPE(a) ((a == AN || a == EN)?R:a) 748 749 /*------------------------------------------------------------------------ 750 Function: resolveNeutrals 751 752 Resolves the directionality of neutral character types. 753 754 Implements rules N1 and N2 of the Unicode Bidi Algorithm. 755 756 Input: Array of embedding levels 757 Character count 758 Baselevel 759 760 In/Out: Array of directional classes 761 762 Note: On input only these directional classes are expected 763 R, L, NI, AN, EN and BN 764 765 W8 resolves a number of ENs to L 766 ------------------------------------------------------------------------*/ 767 static void resolveNeutrals(IsolatedRun *iso_run) 768 { 769 int i; 770 BracketPair *pairs = NULL; 771 772 /* Translate isolates into NI */ 773 for (i = 0; i < iso_run->length; i++) 774 { 775 if (*iso_run->item[i].pcls >= LRI) 776 *iso_run->item[i].pcls = NI; 777 778 switch(*iso_run->item[i].pcls) 779 { 780 case B: 781 case S: 782 case WS: *iso_run->item[i].pcls = NI; 783 } 784 785 ASSERT(*iso_run->item[i].pcls < 5 || *iso_run->item[i].pcls == BN); /* "Only NI, L, R, AN, EN and BN are allowed" */ 786 } 787 788 /* N0: Skipping bracketed pairs for now */ 789 pairs = computeBracketPairs(iso_run); 790 if (pairs) 791 { 792 BracketPair *p = &pairs[0]; 793 int i = 0; 794 while (p->start >= 0) 795 { 796 int j; 797 int e = EmbeddingDirection(iso_run->e); 798 int o = EmbeddingDirection(iso_run->e+1); 799 BOOL flag_o = FALSE; 800 TRACE("Bracket Pair [%i - %i]\n",p->start, p->end); 801 802 /* N0.b */ 803 for (j = p->start+1; j < p->end; j++) 804 { 805 if (N0_TYPE(*iso_run->item[j].pcls) == e) 806 { 807 *iso_run->item[p->start].pcls = e; 808 *iso_run->item[p->end].pcls = e; 809 break; 810 } 811 else if (N0_TYPE(*iso_run->item[j].pcls) == o) 812 flag_o = TRUE; 813 } 814 /* N0.c */ 815 if (j == p->end && flag_o) 816 { 817 for (j = p->start; j >= 0; j--) 818 { 819 if (N0_TYPE(*iso_run->item[j].pcls) == o) 820 { 821 *iso_run->item[p->start].pcls = o; 822 *iso_run->item[p->end].pcls = o; 823 break; 824 } 825 else if (N0_TYPE(*iso_run->item[j].pcls) == e) 826 { 827 *iso_run->item[p->start].pcls = e; 828 *iso_run->item[p->end].pcls = e; 829 break; 830 } 831 } 832 if ( j < 0 ) 833 { 834 *iso_run->item[p->start].pcls = iso_run->sos; 835 *iso_run->item[p->end].pcls = iso_run->sos; 836 } 837 } 838 839 i++; 840 p = &pairs[i]; 841 } 842 heap_free(pairs); 843 } 844 845 /* N1 */ 846 for (i = 0; i < iso_run->length; i++) 847 { 848 WORD l,r; 849 850 if (*iso_run->item[i].pcls == NI) 851 { 852 int j; 853 int b = iso_previousValidChar(iso_run, i); 854 855 if (b == -1) 856 { 857 l = iso_run->sos; 858 b = 0; 859 } 860 else 861 { 862 if (*iso_run->item[b].pcls == R || *iso_run->item[b].pcls == AN || *iso_run->item[b].pcls == EN) 863 l = R; 864 else if (*iso_run->item[b].pcls == L) 865 l = L; 866 else /* No string type */ 867 continue; 868 } 869 j = iso_nextValidChar(iso_run, i); 870 while (j > -1 && *iso_run->item[j].pcls == NI) j = iso_nextValidChar(iso_run, j); 871 872 if (j == -1) 873 { 874 r = iso_run->eos; 875 j = iso_run->length; 876 } 877 else if (*iso_run->item[j].pcls == R || *iso_run->item[j].pcls == AN || *iso_run->item[j].pcls == EN) 878 r = R; 879 else if (*iso_run->item[j].pcls == L) 880 r = L; 881 else /* No string type */ 882 continue; 883 884 if (r == l) 885 { 886 for (b = i; b < j && b < iso_run->length; b++) 887 *iso_run->item[b].pcls = r; 888 } 889 } 890 } 891 892 /* N2 */ 893 for (i = 0; i < iso_run->length; i++) 894 { 895 if (*iso_run->item[i].pcls == NI) 896 { 897 int b = i-1; 898 int f = i+1; 899 *iso_run->item[i].pcls = EmbeddingDirection(iso_run->e); 900 if (b > -1 && *iso_run->item[b].pcls == BN) 901 *iso_run->item[b].pcls = EmbeddingDirection(iso_run->e); 902 if (f < iso_run->length && *iso_run->item[f].pcls == BN) 903 *iso_run->item[f].pcls = EmbeddingDirection(iso_run->e); 904 } 905 } 906 } 907 908 /*------------------------------------------------------------------------ 909 Function: resolveImplicit 910 911 Recursively resolves implicit embedding levels. 912 Implements rules I1 and I2 of the Unicode Bidirectional Algorithm. 913 914 Input: Array of direction classes 915 Character count 916 Base level 917 918 In/Out: Array of embedding levels 919 920 Note: levels may exceed 15 on output. 921 Accepted subset of direction classes 922 R, L, AN, EN 923 ------------------------------------------------------------------------*/ 924 static void resolveImplicit(const WORD * pcls, WORD *plevel, int sos, int eos) 925 { 926 int i; 927 928 /* I1/2 */ 929 for (i = sos; i <= eos; i++) 930 { 931 if (pcls[i] == BN) 932 continue; 933 934 ASSERT(pcls[i] > 0); /* "No Neutrals allowed to survive here." */ 935 ASSERT(pcls[i] < 5); /* "Out of range." */ 936 937 if (odd(plevel[i]) && (pcls[i] == L || pcls[i] == EN || pcls [i] == AN)) 938 plevel[i]++; 939 else if (!odd(plevel[i]) && pcls[i] == R) 940 plevel[i]++; 941 else if (!odd(plevel[i]) && (pcls[i] == EN || pcls [i] == AN)) 942 plevel[i]+=2; 943 } 944 } 945 946 static void resolveResolved(unsigned baselevel, const WORD * pcls, WORD *plevel, int sos, int eos) 947 { 948 int i; 949 950 /* L1 */ 951 for (i = sos; i <= eos; i++) 952 { 953 if (pcls[i] == B || pcls[i] == S) 954 { 955 int j = i -1; 956 while (i > sos && j >= sos && 957 (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI || 958 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO || 959 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN)) 960 plevel[j--] = baselevel; 961 plevel[i] = baselevel; 962 } 963 else if (pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || pcls[i] == RLO || 964 pcls[i] == PDF || pcls[i] == BN) 965 { 966 plevel[i] = i ? plevel[i - 1] : baselevel; 967 } 968 if (i == eos && 969 (pcls[i] == WS || pcls[i] == FSI || pcls[i] == LRI || pcls[i] == RLI || 970 pcls[i] == PDI || pcls[i] == LRE || pcls[i] == RLE || pcls[i] == LRO || 971 pcls[i] == RLO || pcls[i] == PDF || pcls[i] == BN )) 972 { 973 int j = i; 974 while (j >= sos && (pcls[j] == WS || pcls[j] == FSI || pcls[j] == LRI || pcls[j] == RLI || 975 pcls[j] == PDI || pcls[j] == LRE || pcls[j] == RLE || pcls[j] == LRO || 976 pcls[j] == RLO || pcls[j] == PDF || pcls[j] == BN)) 977 plevel[j--] = baselevel; 978 } 979 } 980 } 981 982 static void computeIsolatingRunsSet(unsigned baselevel, WORD *pcls, const WORD *pLevel, 983 const WCHAR *string, unsigned int uCount, struct list *set) 984 { 985 int run_start, run_end, i; 986 int run_count = 0; 987 Run *runs; 988 IsolatedRun *current_isolated; 989 990 if (!(runs = heap_alloc(uCount * sizeof(*runs)))) 991 return; 992 993 list_init(set); 994 995 /* Build Runs */ 996 run_start = 0; 997 while (run_start < uCount) 998 { 999 run_end = nextValidChar(pcls, run_start, uCount); 1000 while (run_end < uCount && pLevel[run_end] == pLevel[run_start]) run_end = nextValidChar(pcls, run_end, uCount); 1001 run_end --; 1002 runs[run_count].start = run_start; 1003 runs[run_count].end = run_end; 1004 runs[run_count].e = pLevel[run_start]; 1005 run_start = nextValidChar(pcls, run_end, uCount); 1006 run_count++; 1007 } 1008 1009 /* Build Isolating Runs */ 1010 i = 0; 1011 while (i < run_count) 1012 { 1013 int k = i; 1014 if (runs[k].start >= 0) 1015 { 1016 int type_fence, real_end; 1017 int j; 1018 1019 if (!(current_isolated = heap_alloc(FIELD_OFFSET(IsolatedRun, item[uCount])))) 1020 break; 1021 1022 run_start = runs[k].start; 1023 current_isolated->e = runs[k].e; 1024 current_isolated->length = (runs[k].end - runs[k].start)+1; 1025 1026 for (j = 0; j < current_isolated->length; j++) 1027 { 1028 current_isolated->item[j].pcls = &pcls[runs[k].start+j]; 1029 current_isolated->item[j].ch = string[runs[k].start + j]; 1030 } 1031 1032 run_end = runs[k].end; 1033 1034 TRACE("{ [%i -- %i]",run_start, run_end); 1035 1036 if (pcls[run_end] == BN) 1037 run_end = previousValidChar(pcls, run_end, runs[k].start); 1038 1039 while (run_end < uCount && (pcls[run_end] == RLI || pcls[run_end] == LRI || pcls[run_end] == FSI)) 1040 { 1041 j = k+1; 1042 search: 1043 while (j < run_count && pcls[runs[j].start] != PDI) j++; 1044 if (j < run_count && runs[i].e != runs[j].e) 1045 { 1046 j++; 1047 goto search; 1048 } 1049 1050 if (j != run_count) 1051 { 1052 int m; 1053 int l = current_isolated->length; 1054 1055 current_isolated->length += (runs[j].end - runs[j].start)+1; 1056 for (m = 0; l < current_isolated->length; l++, m++) 1057 { 1058 current_isolated->item[l].pcls = &pcls[runs[j].start+m]; 1059 current_isolated->item[l].ch = string[runs[j].start + m]; 1060 } 1061 1062 TRACE("[%i -- %i]",runs[j].start, runs[j].end); 1063 1064 run_end = runs[j].end; 1065 if (pcls[run_end] == BN) 1066 run_end = previousValidChar(pcls, run_end, runs[i].start); 1067 runs[j].start = -1; 1068 k = j; 1069 } 1070 else 1071 { 1072 run_end = uCount; 1073 break; 1074 } 1075 } 1076 1077 type_fence = previousValidChar(pcls, run_start, -1); 1078 1079 if (type_fence == -1) 1080 current_isolated->sos = (baselevel > pLevel[run_start])?baselevel:pLevel[run_start]; 1081 else 1082 current_isolated->sos = (pLevel[type_fence] > pLevel[run_start])?pLevel[type_fence]:pLevel[run_start]; 1083 1084 current_isolated->sos = EmbeddingDirection(current_isolated->sos); 1085 1086 if (run_end == uCount) 1087 current_isolated->eos = current_isolated->sos; 1088 else 1089 { 1090 /* eos could be an BN */ 1091 if ( pcls[run_end] == BN ) 1092 { 1093 real_end = previousValidChar(pcls, run_end, run_start-1); 1094 if (real_end < run_start) 1095 real_end = run_start; 1096 } 1097 else 1098 real_end = run_end; 1099 1100 type_fence = nextValidChar(pcls, run_end, uCount); 1101 if (type_fence == uCount) 1102 current_isolated->eos = (baselevel > pLevel[real_end])?baselevel:pLevel[real_end]; 1103 else 1104 current_isolated->eos = (pLevel[type_fence] > pLevel[real_end])?pLevel[type_fence]:pLevel[real_end]; 1105 1106 current_isolated->eos = EmbeddingDirection(current_isolated->eos); 1107 } 1108 1109 list_add_tail(set, ¤t_isolated->entry); 1110 TRACE(" } level %i {%s <--> %s}\n",current_isolated->e, debug_type[current_isolated->sos], debug_type[current_isolated->eos]); 1111 } 1112 i++; 1113 } 1114 1115 heap_free(runs); 1116 } 1117 1118 /************************************************************* 1119 * BIDI_DeterminLevels 1120 */ 1121 BOOL BIDI_DetermineLevels( 1122 const WCHAR *lpString, /* [in] The string for which information is to be returned */ 1123 unsigned int uCount, /* [in] Number of WCHARs in string. */ 1124 const SCRIPT_STATE *s, 1125 const SCRIPT_CONTROL *c, 1126 WORD *lpOutLevels, /* [out] final string levels */ 1127 WORD *lpOutOverrides /* [out] final string overrides */ 1128 ) 1129 { 1130 WORD *chartype; 1131 unsigned baselevel = 0; 1132 struct list IsolatingRuns; 1133 IsolatedRun *iso_run, *next; 1134 1135 TRACE("%s, %d\n", debugstr_wn(lpString, uCount), uCount); 1136 1137 if (!(chartype = heap_alloc(uCount * sizeof(*chartype)))) 1138 { 1139 WARN("Out of memory\n"); 1140 return FALSE; 1141 } 1142 1143 baselevel = s->uBidiLevel; 1144 1145 classify(lpString, chartype, uCount, c); 1146 if (TRACE_ON(bidi)) dump_types("Start ", chartype, 0, uCount); 1147 1148 memset(lpOutOverrides, 0, sizeof(WORD) * uCount); 1149 1150 /* resolve explicit */ 1151 resolveExplicit(baselevel, chartype, lpOutLevels, lpOutOverrides, uCount, s->fOverrideDirection); 1152 if (TRACE_ON(bidi)) dump_types("After Explicit", chartype, 0, uCount); 1153 1154 /* X10/BD13: Computer Isolating runs */ 1155 computeIsolatingRunsSet(baselevel, chartype, lpOutLevels, lpString, uCount, &IsolatingRuns); 1156 1157 LIST_FOR_EACH_ENTRY_SAFE(iso_run, next, &IsolatingRuns, IsolatedRun, entry) 1158 { 1159 if (TRACE_ON(bidi)) iso_dump_types("Run", iso_run); 1160 1161 /* resolve weak */ 1162 resolveWeak(iso_run); 1163 if (TRACE_ON(bidi)) iso_dump_types("After Weak", iso_run); 1164 1165 /* resolve neutrals */ 1166 resolveNeutrals(iso_run); 1167 if (TRACE_ON(bidi)) iso_dump_types("After Neutrals", iso_run); 1168 1169 list_remove(&iso_run->entry); 1170 heap_free(iso_run); 1171 } 1172 1173 if (TRACE_ON(bidi)) dump_types("Before Implicit", chartype, 0, uCount); 1174 /* resolveImplicit */ 1175 resolveImplicit(chartype, lpOutLevels, 0, uCount-1); 1176 1177 /* resolveResolvedLevels*/ 1178 classify(lpString, chartype, uCount, c); 1179 resolveResolved(baselevel, chartype, lpOutLevels, 0, uCount-1); 1180 1181 heap_free(chartype); 1182 return TRUE; 1183 } 1184 1185 /* reverse cch indexes */ 1186 static void reverse(int *pidx, int cch) 1187 { 1188 int temp; 1189 int ich = 0; 1190 for (; ich < --cch; ich++) 1191 { 1192 temp = pidx[ich]; 1193 pidx[ich] = pidx[cch]; 1194 pidx[cch] = temp; 1195 } 1196 } 1197 1198 1199 /*------------------------------------------------------------------------ 1200 Functions: reorder/reorderLevel 1201 1202 Recursively reorders the display string 1203 "From the highest level down, reverse all characters at that level and 1204 higher, down to the lowest odd level" 1205 1206 Implements rule L2 of the Unicode bidi Algorithm. 1207 1208 Input: Array of embedding levels 1209 Character count 1210 Flag enabling reversal (set to false by initial caller) 1211 1212 In/Out: Text to reorder 1213 1214 Note: levels may exceed 15 resp. 61 on input. 1215 1216 Rule L3 - reorder combining marks is not implemented here 1217 Rule L4 - glyph mirroring is implemented as a display option below 1218 1219 Note: this should be applied a line at a time 1220 -------------------------------------------------------------------------*/ 1221 int BIDI_ReorderV2lLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 1222 { 1223 int ich = 0; 1224 1225 /* true as soon as first odd level encountered */ 1226 fReverse = fReverse || odd(level); 1227 1228 for (; ich < cch; ich++) 1229 { 1230 if (plevel[ich] < level) 1231 { 1232 break; 1233 } 1234 else if (plevel[ich] > level) 1235 { 1236 ich += BIDI_ReorderV2lLevel(level + 1, pIndexs + ich, plevel + ich, 1237 cch - ich, fReverse) - 1; 1238 } 1239 } 1240 if (fReverse) 1241 { 1242 reverse(pIndexs, ich); 1243 } 1244 return ich; 1245 } 1246 1247 /* Applies the reorder in reverse. Taking an already reordered string and returning the original */ 1248 int BIDI_ReorderL2vLevel(int level, int *pIndexs, const BYTE* plevel, int cch, BOOL fReverse) 1249 { 1250 int ich = 0; 1251 int newlevel = -1; 1252 1253 /* true as soon as first odd level encountered */ 1254 fReverse = fReverse || odd(level); 1255 1256 for (; ich < cch; ich++) 1257 { 1258 if (plevel[ich] < level) 1259 break; 1260 else if (plevel[ich] > level) 1261 newlevel = ich; 1262 } 1263 if (fReverse) 1264 { 1265 reverse(pIndexs, ich); 1266 } 1267 1268 if (newlevel >= 0) 1269 { 1270 ich = 0; 1271 for (; ich < cch; ich++) 1272 if (plevel[ich] < level) 1273 break; 1274 else if (plevel[ich] > level) 1275 ich += BIDI_ReorderL2vLevel(level + 1, pIndexs + ich, plevel + ich, 1276 cch - ich, fReverse) - 1; 1277 } 1278 1279 return ich; 1280 } 1281 1282 BOOL BIDI_GetStrengths(const WCHAR *string, unsigned int count, const SCRIPT_CONTROL *c, WORD *strength) 1283 { 1284 unsigned int i; 1285 1286 classify(string, strength, count, c); 1287 for (i = 0; i < count; i++) 1288 { 1289 switch (strength[i]) 1290 { 1291 case L: 1292 case LRE: 1293 case LRO: 1294 case R: 1295 case AL: 1296 case RLE: 1297 case RLO: 1298 strength[i] = BIDI_STRONG; 1299 break; 1300 case PDF: 1301 case EN: 1302 case ES: 1303 case ET: 1304 case AN: 1305 case CS: 1306 case BN: 1307 strength[i] = BIDI_WEAK; 1308 break; 1309 case B: 1310 case S: 1311 case WS: 1312 case ON: 1313 default: /* Neutrals and NSM */ 1314 strength[i] = BIDI_NEUTRAL; 1315 } 1316 } 1317 return TRUE; 1318 } 1319