1 /* Bidi.java -- Bidirectional Algorithm implementation 2 Copyright (C) 2005, 2006, 2012 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package java.text; 40 41 import java.awt.font.NumericShaper; 42 import java.awt.font.TextAttribute; 43 import java.util.ArrayList; 44 45 46 /** 47 * Bidirectional Algorithm implementation. 48 * 49 * The full algorithm is 50 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard 51 * Annex #9: The Bidirectional Algorithm</a>. 52 * 53 * @since 1.4 54 */ 55 public final class Bidi 56 { 57 /** 58 * This indicates that a strongly directional character in the text should 59 * set the initial direction, but if no such character is found, then the 60 * initial direction will be left-to-right. 61 */ 62 public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2; 63 64 /** 65 * This indicates that a strongly directional character in the text should 66 * set the initial direction, but if no such character is found, then the 67 * initial direction will be right-to-left. 68 */ 69 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1; 70 71 /** 72 * This indicates that the initial direction should be left-to-right. 73 */ 74 public static final int DIRECTION_LEFT_TO_RIGHT = 0; 75 76 /** 77 * This indicates that the initial direction should be right-to-left. 78 */ 79 public static final int DIRECTION_RIGHT_TO_LEFT = 1; 80 81 // Flags used when computing the result. 82 private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT; 83 private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT; 84 85 // The text we are examining, and the starting offset. 86 // If we had a better way to handle createLineBidi, we wouldn't 87 // need this at all -- which for the String case would be an 88 // efficiency win. 89 private char[] text; 90 private int textOffset; 91 // The embeddings corresponding to the text, and the starting offset. 92 private byte[] embeddings; 93 private int embeddingOffset; 94 // The length of the text (and embeddings) to use. 95 private int length; 96 // The flags. 97 private int flags; 98 99 // All instance fields following this point are initialized 100 // during analysis. Fields before this must be set by the constructor. 101 102 // The initial embedding level. 103 private int baseEmbedding; 104 // The type of each character in the text. 105 private byte[] types; 106 // The levels we compute. 107 private byte[] levels; 108 109 // A list of indices where a formatting code was found. These 110 // are indicies into the original text -- not into the text after 111 // the codes have been removed. 112 private ArrayList<Integer> formatterIndices; 113 114 // Indices of the starts of runs in the text. 115 private int[] runs; 116 117 // A convenience field where we keep track of what kinds of runs 118 // we've seen. 119 private int resultFlags; 120 121 /** 122 * Create a new Bidi object given an attributed character iterator. 123 * This constructor will examine various attributes of the text: 124 * <ul> 125 * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the 126 * paragraph's base embedding level. This constructor will recognize 127 * either {@link TextAttribute#RUN_DIRECTION_LTR} or 128 * {@link TextAttribute#RUN_DIRECTION_RTL}. If neither is given, 129 * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed. 130 * </li> 131 * 132 * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric 133 * shaping will be done before the Bidi algorithm is run. 134 * </li> 135 * 136 * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given 137 * character, then the value of this attribute will be used as an 138 * embedding level override. 139 * </li> 140 * </ul> 141 * @param iter the attributed character iterator to use 142 */ Bidi(AttributedCharacterIterator iter)143 public Bidi(AttributedCharacterIterator iter) 144 { 145 // If set, this attribute should be set on all characters. 146 // We don't check this (should we?) but we do assume that we 147 // can simply examine the first character. 148 Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION); 149 if (val == TextAttribute.RUN_DIRECTION_LTR) 150 this.flags = DIRECTION_LEFT_TO_RIGHT; 151 else if (val == TextAttribute.RUN_DIRECTION_RTL) 152 this.flags = DIRECTION_RIGHT_TO_LEFT; 153 else 154 this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT; 155 156 // Likewise this attribute should be specified on the whole text. 157 // We read it here and then, if it is set, we apply the numeric shaper 158 // to the text before processing it. 159 NumericShaper shaper = null; 160 val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING); 161 if (val instanceof NumericShaper) 162 shaper = (NumericShaper) val; 163 164 text = new char[iter.getEndIndex() - iter.getBeginIndex()]; 165 embeddings = new byte[text.length]; 166 embeddingOffset = 0; 167 length = text.length; 168 for (int i = 0; i < text.length; ++i) 169 { 170 text[i] = iter.current(); 171 172 val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING); 173 if (val instanceof Integer) 174 { 175 int ival = ((Integer) val).intValue(); 176 byte bval; 177 if (ival < -62 || ival > 62) 178 bval = 0; 179 else 180 bval = (byte) ival; 181 embeddings[i] = bval; 182 } 183 } 184 185 // Invoke the numeric shaper, if specified. 186 if (shaper != null) 187 shaper.shape(text, 0, length); 188 189 runBidi(); 190 } 191 192 /** 193 * Create a new Bidi object with the indicated text and, possibly, explicit 194 * embedding settings. 195 * 196 * If the embeddings array is null, it is ignored. Otherwise it is taken to 197 * be explicit embedding settings corresponding to the text. Positive values 198 * from 1 to 61 are embedding levels, and negative values from -1 to -61 are 199 * embedding overrides. (FIXME: not at all clear what this really means.) 200 * 201 * @param text the text to use 202 * @param offset the offset of the first character of the text 203 * @param embeddings the explicit embeddings, or null if there are none 204 * @param embedOffset the offset of the first embedding value to use 205 * @param length the length of both the text and the embeddings 206 * @param flags a flag indicating the base embedding direction 207 */ Bidi(char[] text, int offset, byte[] embeddings, int embedOffset, int length, int flags)208 public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset, 209 int length, int flags) 210 { 211 if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT 212 && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT 213 && flags != DIRECTION_LEFT_TO_RIGHT 214 && flags != DIRECTION_RIGHT_TO_LEFT) 215 throw new IllegalArgumentException("unrecognized 'flags' argument: " 216 + flags); 217 this.text = text; 218 this.textOffset = offset; 219 this.embeddings = embeddings; 220 this.embeddingOffset = embedOffset; 221 this.length = length; 222 this.flags = flags; 223 224 runBidi(); 225 } 226 227 /** 228 * Create a new Bidi object using the contents of the given String 229 * as the text. 230 * @param text the text to use 231 * @param flags a flag indicating the base embedding direction 232 */ Bidi(String text, int flags)233 public Bidi(String text, int flags) 234 { 235 if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT 236 && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT 237 && flags != DIRECTION_LEFT_TO_RIGHT 238 && flags != DIRECTION_RIGHT_TO_LEFT) 239 throw new IllegalArgumentException("unrecognized 'flags' argument: " 240 + flags); 241 242 // This is inefficient, but it isn't clear whether it matters. 243 // If it does we can change our implementation a bit to allow either 244 // a String or a char[]. 245 this.text = text.toCharArray(); 246 this.textOffset = 0; 247 this.embeddings = null; 248 this.embeddingOffset = 0; 249 this.length = text.length(); 250 this.flags = flags; 251 252 runBidi(); 253 } 254 255 /** 256 * Implementation function which computes the initial type of 257 * each character in the input. 258 */ computeTypes()259 private void computeTypes() 260 { 261 types = new byte[length]; 262 for (int i = 0; i < length; ++i) 263 types[i] = Character.getDirectionality(text[textOffset + i]); 264 } 265 266 /** 267 * An internal function which implements rules P2 and P3. 268 * This computes the base embedding level. 269 * @return the paragraph's base embedding level 270 */ computeParagraphEmbeddingLevel()271 private int computeParagraphEmbeddingLevel() 272 { 273 // First check to see if the user supplied a directionality override. 274 if (flags == DIRECTION_LEFT_TO_RIGHT 275 || flags == DIRECTION_RIGHT_TO_LEFT) 276 return flags; 277 278 // This implements rules P2 and P3. 279 // (Note that we don't need P1, as the user supplies 280 // a paragraph.) 281 for (int i = 0; i < length; ++i) 282 { 283 int dir = types[i]; 284 if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT) 285 return DIRECTION_LEFT_TO_RIGHT; 286 if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT 287 || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT) 288 return DIRECTION_RIGHT_TO_LEFT; 289 } 290 return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT 291 ? DIRECTION_LEFT_TO_RIGHT 292 : DIRECTION_RIGHT_TO_LEFT); 293 } 294 295 /** 296 * An internal function which implements rules X1 through X9. 297 * This computes the initial levels for the text, handling 298 * explicit overrides and embeddings. 299 */ computeExplicitLevels()300 private void computeExplicitLevels() 301 { 302 levels = new byte[length]; 303 byte currentEmbedding = (byte) baseEmbedding; 304 // The directional override is a Character directionality 305 // constant. -1 means there is no override. 306 byte directionalOverride = -1; 307 // The stack of pushed embeddings, and the stack pointer. 308 // Note that because the direction is inherent in the depth, 309 // and because we have a bit left over in a byte, we can encode 310 // the override, if any, directly in this value on the stack. 311 final int MAX_DEPTH = 62; 312 byte[] embeddingStack = new byte[MAX_DEPTH]; 313 int sp = 0; 314 315 for (int i = 0; i < length; ++i) 316 { 317 // If we see an explicit embedding, we use that, even if 318 // the current character is itself a directional override. 319 if (embeddings != null && embeddings[embeddingOffset + i] != 0) 320 { 321 // It isn't at all clear what we're supposed to do here. 322 // What does a negative value really mean? 323 // Should we push on the embedding stack here? 324 currentEmbedding = embeddings[embeddingOffset + i]; 325 if (currentEmbedding < 0) 326 { 327 currentEmbedding = (byte) -currentEmbedding; 328 directionalOverride 329 = (((currentEmbedding % 2) == 0) 330 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 331 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 332 } 333 else 334 directionalOverride = -1; 335 continue; 336 } 337 // No explicit embedding. 338 boolean isLtoR = false; 339 boolean isSpecial = true; 340 switch (types[i]) 341 { 342 case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING: 343 case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE: 344 isLtoR = true; 345 // Fall through. 346 case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING: 347 case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE: 348 { 349 byte newEmbedding; 350 if (isLtoR) 351 { 352 // Least greater even. 353 newEmbedding = (byte) ((currentEmbedding & ~1) + 2); 354 } 355 else 356 { 357 // Least greater odd. 358 newEmbedding = (byte) ((currentEmbedding + 1) | 1); 359 } 360 // FIXME: we don't properly handle invalid pushes. 361 if (newEmbedding < MAX_DEPTH) 362 { 363 // The new level is valid. Push the old value. 364 // See above for a comment on the encoding here. 365 if (directionalOverride != -1) 366 currentEmbedding |= Byte.MIN_VALUE; 367 embeddingStack[sp++] = currentEmbedding; 368 currentEmbedding = newEmbedding; 369 if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE) 370 directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT; 371 else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE) 372 directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT; 373 else 374 directionalOverride = -1; 375 } 376 } 377 break; 378 case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT: 379 { 380 // FIXME: we don't properly handle a pop with a corresponding 381 // invalid push. 382 if (sp == 0) 383 { 384 // We saw a pop without a push. Just ignore it. 385 break; 386 } 387 byte newEmbedding = embeddingStack[--sp]; 388 currentEmbedding = (byte) (newEmbedding & 0x7f); 389 if (newEmbedding < 0) 390 directionalOverride 391 = (((newEmbedding & 1) == 0) 392 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 393 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 394 else 395 directionalOverride = -1; 396 } 397 break; 398 default: 399 isSpecial = false; 400 break; 401 } 402 levels[i] = currentEmbedding; 403 if (isSpecial) 404 { 405 // Mark this character for removal. 406 if (formatterIndices == null) 407 formatterIndices = new ArrayList<Integer>(); 408 formatterIndices.add(Integer.valueOf(i)); 409 } 410 else if (directionalOverride != -1) 411 types[i] = directionalOverride; 412 } 413 414 // Remove the formatting codes and update both the arrays 415 // and 'length'. It would be more efficient not to remove 416 // these codes, but it is also more complicated. Also, the 417 // Unicode algorithm reference does not properly describe 418 // how this is to be done -- from what I can tell, their suggestions 419 // in this area will not yield the correct results. 420 if (formatterIndices == null) 421 return; 422 int output = 0, input = 0; 423 final int size = formatterIndices.size(); 424 for (int i = 0; i <= size; ++i) 425 { 426 int nextFmt; 427 if (i == size) 428 nextFmt = length; 429 else 430 nextFmt = formatterIndices.get(i).intValue(); 431 // Non-formatter codes are from 'input' to 'nextFmt'. 432 int len = nextFmt - input; 433 System.arraycopy(levels, input, levels, output, len); 434 System.arraycopy(types, input, types, output, len); 435 output += len; 436 input = nextFmt + 1; 437 } 438 length -= formatterIndices.size(); 439 } 440 441 /** 442 * An internal function to compute the boundaries of runs 443 * in the text. It isn't strictly necessary to do this, but 444 * it lets us write some following passes in a less complicated 445 * way. Also it lets us efficiently implement some of the public 446 * methods. A run is simply a sequence of characters at the 447 * same level. 448 */ computeRuns()449 private void computeRuns() 450 { 451 int runCount = 0; 452 int currentEmbedding = baseEmbedding; 453 for (int i = 0; i < length; ++i) 454 { 455 if (levels[i] != currentEmbedding) 456 { 457 currentEmbedding = levels[i]; 458 ++runCount; 459 } 460 } 461 462 // This may be called multiple times. If so, and if 463 // the number of runs has not changed, then don't bother 464 // allocating a new array. 465 if (runs == null || runs.length != runCount + 1) 466 runs = new int[runCount + 1]; 467 int where = 0; 468 int lastRunStart = 0; 469 currentEmbedding = baseEmbedding; 470 for (int i = 0; i < length; ++i) 471 { 472 if (levels[i] != currentEmbedding) 473 { 474 runs[where++] = lastRunStart; 475 lastRunStart = i; 476 currentEmbedding = levels[i]; 477 } 478 } 479 runs[where++] = lastRunStart; 480 } 481 482 /** 483 * An internal method to resolve weak types. This implements 484 * rules W1 through W7. 485 */ resolveWeakTypes()486 private void resolveWeakTypes() 487 { 488 final int runCount = getRunCount(); 489 490 int previousLevel = baseEmbedding; 491 for (int run = 0; run < runCount; ++run) 492 { 493 int start = getRunStart(run); 494 int end = getRunLimit(run); 495 int level = getRunLevel(run); 496 497 // These are the names used in the Bidi algorithm. 498 byte sor = (((Math.max(previousLevel, level) % 2) == 0) 499 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 500 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 501 int nextLevel; 502 if (run == runCount - 1) 503 nextLevel = baseEmbedding; 504 else 505 nextLevel = getRunLevel(run + 1); 506 byte eor = (((Math.max(level, nextLevel) % 2) == 0) 507 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 508 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 509 510 byte prevType = sor; 511 byte prevStrongType = sor; 512 for (int i = start; i < end; ++i) 513 { 514 final byte nextType = (i == end - 1) ? eor : types[i + 1]; 515 516 // Rule W1: change NSM to the prevailing direction. 517 if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK) 518 types[i] = prevType; 519 else 520 prevType = types[i]; 521 522 // Rule W2: change EN to AN in some cases. 523 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 524 { 525 if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) 526 types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER; 527 } 528 else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT 529 || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT 530 || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) 531 prevStrongType = types[i]; 532 533 // Rule W3: change AL to R. 534 if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC) 535 types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT; 536 537 // Rule W4: handle separators between two numbers. 538 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER 539 && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 540 { 541 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 542 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR) 543 types[i] = nextType; 544 } 545 else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER 546 && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER 547 && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR) 548 types[i] = nextType; 549 550 // Rule W5: change a sequence of european terminators to 551 // european numbers, if they are adjacent to european numbers. 552 // We also include BN characters in this. 553 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 554 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL) 555 { 556 if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 557 types[i] = prevType; 558 else 559 { 560 // Look ahead to see if there is an EN terminating this 561 // sequence of ETs. 562 int j = i + 1; 563 while (j < end 564 && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 565 || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)) 566 ++j; 567 if (j < end 568 && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 569 { 570 // Change them all to EN now. 571 for (int k = i; k < j; ++k) 572 types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER; 573 } 574 } 575 } 576 577 // Rule W6: separators and terminators change to ON. 578 // Again we include BN. 579 if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 580 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 581 || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 582 || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL) 583 types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS; 584 585 // Rule W7: change european number types. 586 if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT 587 && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 588 types[i] = prevStrongType; 589 } 590 591 previousLevel = level; 592 } 593 } 594 595 /** 596 * An internal method to resolve neutral types. This implements 597 * rules N1 and N2. 598 */ resolveNeutralTypes()599 private void resolveNeutralTypes() 600 { 601 // This implements rules N1 and N2. 602 final int runCount = getRunCount(); 603 604 int previousLevel = baseEmbedding; 605 for (int run = 0; run < runCount; ++run) 606 { 607 int start = getRunStart(run); 608 int end = getRunLimit(run); 609 int level = getRunLevel(run); 610 611 byte embeddingDirection 612 = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 613 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 614 // These are the names used in the Bidi algorithm. 615 byte sor = (((Math.max(previousLevel, level) % 2) == 0) 616 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 617 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 618 int nextLevel; 619 if (run == runCount - 1) 620 nextLevel = baseEmbedding; 621 else 622 nextLevel = getRunLevel(run + 1); 623 byte eor = (((Math.max(level, nextLevel) % 2) == 0) 624 ? Character.DIRECTIONALITY_LEFT_TO_RIGHT 625 : Character.DIRECTIONALITY_RIGHT_TO_LEFT); 626 627 byte prevStrong = sor; 628 int neutralStart = -1; 629 for (int i = start; i <= end; ++i) 630 { 631 byte newStrong = -1; 632 byte thisType = i == end ? eor : types[i]; 633 switch (thisType) 634 { 635 case Character.DIRECTIONALITY_LEFT_TO_RIGHT: 636 newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT; 637 break; 638 case Character.DIRECTIONALITY_RIGHT_TO_LEFT: 639 case Character.DIRECTIONALITY_ARABIC_NUMBER: 640 case Character.DIRECTIONALITY_EUROPEAN_NUMBER: 641 newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT; 642 break; 643 case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL: 644 case Character.DIRECTIONALITY_OTHER_NEUTRALS: 645 case Character.DIRECTIONALITY_SEGMENT_SEPARATOR: 646 case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR: 647 case Character.DIRECTIONALITY_WHITESPACE: 648 if (neutralStart == -1) 649 neutralStart = i; 650 break; 651 } 652 // If we see a strong character, update all the neutrals. 653 if (newStrong != -1) 654 { 655 if (neutralStart != -1) 656 { 657 byte override = (prevStrong == newStrong 658 ? prevStrong 659 : embeddingDirection); 660 for (int j = neutralStart; j < i; ++j) 661 types[j] = override; 662 } 663 prevStrong = newStrong; 664 neutralStart = -1; 665 } 666 } 667 668 previousLevel = level; 669 } 670 } 671 672 /** 673 * An internal method to resolve implicit levels. 674 * This implements rules I1 and I2. 675 */ resolveImplicitLevels()676 private void resolveImplicitLevels() 677 { 678 // This implements rules I1 and I2. 679 for (int i = 0; i < length; ++i) 680 { 681 if ((levels[i] & 1) == 0) 682 { 683 if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT) 684 ++levels[i]; 685 else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER 686 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 687 levels[i] += 2; 688 } 689 else 690 { 691 if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT 692 || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER 693 || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER) 694 ++levels[i]; 695 } 696 697 // Update the result flags. 698 resultFlags |= 1 << (levels[i] & 1); 699 } 700 // One final update of the result flags, using the base level. 701 resultFlags |= 1 << baseEmbedding; 702 } 703 704 /** 705 * This reinserts the formatting codes that we removed early on. 706 * Actually it does not insert formatting codes per se, but rather 707 * simply inserts new levels at the appropriate locations in the 708 * 'levels' array. 709 */ reinsertFormattingCodes()710 private void reinsertFormattingCodes() 711 { 712 if (formatterIndices == null) 713 return; 714 int input = length; 715 int output = levels.length; 716 // Process from the end as we are copying the array over itself here. 717 for (int index = formatterIndices.size() - 1; index >= 0; --index) 718 { 719 int nextFmt = formatterIndices.get(index).intValue(); 720 721 // nextFmt points to a location in the original array. So, 722 // nextFmt+1 is the target of our copying. output is the location 723 // to which we last copied, thus we can derive the length of the 724 // copy from it. 725 int len = output - nextFmt - 1; 726 output = nextFmt; 727 input -= len; 728 // Note that we no longer need 'types' at this point, so we 729 // only edit 'levels'. 730 if (nextFmt + 1 < levels.length) 731 System.arraycopy(levels, input, levels, nextFmt + 1, len); 732 733 // Now set the level at the reinsertion point. 734 int rightLevel; 735 if (output == levels.length - 1) 736 rightLevel = baseEmbedding; 737 else 738 rightLevel = levels[output + 1]; 739 int leftLevel; 740 if (input == 0) 741 leftLevel = baseEmbedding; 742 else 743 leftLevel = levels[input]; 744 levels[output] = (byte) Math.max(leftLevel, rightLevel); 745 } 746 length = levels.length; 747 } 748 749 /** 750 * This is the main internal entry point. After a constructor 751 * has initialized the appropriate local state, it will call 752 * this method to do all the work. 753 */ runBidi()754 private void runBidi() 755 { 756 computeTypes(); 757 baseEmbedding = computeParagraphEmbeddingLevel(); 758 computeExplicitLevels(); 759 computeRuns(); 760 resolveWeakTypes(); 761 resolveNeutralTypes(); 762 resolveImplicitLevels(); 763 // We're done with the types. Let the GC clean up. 764 types = null; 765 reinsertFormattingCodes(); 766 // After resolving the implicit levels, the number 767 // of runs may have changed. 768 computeRuns(); 769 } 770 771 /** 772 * Return true if the paragraph base embedding is left-to-right, 773 * false otherwise. 774 */ baseIsLeftToRight()775 public boolean baseIsLeftToRight() 776 { 777 return baseEmbedding == DIRECTION_LEFT_TO_RIGHT; 778 } 779 780 /** 781 * Create a new Bidi object for a single line of text, taken 782 * from the text used when creating the current Bidi object. 783 * @param start the index of the first character of the line 784 * @param end the index of the final character of the line 785 * @return a new Bidi object for the indicated line of text 786 */ createLineBidi(int start, int end)787 public Bidi createLineBidi(int start, int end) 788 { 789 // This isn't the most efficient implementation possible. 790 // This probably does not matter, so we choose simplicity instead. 791 int level = getLevelAt(start); 792 int flag = (((level % 2) == 0) 793 ? DIRECTION_LEFT_TO_RIGHT 794 : DIRECTION_RIGHT_TO_LEFT); 795 return new Bidi(text, textOffset + start, 796 embeddings, embeddingOffset + start, 797 end - start, flag); 798 } 799 800 /** 801 * Return the base embedding level of the paragraph. 802 */ getBaseLevel()803 public int getBaseLevel() 804 { 805 return baseEmbedding; 806 } 807 808 /** 809 * Return the length of the paragraph, in characters. 810 */ getLength()811 public int getLength() 812 { 813 return length; 814 } 815 816 /** 817 * Return the level at the indicated character. If the 818 * supplied index is less than zero or greater than the length 819 * of the text, then the paragraph's base embedding level will 820 * be returned. 821 * @param offset the character to examine 822 * @return the level of that character 823 */ getLevelAt(int offset)824 public int getLevelAt(int offset) 825 { 826 if (offset < 0 || offset >= length) 827 return getBaseLevel(); 828 return levels[offset]; 829 } 830 831 /** 832 * Return the number of runs in the result. A run is 833 * a sequence of characters at the same embedding level. 834 */ getRunCount()835 public int getRunCount() 836 { 837 return runs.length; 838 } 839 840 /** 841 * Return the level of the indicated run. 842 * @param which the run to examine 843 * @return the level of that run 844 */ getRunLevel(int which)845 public int getRunLevel(int which) 846 { 847 return levels[runs[which]]; 848 } 849 850 /** 851 * Return the index of the character just following the end 852 * of the indicated run. 853 * @param which the run to examine 854 * @return the index of the character after the final character 855 * of the run 856 */ getRunLimit(int which)857 public int getRunLimit(int which) 858 { 859 if (which == runs.length - 1) 860 return length; 861 return runs[which + 1]; 862 } 863 864 /** 865 * Return the index of the first character in the indicated run. 866 * @param which the run to examine 867 * @return the index of the first character of the run 868 */ getRunStart(int which)869 public int getRunStart(int which) 870 { 871 return runs[which]; 872 } 873 874 /** 875 * Return true if the text is entirely left-to-right, and the 876 * base embedding is also left-to-right. 877 */ isLeftToRight()878 public boolean isLeftToRight() 879 { 880 return resultFlags == LTOR; 881 } 882 883 /** 884 * Return true if the text consists of mixed left-to-right and 885 * right-to-left runs, or if the text consists of one kind of run 886 * which differs from the base embedding direction. 887 */ isMixed()888 public boolean isMixed() 889 { 890 return resultFlags == (LTOR | RTOL); 891 } 892 893 /** 894 * Return true if the text is entirely right-to-left, and the 895 * base embedding is also right-to-left. 896 */ isRightToLeft()897 public boolean isRightToLeft() 898 { 899 return resultFlags == RTOL; 900 } 901 902 /** 903 * Return a String describing the internal state of this object. 904 * This is only useful for debugging. 905 */ toString()906 public String toString() 907 { 908 return "Bidi Bidi Bidi I like you, Buck!"; 909 } 910 911 /** 912 * Reorder objects according to the levels passed in. This implements 913 * reordering as defined by the Unicode bidirectional layout specification. 914 * The levels are integers from 0 to 62; even numbers represent left-to-right 915 * runs, and odd numbers represent right-to-left runs. 916 * 917 * @param levels the levels associated with each object 918 * @param levelOffset the index of the first level to use 919 * @param objs the objects to reorder according to the levels 920 * @param objOffset the index of the first object to use 921 * @param count the number of objects (and levels) to manipulate 922 */ reorderVisually(byte[] levels, int levelOffset, Object[] objs, int objOffset, int count)923 public static void reorderVisually(byte[] levels, int levelOffset, 924 Object[] objs, int objOffset, int count) 925 { 926 // We need a copy of the 'levels' array, as we are going to modify it. 927 // This is unfortunate but difficult to avoid. 928 byte[] levelCopy = new byte[count]; 929 // Do this explicitly so we can also find the maximum depth at the 930 // same time. 931 int max = 0; 932 int lowestOdd = 63; 933 for (int i = 0; i < count; ++i) 934 { 935 levelCopy[i] = levels[levelOffset + i]; 936 max = Math.max(levelCopy[i], max); 937 if (levelCopy[i] % 2 != 0) 938 lowestOdd = Math.min(lowestOdd, levelCopy[i]); 939 } 940 941 // Reverse the runs starting with the deepest. 942 for (int depth = max; depth >= lowestOdd; --depth) 943 { 944 int start = 0; 945 while (start < count) 946 { 947 // Find the start of a run >= DEPTH. 948 while (start < count && levelCopy[start] < depth) 949 ++start; 950 if (start == count) 951 break; 952 // Find the end of the run. 953 int end = start + 1; 954 while (end < count && levelCopy[end] >= depth) 955 ++end; 956 957 // Reverse this run. 958 for (int i = 0; i < (end - start) / 2; ++i) 959 { 960 byte tmpb = levelCopy[end - i - 1]; 961 levelCopy[end - i - 1] = levelCopy[start + i]; 962 levelCopy[start + i] = tmpb; 963 Object tmpo = objs[objOffset + end - i - 1]; 964 objs[objOffset + end - i - 1] = objs[objOffset + start + i]; 965 objs[objOffset + start + i] = tmpo; 966 } 967 968 // Handle the next run. 969 start = end + 1; 970 } 971 } 972 } 973 974 /** 975 * Returns false if all characters in the text between start and end 976 * are all left-to-right text. This implementation is just calls 977 * <code>Character.getDirectionality(char)</code> on all characters 978 * and makes sure all characters are either explicitly left-to-right 979 * or neutral in directionality (character types L, EN, ES, ET, AN, 980 * CS, S and WS). 981 */ requiresBidi(char[] text, int start, int end)982 public static boolean requiresBidi(char[] text, int start, int end) 983 { 984 for (int i = start; i < end; i++) 985 { 986 byte dir = Character.getDirectionality(text[i]); 987 if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT 988 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER 989 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 990 && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 991 && dir != Character.DIRECTIONALITY_ARABIC_NUMBER 992 && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 993 && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR 994 && dir != Character.DIRECTIONALITY_WHITESPACE 995 && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR) 996 return true; 997 } 998 999 return false; 1000 } 1001 } 1002