1 /* 2 * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39 package java.text; 40 41 import java.lang.Character; 42 import java.util.Vector; 43 import sun.text.CollatorUtilities; 44 import jdk.internal.icu.text.NormalizerBase; 45 46 /** 47 * The {@code CollationElementIterator} class is used as an iterator 48 * to walk through each character of an international string. Use the iterator 49 * to return the ordering priority of the positioned character. The ordering 50 * priority of a character, which we refer to as a key, defines how a character 51 * is collated in the given collation object. 52 * 53 * <p> 54 * For example, consider the following in Spanish: 55 * <blockquote> 56 * <pre> 57 * "ca" → the first key is key('c') and second key is key('a'). 58 * "cha" → the first key is key('ch') and second key is key('a'). 59 * </pre> 60 * </blockquote> 61 * And in German, 62 * <blockquote> 63 * <pre> 64 * "\u00e4b" → the first key is key('a'), the second key is key('e'), and 65 * the third key is key('b'). 66 * </pre> 67 * </blockquote> 68 * The key of a character is an integer composed of primary order(short), 69 * secondary order(byte), and tertiary order(byte). Java strictly defines 70 * the size and signedness of its primitive data types. Therefore, the static 71 * functions {@code primaryOrder}, {@code secondaryOrder}, and 72 * {@code tertiaryOrder} return {@code int}, {@code short}, 73 * and {@code short} respectively to ensure the correctness of the key 74 * value. 75 * 76 * <p> 77 * Example of the iterator usage, 78 * <blockquote> 79 * <pre> 80 * 81 * String testString = "This is a test"; 82 * Collator col = Collator.getInstance(); 83 * if (col instanceof RuleBasedCollator) { 84 * RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col; 85 * CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString); 86 * int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next()); 87 * : 88 * } 89 * </pre> 90 * </blockquote> 91 * 92 * <p> 93 * {@code CollationElementIterator.next} returns the collation order 94 * of the next character. A collation order consists of primary order, 95 * secondary order and tertiary order. The data type of the collation 96 * order is <strong>int</strong>. The first 16 bits of a collation order 97 * is its primary order; the next 8 bits is the secondary order and the 98 * last 8 bits is the tertiary order. 99 * 100 * <p><b>Note:</b> {@code CollationElementIterator} is a part of 101 * {@code RuleBasedCollator} implementation. It is only usable 102 * with {@code RuleBasedCollator} instances. 103 * 104 * @see Collator 105 * @see RuleBasedCollator 106 * @author Helena Shih, Laura Werner, Richard Gillam 107 * @since 1.1 108 */ 109 public final class CollationElementIterator 110 { 111 /** 112 * Null order which indicates the end of string is reached by the 113 * cursor. 114 */ 115 public static final int NULLORDER = 0xffffffff; 116 117 /** 118 * CollationElementIterator constructor. This takes the source string and 119 * the collation object. The cursor will walk thru the source string based 120 * on the predefined collation rules. If the source string is empty, 121 * NULLORDER will be returned on the calls to next(). 122 * @param sourceText the source string. 123 * @param owner the collation object. 124 */ CollationElementIterator(String sourceText, RuleBasedCollator owner)125 CollationElementIterator(String sourceText, RuleBasedCollator owner) { 126 this.owner = owner; 127 ordering = owner.getTables(); 128 if (!sourceText.isEmpty()) { 129 NormalizerBase.Mode mode = 130 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 131 text = new NormalizerBase(sourceText, mode); 132 } 133 } 134 135 /** 136 * CollationElementIterator constructor. This takes the source string and 137 * the collation object. The cursor will walk thru the source string based 138 * on the predefined collation rules. If the source string is empty, 139 * NULLORDER will be returned on the calls to next(). 140 * @param sourceText the source string. 141 * @param owner the collation object. 142 */ CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner)143 CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) { 144 this.owner = owner; 145 ordering = owner.getTables(); 146 NormalizerBase.Mode mode = 147 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 148 text = new NormalizerBase(sourceText, mode); 149 } 150 151 /** 152 * Resets the cursor to the beginning of the string. The next call 153 * to next() will return the first collation element in the string. 154 */ reset()155 public void reset() 156 { 157 if (text != null) { 158 text.reset(); 159 NormalizerBase.Mode mode = 160 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 161 text.setMode(mode); 162 } 163 buffer = null; 164 expIndex = 0; 165 swapOrder = 0; 166 } 167 168 /** 169 * Get the next collation element in the string. <p>This iterator iterates 170 * over a sequence of collation elements that were built from the string. 171 * Because there isn't necessarily a one-to-one mapping from characters to 172 * collation elements, this doesn't mean the same thing as "return the 173 * collation element [or ordering priority] of the next character in the 174 * string".</p> 175 * <p>This function returns the collation element that the iterator is currently 176 * pointing to and then updates the internal pointer to point to the next element. 177 * previous() updates the pointer first and then returns the element. This 178 * means that when you change direction while iterating (i.e., call next() and 179 * then call previous(), or call previous() and then call next()), you'll get 180 * back the same element twice.</p> 181 * 182 * @return the next collation element 183 */ next()184 public int next() 185 { 186 if (text == null) { 187 return NULLORDER; 188 } 189 NormalizerBase.Mode textMode = text.getMode(); 190 // convert the owner's mode to something the Normalizer understands 191 NormalizerBase.Mode ownerMode = 192 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 193 if (textMode != ownerMode) { 194 text.setMode(ownerMode); 195 } 196 197 // if buffer contains any decomposed char values 198 // return their strength orders before continuing in 199 // the Normalizer's CharacterIterator. 200 if (buffer != null) { 201 if (expIndex < buffer.length) { 202 return strengthOrder(buffer[expIndex++]); 203 } else { 204 buffer = null; 205 expIndex = 0; 206 } 207 } else if (swapOrder != 0) { 208 if (Character.isSupplementaryCodePoint(swapOrder)) { 209 char[] chars = Character.toChars(swapOrder); 210 swapOrder = chars[1]; 211 return chars[0] << 16; 212 } 213 int order = swapOrder << 16; 214 swapOrder = 0; 215 return order; 216 } 217 int ch = text.next(); 218 219 // are we at the end of Normalizer's text? 220 if (ch == NormalizerBase.DONE) { 221 return NULLORDER; 222 } 223 224 int value = ordering.getUnicodeOrder(ch); 225 if (value == RuleBasedCollator.UNMAPPED) { 226 swapOrder = ch; 227 return UNMAPPEDCHARVALUE; 228 } 229 else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { 230 value = nextContractChar(ch); 231 } 232 if (value >= RuleBasedCollator.EXPANDCHARINDEX) { 233 buffer = ordering.getExpandValueList(value); 234 expIndex = 0; 235 value = buffer[expIndex++]; 236 } 237 238 if (ordering.isSEAsianSwapping()) { 239 int consonant; 240 if (isThaiPreVowel(ch)) { 241 consonant = text.next(); 242 if (isThaiBaseConsonant(consonant)) { 243 buffer = makeReorderedBuffer(consonant, value, buffer, true); 244 value = buffer[0]; 245 expIndex = 1; 246 } else if (consonant != NormalizerBase.DONE) { 247 text.previous(); 248 } 249 } 250 if (isLaoPreVowel(ch)) { 251 consonant = text.next(); 252 if (isLaoBaseConsonant(consonant)) { 253 buffer = makeReorderedBuffer(consonant, value, buffer, true); 254 value = buffer[0]; 255 expIndex = 1; 256 } else if (consonant != NormalizerBase.DONE) { 257 text.previous(); 258 } 259 } 260 } 261 262 return strengthOrder(value); 263 } 264 265 /** 266 * Get the previous collation element in the string. <p>This iterator iterates 267 * over a sequence of collation elements that were built from the string. 268 * Because there isn't necessarily a one-to-one mapping from characters to 269 * collation elements, this doesn't mean the same thing as "return the 270 * collation element [or ordering priority] of the previous character in the 271 * string".</p> 272 * <p>This function updates the iterator's internal pointer to point to the 273 * collation element preceding the one it's currently pointing to and then 274 * returns that element, while next() returns the current element and then 275 * updates the pointer. This means that when you change direction while 276 * iterating (i.e., call next() and then call previous(), or call previous() 277 * and then call next()), you'll get back the same element twice.</p> 278 * 279 * @return the previous collation element 280 * @since 1.2 281 */ previous()282 public int previous() 283 { 284 if (text == null) { 285 return NULLORDER; 286 } 287 NormalizerBase.Mode textMode = text.getMode(); 288 // convert the owner's mode to something the Normalizer understands 289 NormalizerBase.Mode ownerMode = 290 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 291 if (textMode != ownerMode) { 292 text.setMode(ownerMode); 293 } 294 if (buffer != null) { 295 if (expIndex > 0) { 296 return strengthOrder(buffer[--expIndex]); 297 } else { 298 buffer = null; 299 expIndex = 0; 300 } 301 } else if (swapOrder != 0) { 302 if (Character.isSupplementaryCodePoint(swapOrder)) { 303 char[] chars = Character.toChars(swapOrder); 304 swapOrder = chars[1]; 305 return chars[0] << 16; 306 } 307 int order = swapOrder << 16; 308 swapOrder = 0; 309 return order; 310 } 311 int ch = text.previous(); 312 if (ch == NormalizerBase.DONE) { 313 return NULLORDER; 314 } 315 316 int value = ordering.getUnicodeOrder(ch); 317 318 if (value == RuleBasedCollator.UNMAPPED) { 319 swapOrder = UNMAPPEDCHARVALUE; 320 return ch; 321 } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { 322 value = prevContractChar(ch); 323 } 324 if (value >= RuleBasedCollator.EXPANDCHARINDEX) { 325 buffer = ordering.getExpandValueList(value); 326 expIndex = buffer.length; 327 value = buffer[--expIndex]; 328 } 329 330 if (ordering.isSEAsianSwapping()) { 331 int vowel; 332 if (isThaiBaseConsonant(ch)) { 333 vowel = text.previous(); 334 if (isThaiPreVowel(vowel)) { 335 buffer = makeReorderedBuffer(vowel, value, buffer, false); 336 expIndex = buffer.length - 1; 337 value = buffer[expIndex]; 338 } else { 339 text.next(); 340 } 341 } 342 if (isLaoBaseConsonant(ch)) { 343 vowel = text.previous(); 344 if (isLaoPreVowel(vowel)) { 345 buffer = makeReorderedBuffer(vowel, value, buffer, false); 346 expIndex = buffer.length - 1; 347 value = buffer[expIndex]; 348 } else { 349 text.next(); 350 } 351 } 352 } 353 354 return strengthOrder(value); 355 } 356 357 /** 358 * Return the primary component of a collation element. 359 * @param order the collation element 360 * @return the element's primary component 361 */ primaryOrder(int order)362 public static final int primaryOrder(int order) 363 { 364 order &= RBCollationTables.PRIMARYORDERMASK; 365 return (order >>> RBCollationTables.PRIMARYORDERSHIFT); 366 } 367 /** 368 * Return the secondary component of a collation element. 369 * @param order the collation element 370 * @return the element's secondary component 371 */ secondaryOrder(int order)372 public static final short secondaryOrder(int order) 373 { 374 order = order & RBCollationTables.SECONDARYORDERMASK; 375 return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT)); 376 } 377 /** 378 * Return the tertiary component of a collation element. 379 * @param order the collation element 380 * @return the element's tertiary component 381 */ tertiaryOrder(int order)382 public static final short tertiaryOrder(int order) 383 { 384 return ((short)(order &= RBCollationTables.TERTIARYORDERMASK)); 385 } 386 387 /** 388 * Get the comparison order in the desired strength. Ignore the other 389 * differences. 390 * @param order The order value 391 */ strengthOrder(int order)392 final int strengthOrder(int order) 393 { 394 int s = owner.getStrength(); 395 if (s == Collator.PRIMARY) 396 { 397 order &= RBCollationTables.PRIMARYDIFFERENCEONLY; 398 } else if (s == Collator.SECONDARY) 399 { 400 order &= RBCollationTables.SECONDARYDIFFERENCEONLY; 401 } 402 return order; 403 } 404 405 /** 406 * Sets the iterator to point to the collation element corresponding to 407 * the specified character (the parameter is a CHARACTER offset in the 408 * original string, not an offset into its corresponding sequence of 409 * collation elements). The value returned by the next call to next() 410 * will be the collation element corresponding to the specified position 411 * in the text. If that position is in the middle of a contracting 412 * character sequence, the result of the next call to next() is the 413 * collation element for that sequence. This means that getOffset() 414 * is not guaranteed to return the same value as was passed to a preceding 415 * call to setOffset(). 416 * 417 * @param newOffset The new character offset into the original text. 418 * @since 1.2 419 */ 420 @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated setOffset(int newOffset)421 public void setOffset(int newOffset) 422 { 423 if (text != null) { 424 if (newOffset < text.getBeginIndex() 425 || newOffset >= text.getEndIndex()) { 426 text.setIndexOnly(newOffset); 427 } else { 428 int c = text.setIndex(newOffset); 429 430 // if the desired character isn't used in a contracting character 431 // sequence, bypass all the backing-up logic-- we're sitting on 432 // the right character already 433 if (ordering.usedInContractSeq(c)) { 434 // walk backwards through the string until we see a character 435 // that DOESN'T participate in a contracting character sequence 436 while (ordering.usedInContractSeq(c)) { 437 c = text.previous(); 438 } 439 // now walk forward using this object's next() method until 440 // we pass the starting point and set our current position 441 // to the beginning of the last "character" before or at 442 // our starting position 443 int last = text.getIndex(); 444 while (text.getIndex() <= newOffset) { 445 last = text.getIndex(); 446 next(); 447 } 448 text.setIndexOnly(last); 449 // we don't need this, since last is the last index 450 // that is the starting of the contraction which encompass 451 // newOffset 452 // text.previous(); 453 } 454 } 455 } 456 buffer = null; 457 expIndex = 0; 458 swapOrder = 0; 459 } 460 461 /** 462 * Returns the character offset in the original text corresponding to the next 463 * collation element. (That is, getOffset() returns the position in the text 464 * corresponding to the collation element that will be returned by the next 465 * call to next().) This value will always be the index of the FIRST character 466 * corresponding to the collation element (a contracting character sequence is 467 * when two or more characters all correspond to the same collation element). 468 * This means if you do setOffset(x) followed immediately by getOffset(), getOffset() 469 * won't necessarily return x. 470 * 471 * @return The character offset in the original text corresponding to the collation 472 * element that will be returned by the next call to next(). 473 * @since 1.2 474 */ getOffset()475 public int getOffset() 476 { 477 return (text != null) ? text.getIndex() : 0; 478 } 479 480 481 /** 482 * Return the maximum length of any expansion sequences that end 483 * with the specified comparison order. 484 * @param order a collation order returned by previous or next. 485 * @return the maximum length of any expansion sequences ending 486 * with the specified order. 487 * @since 1.2 488 */ getMaxExpansion(int order)489 public int getMaxExpansion(int order) 490 { 491 return ordering.getMaxExpansion(order); 492 } 493 494 /** 495 * Set a new string over which to iterate. 496 * 497 * @param source the new source text 498 * @since 1.2 499 */ setText(String source)500 public void setText(String source) 501 { 502 buffer = null; 503 swapOrder = 0; 504 expIndex = 0; 505 NormalizerBase.Mode mode = 506 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 507 if (text == null) { 508 text = new NormalizerBase(source, mode); 509 } else { 510 text.setMode(mode); 511 text.setText(source); 512 } 513 } 514 515 /** 516 * Set a new string over which to iterate. 517 * 518 * @param source the new source text. 519 * @since 1.2 520 */ setText(CharacterIterator source)521 public void setText(CharacterIterator source) 522 { 523 buffer = null; 524 swapOrder = 0; 525 expIndex = 0; 526 NormalizerBase.Mode mode = 527 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 528 if (text == null) { 529 text = new NormalizerBase(source, mode); 530 } else { 531 text.setMode(mode); 532 text.setText(source); 533 } 534 } 535 536 //============================================================ 537 // privates 538 //============================================================ 539 540 /** 541 * Determine if a character is a Thai vowel (which sorts after 542 * its base consonant). 543 */ isThaiPreVowel(int ch)544 private static final boolean isThaiPreVowel(int ch) { 545 return (ch >= 0x0e40) && (ch <= 0x0e44); 546 } 547 548 /** 549 * Determine if a character is a Thai base consonant 550 */ isThaiBaseConsonant(int ch)551 private static final boolean isThaiBaseConsonant(int ch) { 552 return (ch >= 0x0e01) && (ch <= 0x0e2e); 553 } 554 555 /** 556 * Determine if a character is a Lao vowel (which sorts after 557 * its base consonant). 558 */ isLaoPreVowel(int ch)559 private static final boolean isLaoPreVowel(int ch) { 560 return (ch >= 0x0ec0) && (ch <= 0x0ec4); 561 } 562 563 /** 564 * Determine if a character is a Lao base consonant 565 */ isLaoBaseConsonant(int ch)566 private static final boolean isLaoBaseConsonant(int ch) { 567 return (ch >= 0x0e81) && (ch <= 0x0eae); 568 } 569 570 /** 571 * This method produces a buffer which contains the collation 572 * elements for the two characters, with colFirst's values preceding 573 * another character's. Presumably, the other character precedes colFirst 574 * in logical order (otherwise you wouldn't need this method would you?). 575 * The assumption is that the other char's value(s) have already been 576 * computed. If this char has a single element it is passed to this 577 * method as lastValue, and lastExpansion is null. If it has an 578 * expansion it is passed in lastExpansion, and colLastValue is ignored. 579 */ makeReorderedBuffer(int colFirst, int lastValue, int[] lastExpansion, boolean forward)580 private int[] makeReorderedBuffer(int colFirst, 581 int lastValue, 582 int[] lastExpansion, 583 boolean forward) { 584 585 int[] result; 586 587 int firstValue = ordering.getUnicodeOrder(colFirst); 588 if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) { 589 firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst); 590 } 591 592 int[] firstExpansion = null; 593 if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) { 594 firstExpansion = ordering.getExpandValueList(firstValue); 595 } 596 597 if (!forward) { 598 int temp1 = firstValue; 599 firstValue = lastValue; 600 lastValue = temp1; 601 int[] temp2 = firstExpansion; 602 firstExpansion = lastExpansion; 603 lastExpansion = temp2; 604 } 605 606 if (firstExpansion == null && lastExpansion == null) { 607 result = new int [2]; 608 result[0] = firstValue; 609 result[1] = lastValue; 610 } 611 else { 612 int firstLength = firstExpansion==null? 1 : firstExpansion.length; 613 int lastLength = lastExpansion==null? 1 : lastExpansion.length; 614 result = new int[firstLength + lastLength]; 615 616 if (firstExpansion == null) { 617 result[0] = firstValue; 618 } 619 else { 620 System.arraycopy(firstExpansion, 0, result, 0, firstLength); 621 } 622 623 if (lastExpansion == null) { 624 result[firstLength] = lastValue; 625 } 626 else { 627 System.arraycopy(lastExpansion, 0, result, firstLength, lastLength); 628 } 629 } 630 631 return result; 632 } 633 634 /** 635 * Check if a comparison order is ignorable. 636 * @return true if a character is ignorable, false otherwise. 637 */ isIgnorable(int order)638 static final boolean isIgnorable(int order) 639 { 640 return ((primaryOrder(order) == 0) ? true : false); 641 } 642 643 /** 644 * Get the ordering priority of the next contracting character in the 645 * string. 646 * @param ch the starting character of a contracting character token 647 * @return the next contracting character's ordering. Returns NULLORDER 648 * if the end of string is reached. 649 */ nextContractChar(int ch)650 private int nextContractChar(int ch) 651 { 652 // First get the ordering of this single character, 653 // which is always the first element in the list 654 Vector<EntryPair> list = ordering.getContractValues(ch); 655 EntryPair pair = list.firstElement(); 656 int order = pair.value; 657 658 // find out the length of the longest contracting character sequence in the list. 659 // There's logic in the builder code to make sure the longest sequence is always 660 // the last. 661 pair = list.lastElement(); 662 int maxLength = pair.entryName.length(); 663 664 // (the Normalizer is cloned here so that the seeking we do in the next loop 665 // won't affect our real position in the text) 666 NormalizerBase tempText = (NormalizerBase)text.clone(); 667 668 // extract the next maxLength characters in the string (we have to do this using the 669 // Normalizer to ensure that our offsets correspond to those the rest of the 670 // iterator is using) and store it in "fragment". 671 tempText.previous(); 672 key.setLength(0); 673 int c = tempText.next(); 674 while (maxLength > 0 && c != NormalizerBase.DONE) { 675 if (Character.isSupplementaryCodePoint(c)) { 676 key.append(Character.toChars(c)); 677 maxLength -= 2; 678 } else { 679 key.append((char)c); 680 --maxLength; 681 } 682 c = tempText.next(); 683 } 684 String fragment = key.toString(); 685 // now that we have that fragment, iterate through this list looking for the 686 // longest sequence that matches the characters in the actual text. (maxLength 687 // is used here to keep track of the length of the longest sequence) 688 // Upon exit from this loop, maxLength will contain the length of the matching 689 // sequence and order will contain the collation-element value corresponding 690 // to this sequence 691 maxLength = 1; 692 for (int i = list.size() - 1; i > 0; i--) { 693 pair = list.elementAt(i); 694 if (!pair.fwd) 695 continue; 696 697 if (fragment.startsWith(pair.entryName) && pair.entryName.length() 698 > maxLength) { 699 maxLength = pair.entryName.length(); 700 order = pair.value; 701 } 702 } 703 704 // seek our current iteration position to the end of the matching sequence 705 // and return the appropriate collation-element value (if there was no matching 706 // sequence, we're already seeked to the right position and order already contains 707 // the correct collation-element value for the single character) 708 while (maxLength > 1) { 709 c = text.next(); 710 maxLength -= Character.charCount(c); 711 } 712 return order; 713 } 714 715 /** 716 * Get the ordering priority of the previous contracting character in the 717 * string. 718 * @param ch the starting character of a contracting character token 719 * @return the next contracting character's ordering. Returns NULLORDER 720 * if the end of string is reached. 721 */ prevContractChar(int ch)722 private int prevContractChar(int ch) 723 { 724 // This function is identical to nextContractChar(), except that we've 725 // switched things so that the next() and previous() calls on the Normalizer 726 // are switched and so that we skip entry pairs with the fwd flag turned on 727 // rather than off. Notice that we still use append() and startsWith() when 728 // working on the fragment. This is because the entry pairs that are used 729 // in reverse iteration have their names reversed already. 730 Vector<EntryPair> list = ordering.getContractValues(ch); 731 EntryPair pair = list.firstElement(); 732 int order = pair.value; 733 734 pair = list.lastElement(); 735 int maxLength = pair.entryName.length(); 736 737 NormalizerBase tempText = (NormalizerBase)text.clone(); 738 739 tempText.next(); 740 key.setLength(0); 741 int c = tempText.previous(); 742 while (maxLength > 0 && c != NormalizerBase.DONE) { 743 if (Character.isSupplementaryCodePoint(c)) { 744 key.append(Character.toChars(c)); 745 maxLength -= 2; 746 } else { 747 key.append((char)c); 748 --maxLength; 749 } 750 c = tempText.previous(); 751 } 752 String fragment = key.toString(); 753 754 maxLength = 1; 755 for (int i = list.size() - 1; i > 0; i--) { 756 pair = list.elementAt(i); 757 if (pair.fwd) 758 continue; 759 760 if (fragment.startsWith(pair.entryName) && pair.entryName.length() 761 > maxLength) { 762 maxLength = pair.entryName.length(); 763 order = pair.value; 764 } 765 } 766 767 while (maxLength > 1) { 768 c = text.previous(); 769 maxLength -= Character.charCount(c); 770 } 771 return order; 772 } 773 774 static final int UNMAPPEDCHARVALUE = 0x7FFF0000; 775 776 private NormalizerBase text = null; 777 private int[] buffer = null; 778 private int expIndex = 0; 779 private StringBuffer key = new StringBuffer(5); 780 private int swapOrder = 0; 781 private RBCollationTables ordering; 782 private RuleBasedCollator owner; 783 } 784