1 /*
2  * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /*
27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29  *
30  *   The original version of this source code and documentation is copyrighted
31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32  * materials are provided under terms of a License Agreement between Taligent
33  * and Sun. This technology is protected by multiple US and International
34  * patents. This notice and attribution to Taligent may not be removed.
35  *   Taligent is a registered trademark of Taligent, Inc.
36  *
37  */
38 
39 package java.text;
40 
41 import java.lang.Character;
42 import java.util.Vector;
43 import sun.text.CollatorUtilities;
44 import sun.text.normalizer.NormalizerBase;
45 
46 /**
47  * The <code>CollationElementIterator</code> class is used as an iterator
48  * to walk through each character of an international string. Use the iterator
49  * to return the ordering priority of the positioned character. The ordering
50  * priority of a character, which we refer to as a key, defines how a character
51  * is collated in the given collation object.
52  *
53  * <p>
54  * For example, consider the following in Spanish:
55  * <blockquote>
56  * <pre>
57  * "ca" &rarr; the first key is key('c') and second key is key('a').
58  * "cha" &rarr; the first key is key('ch') and second key is key('a').
59  * </pre>
60  * </blockquote>
61  * And in German,
62  * <blockquote>
63  * <pre>
64  * "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
65  * the third key is key('b').
66  * </pre>
67  * </blockquote>
68  * The key of a character is an integer composed of primary order(short),
69  * secondary order(byte), and tertiary order(byte). Java strictly defines
70  * the size and signedness of its primitive data types. Therefore, the static
71  * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
72  * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
73  * and <code>short</code> respectively to ensure the correctness of the key
74  * value.
75  *
76  * <p>
77  * Example of the iterator usage,
78  * <blockquote>
79  * <pre>
80  *
81  *  String testString = "This is a test";
82  *  Collator col = Collator.getInstance();
83  *  if (col instanceof RuleBasedCollator) {
84  *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
85  *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
86  *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
87  *          :
88  *  }
89  * </pre>
90  * </blockquote>
91  *
92  * <p>
93  * <code>CollationElementIterator.next</code> returns the collation order
94  * of the next character. A collation order consists of primary order,
95  * secondary order and tertiary order. The data type of the collation
96  * order is <strong>int</strong>. The first 16 bits of a collation order
97  * is its primary order; the next 8 bits is the secondary order and the
98  * last 8 bits is the tertiary order.
99  *
100  * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
101  * <code>RuleBasedCollator</code> implementation. It is only usable
102  * with <code>RuleBasedCollator</code> instances.
103  *
104  * @see                Collator
105  * @see                RuleBasedCollator
106  * @author             Helena Shih, Laura Werner, Richard Gillam
107  */
108 public final class CollationElementIterator
109 {
110     /**
111      * Null order which indicates the end of string is reached by the
112      * cursor.
113      */
114     public final static int NULLORDER = 0xffffffff;
115 
116     /**
117      * CollationElementIterator constructor.  This takes the source string and
118      * the collation object.  The cursor will walk thru the source string based
119      * on the predefined collation rules.  If the source string is empty,
120      * NULLORDER will be returned on the calls to next().
121      * @param sourceText the source string.
122      * @param owner the collation object.
123      */
CollationElementIterator(String sourceText, RuleBasedCollator owner)124     CollationElementIterator(String sourceText, RuleBasedCollator owner) {
125         this.owner = owner;
126         ordering = owner.getTables();
127         if ( sourceText.length() != 0 ) {
128             NormalizerBase.Mode mode =
129                 CollatorUtilities.toNormalizerMode(owner.getDecomposition());
130             text = new NormalizerBase(sourceText, mode);
131         }
132     }
133 
134     /**
135      * CollationElementIterator constructor.  This takes the source string and
136      * the collation object.  The cursor will walk thru the source string based
137      * on the predefined collation rules.  If the source string is empty,
138      * NULLORDER will be returned on the calls to next().
139      * @param sourceText the source string.
140      * @param owner the collation object.
141      */
CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner)142     CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) {
143         this.owner = owner;
144         ordering = owner.getTables();
145         NormalizerBase.Mode mode =
146             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
147         text = new NormalizerBase(sourceText, mode);
148     }
149 
150     /**
151      * Resets the cursor to the beginning of the string.  The next call
152      * to next() will return the first collation element in the string.
153      */
reset()154     public void reset()
155     {
156         if (text != null) {
157             text.reset();
158             NormalizerBase.Mode mode =
159                 CollatorUtilities.toNormalizerMode(owner.getDecomposition());
160             text.setMode(mode);
161         }
162         buffer = null;
163         expIndex = 0;
164         swapOrder = 0;
165     }
166 
167     /**
168      * Get the next collation element in the string.  <p>This iterator iterates
169      * over a sequence of collation elements that were built from the string.
170      * Because there isn't necessarily a one-to-one mapping from characters to
171      * collation elements, this doesn't mean the same thing as "return the
172      * collation element [or ordering priority] of the next character in the
173      * string".</p>
174      * <p>This function returns the collation element that the iterator is currently
175      * pointing to and then updates the internal pointer to point to the next element.
176      * previous() updates the pointer first and then returns the element.  This
177      * means that when you change direction while iterating (i.e., call next() and
178      * then call previous(), or call previous() and then call next()), you'll get
179      * back the same element twice.</p>
180      *
181      * @return the next collation element
182      */
next()183     public int next()
184     {
185         if (text == null) {
186             return NULLORDER;
187         }
188         NormalizerBase.Mode textMode = text.getMode();
189         // convert the owner's mode to something the Normalizer understands
190         NormalizerBase.Mode ownerMode =
191             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
192         if (textMode != ownerMode) {
193             text.setMode(ownerMode);
194         }
195 
196         // if buffer contains any decomposed char values
197         // return their strength orders before continuing in
198         // the Normalizer's CharacterIterator.
199         if (buffer != null) {
200             if (expIndex < buffer.length) {
201                 return strengthOrder(buffer[expIndex++]);
202             } else {
203                 buffer = null;
204                 expIndex = 0;
205             }
206         } else if (swapOrder != 0) {
207             if (Character.isSupplementaryCodePoint(swapOrder)) {
208                 char[] chars = Character.toChars(swapOrder);
209                 swapOrder = chars[1];
210                 return chars[0] << 16;
211             }
212             int order = swapOrder << 16;
213             swapOrder = 0;
214             return order;
215         }
216         int ch  = text.next();
217 
218         // are we at the end of Normalizer's text?
219         if (ch == NormalizerBase.DONE) {
220             return NULLORDER;
221         }
222 
223         int value = ordering.getUnicodeOrder(ch);
224         if (value == RuleBasedCollator.UNMAPPED) {
225             swapOrder = ch;
226             return UNMAPPEDCHARVALUE;
227         }
228         else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
229             value = nextContractChar(ch);
230         }
231         if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
232             buffer = ordering.getExpandValueList(value);
233             expIndex = 0;
234             value = buffer[expIndex++];
235         }
236 
237         if (ordering.isSEAsianSwapping()) {
238             int consonant;
239             if (isThaiPreVowel(ch)) {
240                 consonant = text.next();
241                 if (isThaiBaseConsonant(consonant)) {
242                     buffer = makeReorderedBuffer(consonant, value, buffer, true);
243                     value = buffer[0];
244                     expIndex = 1;
245                 } else if (consonant != NormalizerBase.DONE) {
246                     text.previous();
247                 }
248             }
249             if (isLaoPreVowel(ch)) {
250                 consonant = text.next();
251                 if (isLaoBaseConsonant(consonant)) {
252                     buffer = makeReorderedBuffer(consonant, value, buffer, true);
253                     value = buffer[0];
254                     expIndex = 1;
255                 } else if (consonant != NormalizerBase.DONE) {
256                     text.previous();
257                 }
258             }
259         }
260 
261         return strengthOrder(value);
262     }
263 
264     /**
265      * Get the previous collation element in the string.  <p>This iterator iterates
266      * over a sequence of collation elements that were built from the string.
267      * Because there isn't necessarily a one-to-one mapping from characters to
268      * collation elements, this doesn't mean the same thing as "return the
269      * collation element [or ordering priority] of the previous character in the
270      * string".</p>
271      * <p>This function updates the iterator's internal pointer to point to the
272      * collation element preceding the one it's currently pointing to and then
273      * returns that element, while next() returns the current element and then
274      * updates the pointer.  This means that when you change direction while
275      * iterating (i.e., call next() and then call previous(), or call previous()
276      * and then call next()), you'll get back the same element twice.</p>
277      *
278      * @return the previous collation element
279      * @since 1.2
280      */
previous()281     public int previous()
282     {
283         if (text == null) {
284             return NULLORDER;
285         }
286         NormalizerBase.Mode textMode = text.getMode();
287         // convert the owner's mode to something the Normalizer understands
288         NormalizerBase.Mode ownerMode =
289             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
290         if (textMode != ownerMode) {
291             text.setMode(ownerMode);
292         }
293         if (buffer != null) {
294             if (expIndex > 0) {
295                 return strengthOrder(buffer[--expIndex]);
296             } else {
297                 buffer = null;
298                 expIndex = 0;
299             }
300         } else if (swapOrder != 0) {
301             if (Character.isSupplementaryCodePoint(swapOrder)) {
302                 char[] chars = Character.toChars(swapOrder);
303                 swapOrder = chars[1];
304                 return chars[0] << 16;
305             }
306             int order = swapOrder << 16;
307             swapOrder = 0;
308             return order;
309         }
310         int ch = text.previous();
311         if (ch == NormalizerBase.DONE) {
312             return NULLORDER;
313         }
314 
315         int value = ordering.getUnicodeOrder(ch);
316 
317         if (value == RuleBasedCollator.UNMAPPED) {
318             swapOrder = UNMAPPEDCHARVALUE;
319             return ch;
320         } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) {
321             value = prevContractChar(ch);
322         }
323         if (value >= RuleBasedCollator.EXPANDCHARINDEX) {
324             buffer = ordering.getExpandValueList(value);
325             expIndex = buffer.length;
326             value = buffer[--expIndex];
327         }
328 
329         if (ordering.isSEAsianSwapping()) {
330             int vowel;
331             if (isThaiBaseConsonant(ch)) {
332                 vowel = text.previous();
333                 if (isThaiPreVowel(vowel)) {
334                     buffer = makeReorderedBuffer(vowel, value, buffer, false);
335                     expIndex = buffer.length - 1;
336                     value = buffer[expIndex];
337                 } else {
338                     text.next();
339                 }
340             }
341             if (isLaoBaseConsonant(ch)) {
342                 vowel = text.previous();
343                 if (isLaoPreVowel(vowel)) {
344                     buffer = makeReorderedBuffer(vowel, value, buffer, false);
345                     expIndex = buffer.length - 1;
346                     value = buffer[expIndex];
347                 } else {
348                     text.next();
349                 }
350             }
351         }
352 
353         return strengthOrder(value);
354     }
355 
356     /**
357      * Return the primary component of a collation element.
358      * @param order the collation element
359      * @return the element's primary component
360      */
primaryOrder(int order)361     public final static int primaryOrder(int order)
362     {
363         order &= RBCollationTables.PRIMARYORDERMASK;
364         return (order >>> RBCollationTables.PRIMARYORDERSHIFT);
365     }
366     /**
367      * Return the secondary component of a collation element.
368      * @param order the collation element
369      * @return the element's secondary component
370      */
secondaryOrder(int order)371     public final static short secondaryOrder(int order)
372     {
373         order = order & RBCollationTables.SECONDARYORDERMASK;
374         return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT));
375     }
376     /**
377      * Return the tertiary component of a collation element.
378      * @param order the collation element
379      * @return the element's tertiary component
380      */
tertiaryOrder(int order)381     public final static short tertiaryOrder(int order)
382     {
383         return ((short)(order &= RBCollationTables.TERTIARYORDERMASK));
384     }
385 
386     /**
387      *  Get the comparison order in the desired strength.  Ignore the other
388      *  differences.
389      *  @param order The order value
390      */
strengthOrder(int order)391     final int strengthOrder(int order)
392     {
393         int s = owner.getStrength();
394         if (s == Collator.PRIMARY)
395         {
396             order &= RBCollationTables.PRIMARYDIFFERENCEONLY;
397         } else if (s == Collator.SECONDARY)
398         {
399             order &= RBCollationTables.SECONDARYDIFFERENCEONLY;
400         }
401         return order;
402     }
403 
404     /**
405      * Sets the iterator to point to the collation element corresponding to
406      * the specified character (the parameter is a CHARACTER offset in the
407      * original string, not an offset into its corresponding sequence of
408      * collation elements).  The value returned by the next call to next()
409      * will be the collation element corresponding to the specified position
410      * in the text.  If that position is in the middle of a contracting
411      * character sequence, the result of the next call to next() is the
412      * collation element for that sequence.  This means that getOffset()
413      * is not guaranteed to return the same value as was passed to a preceding
414      * call to setOffset().
415      *
416      * @param newOffset The new character offset into the original text.
417      * @since 1.2
418      */
419     @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
setOffset(int newOffset)420     public void setOffset(int newOffset)
421     {
422         if (text != null) {
423             if (newOffset < text.getBeginIndex()
424                 || newOffset >= text.getEndIndex()) {
425                     text.setIndexOnly(newOffset);
426             } else {
427                 int c = text.setIndex(newOffset);
428 
429                 // if the desired character isn't used in a contracting character
430                 // sequence, bypass all the backing-up logic-- we're sitting on
431                 // the right character already
432                 if (ordering.usedInContractSeq(c)) {
433                     // walk backwards through the string until we see a character
434                     // that DOESN'T participate in a contracting character sequence
435                     while (ordering.usedInContractSeq(c)) {
436                         c = text.previous();
437                     }
438                     // now walk forward using this object's next() method until
439                     // we pass the starting point and set our current position
440                     // to the beginning of the last "character" before or at
441                     // our starting position
442                     int last = text.getIndex();
443                     while (text.getIndex() <= newOffset) {
444                         last = text.getIndex();
445                         next();
446                     }
447                     text.setIndexOnly(last);
448                     // we don't need this, since last is the last index
449                     // that is the starting of the contraction which encompass
450                     // newOffset
451                     // text.previous();
452                 }
453             }
454         }
455         buffer = null;
456         expIndex = 0;
457         swapOrder = 0;
458     }
459 
460     /**
461      * Returns the character offset in the original text corresponding to the next
462      * collation element.  (That is, getOffset() returns the position in the text
463      * corresponding to the collation element that will be returned by the next
464      * call to next().)  This value will always be the index of the FIRST character
465      * corresponding to the collation element (a contracting character sequence is
466      * when two or more characters all correspond to the same collation element).
467      * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
468      * won't necessarily return x.
469      *
470      * @return The character offset in the original text corresponding to the collation
471      * element that will be returned by the next call to next().
472      * @since 1.2
473      */
getOffset()474     public int getOffset()
475     {
476         return (text != null) ? text.getIndex() : 0;
477     }
478 
479 
480     /**
481      * Return the maximum length of any expansion sequences that end
482      * with the specified comparison order.
483      * @param order a collation order returned by previous or next.
484      * @return the maximum length of any expansion sequences ending
485      *         with the specified order.
486      * @since 1.2
487      */
getMaxExpansion(int order)488     public int getMaxExpansion(int order)
489     {
490         return ordering.getMaxExpansion(order);
491     }
492 
493     /**
494      * Set a new string over which to iterate.
495      *
496      * @param source  the new source text
497      * @since 1.2
498      */
setText(String source)499     public void setText(String source)
500     {
501         buffer = null;
502         swapOrder = 0;
503         expIndex = 0;
504         NormalizerBase.Mode mode =
505             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
506         if (text == null) {
507             text = new NormalizerBase(source, mode);
508         } else {
509             text.setMode(mode);
510             text.setText(source);
511         }
512     }
513 
514     /**
515      * Set a new string over which to iterate.
516      *
517      * @param source  the new source text.
518      * @since 1.2
519      */
setText(CharacterIterator source)520     public void setText(CharacterIterator source)
521     {
522         buffer = null;
523         swapOrder = 0;
524         expIndex = 0;
525         NormalizerBase.Mode mode =
526             CollatorUtilities.toNormalizerMode(owner.getDecomposition());
527         if (text == null) {
528             text = new NormalizerBase(source, mode);
529         } else {
530             text.setMode(mode);
531             text.setText(source);
532         }
533     }
534 
535     //============================================================
536     // privates
537     //============================================================
538 
539     /**
540      * Determine if a character is a Thai vowel (which sorts after
541      * its base consonant).
542      */
isThaiPreVowel(int ch)543     private final static boolean isThaiPreVowel(int ch) {
544         return (ch >= 0x0e40) && (ch <= 0x0e44);
545     }
546 
547     /**
548      * Determine if a character is a Thai base consonant
549      */
isThaiBaseConsonant(int ch)550     private final static boolean isThaiBaseConsonant(int ch) {
551         return (ch >= 0x0e01) && (ch <= 0x0e2e);
552     }
553 
554     /**
555      * Determine if a character is a Lao vowel (which sorts after
556      * its base consonant).
557      */
isLaoPreVowel(int ch)558     private final static boolean isLaoPreVowel(int ch) {
559         return (ch >= 0x0ec0) && (ch <= 0x0ec4);
560     }
561 
562     /**
563      * Determine if a character is a Lao base consonant
564      */
isLaoBaseConsonant(int ch)565     private final static boolean isLaoBaseConsonant(int ch) {
566         return (ch >= 0x0e81) && (ch <= 0x0eae);
567     }
568 
569     /**
570      * This method produces a buffer which contains the collation
571      * elements for the two characters, with colFirst's values preceding
572      * another character's.  Presumably, the other character precedes colFirst
573      * in logical order (otherwise you wouldn't need this method would you?).
574      * The assumption is that the other char's value(s) have already been
575      * computed.  If this char has a single element it is passed to this
576      * method as lastValue, and lastExpansion is null.  If it has an
577      * expansion it is passed in lastExpansion, and colLastValue is ignored.
578      */
makeReorderedBuffer(int colFirst, int lastValue, int[] lastExpansion, boolean forward)579     private int[] makeReorderedBuffer(int colFirst,
580                                       int lastValue,
581                                       int[] lastExpansion,
582                                       boolean forward) {
583 
584         int[] result;
585 
586         int firstValue = ordering.getUnicodeOrder(colFirst);
587         if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) {
588             firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst);
589         }
590 
591         int[] firstExpansion = null;
592         if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) {
593             firstExpansion = ordering.getExpandValueList(firstValue);
594         }
595 
596         if (!forward) {
597             int temp1 = firstValue;
598             firstValue = lastValue;
599             lastValue = temp1;
600             int[] temp2 = firstExpansion;
601             firstExpansion = lastExpansion;
602             lastExpansion = temp2;
603         }
604 
605         if (firstExpansion == null && lastExpansion == null) {
606             result = new int [2];
607             result[0] = firstValue;
608             result[1] = lastValue;
609         }
610         else {
611             int firstLength = firstExpansion==null? 1 : firstExpansion.length;
612             int lastLength = lastExpansion==null? 1 : lastExpansion.length;
613             result = new int[firstLength + lastLength];
614 
615             if (firstExpansion == null) {
616                 result[0] = firstValue;
617             }
618             else {
619                 System.arraycopy(firstExpansion, 0, result, 0, firstLength);
620             }
621 
622             if (lastExpansion == null) {
623                 result[firstLength] = lastValue;
624             }
625             else {
626                 System.arraycopy(lastExpansion, 0, result, firstLength, lastLength);
627             }
628         }
629 
630         return result;
631     }
632 
633     /**
634      *  Check if a comparison order is ignorable.
635      *  @return true if a character is ignorable, false otherwise.
636      */
isIgnorable(int order)637     final static boolean isIgnorable(int order)
638     {
639         return ((primaryOrder(order) == 0) ? true : false);
640     }
641 
642     /**
643      * Get the ordering priority of the next contracting character in the
644      * string.
645      * @param ch the starting character of a contracting character token
646      * @return the next contracting character's ordering.  Returns NULLORDER
647      * if the end of string is reached.
648      */
nextContractChar(int ch)649     private int nextContractChar(int ch)
650     {
651         // First get the ordering of this single character,
652         // which is always the first element in the list
653         Vector<EntryPair> list = ordering.getContractValues(ch);
654         EntryPair pair = list.firstElement();
655         int order = pair.value;
656 
657         // find out the length of the longest contracting character sequence in the list.
658         // There's logic in the builder code to make sure the longest sequence is always
659         // the last.
660         pair = list.lastElement();
661         int maxLength = pair.entryName.length();
662 
663         // (the Normalizer is cloned here so that the seeking we do in the next loop
664         // won't affect our real position in the text)
665         NormalizerBase tempText = (NormalizerBase)text.clone();
666 
667         // extract the next maxLength characters in the string (we have to do this using the
668         // Normalizer to ensure that our offsets correspond to those the rest of the
669         // iterator is using) and store it in "fragment".
670         tempText.previous();
671         key.setLength(0);
672         int c = tempText.next();
673         while (maxLength > 0 && c != NormalizerBase.DONE) {
674             if (Character.isSupplementaryCodePoint(c)) {
675                 key.append(Character.toChars(c));
676                 maxLength -= 2;
677             } else {
678                 key.append((char)c);
679                 --maxLength;
680             }
681             c = tempText.next();
682         }
683         String fragment = key.toString();
684         // now that we have that fragment, iterate through this list looking for the
685         // longest sequence that matches the characters in the actual text.  (maxLength
686         // is used here to keep track of the length of the longest sequence)
687         // Upon exit from this loop, maxLength will contain the length of the matching
688         // sequence and order will contain the collation-element value corresponding
689         // to this sequence
690         maxLength = 1;
691         for (int i = list.size() - 1; i > 0; i--) {
692             pair = list.elementAt(i);
693             if (!pair.fwd)
694                 continue;
695 
696             if (fragment.startsWith(pair.entryName) && pair.entryName.length()
697                     > maxLength) {
698                 maxLength = pair.entryName.length();
699                 order = pair.value;
700             }
701         }
702 
703         // seek our current iteration position to the end of the matching sequence
704         // and return the appropriate collation-element value (if there was no matching
705         // sequence, we're already seeked to the right position and order already contains
706         // the correct collation-element value for the single character)
707         while (maxLength > 1) {
708             c = text.next();
709             maxLength -= Character.charCount(c);
710         }
711         return order;
712     }
713 
714     /**
715      * Get the ordering priority of the previous contracting character in the
716      * string.
717      * @param ch the starting character of a contracting character token
718      * @return the next contracting character's ordering.  Returns NULLORDER
719      * if the end of string is reached.
720      */
prevContractChar(int ch)721     private int prevContractChar(int ch)
722     {
723         // This function is identical to nextContractChar(), except that we've
724         // switched things so that the next() and previous() calls on the Normalizer
725         // are switched and so that we skip entry pairs with the fwd flag turned on
726         // rather than off.  Notice that we still use append() and startsWith() when
727         // working on the fragment.  This is because the entry pairs that are used
728         // in reverse iteration have their names reversed already.
729         Vector<EntryPair> list = ordering.getContractValues(ch);
730         EntryPair pair = list.firstElement();
731         int order = pair.value;
732 
733         pair = list.lastElement();
734         int maxLength = pair.entryName.length();
735 
736         NormalizerBase tempText = (NormalizerBase)text.clone();
737 
738         tempText.next();
739         key.setLength(0);
740         int c = tempText.previous();
741         while (maxLength > 0 && c != NormalizerBase.DONE) {
742             if (Character.isSupplementaryCodePoint(c)) {
743                 key.append(Character.toChars(c));
744                 maxLength -= 2;
745             } else {
746                 key.append((char)c);
747                 --maxLength;
748             }
749             c = tempText.previous();
750         }
751         String fragment = key.toString();
752 
753         maxLength = 1;
754         for (int i = list.size() - 1; i > 0; i--) {
755             pair = list.elementAt(i);
756             if (pair.fwd)
757                 continue;
758 
759             if (fragment.startsWith(pair.entryName) && pair.entryName.length()
760                     > maxLength) {
761                 maxLength = pair.entryName.length();
762                 order = pair.value;
763             }
764         }
765 
766         while (maxLength > 1) {
767             c = text.previous();
768             maxLength -= Character.charCount(c);
769         }
770         return order;
771     }
772 
773     final static int UNMAPPEDCHARVALUE = 0x7FFF0000;
774 
775     private NormalizerBase text = null;
776     private int[] buffer = null;
777     private int expIndex = 0;
778     private StringBuffer key = new StringBuffer(5);
779     private int swapOrder = 0;
780     private RBCollationTables ordering;
781     private RuleBasedCollator owner;
782 }
783