1 /*
2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /*
27  * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
29  *
30  *   The original version of this source code and documentation is copyrighted
31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
32  * materials are provided under terms of a License Agreement between Taligent
33  * and Sun. This technology is protected by multiple US and International
34  * patents. This notice and attribution to Taligent may not be removed.
35  *   Taligent is a registered trademark of Taligent, Inc.
36  *
37  */
38 
39 package java.text;
40 
41 import java.lang.ref.SoftReference;
42 import java.text.spi.CollatorProvider;
43 import java.util.Locale;
44 import java.util.ResourceBundle;
45 import java.util.concurrent.ConcurrentHashMap;
46 import java.util.concurrent.ConcurrentMap;
47 import sun.util.locale.provider.LocaleProviderAdapter;
48 import sun.util.locale.provider.LocaleServiceProviderPool;
49 
50 
51 /**
52  * The {@code Collator} class performs locale-sensitive
53  * {@code String} comparison. You use this class to build
54  * searching and sorting routines for natural language text.
55  *
56  * <p>
57  * {@code Collator} is an abstract base class. Subclasses
58  * implement specific collation strategies. One subclass,
59  * {@code RuleBasedCollator}, is currently provided with
60  * the Java Platform and is applicable to a wide set of languages. Other
61  * subclasses may be created to handle more specialized needs.
62  *
63  * <p>
64  * Like other locale-sensitive classes, you can use the static
65  * factory method, {@code getInstance}, to obtain the appropriate
66  * {@code Collator} object for a given locale. You will only need
67  * to look at the subclasses of {@code Collator} if you need
68  * to understand the details of a particular collation strategy or
69  * if you need to modify that strategy.
70  *
71  * <p>
72  * The following example shows how to compare two strings using
73  * the {@code Collator} for the default locale.
74  * <blockquote>
75  * <pre>{@code
76  * // Compare two strings in the default locale
77  * Collator myCollator = Collator.getInstance();
78  * if( myCollator.compare("abc", "ABC") < 0 )
79  *     System.out.println("abc is less than ABC");
80  * else
81  *     System.out.println("abc is greater than or equal to ABC");
82  * }</pre>
83  * </blockquote>
84  *
85  * <p>
86  * You can set a {@code Collator}'s <em>strength</em> property
87  * to determine the level of difference considered significant in
88  * comparisons. Four strengths are provided: {@code PRIMARY},
89  * {@code SECONDARY}, {@code TERTIARY}, and {@code IDENTICAL}.
90  * The exact assignment of strengths to language features is
91  * locale dependent.  For example, in Czech, "e" and "f" are considered
92  * primary differences, while "e" and "&#283;" are secondary differences,
93  * "e" and "E" are tertiary differences and "e" and "e" are identical.
94  * The following shows how both case and accents could be ignored for
95  * US English.
96  * <blockquote>
97  * <pre>
98  * //Get the Collator for US English and set its strength to PRIMARY
99  * Collator usCollator = Collator.getInstance(Locale.US);
100  * usCollator.setStrength(Collator.PRIMARY);
101  * if( usCollator.compare("abc", "ABC") == 0 ) {
102  *     System.out.println("Strings are equivalent");
103  * }
104  * </pre>
105  * </blockquote>
106  * <p>
107  * For comparing {@code String}s exactly once, the {@code compare}
108  * method provides the best performance. When sorting a list of
109  * {@code String}s however, it is generally necessary to compare each
110  * {@code String} multiple times. In this case, {@code CollationKey}s
111  * provide better performance. The {@code CollationKey} class converts
112  * a {@code String} to a series of bits that can be compared bitwise
113  * against other {@code CollationKey}s. A {@code CollationKey} is
114  * created by a {@code Collator} object for a given {@code String}.
115  * <br>
116  * <strong>Note:</strong> {@code CollationKey}s from different
117  * {@code Collator}s can not be compared. See the class description
118  * for {@link CollationKey}
119  * for an example using {@code CollationKey}s.
120  *
121  * @see         RuleBasedCollator
122  * @see         CollationKey
123  * @see         CollationElementIterator
124  * @see         Locale
125  * @author      Helena Shih, Laura Werner, Richard Gillam
126  * @since 1.1
127  */
128 
129 public abstract class Collator
130     implements java.util.Comparator<Object>, Cloneable
131 {
132     /**
133      * Collator strength value.  When set, only PRIMARY differences are
134      * considered significant during comparison. The assignment of strengths
135      * to language features is locale dependent. A common example is for
136      * different base letters ("a" vs "b") to be considered a PRIMARY difference.
137      * @see java.text.Collator#setStrength
138      * @see java.text.Collator#getStrength
139      */
140     public static final int PRIMARY = 0;
141     /**
142      * Collator strength value.  When set, only SECONDARY and above differences are
143      * considered significant during comparison. The assignment of strengths
144      * to language features is locale dependent. A common example is for
145      * different accented forms of the same base letter ("a" vs "\u00E4") to be
146      * considered a SECONDARY difference.
147      * @see java.text.Collator#setStrength
148      * @see java.text.Collator#getStrength
149      */
150     public static final int SECONDARY = 1;
151     /**
152      * Collator strength value.  When set, only TERTIARY and above differences are
153      * considered significant during comparison. The assignment of strengths
154      * to language features is locale dependent. A common example is for
155      * case differences ("a" vs "A") to be considered a TERTIARY difference.
156      * @see java.text.Collator#setStrength
157      * @see java.text.Collator#getStrength
158      */
159     public static final int TERTIARY = 2;
160 
161     /**
162      * Collator strength value.  When set, all differences are
163      * considered significant during comparison. The assignment of strengths
164      * to language features is locale dependent. A common example is for control
165      * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
166      * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
167      * level.  Additionally, differences between pre-composed accents such as
168      * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
169      * (A, combining-grave) will be considered significant at the IDENTICAL
170      * level if decomposition is set to NO_DECOMPOSITION.
171      */
172     public static final int IDENTICAL = 3;
173 
174     /**
175      * Decomposition mode value. With NO_DECOMPOSITION
176      * set, accented characters will not be decomposed for collation. This
177      * is the default setting and provides the fastest collation but
178      * will only produce correct results for languages that do not use accents.
179      * @see java.text.Collator#getDecomposition
180      * @see java.text.Collator#setDecomposition
181      */
182     public static final int NO_DECOMPOSITION = 0;
183 
184     /**
185      * Decomposition mode value. With CANONICAL_DECOMPOSITION
186      * set, characters that are canonical variants according to Unicode
187      * standard will be decomposed for collation. This should be used to get
188      * correct collation of accented characters.
189      * <p>
190      * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
191      * described in
192      * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
193      * Technical Report #15</a>.
194      * @see java.text.Collator#getDecomposition
195      * @see java.text.Collator#setDecomposition
196      */
197     public static final int CANONICAL_DECOMPOSITION = 1;
198 
199     /**
200      * Decomposition mode value. With FULL_DECOMPOSITION
201      * set, both Unicode canonical variants and Unicode compatibility variants
202      * will be decomposed for collation.  This causes not only accented
203      * characters to be collated, but also characters that have special formats
204      * to be collated with their norminal form. For example, the half-width and
205      * full-width ASCII and Katakana characters are then collated together.
206      * FULL_DECOMPOSITION is the most complete and therefore the slowest
207      * decomposition mode.
208      * <p>
209      * FULL_DECOMPOSITION corresponds to Normalization Form KD as
210      * described in
211      * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode
212      * Technical Report #15</a>.
213      * @see java.text.Collator#getDecomposition
214      * @see java.text.Collator#setDecomposition
215      */
216     public static final int FULL_DECOMPOSITION = 2;
217 
218     /**
219      * Gets the Collator for the current default locale.
220      * The default locale is determined by java.util.Locale.getDefault.
221      * @return the Collator for the default locale.(for example, en_US)
222      * @see java.util.Locale#getDefault
223      */
getInstance()224     public static synchronized Collator getInstance() {
225         return getInstance(Locale.getDefault());
226     }
227 
228     /**
229      * Gets the Collator for the desired locale.
230      * @param desiredLocale the desired locale.
231      * @return the Collator for the desired locale.
232      * @see java.util.Locale
233      * @see java.util.ResourceBundle
234      */
getInstance(Locale desiredLocale)235     public static Collator getInstance(Locale desiredLocale) {
236         SoftReference<Collator> ref = cache.get(desiredLocale);
237         Collator result = (ref != null) ? ref.get() : null;
238         if (result == null) {
239             LocaleProviderAdapter adapter;
240             adapter = LocaleProviderAdapter.getAdapter(CollatorProvider.class,
241                                                        desiredLocale);
242             CollatorProvider provider = adapter.getCollatorProvider();
243             result = provider.getInstance(desiredLocale);
244             if (result == null) {
245                 result = LocaleProviderAdapter.forJRE()
246                              .getCollatorProvider().getInstance(desiredLocale);
247             }
248             while (true) {
249                 if (ref != null) {
250                     // Remove the empty SoftReference if any
251                     cache.remove(desiredLocale, ref);
252                 }
253                 ref = cache.putIfAbsent(desiredLocale, new SoftReference<>(result));
254                 if (ref == null) {
255                     break;
256                 }
257                 Collator cachedColl = ref.get();
258                 if (cachedColl != null) {
259                     result = cachedColl;
260                     break;
261                 }
262             }
263         }
264         return (Collator) result.clone(); // make the world safe
265     }
266 
267     /**
268      * Compares the source string to the target string according to the
269      * collation rules for this Collator.  Returns an integer less than,
270      * equal to or greater than zero depending on whether the source String is
271      * less than, equal to or greater than the target string.  See the Collator
272      * class description for an example of use.
273      * <p>
274      * For a one time comparison, this method has the best performance. If a
275      * given String will be involved in multiple comparisons, CollationKey.compareTo
276      * has the best performance. See the Collator class description for an example
277      * using CollationKeys.
278      * @param source the source string.
279      * @param target the target string.
280      * @return Returns an integer value. Value is less than zero if source is less than
281      * target, value is zero if source and target are equal, value is greater than zero
282      * if source is greater than target.
283      * @see java.text.CollationKey
284      * @see java.text.Collator#getCollationKey
285      */
compare(String source, String target)286     public abstract int compare(String source, String target);
287 
288     /**
289      * Compares its two arguments for order.  Returns a negative integer,
290      * zero, or a positive integer as the first argument is less than, equal
291      * to, or greater than the second.
292      * <p>
293      * This implementation merely returns
294      *  {@code  compare((String)o1, (String)o2) }.
295      *
296      * @return a negative integer, zero, or a positive integer as the
297      *         first argument is less than, equal to, or greater than the
298      *         second.
299      * @throws    ClassCastException the arguments cannot be cast to Strings.
300      * @see java.util.Comparator
301      * @since   1.2
302      */
303     @Override
compare(Object o1, Object o2)304     public int compare(Object o1, Object o2) {
305     return compare((String)o1, (String)o2);
306     }
307 
308     /**
309      * Transforms the String into a series of bits that can be compared bitwise
310      * to other CollationKeys. CollationKeys provide better performance than
311      * Collator.compare when Strings are involved in multiple comparisons.
312      * See the Collator class description for an example using CollationKeys.
313      * @param source the string to be transformed into a collation key.
314      * @return the CollationKey for the given String based on this Collator's collation
315      * rules. If the source String is null, a null CollationKey is returned.
316      * @see java.text.CollationKey
317      * @see java.text.Collator#compare
318      */
getCollationKey(String source)319     public abstract CollationKey getCollationKey(String source);
320 
321     /**
322      * Convenience method for comparing the equality of two strings based on
323      * this Collator's collation rules.
324      * @param source the source string to be compared with.
325      * @param target the target string to be compared with.
326      * @return true if the strings are equal according to the collation
327      * rules.  false, otherwise.
328      * @see java.text.Collator#compare
329      */
equals(String source, String target)330     public boolean equals(String source, String target)
331     {
332         return (compare(source, target) == Collator.EQUAL);
333     }
334 
335     /**
336      * Returns this Collator's strength property.  The strength property determines
337      * the minimum level of difference considered significant during comparison.
338      * See the Collator class description for an example of use.
339      * @return this Collator's current strength property.
340      * @see java.text.Collator#setStrength
341      * @see java.text.Collator#PRIMARY
342      * @see java.text.Collator#SECONDARY
343      * @see java.text.Collator#TERTIARY
344      * @see java.text.Collator#IDENTICAL
345      */
getStrength()346     public synchronized int getStrength()
347     {
348         return strength;
349     }
350 
351     /**
352      * Sets this Collator's strength property.  The strength property determines
353      * the minimum level of difference considered significant during comparison.
354      * See the Collator class description for an example of use.
355      * @param newStrength  the new strength value.
356      * @see java.text.Collator#getStrength
357      * @see java.text.Collator#PRIMARY
358      * @see java.text.Collator#SECONDARY
359      * @see java.text.Collator#TERTIARY
360      * @see java.text.Collator#IDENTICAL
361      * @throws     IllegalArgumentException If the new strength value is not one of
362      * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
363      */
setStrength(int newStrength)364     public synchronized void setStrength(int newStrength) {
365         if ((newStrength != PRIMARY) &&
366             (newStrength != SECONDARY) &&
367             (newStrength != TERTIARY) &&
368             (newStrength != IDENTICAL)) {
369             throw new IllegalArgumentException("Incorrect comparison level.");
370         }
371         strength = newStrength;
372     }
373 
374     /**
375      * Get the decomposition mode of this Collator. Decomposition mode
376      * determines how Unicode composed characters are handled. Adjusting
377      * decomposition mode allows the user to select between faster and more
378      * complete collation behavior.
379      * <p>The three values for decomposition mode are:
380      * <UL>
381      * <LI>NO_DECOMPOSITION,
382      * <LI>CANONICAL_DECOMPOSITION
383      * <LI>FULL_DECOMPOSITION.
384      * </UL>
385      * See the documentation for these three constants for a description
386      * of their meaning.
387      * @return the decomposition mode
388      * @see java.text.Collator#setDecomposition
389      * @see java.text.Collator#NO_DECOMPOSITION
390      * @see java.text.Collator#CANONICAL_DECOMPOSITION
391      * @see java.text.Collator#FULL_DECOMPOSITION
392      */
getDecomposition()393     public synchronized int getDecomposition()
394     {
395         return decmp;
396     }
397     /**
398      * Set the decomposition mode of this Collator. See getDecomposition
399      * for a description of decomposition mode.
400      * @param decompositionMode  the new decomposition mode.
401      * @see java.text.Collator#getDecomposition
402      * @see java.text.Collator#NO_DECOMPOSITION
403      * @see java.text.Collator#CANONICAL_DECOMPOSITION
404      * @see java.text.Collator#FULL_DECOMPOSITION
405      * @throws    IllegalArgumentException If the given value is not a valid decomposition
406      * mode.
407      */
setDecomposition(int decompositionMode)408     public synchronized void setDecomposition(int decompositionMode) {
409         if ((decompositionMode != NO_DECOMPOSITION) &&
410             (decompositionMode != CANONICAL_DECOMPOSITION) &&
411             (decompositionMode != FULL_DECOMPOSITION)) {
412             throw new IllegalArgumentException("Wrong decomposition mode.");
413         }
414         decmp = decompositionMode;
415     }
416 
417     /**
418      * Returns an array of all locales for which the
419      * {@code getInstance} methods of this class can return
420      * localized instances.
421      * The returned array represents the union of locales supported
422      * by the Java runtime and by installed
423      * {@link java.text.spi.CollatorProvider CollatorProvider} implementations.
424      * It must contain at least a Locale instance equal to
425      * {@link java.util.Locale#US Locale.US}.
426      *
427      * @return An array of locales for which localized
428      *         {@code Collator} instances are available.
429      */
getAvailableLocales()430     public static synchronized Locale[] getAvailableLocales() {
431         LocaleServiceProviderPool pool =
432             LocaleServiceProviderPool.getPool(CollatorProvider.class);
433         return pool.getAvailableLocales();
434     }
435 
436     /**
437      * Overrides Cloneable
438      */
439     @Override
clone()440     public Object clone()
441     {
442         try {
443             return (Collator)super.clone();
444         } catch (CloneNotSupportedException e) {
445             throw new InternalError(e);
446         }
447     }
448 
449     /**
450      * Compares the equality of two Collators.
451      * @param that the Collator to be compared with this.
452      * @return true if this Collator is the same as that Collator;
453      * false otherwise.
454      */
455     @Override
equals(Object that)456     public boolean equals(Object that)
457     {
458         if (this == that) {
459             return true;
460         }
461         if (that == null) {
462             return false;
463         }
464         if (getClass() != that.getClass()) {
465             return false;
466         }
467         Collator other = (Collator) that;
468         return ((strength == other.strength) &&
469                 (decmp == other.decmp));
470     }
471 
472     /**
473      * Generates the hash code for this Collator.
474      */
475     @Override
hashCode()476     public abstract int hashCode();
477 
478     /**
479      * Default constructor.  This constructor is
480      * protected so subclasses can get access to it. Users typically create
481      * a Collator sub-class by calling the factory method getInstance.
482      * @see java.text.Collator#getInstance
483      */
Collator()484     protected Collator()
485     {
486         strength = TERTIARY;
487         decmp = CANONICAL_DECOMPOSITION;
488     }
489 
490     private int strength = 0;
491     private int decmp = 0;
492     private static final ConcurrentMap<Locale, SoftReference<Collator>> cache
493             = new ConcurrentHashMap<>();
494 
495     //
496     // FIXME: These three constants should be removed.
497     //
498     /**
499      * LESS is returned if source string is compared to be less than target
500      * string in the compare() method.
501      * @see java.text.Collator#compare
502      */
503     static final int LESS = -1;
504     /**
505      * EQUAL is returned if source string is compared to be equal to target
506      * string in the compare() method.
507      * @see java.text.Collator#compare
508      */
509     static final int EQUAL = 0;
510     /**
511      * GREATER is returned if source string is compared to be greater than
512      * target string in the compare() method.
513      * @see java.text.Collator#compare
514      */
515     static final int GREATER = 1;
516  }
517