1 /* 2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * (C) Copyright Taligent, Inc. 1996-1998 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39 package java.text; 40 41 import java.lang.ref.SoftReference; 42 import java.text.spi.CollatorProvider; 43 import java.util.Locale; 44 import java.util.ResourceBundle; 45 import java.util.concurrent.ConcurrentHashMap; 46 import java.util.concurrent.ConcurrentMap; 47 import sun.util.locale.provider.LocaleProviderAdapter; 48 import sun.util.locale.provider.LocaleServiceProviderPool; 49 50 51 /** 52 * The {@code Collator} class performs locale-sensitive 53 * {@code String} comparison. You use this class to build 54 * searching and sorting routines for natural language text. 55 * 56 * <p> 57 * {@code Collator} is an abstract base class. Subclasses 58 * implement specific collation strategies. One subclass, 59 * {@code RuleBasedCollator}, is currently provided with 60 * the Java Platform and is applicable to a wide set of languages. Other 61 * subclasses may be created to handle more specialized needs. 62 * 63 * <p> 64 * Like other locale-sensitive classes, you can use the static 65 * factory method, {@code getInstance}, to obtain the appropriate 66 * {@code Collator} object for a given locale. You will only need 67 * to look at the subclasses of {@code Collator} if you need 68 * to understand the details of a particular collation strategy or 69 * if you need to modify that strategy. 70 * 71 * <p> 72 * The following example shows how to compare two strings using 73 * the {@code Collator} for the default locale. 74 * <blockquote> 75 * <pre>{@code 76 * // Compare two strings in the default locale 77 * Collator myCollator = Collator.getInstance(); 78 * if( myCollator.compare("abc", "ABC") < 0 ) 79 * System.out.println("abc is less than ABC"); 80 * else 81 * System.out.println("abc is greater than or equal to ABC"); 82 * }</pre> 83 * </blockquote> 84 * 85 * <p> 86 * You can set a {@code Collator}'s <em>strength</em> property 87 * to determine the level of difference considered significant in 88 * comparisons. Four strengths are provided: {@code PRIMARY}, 89 * {@code SECONDARY}, {@code TERTIARY}, and {@code IDENTICAL}. 90 * The exact assignment of strengths to language features is 91 * locale dependent. For example, in Czech, "e" and "f" are considered 92 * primary differences, while "e" and "ě" are secondary differences, 93 * "e" and "E" are tertiary differences and "e" and "e" are identical. 94 * The following shows how both case and accents could be ignored for 95 * US English. 96 * <blockquote> 97 * <pre> 98 * //Get the Collator for US English and set its strength to PRIMARY 99 * Collator usCollator = Collator.getInstance(Locale.US); 100 * usCollator.setStrength(Collator.PRIMARY); 101 * if( usCollator.compare("abc", "ABC") == 0 ) { 102 * System.out.println("Strings are equivalent"); 103 * } 104 * </pre> 105 * </blockquote> 106 * <p> 107 * For comparing {@code String}s exactly once, the {@code compare} 108 * method provides the best performance. When sorting a list of 109 * {@code String}s however, it is generally necessary to compare each 110 * {@code String} multiple times. In this case, {@code CollationKey}s 111 * provide better performance. The {@code CollationKey} class converts 112 * a {@code String} to a series of bits that can be compared bitwise 113 * against other {@code CollationKey}s. A {@code CollationKey} is 114 * created by a {@code Collator} object for a given {@code String}. 115 * <br> 116 * <strong>Note:</strong> {@code CollationKey}s from different 117 * {@code Collator}s can not be compared. See the class description 118 * for {@link CollationKey} 119 * for an example using {@code CollationKey}s. 120 * 121 * @see RuleBasedCollator 122 * @see CollationKey 123 * @see CollationElementIterator 124 * @see Locale 125 * @author Helena Shih, Laura Werner, Richard Gillam 126 * @since 1.1 127 */ 128 129 public abstract class Collator 130 implements java.util.Comparator<Object>, Cloneable 131 { 132 /** 133 * Collator strength value. When set, only PRIMARY differences are 134 * considered significant during comparison. The assignment of strengths 135 * to language features is locale dependent. A common example is for 136 * different base letters ("a" vs "b") to be considered a PRIMARY difference. 137 * @see java.text.Collator#setStrength 138 * @see java.text.Collator#getStrength 139 */ 140 public static final int PRIMARY = 0; 141 /** 142 * Collator strength value. When set, only SECONDARY and above differences are 143 * considered significant during comparison. The assignment of strengths 144 * to language features is locale dependent. A common example is for 145 * different accented forms of the same base letter ("a" vs "\u00E4") to be 146 * considered a SECONDARY difference. 147 * @see java.text.Collator#setStrength 148 * @see java.text.Collator#getStrength 149 */ 150 public static final int SECONDARY = 1; 151 /** 152 * Collator strength value. When set, only TERTIARY and above differences are 153 * considered significant during comparison. The assignment of strengths 154 * to language features is locale dependent. A common example is for 155 * case differences ("a" vs "A") to be considered a TERTIARY difference. 156 * @see java.text.Collator#setStrength 157 * @see java.text.Collator#getStrength 158 */ 159 public static final int TERTIARY = 2; 160 161 /** 162 * Collator strength value. When set, all differences are 163 * considered significant during comparison. The assignment of strengths 164 * to language features is locale dependent. A common example is for control 165 * characters ("\u0001" vs "\u0002") to be considered equal at the 166 * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL 167 * level. Additionally, differences between pre-composed accents such as 168 * "\u00C0" (A-grave) and combining accents such as "A\u0300" 169 * (A, combining-grave) will be considered significant at the IDENTICAL 170 * level if decomposition is set to NO_DECOMPOSITION. 171 */ 172 public static final int IDENTICAL = 3; 173 174 /** 175 * Decomposition mode value. With NO_DECOMPOSITION 176 * set, accented characters will not be decomposed for collation. This 177 * is the default setting and provides the fastest collation but 178 * will only produce correct results for languages that do not use accents. 179 * @see java.text.Collator#getDecomposition 180 * @see java.text.Collator#setDecomposition 181 */ 182 public static final int NO_DECOMPOSITION = 0; 183 184 /** 185 * Decomposition mode value. With CANONICAL_DECOMPOSITION 186 * set, characters that are canonical variants according to Unicode 187 * standard will be decomposed for collation. This should be used to get 188 * correct collation of accented characters. 189 * <p> 190 * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as 191 * described in 192 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode 193 * Technical Report #15</a>. 194 * @see java.text.Collator#getDecomposition 195 * @see java.text.Collator#setDecomposition 196 */ 197 public static final int CANONICAL_DECOMPOSITION = 1; 198 199 /** 200 * Decomposition mode value. With FULL_DECOMPOSITION 201 * set, both Unicode canonical variants and Unicode compatibility variants 202 * will be decomposed for collation. This causes not only accented 203 * characters to be collated, but also characters that have special formats 204 * to be collated with their norminal form. For example, the half-width and 205 * full-width ASCII and Katakana characters are then collated together. 206 * FULL_DECOMPOSITION is the most complete and therefore the slowest 207 * decomposition mode. 208 * <p> 209 * FULL_DECOMPOSITION corresponds to Normalization Form KD as 210 * described in 211 * <a href="http://www.unicode.org/unicode/reports/tr15/tr15-23.html">Unicode 212 * Technical Report #15</a>. 213 * @see java.text.Collator#getDecomposition 214 * @see java.text.Collator#setDecomposition 215 */ 216 public static final int FULL_DECOMPOSITION = 2; 217 218 /** 219 * Gets the Collator for the current default locale. 220 * The default locale is determined by java.util.Locale.getDefault. 221 * @return the Collator for the default locale.(for example, en_US) 222 * @see java.util.Locale#getDefault 223 */ getInstance()224 public static synchronized Collator getInstance() { 225 return getInstance(Locale.getDefault()); 226 } 227 228 /** 229 * Gets the Collator for the desired locale. 230 * @param desiredLocale the desired locale. 231 * @return the Collator for the desired locale. 232 * @see java.util.Locale 233 * @see java.util.ResourceBundle 234 */ getInstance(Locale desiredLocale)235 public static Collator getInstance(Locale desiredLocale) { 236 SoftReference<Collator> ref = cache.get(desiredLocale); 237 Collator result = (ref != null) ? ref.get() : null; 238 if (result == null) { 239 LocaleProviderAdapter adapter; 240 adapter = LocaleProviderAdapter.getAdapter(CollatorProvider.class, 241 desiredLocale); 242 CollatorProvider provider = adapter.getCollatorProvider(); 243 result = provider.getInstance(desiredLocale); 244 if (result == null) { 245 result = LocaleProviderAdapter.forJRE() 246 .getCollatorProvider().getInstance(desiredLocale); 247 } 248 while (true) { 249 if (ref != null) { 250 // Remove the empty SoftReference if any 251 cache.remove(desiredLocale, ref); 252 } 253 ref = cache.putIfAbsent(desiredLocale, new SoftReference<>(result)); 254 if (ref == null) { 255 break; 256 } 257 Collator cachedColl = ref.get(); 258 if (cachedColl != null) { 259 result = cachedColl; 260 break; 261 } 262 } 263 } 264 return (Collator) result.clone(); // make the world safe 265 } 266 267 /** 268 * Compares the source string to the target string according to the 269 * collation rules for this Collator. Returns an integer less than, 270 * equal to or greater than zero depending on whether the source String is 271 * less than, equal to or greater than the target string. See the Collator 272 * class description for an example of use. 273 * <p> 274 * For a one time comparison, this method has the best performance. If a 275 * given String will be involved in multiple comparisons, CollationKey.compareTo 276 * has the best performance. See the Collator class description for an example 277 * using CollationKeys. 278 * @param source the source string. 279 * @param target the target string. 280 * @return Returns an integer value. Value is less than zero if source is less than 281 * target, value is zero if source and target are equal, value is greater than zero 282 * if source is greater than target. 283 * @see java.text.CollationKey 284 * @see java.text.Collator#getCollationKey 285 */ compare(String source, String target)286 public abstract int compare(String source, String target); 287 288 /** 289 * Compares its two arguments for order. Returns a negative integer, 290 * zero, or a positive integer as the first argument is less than, equal 291 * to, or greater than the second. 292 * <p> 293 * This implementation merely returns 294 * {@code compare((String)o1, (String)o2) }. 295 * 296 * @return a negative integer, zero, or a positive integer as the 297 * first argument is less than, equal to, or greater than the 298 * second. 299 * @throws ClassCastException the arguments cannot be cast to Strings. 300 * @see java.util.Comparator 301 * @since 1.2 302 */ 303 @Override compare(Object o1, Object o2)304 public int compare(Object o1, Object o2) { 305 return compare((String)o1, (String)o2); 306 } 307 308 /** 309 * Transforms the String into a series of bits that can be compared bitwise 310 * to other CollationKeys. CollationKeys provide better performance than 311 * Collator.compare when Strings are involved in multiple comparisons. 312 * See the Collator class description for an example using CollationKeys. 313 * @param source the string to be transformed into a collation key. 314 * @return the CollationKey for the given String based on this Collator's collation 315 * rules. If the source String is null, a null CollationKey is returned. 316 * @see java.text.CollationKey 317 * @see java.text.Collator#compare 318 */ getCollationKey(String source)319 public abstract CollationKey getCollationKey(String source); 320 321 /** 322 * Convenience method for comparing the equality of two strings based on 323 * this Collator's collation rules. 324 * @param source the source string to be compared with. 325 * @param target the target string to be compared with. 326 * @return true if the strings are equal according to the collation 327 * rules. false, otherwise. 328 * @see java.text.Collator#compare 329 */ equals(String source, String target)330 public boolean equals(String source, String target) 331 { 332 return (compare(source, target) == Collator.EQUAL); 333 } 334 335 /** 336 * Returns this Collator's strength property. The strength property determines 337 * the minimum level of difference considered significant during comparison. 338 * See the Collator class description for an example of use. 339 * @return this Collator's current strength property. 340 * @see java.text.Collator#setStrength 341 * @see java.text.Collator#PRIMARY 342 * @see java.text.Collator#SECONDARY 343 * @see java.text.Collator#TERTIARY 344 * @see java.text.Collator#IDENTICAL 345 */ getStrength()346 public synchronized int getStrength() 347 { 348 return strength; 349 } 350 351 /** 352 * Sets this Collator's strength property. The strength property determines 353 * the minimum level of difference considered significant during comparison. 354 * See the Collator class description for an example of use. 355 * @param newStrength the new strength value. 356 * @see java.text.Collator#getStrength 357 * @see java.text.Collator#PRIMARY 358 * @see java.text.Collator#SECONDARY 359 * @see java.text.Collator#TERTIARY 360 * @see java.text.Collator#IDENTICAL 361 * @throws IllegalArgumentException If the new strength value is not one of 362 * PRIMARY, SECONDARY, TERTIARY or IDENTICAL. 363 */ setStrength(int newStrength)364 public synchronized void setStrength(int newStrength) { 365 if ((newStrength != PRIMARY) && 366 (newStrength != SECONDARY) && 367 (newStrength != TERTIARY) && 368 (newStrength != IDENTICAL)) { 369 throw new IllegalArgumentException("Incorrect comparison level."); 370 } 371 strength = newStrength; 372 } 373 374 /** 375 * Get the decomposition mode of this Collator. Decomposition mode 376 * determines how Unicode composed characters are handled. Adjusting 377 * decomposition mode allows the user to select between faster and more 378 * complete collation behavior. 379 * <p>The three values for decomposition mode are: 380 * <UL> 381 * <LI>NO_DECOMPOSITION, 382 * <LI>CANONICAL_DECOMPOSITION 383 * <LI>FULL_DECOMPOSITION. 384 * </UL> 385 * See the documentation for these three constants for a description 386 * of their meaning. 387 * @return the decomposition mode 388 * @see java.text.Collator#setDecomposition 389 * @see java.text.Collator#NO_DECOMPOSITION 390 * @see java.text.Collator#CANONICAL_DECOMPOSITION 391 * @see java.text.Collator#FULL_DECOMPOSITION 392 */ getDecomposition()393 public synchronized int getDecomposition() 394 { 395 return decmp; 396 } 397 /** 398 * Set the decomposition mode of this Collator. See getDecomposition 399 * for a description of decomposition mode. 400 * @param decompositionMode the new decomposition mode. 401 * @see java.text.Collator#getDecomposition 402 * @see java.text.Collator#NO_DECOMPOSITION 403 * @see java.text.Collator#CANONICAL_DECOMPOSITION 404 * @see java.text.Collator#FULL_DECOMPOSITION 405 * @throws IllegalArgumentException If the given value is not a valid decomposition 406 * mode. 407 */ setDecomposition(int decompositionMode)408 public synchronized void setDecomposition(int decompositionMode) { 409 if ((decompositionMode != NO_DECOMPOSITION) && 410 (decompositionMode != CANONICAL_DECOMPOSITION) && 411 (decompositionMode != FULL_DECOMPOSITION)) { 412 throw new IllegalArgumentException("Wrong decomposition mode."); 413 } 414 decmp = decompositionMode; 415 } 416 417 /** 418 * Returns an array of all locales for which the 419 * {@code getInstance} methods of this class can return 420 * localized instances. 421 * The returned array represents the union of locales supported 422 * by the Java runtime and by installed 423 * {@link java.text.spi.CollatorProvider CollatorProvider} implementations. 424 * It must contain at least a Locale instance equal to 425 * {@link java.util.Locale#US Locale.US}. 426 * 427 * @return An array of locales for which localized 428 * {@code Collator} instances are available. 429 */ getAvailableLocales()430 public static synchronized Locale[] getAvailableLocales() { 431 LocaleServiceProviderPool pool = 432 LocaleServiceProviderPool.getPool(CollatorProvider.class); 433 return pool.getAvailableLocales(); 434 } 435 436 /** 437 * Overrides Cloneable 438 */ 439 @Override clone()440 public Object clone() 441 { 442 try { 443 return (Collator)super.clone(); 444 } catch (CloneNotSupportedException e) { 445 throw new InternalError(e); 446 } 447 } 448 449 /** 450 * Compares the equality of two Collators. 451 * @param that the Collator to be compared with this. 452 * @return true if this Collator is the same as that Collator; 453 * false otherwise. 454 */ 455 @Override equals(Object that)456 public boolean equals(Object that) 457 { 458 if (this == that) { 459 return true; 460 } 461 if (that == null) { 462 return false; 463 } 464 if (getClass() != that.getClass()) { 465 return false; 466 } 467 Collator other = (Collator) that; 468 return ((strength == other.strength) && 469 (decmp == other.decmp)); 470 } 471 472 /** 473 * Generates the hash code for this Collator. 474 */ 475 @Override hashCode()476 public abstract int hashCode(); 477 478 /** 479 * Default constructor. This constructor is 480 * protected so subclasses can get access to it. Users typically create 481 * a Collator sub-class by calling the factory method getInstance. 482 * @see java.text.Collator#getInstance 483 */ Collator()484 protected Collator() 485 { 486 strength = TERTIARY; 487 decmp = CANONICAL_DECOMPOSITION; 488 } 489 490 private int strength = 0; 491 private int decmp = 0; 492 private static final ConcurrentMap<Locale, SoftReference<Collator>> cache 493 = new ConcurrentHashMap<>(); 494 495 // 496 // FIXME: These three constants should be removed. 497 // 498 /** 499 * LESS is returned if source string is compared to be less than target 500 * string in the compare() method. 501 * @see java.text.Collator#compare 502 */ 503 static final int LESS = -1; 504 /** 505 * EQUAL is returned if source string is compared to be equal to target 506 * string in the compare() method. 507 * @see java.text.Collator#compare 508 */ 509 static final int EQUAL = 0; 510 /** 511 * GREATER is returned if source string is compared to be greater than 512 * target string in the compare() method. 513 * @see java.text.Collator#compare 514 */ 515 static final int GREATER = 1; 516 } 517