1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ******************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ******************************************************************************
8  */
9 
10 /**
11  * File coll.cpp
12  *
13  * Created by: Helena Shih
14  *
15  * Modification History:
16  *
17  *  Date        Name        Description
18  *  2/5/97      aliu        Modified createDefault to load collation data from
19  *                          binary files when possible.  Added related methods
20  *                          createCollationFromFile, chopLocale, createPathName.
21  *  2/11/97     aliu        Added methods addToCache, findInCache, which implement
22  *                          a Collation cache.  Modified createDefault to look in
23  *                          cache first, and also to store newly created Collation
24  *                          objects in the cache.  Modified to not use gLocPath.
25  *  2/12/97     aliu        Modified to create objects from RuleBasedCollator cache.
26  *                          Moved cache out of Collation class.
27  *  2/13/97     aliu        Moved several methods out of this class and into
28  *                          RuleBasedCollator, with modifications.  Modified
29  *                          createDefault() to call new RuleBasedCollator(Locale&)
30  *                          constructor.  General clean up and documentation.
31  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
32  *                          constructor.
33  * 05/06/97     helena      Added memory allocation error detection.
34  * 05/08/97     helena      Added createInstance().
35  *  6/20/97     helena      Java class name change.
36  * 04/23/99     stephen     Removed EDecompositionMode, merged with
37  *                          Normalizer::EMode
38  * 11/23/9      srl         Inlining of some critical functions
39  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
40  * 2012-2014    markus      Rewritten in C++ again.
41  */
42 
43 #include "utypeinfo.h"  // for 'typeid' to work
44 
45 #include "unicode/utypes.h"
46 
47 #if !UCONFIG_NO_COLLATION
48 
49 #include "unicode/coll.h"
50 #include "unicode/tblcoll.h"
51 #include "collationdata.h"
52 #include "collationroot.h"
53 #include "collationtailoring.h"
54 #include "ucol_imp.h"
55 #include "cstring.h"
56 #include "cmemory.h"
57 #include "umutex.h"
58 #include "servloc.h"
59 #include "uassert.h"
60 #include "ustrenum.h"
61 #include "uresimp.h"
62 #include "ucln_in.h"
63 
64 static icu::Locale* availableLocaleList = NULL;
65 static int32_t  availableLocaleListCount;
66 #if !UCONFIG_NO_SERVICE
67 static icu::ICULocaleService* gService = NULL;
68 static icu::UInitOnce gServiceInitOnce = U_INITONCE_INITIALIZER;
69 #endif
70 static icu::UInitOnce gAvailableLocaleListInitOnce = U_INITONCE_INITIALIZER;
71 
72 /**
73  * Release all static memory held by collator.
74  */
75 U_CDECL_BEGIN
collator_cleanup(void)76 static UBool U_CALLCONV collator_cleanup(void) {
77 #if !UCONFIG_NO_SERVICE
78     if (gService) {
79         delete gService;
80         gService = NULL;
81     }
82     gServiceInitOnce.reset();
83 #endif
84     if (availableLocaleList) {
85         delete []availableLocaleList;
86         availableLocaleList = NULL;
87     }
88     availableLocaleListCount = 0;
89     gAvailableLocaleListInitOnce.reset();
90     return TRUE;
91 }
92 
93 U_CDECL_END
94 
95 U_NAMESPACE_BEGIN
96 
97 #if !UCONFIG_NO_SERVICE
98 
99 // ------------------------------------------
100 //
101 // Registration
102 //
103 
104 //-------------------------------------------
105 
~CollatorFactory()106 CollatorFactory::~CollatorFactory() {}
107 
108 //-------------------------------------------
109 
110 UBool
visible(void) const111 CollatorFactory::visible(void) const {
112     return TRUE;
113 }
114 
115 //-------------------------------------------
116 
117 UnicodeString&
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & result)118 CollatorFactory::getDisplayName(const Locale& objectLocale,
119                                 const Locale& displayLocale,
120                                 UnicodeString& result)
121 {
122   return objectLocale.getDisplayName(displayLocale, result);
123 }
124 
125 // -------------------------------------
126 
127 class ICUCollatorFactory : public ICUResourceBundleFactory {
128  public:
ICUCollatorFactory()129     ICUCollatorFactory() : ICUResourceBundleFactory(UnicodeString(U_ICUDATA_COLL, -1, US_INV)) { }
130     virtual ~ICUCollatorFactory();
131  protected:
132     virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
133 };
134 
~ICUCollatorFactory()135 ICUCollatorFactory::~ICUCollatorFactory() {}
136 
137 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const138 ICUCollatorFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const {
139     if (handlesKey(key, status)) {
140         const LocaleKey& lkey = (const LocaleKey&)key;
141         Locale loc;
142         // make sure the requested locale is correct
143         // default LocaleFactory uses currentLocale since that's the one vetted by handlesKey
144         // but for ICU rb resources we use the actual one since it will fallback again
145         lkey.canonicalLocale(loc);
146 
147         return Collator::makeInstance(loc, status);
148     }
149     return NULL;
150 }
151 
152 // -------------------------------------
153 
154 class ICUCollatorService : public ICULocaleService {
155 public:
ICUCollatorService()156     ICUCollatorService()
157         : ICULocaleService(UNICODE_STRING_SIMPLE("Collator"))
158     {
159         UErrorCode status = U_ZERO_ERROR;
160         registerFactory(new ICUCollatorFactory(), status);
161     }
162 
163     virtual ~ICUCollatorService();
164 
cloneInstance(UObject * instance) const165     virtual UObject* cloneInstance(UObject* instance) const {
166         return ((Collator*)instance)->clone();
167     }
168 
handleDefault(const ICUServiceKey & key,UnicodeString * actualID,UErrorCode & status) const169     virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualID, UErrorCode& status) const {
170         LocaleKey& lkey = (LocaleKey&)key;
171         if (actualID) {
172             // Ugly Hack Alert! We return an empty actualID to signal
173             // to callers that this is a default object, not a "real"
174             // service-created object. (TODO remove in 3.0) [aliu]
175             actualID->truncate(0);
176         }
177         Locale loc("");
178         lkey.canonicalLocale(loc);
179         return Collator::makeInstance(loc, status);
180     }
181 
getKey(ICUServiceKey & key,UnicodeString * actualReturn,UErrorCode & status) const182     virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const {
183         UnicodeString ar;
184         if (actualReturn == NULL) {
185             actualReturn = &ar;
186         }
187         return (Collator*)ICULocaleService::getKey(key, actualReturn, status);
188     }
189 
isDefault() const190     virtual UBool isDefault() const {
191         return countFactories() == 1;
192     }
193 };
194 
~ICUCollatorService()195 ICUCollatorService::~ICUCollatorService() {}
196 
197 // -------------------------------------
198 
initService()199 static void U_CALLCONV initService() {
200     gService = new ICUCollatorService();
201     ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
202 }
203 
204 
205 static ICULocaleService*
getService(void)206 getService(void)
207 {
208     umtx_initOnce(gServiceInitOnce, &initService);
209     return gService;
210 }
211 
212 // -------------------------------------
213 
214 static inline UBool
hasService(void)215 hasService(void)
216 {
217     UBool retVal = !gServiceInitOnce.isReset() && (getService() != NULL);
218     return retVal;
219 }
220 
221 #endif /* UCONFIG_NO_SERVICE */
222 
223 static void U_CALLCONV
initAvailableLocaleList(UErrorCode & status)224 initAvailableLocaleList(UErrorCode &status) {
225     U_ASSERT(availableLocaleListCount == 0);
226     U_ASSERT(availableLocaleList == NULL);
227     // for now, there is a hardcoded list, so just walk through that list and set it up.
228     UResourceBundle *index = NULL;
229     StackUResourceBundle installed;
230     int32_t i = 0;
231 
232     index = ures_openDirect(U_ICUDATA_COLL, "res_index", &status);
233     ures_getByKey(index, "InstalledLocales", installed.getAlias(), &status);
234 
235     if(U_SUCCESS(status)) {
236         availableLocaleListCount = ures_getSize(installed.getAlias());
237         availableLocaleList = new Locale[availableLocaleListCount];
238 
239         if (availableLocaleList != NULL) {
240             ures_resetIterator(installed.getAlias());
241             while(ures_hasNext(installed.getAlias())) {
242                 const char *tempKey = NULL;
243                 ures_getNextString(installed.getAlias(), NULL, &tempKey, &status);
244                 availableLocaleList[i++] = Locale(tempKey);
245             }
246         }
247         U_ASSERT(availableLocaleListCount == i);
248     }
249     ures_close(index);
250     ucln_i18n_registerCleanup(UCLN_I18N_COLLATOR, collator_cleanup);
251 }
252 
isAvailableLocaleListInitialized(UErrorCode & status)253 static UBool isAvailableLocaleListInitialized(UErrorCode &status) {
254     umtx_initOnce(gAvailableLocaleListInitOnce, &initAvailableLocaleList, status);
255     return U_SUCCESS(status);
256 }
257 
258 
259 // Collator public methods -----------------------------------------------
260 
261 namespace {
262 
263 static const struct {
264     const char *name;
265     UColAttribute attr;
266 } collAttributes[] = {
267     { "colStrength", UCOL_STRENGTH },
268     { "colBackwards", UCOL_FRENCH_COLLATION },
269     { "colCaseLevel", UCOL_CASE_LEVEL },
270     { "colCaseFirst", UCOL_CASE_FIRST },
271     { "colAlternate", UCOL_ALTERNATE_HANDLING },
272     { "colNormalization", UCOL_NORMALIZATION_MODE },
273     { "colNumeric", UCOL_NUMERIC_COLLATION }
274 };
275 
276 static const struct {
277     const char *name;
278     UColAttributeValue value;
279 } collAttributeValues[] = {
280     { "primary", UCOL_PRIMARY },
281     { "secondary", UCOL_SECONDARY },
282     { "tertiary", UCOL_TERTIARY },
283     { "quaternary", UCOL_QUATERNARY },
284     // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
285     { "identical", UCOL_IDENTICAL },
286     { "no", UCOL_OFF },
287     { "yes", UCOL_ON },
288     { "shifted", UCOL_SHIFTED },
289     { "non-ignorable", UCOL_NON_IGNORABLE },
290     { "lower", UCOL_LOWER_FIRST },
291     { "upper", UCOL_UPPER_FIRST }
292 };
293 
294 static const char *collReorderCodes[UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST] = {
295     "space", "punct", "symbol", "currency", "digit"
296 };
297 
getReorderCode(const char * s)298 int32_t getReorderCode(const char *s) {
299     for (int32_t i = 0; i < UPRV_LENGTHOF(collReorderCodes); ++i) {
300         if (uprv_stricmp(s, collReorderCodes[i]) == 0) {
301             return UCOL_REORDER_CODE_FIRST + i;
302         }
303     }
304     // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
305     // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
306     // Avoid introducing synonyms/aliases.
307     return -1;
308 }
309 
310 /**
311  * Sets collation attributes according to locale keywords. See
312  * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
313  *
314  * Using "alias" keywords and values where defined:
315  * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
316  * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
317  */
setAttributesFromKeywords(const Locale & loc,Collator & coll,UErrorCode & errorCode)318 void setAttributesFromKeywords(const Locale &loc, Collator &coll, UErrorCode &errorCode) {
319     if (U_FAILURE(errorCode)) {
320         return;
321     }
322     if (uprv_strcmp(loc.getName(), loc.getBaseName()) == 0) {
323         // No keywords.
324         return;
325     }
326     char value[1024];  // The reordering value could be long.
327     // Check for collation keywords that were already deprecated
328     // before any were supported in createInstance() (except for "collation").
329     int32_t length = loc.getKeywordValue("colHiraganaQuaternary", value, UPRV_LENGTHOF(value), errorCode);
330     if (U_FAILURE(errorCode)) {
331         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
332         return;
333     }
334     if (length != 0) {
335         errorCode = U_UNSUPPORTED_ERROR;
336         return;
337     }
338     length = loc.getKeywordValue("variableTop", value, UPRV_LENGTHOF(value), errorCode);
339     if (U_FAILURE(errorCode)) {
340         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
341         return;
342     }
343     if (length != 0) {
344         errorCode = U_UNSUPPORTED_ERROR;
345         return;
346     }
347     // Parse known collation keywords, ignore others.
348     if (errorCode == U_STRING_NOT_TERMINATED_WARNING) {
349         errorCode = U_ZERO_ERROR;
350     }
351     for (int32_t i = 0; i < UPRV_LENGTHOF(collAttributes); ++i) {
352         length = loc.getKeywordValue(collAttributes[i].name, value, UPRV_LENGTHOF(value), errorCode);
353         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
354             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
355             return;
356         }
357         if (length == 0) { continue; }
358         for (int32_t j = 0;; ++j) {
359             if (j == UPRV_LENGTHOF(collAttributeValues)) {
360                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
361                 return;
362             }
363             if (uprv_stricmp(value, collAttributeValues[j].name) == 0) {
364                 coll.setAttribute(collAttributes[i].attr, collAttributeValues[j].value, errorCode);
365                 break;
366             }
367         }
368     }
369     length = loc.getKeywordValue("colReorder", value, UPRV_LENGTHOF(value), errorCode);
370     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
371         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
372         return;
373     }
374     if (length != 0) {
375         int32_t codes[USCRIPT_CODE_LIMIT + UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST];
376         int32_t codesLength = 0;
377         char *scriptName = value;
378         for (;;) {
379             if (codesLength == UPRV_LENGTHOF(codes)) {
380                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
381                 return;
382             }
383             char *limit = scriptName;
384             char c;
385             while ((c = *limit) != 0 && c != '-') { ++limit; }
386             *limit = 0;
387             int32_t code;
388             if ((limit - scriptName) == 4) {
389                 // Strict parsing, accept only 4-letter script codes, not long names.
390                 code = u_getPropertyValueEnum(UCHAR_SCRIPT, scriptName);
391             } else {
392                 code = getReorderCode(scriptName);
393             }
394             if (code < 0) {
395                 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
396                 return;
397             }
398             codes[codesLength++] = code;
399             if (c == 0) { break; }
400             scriptName = limit + 1;
401         }
402         coll.setReorderCodes(codes, codesLength, errorCode);
403     }
404     length = loc.getKeywordValue("kv", value, UPRV_LENGTHOF(value), errorCode);
405     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
406         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
407         return;
408     }
409     if (length != 0) {
410         int32_t code = getReorderCode(value);
411         if (code < 0) {
412             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
413             return;
414         }
415         coll.setMaxVariable((UColReorderCode)code, errorCode);
416     }
417     if (U_FAILURE(errorCode)) {
418         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
419     }
420 }
421 
422 }  // namespace
423 
createInstance(UErrorCode & success)424 Collator* U_EXPORT2 Collator::createInstance(UErrorCode& success)
425 {
426     return createInstance(Locale::getDefault(), success);
427 }
428 
createInstance(const Locale & desiredLocale,UErrorCode & status)429 Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale,
430                                    UErrorCode& status)
431 {
432     if (U_FAILURE(status))
433         return 0;
434     if (desiredLocale.isBogus()) {
435         // Locale constructed from malformed locale ID or language tag.
436         status = U_ILLEGAL_ARGUMENT_ERROR;
437         return NULL;
438     }
439 
440     Collator* coll;
441 #if !UCONFIG_NO_SERVICE
442     if (hasService()) {
443         Locale actualLoc;
444         coll = (Collator*)gService->get(desiredLocale, &actualLoc, status);
445     } else
446 #endif
447     {
448         coll = makeInstance(desiredLocale, status);
449         // Either returns NULL with U_FAILURE(status), or non-NULL with U_SUCCESS(status)
450     }
451     // The use of *coll in setAttributesFromKeywords can cause the NULL check to be
452     // optimized out of the delete even though setAttributesFromKeywords returns
453     // immediately if U_FAILURE(status), so we add a check here.
454     if (U_FAILURE(status)) {
455         return NULL;
456     }
457     setAttributesFromKeywords(desiredLocale, *coll, status);
458     if (U_FAILURE(status)) {
459         delete coll;
460         return NULL;
461     }
462     return coll;
463 }
464 
465 
makeInstance(const Locale & desiredLocale,UErrorCode & status)466 Collator* Collator::makeInstance(const Locale&  desiredLocale, UErrorCode& status) {
467     const CollationCacheEntry *entry = CollationLoader::loadTailoring(desiredLocale, status);
468     if (U_SUCCESS(status)) {
469         Collator *result = new RuleBasedCollator(entry);
470         if (result != NULL) {
471             // Both the unified cache's get() and the RBC constructor
472             // did addRef(). Undo one of them.
473             entry->removeRef();
474             return result;
475         }
476         status = U_MEMORY_ALLOCATION_ERROR;
477     }
478     if (entry != NULL) {
479         // Undo the addRef() from the cache.get().
480         entry->removeRef();
481     }
482     return NULL;
483 }
484 
485 Collator *
safeClone() const486 Collator::safeClone() const {
487     return clone();
488 }
489 
490 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target) const491 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
492                                     const UnicodeString& target) const
493 {
494     UErrorCode ec = U_ZERO_ERROR;
495     return (EComparisonResult)compare(source, target, ec);
496 }
497 
498 // implement deprecated, previously abstract method
compare(const UnicodeString & source,const UnicodeString & target,int32_t length) const499 Collator::EComparisonResult Collator::compare(const UnicodeString& source,
500                                     const UnicodeString& target,
501                                     int32_t length) const
502 {
503     UErrorCode ec = U_ZERO_ERROR;
504     return (EComparisonResult)compare(source, target, length, ec);
505 }
506 
507 // implement deprecated, previously abstract method
compare(const UChar * source,int32_t sourceLength,const UChar * target,int32_t targetLength) const508 Collator::EComparisonResult Collator::compare(const UChar* source, int32_t sourceLength,
509                                     const UChar* target, int32_t targetLength)
510                                     const
511 {
512     UErrorCode ec = U_ZERO_ERROR;
513     return (EComparisonResult)compare(source, sourceLength, target, targetLength, ec);
514 }
515 
compare(UCharIterator &,UCharIterator &,UErrorCode & status) const516 UCollationResult Collator::compare(UCharIterator &/*sIter*/,
517                                    UCharIterator &/*tIter*/,
518                                    UErrorCode &status) const {
519     if(U_SUCCESS(status)) {
520         // Not implemented in the base class.
521         status = U_UNSUPPORTED_ERROR;
522     }
523     return UCOL_EQUAL;
524 }
525 
compareUTF8(const StringPiece & source,const StringPiece & target,UErrorCode & status) const526 UCollationResult Collator::compareUTF8(const StringPiece &source,
527                                        const StringPiece &target,
528                                        UErrorCode &status) const {
529     if(U_FAILURE(status)) {
530         return UCOL_EQUAL;
531     }
532     UCharIterator sIter, tIter;
533     uiter_setUTF8(&sIter, source.data(), source.length());
534     uiter_setUTF8(&tIter, target.data(), target.length());
535     return compare(sIter, tIter, status);
536 }
537 
equals(const UnicodeString & source,const UnicodeString & target) const538 UBool Collator::equals(const UnicodeString& source,
539                        const UnicodeString& target) const
540 {
541     UErrorCode ec = U_ZERO_ERROR;
542     return (compare(source, target, ec) == UCOL_EQUAL);
543 }
544 
greaterOrEqual(const UnicodeString & source,const UnicodeString & target) const545 UBool Collator::greaterOrEqual(const UnicodeString& source,
546                                const UnicodeString& target) const
547 {
548     UErrorCode ec = U_ZERO_ERROR;
549     return (compare(source, target, ec) != UCOL_LESS);
550 }
551 
greater(const UnicodeString & source,const UnicodeString & target) const552 UBool Collator::greater(const UnicodeString& source,
553                         const UnicodeString& target) const
554 {
555     UErrorCode ec = U_ZERO_ERROR;
556     return (compare(source, target, ec) == UCOL_GREATER);
557 }
558 
559 // this API  ignores registered collators, since it returns an
560 // array of indefinite lifetime
getAvailableLocales(int32_t & count)561 const Locale* U_EXPORT2 Collator::getAvailableLocales(int32_t& count)
562 {
563     UErrorCode status = U_ZERO_ERROR;
564     Locale *result = NULL;
565     count = 0;
566     if (isAvailableLocaleListInitialized(status))
567     {
568         result = availableLocaleList;
569         count = availableLocaleListCount;
570     }
571     return result;
572 }
573 
getDisplayName(const Locale & objectLocale,const Locale & displayLocale,UnicodeString & name)574 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
575                                         const Locale& displayLocale,
576                                         UnicodeString& name)
577 {
578 #if !UCONFIG_NO_SERVICE
579     if (hasService()) {
580         UnicodeString locNameStr;
581         LocaleUtility::initNameFromLocale(objectLocale, locNameStr);
582         return gService->getDisplayName(locNameStr, name, displayLocale);
583     }
584 #endif
585     return objectLocale.getDisplayName(displayLocale, name);
586 }
587 
getDisplayName(const Locale & objectLocale,UnicodeString & name)588 UnicodeString& U_EXPORT2 Collator::getDisplayName(const Locale& objectLocale,
589                                         UnicodeString& name)
590 {
591     return getDisplayName(objectLocale, Locale::getDefault(), name);
592 }
593 
594 /* This is useless information */
595 /*void Collator::getVersion(UVersionInfo versionInfo) const
596 {
597   if (versionInfo!=NULL)
598     uprv_memcpy(versionInfo, fVersion, U_MAX_VERSION_LENGTH);
599 }
600 */
601 
602 // UCollator protected constructor destructor ----------------------------
603 
604 /**
605 * Default constructor.
606 * Constructor is different from the old default Collator constructor.
607 * The task for determing the default collation strength and normalization mode
608 * is left to the child class.
609 */
Collator()610 Collator::Collator()
611 : UObject()
612 {
613 }
614 
615 /**
616 * Constructor.
617 * Empty constructor, does not handle the arguments.
618 * This constructor is done for backward compatibility with 1.7 and 1.8.
619 * The task for handling the argument collation strength and normalization
620 * mode is left to the child class.
621 * @param collationStrength collation strength
622 * @param decompositionMode
623 * @deprecated 2.4 use the default constructor instead
624 */
Collator(UCollationStrength,UNormalizationMode)625 Collator::Collator(UCollationStrength, UNormalizationMode )
626 : UObject()
627 {
628 }
629 
~Collator()630 Collator::~Collator()
631 {
632 }
633 
Collator(const Collator & other)634 Collator::Collator(const Collator &other)
635     : UObject(other)
636 {
637 }
638 
operator ==(const Collator & other) const639 UBool Collator::operator==(const Collator& other) const
640 {
641     // Subclasses: Call this method and then add more specific checks.
642     return typeid(*this) == typeid(other);
643 }
644 
operator !=(const Collator & other) const645 UBool Collator::operator!=(const Collator& other) const
646 {
647     return (UBool)!(*this == other);
648 }
649 
getBound(const uint8_t * source,int32_t sourceLength,UColBoundMode boundType,uint32_t noOfLevels,uint8_t * result,int32_t resultLength,UErrorCode & status)650 int32_t U_EXPORT2 Collator::getBound(const uint8_t       *source,
651                            int32_t             sourceLength,
652                            UColBoundMode       boundType,
653                            uint32_t            noOfLevels,
654                            uint8_t             *result,
655                            int32_t             resultLength,
656                            UErrorCode          &status)
657 {
658     return ucol_getBound(source, sourceLength, boundType, noOfLevels, result, resultLength, &status);
659 }
660 
661 void
setLocales(const Locale &,const Locale &,const Locale &)662 Collator::setLocales(const Locale& /* requestedLocale */, const Locale& /* validLocale */, const Locale& /*actualLocale*/) {
663 }
664 
getTailoredSet(UErrorCode & status) const665 UnicodeSet *Collator::getTailoredSet(UErrorCode &status) const
666 {
667     if(U_FAILURE(status)) {
668         return NULL;
669     }
670     // everything can be changed
671     return new UnicodeSet(0, 0x10FFFF);
672 }
673 
674 // -------------------------------------
675 
676 #if !UCONFIG_NO_SERVICE
677 URegistryKey U_EXPORT2
registerInstance(Collator * toAdopt,const Locale & locale,UErrorCode & status)678 Collator::registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status)
679 {
680     if (U_SUCCESS(status)) {
681         // Set the collator locales while registering so that createInstance()
682         // need not guess whether the collator's locales are already set properly
683         // (as they are by the data loader).
684         toAdopt->setLocales(locale, locale, locale);
685         return getService()->registerInstance(toAdopt, locale, status);
686     }
687     return NULL;
688 }
689 
690 // -------------------------------------
691 
692 class CFactory : public LocaleKeyFactory {
693 private:
694     CollatorFactory* _delegate;
695     Hashtable* _ids;
696 
697 public:
CFactory(CollatorFactory * delegate,UErrorCode & status)698     CFactory(CollatorFactory* delegate, UErrorCode& status)
699         : LocaleKeyFactory(delegate->visible() ? VISIBLE : INVISIBLE)
700         , _delegate(delegate)
701         , _ids(NULL)
702     {
703         if (U_SUCCESS(status)) {
704             int32_t count = 0;
705             _ids = new Hashtable(status);
706             if (_ids) {
707                 const UnicodeString * idlist = _delegate->getSupportedIDs(count, status);
708                 for (int i = 0; i < count; ++i) {
709                     _ids->put(idlist[i], (void*)this, status);
710                     if (U_FAILURE(status)) {
711                         delete _ids;
712                         _ids = NULL;
713                         return;
714                     }
715                 }
716             } else {
717                 status = U_MEMORY_ALLOCATION_ERROR;
718             }
719         }
720     }
721 
722     virtual ~CFactory();
723 
724     virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
725 
726 protected:
getSupportedIDs(UErrorCode & status) const727     virtual const Hashtable* getSupportedIDs(UErrorCode& status) const
728     {
729         if (U_SUCCESS(status)) {
730             return _ids;
731         }
732         return NULL;
733     }
734 
735     virtual UnicodeString&
736         getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
737 };
738 
~CFactory()739 CFactory::~CFactory()
740 {
741     delete _delegate;
742     delete _ids;
743 }
744 
745 UObject*
create(const ICUServiceKey & key,const ICUService *,UErrorCode & status) const746 CFactory::create(const ICUServiceKey& key, const ICUService* /* service */, UErrorCode& status) const
747 {
748     if (handlesKey(key, status)) {
749         const LocaleKey& lkey = (const LocaleKey&)key;
750         Locale validLoc;
751         lkey.currentLocale(validLoc);
752         return _delegate->createCollator(validLoc);
753     }
754     return NULL;
755 }
756 
757 UnicodeString&
getDisplayName(const UnicodeString & id,const Locale & locale,UnicodeString & result) const758 CFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const
759 {
760     if ((_coverage & 0x1) == 0) {
761         UErrorCode status = U_ZERO_ERROR;
762         const Hashtable* ids = getSupportedIDs(status);
763         if (ids && (ids->get(id) != NULL)) {
764             Locale loc;
765             LocaleUtility::initLocaleFromName(id, loc);
766             return _delegate->getDisplayName(loc, locale, result);
767         }
768     }
769     result.setToBogus();
770     return result;
771 }
772 
773 URegistryKey U_EXPORT2
registerFactory(CollatorFactory * toAdopt,UErrorCode & status)774 Collator::registerFactory(CollatorFactory* toAdopt, UErrorCode& status)
775 {
776     if (U_SUCCESS(status)) {
777         CFactory* f = new CFactory(toAdopt, status);
778         if (f) {
779             return getService()->registerFactory(f, status);
780         }
781         status = U_MEMORY_ALLOCATION_ERROR;
782     }
783     return NULL;
784 }
785 
786 // -------------------------------------
787 
788 UBool U_EXPORT2
unregister(URegistryKey key,UErrorCode & status)789 Collator::unregister(URegistryKey key, UErrorCode& status)
790 {
791     if (U_SUCCESS(status)) {
792         if (hasService()) {
793             return gService->unregister(key, status);
794         }
795         status = U_ILLEGAL_ARGUMENT_ERROR;
796     }
797     return FALSE;
798 }
799 #endif /* UCONFIG_NO_SERVICE */
800 
801 class CollationLocaleListEnumeration : public StringEnumeration {
802 private:
803     int32_t index;
804 public:
805     static UClassID U_EXPORT2 getStaticClassID(void);
806     virtual UClassID getDynamicClassID(void) const;
807 public:
CollationLocaleListEnumeration()808     CollationLocaleListEnumeration()
809         : index(0)
810     {
811         // The global variables should already be initialized.
812         //isAvailableLocaleListInitialized(status);
813     }
814 
815     virtual ~CollationLocaleListEnumeration();
816 
clone() const817     virtual StringEnumeration * clone() const
818     {
819         CollationLocaleListEnumeration *result = new CollationLocaleListEnumeration();
820         if (result) {
821             result->index = index;
822         }
823         return result;
824     }
825 
count(UErrorCode &) const826     virtual int32_t count(UErrorCode &/*status*/) const {
827         return availableLocaleListCount;
828     }
829 
next(int32_t * resultLength,UErrorCode &)830     virtual const char* next(int32_t* resultLength, UErrorCode& /*status*/) {
831         const char* result;
832         if(index < availableLocaleListCount) {
833             result = availableLocaleList[index++].getName();
834             if(resultLength != NULL) {
835                 *resultLength = (int32_t)uprv_strlen(result);
836             }
837         } else {
838             if(resultLength != NULL) {
839                 *resultLength = 0;
840             }
841             result = NULL;
842         }
843         return result;
844     }
845 
snext(UErrorCode & status)846     virtual const UnicodeString* snext(UErrorCode& status) {
847         int32_t resultLength = 0;
848         const char *s = next(&resultLength, status);
849         return setChars(s, resultLength, status);
850     }
851 
reset(UErrorCode &)852     virtual void reset(UErrorCode& /*status*/) {
853         index = 0;
854     }
855 };
856 
~CollationLocaleListEnumeration()857 CollationLocaleListEnumeration::~CollationLocaleListEnumeration() {}
858 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)859 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationLocaleListEnumeration)
860 
861 
862 // -------------------------------------
863 
864 StringEnumeration* U_EXPORT2
865 Collator::getAvailableLocales(void)
866 {
867 #if !UCONFIG_NO_SERVICE
868     if (hasService()) {
869         return getService()->getAvailableLocales();
870     }
871 #endif /* UCONFIG_NO_SERVICE */
872     UErrorCode status = U_ZERO_ERROR;
873     if (isAvailableLocaleListInitialized(status)) {
874         return new CollationLocaleListEnumeration();
875     }
876     return NULL;
877 }
878 
879 StringEnumeration* U_EXPORT2
getKeywords(UErrorCode & status)880 Collator::getKeywords(UErrorCode& status) {
881     return UStringEnumeration::fromUEnumeration(
882             ucol_getKeywords(&status), status);
883 }
884 
885 StringEnumeration* U_EXPORT2
getKeywordValues(const char * keyword,UErrorCode & status)886 Collator::getKeywordValues(const char *keyword, UErrorCode& status) {
887     return UStringEnumeration::fromUEnumeration(
888             ucol_getKeywordValues(keyword, &status), status);
889 }
890 
891 StringEnumeration* U_EXPORT2
getKeywordValuesForLocale(const char * key,const Locale & locale,UBool commonlyUsed,UErrorCode & status)892 Collator::getKeywordValuesForLocale(const char* key, const Locale& locale,
893                                     UBool commonlyUsed, UErrorCode& status) {
894     return UStringEnumeration::fromUEnumeration(
895             ucol_getKeywordValuesForLocale(
896                     key, locale.getName(), commonlyUsed, &status),
897             status);
898 }
899 
900 Locale U_EXPORT2
getFunctionalEquivalent(const char * keyword,const Locale & locale,UBool & isAvailable,UErrorCode & status)901 Collator::getFunctionalEquivalent(const char* keyword, const Locale& locale,
902                                   UBool& isAvailable, UErrorCode& status) {
903     // This is a wrapper over ucol_getFunctionalEquivalent
904     char loc[ULOC_FULLNAME_CAPACITY];
905     /*int32_t len =*/ ucol_getFunctionalEquivalent(loc, sizeof(loc),
906                     keyword, locale.getName(), &isAvailable, &status);
907     if (U_FAILURE(status)) {
908         *loc = 0; // root
909     }
910     return Locale::createFromName(loc);
911 }
912 
913 Collator::ECollationStrength
getStrength(void) const914 Collator::getStrength(void) const {
915     UErrorCode intStatus = U_ZERO_ERROR;
916     return (ECollationStrength)getAttribute(UCOL_STRENGTH, intStatus);
917 }
918 
919 void
setStrength(ECollationStrength newStrength)920 Collator::setStrength(ECollationStrength newStrength) {
921     UErrorCode intStatus = U_ZERO_ERROR;
922     setAttribute(UCOL_STRENGTH, (UColAttributeValue)newStrength, intStatus);
923 }
924 
925 Collator &
setMaxVariable(UColReorderCode,UErrorCode & errorCode)926 Collator::setMaxVariable(UColReorderCode /*group*/, UErrorCode &errorCode) {
927     if (U_SUCCESS(errorCode)) {
928         errorCode = U_UNSUPPORTED_ERROR;
929     }
930     return *this;
931 }
932 
933 UColReorderCode
getMaxVariable() const934 Collator::getMaxVariable() const {
935     return UCOL_REORDER_CODE_PUNCTUATION;
936 }
937 
938 int32_t
getReorderCodes(int32_t *,int32_t,UErrorCode & status) const939 Collator::getReorderCodes(int32_t* /* dest*/,
940                           int32_t /* destCapacity*/,
941                           UErrorCode& status) const
942 {
943     if (U_SUCCESS(status)) {
944         status = U_UNSUPPORTED_ERROR;
945     }
946     return 0;
947 }
948 
949 void
setReorderCodes(const int32_t *,int32_t,UErrorCode & status)950 Collator::setReorderCodes(const int32_t* /* reorderCodes */,
951                           int32_t /* reorderCodesLength */,
952                           UErrorCode& status)
953 {
954     if (U_SUCCESS(status)) {
955         status = U_UNSUPPORTED_ERROR;
956     }
957 }
958 
959 int32_t
getEquivalentReorderCodes(int32_t reorderCode,int32_t * dest,int32_t capacity,UErrorCode & errorCode)960 Collator::getEquivalentReorderCodes(int32_t reorderCode,
961                                     int32_t *dest, int32_t capacity,
962                                     UErrorCode &errorCode) {
963     if(U_FAILURE(errorCode)) { return 0; }
964     if(capacity < 0 || (dest == NULL && capacity > 0)) {
965         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
966         return 0;
967     }
968     const CollationData *baseData = CollationRoot::getData(errorCode);
969     if(U_FAILURE(errorCode)) { return 0; }
970     return baseData->getEquivalentScripts(reorderCode, dest, capacity, errorCode);
971 }
972 
973 int32_t
internalGetShortDefinitionString(const char *,char *,int32_t,UErrorCode & status) const974 Collator::internalGetShortDefinitionString(const char * /*locale*/,
975                                                              char * /*buffer*/,
976                                                              int32_t /*capacity*/,
977                                                              UErrorCode &status) const {
978   if(U_SUCCESS(status)) {
979     status = U_UNSUPPORTED_ERROR; /* Shouldn't happen, internal function */
980   }
981   return 0;
982 }
983 
984 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const985 Collator::internalCompareUTF8(const char *left, int32_t leftLength,
986                               const char *right, int32_t rightLength,
987                               UErrorCode &errorCode) const {
988     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
989     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
990         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
991         return UCOL_EQUAL;
992     }
993     return compareUTF8(
994             StringPiece(left, (leftLength < 0) ? static_cast<int32_t>(uprv_strlen(left)) : leftLength),
995             StringPiece(right, (rightLength < 0) ? static_cast<int32_t>(uprv_strlen(right)) : rightLength),
996             errorCode);
997 }
998 
999 int32_t
internalNextSortKeyPart(UCharIterator *,uint32_t[2],uint8_t *,int32_t,UErrorCode & errorCode) const1000 Collator::internalNextSortKeyPart(UCharIterator * /*iter*/, uint32_t /*state*/[2],
1001                                   uint8_t * /*dest*/, int32_t /*count*/, UErrorCode &errorCode) const {
1002     if (U_SUCCESS(errorCode)) {
1003         errorCode = U_UNSUPPORTED_ERROR;
1004     }
1005     return 0;
1006 }
1007 
1008 // UCollator private data members ----------------------------------------
1009 
1010 /* This is useless information */
1011 /*const UVersionInfo Collator::fVersion = {1, 1, 0, 0};*/
1012 
1013 // -------------------------------------
1014 
1015 U_NAMESPACE_END
1016 
1017 #endif /* #if !UCONFIG_NO_COLLATION */
1018 
1019 /* eof */
1020