1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * loadednormalizer2impl.cpp
9 *
10 * created on: 2014sep03
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_NORMALIZATION
17 
18 #include "unicode/udata.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/normalizer2.h"
21 #include "unicode/unistr.h"
22 #include "unicode/unorm.h"
23 #include "cstring.h"
24 #include "mutex.h"
25 #include "norm2allmodes.h"
26 #include "normalizer2impl.h"
27 #include "uassert.h"
28 #include "ucln_cmn.h"
29 #include "uhash.h"
30 
31 U_NAMESPACE_BEGIN
32 
33 class LoadedNormalizer2Impl : public Normalizer2Impl {
34 public:
LoadedNormalizer2Impl()35     LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
36     virtual ~LoadedNormalizer2Impl();
37 
38     void load(const char *packageName, const char *name, UErrorCode &errorCode);
39 
40 private:
41     static UBool U_CALLCONV
42     isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
43 
44     UDataMemory *memory;
45     UTrie2 *ownedTrie;
46 };
47 
~LoadedNormalizer2Impl()48 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
49     udata_close(memory);
50     utrie2_close(ownedTrie);
51 }
52 
53 UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)54 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
55                                     const char * /* type */, const char * /*name*/,
56                                     const UDataInfo *pInfo) {
57     if(
58         pInfo->size>=20 &&
59         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
60         pInfo->charsetFamily==U_CHARSET_FAMILY &&
61         pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
62         pInfo->dataFormat[1]==0x72 &&
63         pInfo->dataFormat[2]==0x6d &&
64         pInfo->dataFormat[3]==0x32 &&
65         pInfo->formatVersion[0]==3
66     ) {
67         // Normalizer2Impl *me=(Normalizer2Impl *)context;
68         // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
69         return TRUE;
70     } else {
71         return FALSE;
72     }
73 }
74 
75 void
load(const char * packageName,const char * name,UErrorCode & errorCode)76 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
77     if(U_FAILURE(errorCode)) {
78         return;
79     }
80     memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
81     if(U_FAILURE(errorCode)) {
82         return;
83     }
84     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
85     const int32_t *inIndexes=(const int32_t *)inBytes;
86     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
87     if(indexesLength<=IX_MIN_LCCC_CP) {
88         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
89         return;
90     }
91 
92     int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
93     int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
94     ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
95                                         inBytes+offset, nextOffset-offset, NULL,
96                                         &errorCode);
97     if(U_FAILURE(errorCode)) {
98         return;
99     }
100 
101     offset=nextOffset;
102     nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
103     const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
104 
105     // smallFCD: new in formatVersion 2
106     offset=nextOffset;
107     const uint8_t *inSmallFCD=inBytes+offset;
108 
109     init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
110 }
111 
112 // instance cache ---------------------------------------------------------- ***
113 
114 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)115 Norm2AllModes::createInstance(const char *packageName,
116                               const char *name,
117                               UErrorCode &errorCode) {
118     if(U_FAILURE(errorCode)) {
119         return NULL;
120     }
121     LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
122     if(impl==NULL) {
123         errorCode=U_MEMORY_ALLOCATION_ERROR;
124         return NULL;
125     }
126     impl->load(packageName, name, errorCode);
127     return createInstance(impl, errorCode);
128 }
129 
130 U_CDECL_BEGIN
131 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
132 U_CDECL_END
133 
134 static Norm2AllModes *nfkcSingleton;
135 static Norm2AllModes *nfkc_cfSingleton;
136 static UHashtable    *cache=NULL;
137 
138 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
139 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
140 
141 // UInitOnce singleton initialization function
initSingletons(const char * what,UErrorCode & errorCode)142 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
143     if (uprv_strcmp(what, "nfkc") == 0) {
144         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
145     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
146         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
147     } else {
148         U_ASSERT(FALSE);   // Unknown singleton
149     }
150     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
151 }
152 
153 U_CDECL_BEGIN
154 
deleteNorm2AllModes(void * allModes)155 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
156     delete (Norm2AllModes *)allModes;
157 }
158 
uprv_loaded_normalizer2_cleanup()159 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
160     delete nfkcSingleton;
161     nfkcSingleton = NULL;
162     delete nfkc_cfSingleton;
163     nfkc_cfSingleton = NULL;
164     uhash_close(cache);
165     cache=NULL;
166     nfkcInitOnce.reset();
167     nfkc_cfInitOnce.reset();
168     return TRUE;
169 }
170 
171 U_CDECL_END
172 
173 const Norm2AllModes *
getNFKCInstance(UErrorCode & errorCode)174 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
175     if(U_FAILURE(errorCode)) { return NULL; }
176     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
177     return nfkcSingleton;
178 }
179 
180 const Norm2AllModes *
getNFKC_CFInstance(UErrorCode & errorCode)181 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
182     if(U_FAILURE(errorCode)) { return NULL; }
183     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
184     return nfkc_cfSingleton;
185 }
186 
187 const Normalizer2 *
getNFKCInstance(UErrorCode & errorCode)188 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
189     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
190     return allModes!=NULL ? &allModes->comp : NULL;
191 }
192 
193 const Normalizer2 *
getNFKDInstance(UErrorCode & errorCode)194 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
195     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
196     return allModes!=NULL ? &allModes->decomp : NULL;
197 }
198 
199 const Normalizer2 *
getNFKCCasefoldInstance(UErrorCode & errorCode)200 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
201     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
202     return allModes!=NULL ? &allModes->comp : NULL;
203 }
204 
205 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)206 Normalizer2::getInstance(const char *packageName,
207                          const char *name,
208                          UNormalization2Mode mode,
209                          UErrorCode &errorCode) {
210     if(U_FAILURE(errorCode)) {
211         return NULL;
212     }
213     if(name==NULL || *name==0) {
214         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
215         return NULL;
216     }
217     const Norm2AllModes *allModes=NULL;
218     if(packageName==NULL) {
219         if(0==uprv_strcmp(name, "nfc")) {
220             allModes=Norm2AllModes::getNFCInstance(errorCode);
221         } else if(0==uprv_strcmp(name, "nfkc")) {
222             allModes=Norm2AllModes::getNFKCInstance(errorCode);
223         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
224             allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
225         }
226     }
227     if(allModes==NULL && U_SUCCESS(errorCode)) {
228         {
229             Mutex lock;
230             if(cache!=NULL) {
231                 allModes=(Norm2AllModes *)uhash_get(cache, name);
232             }
233         }
234         if(allModes==NULL) {
235             ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
236             LocalPointer<Norm2AllModes> localAllModes(
237                 Norm2AllModes::createInstance(packageName, name, errorCode));
238             if(U_SUCCESS(errorCode)) {
239                 Mutex lock;
240                 if(cache==NULL) {
241                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
242                     if(U_FAILURE(errorCode)) {
243                         return NULL;
244                     }
245                     uhash_setKeyDeleter(cache, uprv_free);
246                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
247                 }
248                 void *temp=uhash_get(cache, name);
249                 if(temp==NULL) {
250                     int32_t keyLength=uprv_strlen(name)+1;
251                     char *nameCopy=(char *)uprv_malloc(keyLength);
252                     if(nameCopy==NULL) {
253                         errorCode=U_MEMORY_ALLOCATION_ERROR;
254                         return NULL;
255                     }
256                     uprv_memcpy(nameCopy, name, keyLength);
257                     allModes=localAllModes.getAlias();
258                     uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
259                 } else {
260                     // race condition
261                     allModes=(Norm2AllModes *)temp;
262                 }
263             }
264         }
265     }
266     if(allModes!=NULL && U_SUCCESS(errorCode)) {
267         switch(mode) {
268         case UNORM2_COMPOSE:
269             return &allModes->comp;
270         case UNORM2_DECOMPOSE:
271             return &allModes->decomp;
272         case UNORM2_FCD:
273             return &allModes->fcd;
274         case UNORM2_COMPOSE_CONTIGUOUS:
275             return &allModes->fcc;
276         default:
277             break;  // do nothing
278         }
279     }
280     return NULL;
281 }
282 
283 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)284 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
285     if(U_FAILURE(errorCode)) {
286         return NULL;
287     }
288     switch(mode) {
289     case UNORM_NFD:
290         return Normalizer2::getNFDInstance(errorCode);
291     case UNORM_NFKD:
292         return Normalizer2::getNFKDInstance(errorCode);
293     case UNORM_NFC:
294         return Normalizer2::getNFCInstance(errorCode);
295     case UNORM_NFKC:
296         return Normalizer2::getNFKCInstance(errorCode);
297     case UNORM_FCD:
298         return getFCDInstance(errorCode);
299     default:  // UNORM_NONE
300         return getNoopInstance(errorCode);
301     }
302 }
303 
304 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)305 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
306     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
307     return allModes!=NULL ? allModes->impl : NULL;
308 }
309 
310 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)311 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
312     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
313     return allModes!=NULL ? allModes->impl : NULL;
314 }
315 
316 U_NAMESPACE_END
317 
318 // C API ------------------------------------------------------------------- ***
319 
320 U_NAMESPACE_USE
321 
322 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode * pErrorCode)323 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
324     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
325 }
326 
327 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode * pErrorCode)328 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
329     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
330 }
331 
332 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode * pErrorCode)333 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
334     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
335 }
336 
337 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode * pErrorCode)338 unorm2_getInstance(const char *packageName,
339                    const char *name,
340                    UNormalization2Mode mode,
341                    UErrorCode *pErrorCode) {
342     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
343 }
344 
345 U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)346 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
347     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
348         return UNORM_YES;
349     }
350     UErrorCode errorCode=U_ZERO_ERROR;
351     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
352     if(U_SUCCESS(errorCode)) {
353         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
354     } else {
355         return UNORM_MAYBE;
356     }
357 }
358 
359 #endif  // !UCONFIG_NO_NORMALIZATION
360