1 /*
2 *******************************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * loadednormalizer2impl.cpp
7 *
8 * created on: 2014sep03
9 * created by: Markus W. Scherer
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_NORMALIZATION
15 
16 #include "unicode/udata.h"
17 #include "unicode/localpointer.h"
18 #include "unicode/normalizer2.h"
19 #include "unicode/unistr.h"
20 #include "unicode/unorm.h"
21 #include "cstring.h"
22 #include "mutex.h"
23 #include "norm2allmodes.h"
24 #include "normalizer2impl.h"
25 #include "uassert.h"
26 #include "ucln_cmn.h"
27 #include "uhash.h"
28 
29 U_NAMESPACE_BEGIN
30 
31 class LoadedNormalizer2Impl : public Normalizer2Impl {
32 public:
LoadedNormalizer2Impl()33     LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
34     virtual ~LoadedNormalizer2Impl();
35 
36     void load(const char *packageName, const char *name, UErrorCode &errorCode);
37 
38 private:
39     static UBool U_CALLCONV
40     isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
41 
42     UDataMemory *memory;
43     UTrie2 *ownedTrie;
44 };
45 
~LoadedNormalizer2Impl()46 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
47     udata_close(memory);
48     utrie2_close(ownedTrie);
49 }
50 
51 UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)52 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
53                                     const char * /* type */, const char * /*name*/,
54                                     const UDataInfo *pInfo) {
55     if(
56         pInfo->size>=20 &&
57         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
58         pInfo->charsetFamily==U_CHARSET_FAMILY &&
59         pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
60         pInfo->dataFormat[1]==0x72 &&
61         pInfo->dataFormat[2]==0x6d &&
62         pInfo->dataFormat[3]==0x32 &&
63         pInfo->formatVersion[0]==2
64     ) {
65         // Normalizer2Impl *me=(Normalizer2Impl *)context;
66         // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
67         return TRUE;
68     } else {
69         return FALSE;
70     }
71 }
72 
73 void
load(const char * packageName,const char * name,UErrorCode & errorCode)74 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
75     if(U_FAILURE(errorCode)) {
76         return;
77     }
78     memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
79     if(U_FAILURE(errorCode)) {
80         return;
81     }
82     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
83     const int32_t *inIndexes=(const int32_t *)inBytes;
84     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
85     if(indexesLength<=IX_MIN_MAYBE_YES) {
86         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
87         return;
88     }
89 
90     int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
91     int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
92     ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
93                                         inBytes+offset, nextOffset-offset, NULL,
94                                         &errorCode);
95     if(U_FAILURE(errorCode)) {
96         return;
97     }
98 
99     offset=nextOffset;
100     nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
101     const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
102 
103     // smallFCD: new in formatVersion 2
104     offset=nextOffset;
105     const uint8_t *inSmallFCD=inBytes+offset;
106 
107     init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
108 }
109 
110 // instance cache ---------------------------------------------------------- ***
111 
112 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)113 Norm2AllModes::createInstance(const char *packageName,
114                               const char *name,
115                               UErrorCode &errorCode) {
116     if(U_FAILURE(errorCode)) {
117         return NULL;
118     }
119     LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
120     if(impl==NULL) {
121         errorCode=U_MEMORY_ALLOCATION_ERROR;
122         return NULL;
123     }
124     impl->load(packageName, name, errorCode);
125     return createInstance(impl, errorCode);
126 }
127 
128 U_CDECL_BEGIN
129 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
130 U_CDECL_END
131 
132 static Norm2AllModes *nfkcSingleton;
133 static Norm2AllModes *nfkc_cfSingleton;
134 static UHashtable    *cache=NULL;
135 
136 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
137 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
138 
139 // UInitOnce singleton initialization function
initSingletons(const char * what,UErrorCode & errorCode)140 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
141     if (uprv_strcmp(what, "nfkc") == 0) {
142         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
143     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
144         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
145     } else {
146         U_ASSERT(FALSE);   // Unknown singleton
147     }
148     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
149 }
150 
151 U_CDECL_BEGIN
152 
deleteNorm2AllModes(void * allModes)153 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
154     delete (Norm2AllModes *)allModes;
155 }
156 
uprv_loaded_normalizer2_cleanup()157 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
158     delete nfkcSingleton;
159     nfkcSingleton = NULL;
160     delete nfkc_cfSingleton;
161     nfkc_cfSingleton = NULL;
162     uhash_close(cache);
163     cache=NULL;
164     nfkcInitOnce.reset();
165     nfkc_cfInitOnce.reset();
166     return TRUE;
167 }
168 
169 U_CDECL_END
170 
171 const Norm2AllModes *
getNFKCInstance(UErrorCode & errorCode)172 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
173     if(U_FAILURE(errorCode)) { return NULL; }
174     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
175     return nfkcSingleton;
176 }
177 
178 const Norm2AllModes *
getNFKC_CFInstance(UErrorCode & errorCode)179 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
180     if(U_FAILURE(errorCode)) { return NULL; }
181     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
182     return nfkc_cfSingleton;
183 }
184 
185 const Normalizer2 *
getNFKCInstance(UErrorCode & errorCode)186 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
187     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
188     return allModes!=NULL ? &allModes->comp : NULL;
189 }
190 
191 const Normalizer2 *
getNFKDInstance(UErrorCode & errorCode)192 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
193     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
194     return allModes!=NULL ? &allModes->decomp : NULL;
195 }
196 
197 const Normalizer2 *
getNFKCCasefoldInstance(UErrorCode & errorCode)198 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
199     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
200     return allModes!=NULL ? &allModes->comp : NULL;
201 }
202 
203 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)204 Normalizer2::getInstance(const char *packageName,
205                          const char *name,
206                          UNormalization2Mode mode,
207                          UErrorCode &errorCode) {
208     if(U_FAILURE(errorCode)) {
209         return NULL;
210     }
211     if(name==NULL || *name==0) {
212         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
213         return NULL;
214     }
215     const Norm2AllModes *allModes=NULL;
216     if(packageName==NULL) {
217         if(0==uprv_strcmp(name, "nfc")) {
218             allModes=Norm2AllModes::getNFCInstance(errorCode);
219         } else if(0==uprv_strcmp(name, "nfkc")) {
220             allModes=Norm2AllModes::getNFKCInstance(errorCode);
221         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
222             allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
223         }
224     }
225     if(allModes==NULL && U_SUCCESS(errorCode)) {
226         {
227             Mutex lock;
228             if(cache!=NULL) {
229                 allModes=(Norm2AllModes *)uhash_get(cache, name);
230             }
231         }
232         if(allModes==NULL) {
233             LocalPointer<Norm2AllModes> localAllModes(
234                 Norm2AllModes::createInstance(packageName, name, errorCode));
235             if(U_SUCCESS(errorCode)) {
236                 Mutex lock;
237                 if(cache==NULL) {
238                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
239                     if(U_FAILURE(errorCode)) {
240                         return NULL;
241                     }
242                     uhash_setKeyDeleter(cache, uprv_free);
243                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
244                 }
245                 void *temp=uhash_get(cache, name);
246                 if(temp==NULL) {
247                     int32_t keyLength=uprv_strlen(name)+1;
248                     char *nameCopy=(char *)uprv_malloc(keyLength);
249                     if(nameCopy==NULL) {
250                         errorCode=U_MEMORY_ALLOCATION_ERROR;
251                         return NULL;
252                     }
253                     uprv_memcpy(nameCopy, name, keyLength);
254                     allModes=localAllModes.getAlias();
255                     uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
256                 } else {
257                     // race condition
258                     allModes=(Norm2AllModes *)temp;
259                 }
260             }
261         }
262     }
263     if(allModes!=NULL && U_SUCCESS(errorCode)) {
264         switch(mode) {
265         case UNORM2_COMPOSE:
266             return &allModes->comp;
267         case UNORM2_DECOMPOSE:
268             return &allModes->decomp;
269         case UNORM2_FCD:
270             return &allModes->fcd;
271         case UNORM2_COMPOSE_CONTIGUOUS:
272             return &allModes->fcc;
273         default:
274             break;  // do nothing
275         }
276     }
277     return NULL;
278 }
279 
280 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)281 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
282     if(U_FAILURE(errorCode)) {
283         return NULL;
284     }
285     switch(mode) {
286     case UNORM_NFD:
287         return Normalizer2::getNFDInstance(errorCode);
288     case UNORM_NFKD:
289         return Normalizer2::getNFKDInstance(errorCode);
290     case UNORM_NFC:
291         return Normalizer2::getNFCInstance(errorCode);
292     case UNORM_NFKC:
293         return Normalizer2::getNFKCInstance(errorCode);
294     case UNORM_FCD:
295         return getFCDInstance(errorCode);
296     default:  // UNORM_NONE
297         return getNoopInstance(errorCode);
298     }
299 }
300 
301 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)302 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
303     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
304     return allModes!=NULL ? allModes->impl : NULL;
305 }
306 
307 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)308 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
309     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
310     return allModes!=NULL ? allModes->impl : NULL;
311 }
312 
313 U_NAMESPACE_END
314 
315 // C API ------------------------------------------------------------------- ***
316 
317 U_NAMESPACE_USE
318 
319 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode * pErrorCode)320 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
321     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
322 }
323 
324 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode * pErrorCode)325 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
326     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
327 }
328 
329 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode * pErrorCode)330 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
331     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
332 }
333 
334 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode * pErrorCode)335 unorm2_getInstance(const char *packageName,
336                    const char *name,
337                    UNormalization2Mode mode,
338                    UErrorCode *pErrorCode) {
339     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
340 }
341 
342 U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)343 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
344     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
345         return UNORM_YES;
346     }
347     UErrorCode errorCode=U_ZERO_ERROR;
348     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
349     if(U_SUCCESS(errorCode)) {
350         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
351     } else {
352         return UNORM_MAYBE;
353     }
354 }
355 
356 #endif  // !UCONFIG_NO_NORMALIZATION
357