1 /*
2  ********************************************************************
3  * COPYRIGHT:
4  * Copyright (c) 1996-2016, International Business Machines Corporation and
5  * others. All Rights Reserved.
6  ********************************************************************
7  *
8  *  ucnv_bld.cpp:
9  *
10  *  Defines functions that are used in the creation/initialization/deletion
11  *  of converters and related structures.
12  *  uses uconv_io.h routines to access disk information
13  *  is used by ucnv.h to implement public API create/delete/flushCache routines
14  * Modification History:
15  *
16  *   Date        Name        Description
17  *
18  *   06/20/2000  helena      OS/400 port changes; mostly typecast.
19  *   06/29/2000  helena      Major rewrite of the callback interface.
20 */
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_CONVERSION
25 
26 #include "unicode/putil.h"
27 #include "unicode/udata.h"
28 #include "unicode/ucnv.h"
29 #include "unicode/uloc.h"
30 #include "mutex.h"
31 #include "putilimp.h"
32 #include "uassert.h"
33 #include "utracimp.h"
34 #include "ucnv_io.h"
35 #include "ucnv_bld.h"
36 #include "ucnvmbcs.h"
37 #include "ucnv_ext.h"
38 #include "ucnv_cnv.h"
39 #include "ucnv_imp.h"
40 #include "uhash.h"
41 #include "umutex.h"
42 #include "cstring.h"
43 #include "cmemory.h"
44 #include "ucln_cmn.h"
45 #include "ustr_cnv.h"
46 
47 
48 #if 0
49 #include <stdio.h>
50 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l);
51 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__)
52 #else
53 # define UCNV_DEBUG_LOG(x,y,z)
54 #endif
55 
56 static const UConverterSharedData * const
57 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
58     NULL, NULL,
59 
60 #if UCONFIG_NO_LEGACY_CONVERSION
61     NULL,
62 #else
63     &_MBCSData,
64 #endif
65 
66     &_Latin1Data,
67     &_UTF8Data, &_UTF16BEData, &_UTF16LEData,
68 #if UCONFIG_ONLY_HTML_CONVERSION
69     NULL, NULL,
70 #else
71     &_UTF32BEData, &_UTF32LEData,
72 #endif
73     NULL,
74 
75 #if UCONFIG_NO_LEGACY_CONVERSION
76     NULL,
77 #else
78     &_ISO2022Data,
79 #endif
80 
81 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
82     NULL, NULL, NULL, NULL, NULL, NULL,
83     NULL, NULL, NULL, NULL, NULL, NULL,
84     NULL,
85 #else
86     &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6,
87     &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19,
88     &_HZData,
89 #endif
90 
91 #if UCONFIG_ONLY_HTML_CONVERSION
92     NULL,
93 #else
94     &_SCSUData,
95 #endif
96 
97 
98 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
99     NULL,
100 #else
101     &_ISCIIData,
102 #endif
103 
104     &_ASCIIData,
105 #if UCONFIG_ONLY_HTML_CONVERSION
106     NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
107 #else
108     &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
109 #endif
110 
111 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
112     NULL,
113 #else
114     &_CompoundTextData
115 #endif
116 };
117 
118 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
119    Also the name should be in lower case and all spaces, dashes and underscores
120    removed
121 */
122 static struct {
123   const char *name;
124   const UConverterType type;
125 } const cnvNameType[] = {
126 #if !UCONFIG_ONLY_HTML_CONVERSION
127   { "bocu1", UCNV_BOCU1 },
128   { "cesu8", UCNV_CESU8 },
129 #endif
130 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
131   { "hz",UCNV_HZ },
132 #endif
133 #if !UCONFIG_ONLY_HTML_CONVERSION
134   { "imapmailboxname", UCNV_IMAP_MAILBOX },
135 #endif
136 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
137   { "iscii", UCNV_ISCII },
138 #endif
139 #if !UCONFIG_NO_LEGACY_CONVERSION
140   { "iso2022", UCNV_ISO_2022 },
141 #endif
142   { "iso88591", UCNV_LATIN_1 },
143 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
144   { "lmbcs1", UCNV_LMBCS_1 },
145   { "lmbcs11",UCNV_LMBCS_11 },
146   { "lmbcs16",UCNV_LMBCS_16 },
147   { "lmbcs17",UCNV_LMBCS_17 },
148   { "lmbcs18",UCNV_LMBCS_18 },
149   { "lmbcs19",UCNV_LMBCS_19 },
150   { "lmbcs2", UCNV_LMBCS_2 },
151   { "lmbcs3", UCNV_LMBCS_3 },
152   { "lmbcs4", UCNV_LMBCS_4 },
153   { "lmbcs5", UCNV_LMBCS_5 },
154   { "lmbcs6", UCNV_LMBCS_6 },
155   { "lmbcs8", UCNV_LMBCS_8 },
156 #endif
157 #if !UCONFIG_ONLY_HTML_CONVERSION
158   { "scsu", UCNV_SCSU },
159 #endif
160   { "usascii", UCNV_US_ASCII },
161   { "utf16", UCNV_UTF16 },
162   { "utf16be", UCNV_UTF16_BigEndian },
163   { "utf16le", UCNV_UTF16_LittleEndian },
164 #if U_IS_BIG_ENDIAN
165   { "utf16oppositeendian", UCNV_UTF16_LittleEndian },
166   { "utf16platformendian", UCNV_UTF16_BigEndian },
167 #else
168   { "utf16oppositeendian", UCNV_UTF16_BigEndian},
169   { "utf16platformendian", UCNV_UTF16_LittleEndian },
170 #endif
171 #if !UCONFIG_ONLY_HTML_CONVERSION
172   { "utf32", UCNV_UTF32 },
173   { "utf32be", UCNV_UTF32_BigEndian },
174   { "utf32le", UCNV_UTF32_LittleEndian },
175 #if U_IS_BIG_ENDIAN
176   { "utf32oppositeendian", UCNV_UTF32_LittleEndian },
177   { "utf32platformendian", UCNV_UTF32_BigEndian },
178 #else
179   { "utf32oppositeendian", UCNV_UTF32_BigEndian },
180   { "utf32platformendian", UCNV_UTF32_LittleEndian },
181 #endif
182 #endif
183 #if !UCONFIG_ONLY_HTML_CONVERSION
184   { "utf7", UCNV_UTF7 },
185 #endif
186   { "utf8", UCNV_UTF8 },
187 #if !UCONFIG_ONLY_HTML_CONVERSION
188   { "x11compoundtext", UCNV_COMPOUND_TEXT}
189 #endif
190 };
191 
192 
193 /*initializes some global variables */
194 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
195 static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER;  /* Mutex for synchronizing cnv cache access. */
196                                                     /*  Note:  the global mutex is used for      */
197                                                     /*         reference count updates.          */
198 
199 static const char **gAvailableConverters = NULL;
200 static uint16_t gAvailableConverterCount = 0;
201 static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER;
202 
203 #if !U_CHARSET_IS_UTF8
204 
205 /* This contains the resolved converter name. So no further alias lookup is needed again. */
206 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */
207 static const char *gDefaultConverterName = NULL;
208 
209 /*
210 If the default converter is an algorithmic converter, this is the cached value.
211 We don't cache a full UConverter and clone it because ucnv_clone doesn't have
212 less overhead than an algorithmic open. We don't cache non-algorithmic converters
213 because ucnv_flushCache must be able to unload the default converter and its table.
214 */
215 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL;
216 
217 /* Does gDefaultConverterName have a converter option and require extra parsing? */
218 static UBool gDefaultConverterContainsOption;
219 
220 #endif  /* !U_CHARSET_IS_UTF8 */
221 
222 static const char DATA_TYPE[] = "cnv";
223 
224 /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup().
225  *                       If it is ever to be called from elsewhere, synchronization
226  *                       will need to be considered.
227  */
228 static void
ucnv_flushAvailableConverterCache()229 ucnv_flushAvailableConverterCache() {
230     gAvailableConverterCount = 0;
231     if (gAvailableConverters) {
232         uprv_free((char **)gAvailableConverters);
233         gAvailableConverters = NULL;
234     }
235     gAvailableConvertersInitOnce.reset();
236 }
237 
238 /* ucnv_cleanup - delete all storage held by the converter cache, except any  */
239 /*                in use by open converters.                                  */
240 /*                Not thread safe.                                            */
241 /*                Not supported API.                                          */
ucnv_cleanup(void)242 static UBool U_CALLCONV ucnv_cleanup(void) {
243     ucnv_flushCache();
244     if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
245         uhash_close(SHARED_DATA_HASHTABLE);
246         SHARED_DATA_HASHTABLE = NULL;
247     }
248 
249     /* Isn't called from flushCache because other threads may have preexisting references to the table. */
250     ucnv_flushAvailableConverterCache();
251 
252 #if !U_CHARSET_IS_UTF8
253     gDefaultConverterName = NULL;
254     gDefaultConverterNameBuffer[0] = 0;
255     gDefaultConverterContainsOption = FALSE;
256     gDefaultAlgorithmicSharedData = NULL;
257 #endif
258 
259     return (SHARED_DATA_HASHTABLE == NULL);
260 }
261 
262 static UBool U_CALLCONV
isCnvAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)263 isCnvAcceptable(void * /*context*/,
264                 const char * /*type*/, const char * /*name*/,
265                 const UDataInfo *pInfo) {
266     return (UBool)(
267         pInfo->size>=20 &&
268         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
269         pInfo->charsetFamily==U_CHARSET_FAMILY &&
270         pInfo->sizeofUChar==U_SIZEOF_UCHAR &&
271         pInfo->dataFormat[0]==0x63 &&   /* dataFormat="cnvt" */
272         pInfo->dataFormat[1]==0x6e &&
273         pInfo->dataFormat[2]==0x76 &&
274         pInfo->dataFormat[3]==0x74 &&
275         pInfo->formatVersion[0]==6);  /* Everything will be version 6 */
276 }
277 
278 /**
279  * Un flatten shared data from a UDATA..
280  */
281 static UConverterSharedData*
ucnv_data_unFlattenClone(UConverterLoadArgs * pArgs,UDataMemory * pData,UErrorCode * status)282 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status)
283 {
284     /* UDataInfo info; -- necessary only if some converters have different formatVersion */
285     const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
286     const UConverterStaticData *source = (const UConverterStaticData *) raw;
287     UConverterSharedData *data;
288     UConverterType type = (UConverterType)source->conversionType;
289 
290     if(U_FAILURE(*status))
291         return NULL;
292 
293     if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES ||
294         converterData[type] == NULL ||
295         !converterData[type]->isReferenceCounted ||
296         converterData[type]->referenceCounter != 1 ||
297         source->structSize != sizeof(UConverterStaticData))
298     {
299         *status = U_INVALID_TABLE_FORMAT;
300         return NULL;
301     }
302 
303     data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData));
304     if(data == NULL) {
305         *status = U_MEMORY_ALLOCATION_ERROR;
306         return NULL;
307     }
308 
309     /* copy initial values from the static structure for this type */
310     uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData));
311 
312     data->staticData = source;
313 
314     data->sharedDataCached = FALSE;
315 
316     /* fill in fields from the loaded data */
317     data->dataMemory = (void*)pData; /* for future use */
318 
319     if(data->impl->load != NULL) {
320         data->impl->load(data, pArgs, raw + source->structSize, status);
321         if(U_FAILURE(*status)) {
322             uprv_free(data);
323             return NULL;
324         }
325     }
326     return data;
327 }
328 
329 /*Takes an alias name gets an actual converter file name
330  *goes to disk and opens it.
331  *allocates the memory and returns a new UConverter object
332  */
createConverterFromFile(UConverterLoadArgs * pArgs,UErrorCode * err)333 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err)
334 {
335     UDataMemory *data;
336     UConverterSharedData *sharedData;
337 
338     UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD);
339 
340     if (U_FAILURE (*err)) {
341         UTRACE_EXIT_STATUS(*err);
342         return NULL;
343     }
344 
345     UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg);
346 
347     data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err);
348     if(U_FAILURE(*err))
349     {
350         UTRACE_EXIT_STATUS(*err);
351         return NULL;
352     }
353 
354     sharedData = ucnv_data_unFlattenClone(pArgs, data, err);
355     if(U_FAILURE(*err))
356     {
357         udata_close(data);
358         UTRACE_EXIT_STATUS(*err);
359         return NULL;
360     }
361 
362     /*
363      * TODO Store pkg in a field in the shared data so that delta-only converters
364      * can load base converters from the same package.
365      * If the pkg name is longer than the field, then either do not load the converter
366      * in the first place, or just set the pkg field to "".
367      */
368 
369     UTRACE_EXIT_PTR_STATUS(sharedData, *err);
370     return sharedData;
371 }
372 
373 /*returns a converter type from a string
374  */
375 static const UConverterSharedData *
getAlgorithmicTypeFromName(const char * realName)376 getAlgorithmicTypeFromName(const char *realName)
377 {
378     uint32_t mid, start, limit;
379     uint32_t lastMid;
380     int result;
381     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
382 
383     /* Lower case and remove ignoreable characters. */
384     ucnv_io_stripForCompare(strippedName, realName);
385 
386     /* do a binary search for the alias */
387     start = 0;
388     limit = UPRV_LENGTHOF(cnvNameType);
389     mid = limit;
390     lastMid = UINT32_MAX;
391 
392     for (;;) {
393         mid = (uint32_t)((start + limit) / 2);
394         if (lastMid == mid) {   /* Have we moved? */
395             break;  /* We haven't moved, and it wasn't found. */
396         }
397         lastMid = mid;
398         result = uprv_strcmp(strippedName, cnvNameType[mid].name);
399 
400         if (result < 0) {
401             limit = mid;
402         } else if (result > 0) {
403             start = mid;
404         } else {
405             return converterData[cnvNameType[mid].type];
406         }
407     }
408 
409     return NULL;
410 }
411 
412 /*
413 * Based on the number of known converters, this determines how many times larger
414 * the shared data hash table should be. When on small platforms, or just a couple
415 * of converters are used, this number should be 2. When memory is plentiful, or
416 * when ucnv_countAvailable is ever used with a lot of available converters,
417 * this should be 4.
418 * Larger numbers reduce the number of hash collisions, but use more memory.
419 */
420 #define UCNV_CACHE_LOAD_FACTOR 2
421 
422 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
423 /*   Will always be called with the cnvCacheMutex alrady being held   */
424 /*     by the calling function.                                       */
425 /* Stores the shared data in the SHARED_DATA_HASHTABLE
426  * @param data The shared data
427  */
428 static void
ucnv_shareConverterData(UConverterSharedData * data)429 ucnv_shareConverterData(UConverterSharedData * data)
430 {
431     UErrorCode err = U_ZERO_ERROR;
432     /*Lazy evaluates the Hashtable itself */
433     /*void *sanity = NULL;*/
434 
435     if (SHARED_DATA_HASHTABLE == NULL)
436     {
437         SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL,
438                             ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR,
439                             &err);
440         ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
441 
442         if (U_FAILURE(err))
443             return;
444     }
445 
446     /* ### check to see if the element is not already there! */
447 
448     /*
449     sanity =   ucnv_getSharedConverterData (data->staticData->name);
450     if(sanity != NULL)
451     {
452     UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity);
453     }
454     UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity);
455     */
456 
457     /* Mark it shared */
458     data->sharedDataCached = TRUE;
459 
460     uhash_put(SHARED_DATA_HASHTABLE,
461             (void*) data->staticData->name, /* Okay to cast away const as long as
462             keyDeleter == NULL */
463             data,
464             &err);
465     UCNV_DEBUG_LOG("put", data->staticData->name,data);
466 
467 }
468 
469 /*  Look up a converter name in the shared data cache.                    */
470 /*    cnvCacheMutex must be held by the caller to protect the hash table. */
471 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there)
472  * @param name The name of the shared data
473  * @return the shared data from the SHARED_DATA_HASHTABLE
474  */
475 static UConverterSharedData *
ucnv_getSharedConverterData(const char * name)476 ucnv_getSharedConverterData(const char *name)
477 {
478     /*special case when no Table has yet been created we return NULL */
479     if (SHARED_DATA_HASHTABLE == NULL)
480     {
481         return NULL;
482     }
483     else
484     {
485         UConverterSharedData *rc;
486 
487         rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name);
488         UCNV_DEBUG_LOG("get",name,rc);
489         return rc;
490     }
491 }
492 
493 /*frees the string of memory blocks associates with a sharedConverter
494  *if and only if the referenceCounter == 0
495  */
496 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to
497  * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and
498  * returns TRUE,
499  * otherwise returns FALSE
500  * @param sharedConverterData The shared data
501  * @return if not it frees all the memory stemming from sharedConverterData and
502  * returns TRUE, otherwise returns FALSE
503  */
504 static UBool
ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)505 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData)
506 {
507     UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD);
508     UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData);
509 
510     if (deadSharedData->referenceCounter > 0) {
511         UTRACE_EXIT_VALUE((int32_t)FALSE);
512         return FALSE;
513     }
514 
515     if (deadSharedData->impl->unload != NULL) {
516         deadSharedData->impl->unload(deadSharedData);
517     }
518 
519     if(deadSharedData->dataMemory != NULL)
520     {
521         UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory;
522         udata_close(data);
523     }
524 
525     uprv_free(deadSharedData);
526 
527     UTRACE_EXIT_VALUE((int32_t)TRUE);
528     return TRUE;
529 }
530 
531 /**
532  * Load a non-algorithmic converter.
533  * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
534  */
535 UConverterSharedData *
ucnv_load(UConverterLoadArgs * pArgs,UErrorCode * err)536 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) {
537     UConverterSharedData *mySharedConverterData;
538 
539     if(err == NULL || U_FAILURE(*err)) {
540         return NULL;
541     }
542 
543     if(pArgs->pkg != NULL && *pArgs->pkg != 0) {
544         /* application-provided converters are not currently cached */
545         return createConverterFromFile(pArgs, err);
546     }
547 
548     mySharedConverterData = ucnv_getSharedConverterData(pArgs->name);
549     if (mySharedConverterData == NULL)
550     {
551         /*Not cached, we need to stream it in from file */
552         mySharedConverterData = createConverterFromFile(pArgs, err);
553         if (U_FAILURE (*err) || (mySharedConverterData == NULL))
554         {
555             return NULL;
556         }
557         else if (!pArgs->onlyTestIsLoadable)
558         {
559             /* share it with other library clients */
560             ucnv_shareConverterData(mySharedConverterData);
561         }
562     }
563     else
564     {
565         /* The data for this converter was already in the cache.            */
566         /* Update the reference counter on the shared data: one more client */
567         mySharedConverterData->referenceCounter++;
568     }
569 
570     return mySharedConverterData;
571 }
572 
573 /**
574  * Unload a non-algorithmic converter.
575  * It must be sharedData->isReferenceCounted
576  * and this function must be called inside umtx_lock(&cnvCacheMutex).
577  */
578 U_CAPI void
ucnv_unload(UConverterSharedData * sharedData)579 ucnv_unload(UConverterSharedData *sharedData) {
580     if(sharedData != NULL) {
581         if (sharedData->referenceCounter > 0) {
582             sharedData->referenceCounter--;
583         }
584 
585         if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) {
586             ucnv_deleteSharedConverterData(sharedData);
587         }
588     }
589 }
590 
591 U_CFUNC void
ucnv_unloadSharedDataIfReady(UConverterSharedData * sharedData)592 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData)
593 {
594     if(sharedData != NULL && sharedData->isReferenceCounted) {
595         umtx_lock(&cnvCacheMutex);
596         ucnv_unload(sharedData);
597         umtx_unlock(&cnvCacheMutex);
598     }
599 }
600 
601 U_CFUNC void
ucnv_incrementRefCount(UConverterSharedData * sharedData)602 ucnv_incrementRefCount(UConverterSharedData *sharedData)
603 {
604     if(sharedData != NULL && sharedData->isReferenceCounted) {
605         umtx_lock(&cnvCacheMutex);
606         sharedData->referenceCounter++;
607         umtx_unlock(&cnvCacheMutex);
608     }
609 }
610 
611 /*
612  * *pPieces must be initialized.
613  * The name without options will be copied to pPieces->cnvName.
614  * The locale and options will be copied to pPieces only if present in inName,
615  * otherwise the existing values in pPieces remain.
616  * *pArgs will be set to the pPieces values.
617  */
618 static void
parseConverterOptions(const char * inName,UConverterNamePieces * pPieces,UConverterLoadArgs * pArgs,UErrorCode * err)619 parseConverterOptions(const char *inName,
620                       UConverterNamePieces *pPieces,
621                       UConverterLoadArgs *pArgs,
622                       UErrorCode *err)
623 {
624     char *cnvName = pPieces->cnvName;
625     char c;
626     int32_t len = 0;
627 
628     pArgs->name=inName;
629     pArgs->locale=pPieces->locale;
630     pArgs->options=pPieces->options;
631 
632     /* copy the converter name itself to cnvName */
633     while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
634         if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) {
635             *err = U_ILLEGAL_ARGUMENT_ERROR;    /* bad name */
636             pPieces->cnvName[0]=0;
637             return;
638         }
639         *cnvName++=c;
640         inName++;
641     }
642     *cnvName=0;
643     pArgs->name=pPieces->cnvName;
644 
645     /* parse options. No more name copying should occur. */
646     while((c=*inName)!=0) {
647         if(c==UCNV_OPTION_SEP_CHAR) {
648             ++inName;
649         }
650 
651         /* inName is behind an option separator */
652         if(uprv_strncmp(inName, "locale=", 7)==0) {
653             /* do not modify locale itself in case we have multiple locale options */
654             char *dest=pPieces->locale;
655 
656             /* copy the locale option value */
657             inName+=7;
658             len=0;
659             while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) {
660                 ++inName;
661 
662                 if(++len>=ULOC_FULLNAME_CAPACITY) {
663                     *err=U_ILLEGAL_ARGUMENT_ERROR;    /* bad name */
664                     pPieces->locale[0]=0;
665                     return;
666                 }
667 
668                 *dest++=c;
669             }
670             *dest=0;
671         } else if(uprv_strncmp(inName, "version=", 8)==0) {
672             /* copy the version option value into bits 3..0 of pPieces->options */
673             inName+=8;
674             c=*inName;
675             if(c==0) {
676                 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION);
677                 return;
678             } else if((uint8_t)(c-'0')<10) {
679                 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0');
680                 ++inName;
681             }
682         } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) {
683             inName+=8;
684             pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL);
685         /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */
686         } else {
687             /* ignore any other options until we define some */
688             while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) {
689             }
690             if(c==0) {
691                 return;
692             }
693         }
694     }
695 }
696 
697 /*Logic determines if the converter is Algorithmic AND/OR cached
698  *depending on that:
699  * -we either go to get data from disk and cache it (Data=TRUE, Cached=False)
700  * -Get it from a Hashtable (Data=X, Cached=TRUE)
701  * -Call dataConverter initializer (Data=TRUE, Cached=TRUE)
702  * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE)
703  */
704 U_CFUNC UConverterSharedData *
ucnv_loadSharedData(const char * converterName,UConverterNamePieces * pPieces,UConverterLoadArgs * pArgs,UErrorCode * err)705 ucnv_loadSharedData(const char *converterName,
706                     UConverterNamePieces *pPieces,
707                     UConverterLoadArgs *pArgs,
708                     UErrorCode * err) {
709     UConverterNamePieces stackPieces;
710     UConverterLoadArgs stackArgs;
711     UConverterSharedData *mySharedConverterData = NULL;
712     UErrorCode internalErrorCode = U_ZERO_ERROR;
713     UBool mayContainOption = TRUE;
714     UBool checkForAlgorithmic = TRUE;
715 
716     if (U_FAILURE (*err)) {
717         return NULL;
718     }
719 
720     if(pPieces == NULL) {
721         if(pArgs != NULL) {
722             /*
723              * Bad: We may set pArgs pointers to stackPieces fields
724              * which will be invalid after this function returns.
725              */
726             *err = U_INTERNAL_PROGRAM_ERROR;
727             return NULL;
728         }
729         pPieces = &stackPieces;
730     }
731     if(pArgs == NULL) {
732         uprv_memset(&stackArgs, 0, sizeof(stackArgs));
733         stackArgs.size = (int32_t)sizeof(stackArgs);
734         pArgs = &stackArgs;
735     }
736 
737     pPieces->cnvName[0] = 0;
738     pPieces->locale[0] = 0;
739     pPieces->options = 0;
740 
741     pArgs->name = converterName;
742     pArgs->locale = pPieces->locale;
743     pArgs->options = pPieces->options;
744 
745     /* In case "name" is NULL we want to open the default converter. */
746     if (converterName == NULL) {
747 #if U_CHARSET_IS_UTF8
748         pArgs->name = "UTF-8";
749         return (UConverterSharedData *)converterData[UCNV_UTF8];
750 #else
751         /* Call ucnv_getDefaultName first to query the name from the OS. */
752         pArgs->name = ucnv_getDefaultName();
753         if (pArgs->name == NULL) {
754             *err = U_MISSING_RESOURCE_ERROR;
755             return NULL;
756         }
757         mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData;
758         checkForAlgorithmic = FALSE;
759         mayContainOption = gDefaultConverterContainsOption;
760         /* the default converter name is already canonical */
761 #endif
762     }
763     else if(UCNV_FAST_IS_UTF8(converterName)) {
764         /* fastpath for UTF-8 */
765         pArgs->name = "UTF-8";
766         return (UConverterSharedData *)converterData[UCNV_UTF8];
767     }
768     else {
769         /* separate the converter name from the options */
770         parseConverterOptions(converterName, pPieces, pArgs, err);
771         if (U_FAILURE(*err)) {
772             /* Very bad name used. */
773             return NULL;
774         }
775 
776         /* get the canonical converter name */
777         pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode);
778         if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) {
779             /*
780             * set the input name in case the converter was added
781             * without updating the alias table, or when there is no alias table
782             */
783             pArgs->name = pPieces->cnvName;
784         } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) {
785             *err = U_AMBIGUOUS_ALIAS_WARNING;
786         }
787     }
788 
789     /* separate the converter name from the options */
790     if(mayContainOption && pArgs->name != pPieces->cnvName) {
791         parseConverterOptions(pArgs->name, pPieces, pArgs, err);
792     }
793 
794     /* get the shared data for an algorithmic converter, if it is one */
795     if (checkForAlgorithmic) {
796         mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name);
797     }
798     if (mySharedConverterData == NULL)
799     {
800         /* it is a data-based converter, get its shared data.               */
801         /* Hold the cnvCacheMutex through the whole process of checking the */
802         /*   converter data cache, and adding new entries to the cache      */
803         /*   to prevent other threads from modifying the cache during the   */
804         /*   process.                                                       */
805         pArgs->nestedLoads=1;
806         pArgs->pkg=NULL;
807 
808         umtx_lock(&cnvCacheMutex);
809         mySharedConverterData = ucnv_load(pArgs, err);
810         umtx_unlock(&cnvCacheMutex);
811         if (U_FAILURE (*err) || (mySharedConverterData == NULL))
812         {
813             return NULL;
814         }
815     }
816 
817     return mySharedConverterData;
818 }
819 
820 U_CAPI UConverter *
ucnv_createConverter(UConverter * myUConverter,const char * converterName,UErrorCode * err)821 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err)
822 {
823     UConverterNamePieces stackPieces;
824     UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
825     UConverterSharedData *mySharedConverterData;
826 
827     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
828 
829     if(U_SUCCESS(*err)) {
830         UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName);
831 
832         mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err);
833 
834         myUConverter = ucnv_createConverterFromSharedData(
835             myUConverter, mySharedConverterData,
836             &stackArgs,
837             err);
838 
839         if(U_SUCCESS(*err)) {
840             UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
841             return myUConverter;
842         }
843     }
844 
845     /* exit with error */
846     UTRACE_EXIT_STATUS(*err);
847     return NULL;
848 }
849 
850 U_CFUNC UBool
ucnv_canCreateConverter(const char * converterName,UErrorCode * err)851 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) {
852     UConverter myUConverter;
853     UConverterNamePieces stackPieces;
854     UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
855     UConverterSharedData *mySharedConverterData;
856 
857     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN);
858 
859     if(U_SUCCESS(*err)) {
860         UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName);
861 
862         stackArgs.onlyTestIsLoadable=TRUE;
863         mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err);
864         ucnv_createConverterFromSharedData(
865             &myUConverter, mySharedConverterData,
866             &stackArgs,
867             err);
868         ucnv_unloadSharedDataIfReady(mySharedConverterData);
869     }
870 
871     UTRACE_EXIT_STATUS(*err);
872     return U_SUCCESS(*err);
873 }
874 
875 UConverter *
ucnv_createAlgorithmicConverter(UConverter * myUConverter,UConverterType type,const char * locale,uint32_t options,UErrorCode * err)876 ucnv_createAlgorithmicConverter(UConverter *myUConverter,
877                                 UConverterType type,
878                                 const char *locale, uint32_t options,
879                                 UErrorCode *err) {
880     UConverter *cnv;
881     const UConverterSharedData *sharedData;
882     UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
883 
884     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC);
885     UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type);
886 
887     if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) {
888         *err = U_ILLEGAL_ARGUMENT_ERROR;
889         UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
890         return NULL;
891     }
892 
893     sharedData = converterData[type];
894     if(sharedData == NULL || sharedData->isReferenceCounted) {
895         /* not a valid type, or not an algorithmic converter */
896         *err = U_ILLEGAL_ARGUMENT_ERROR;
897         UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR);
898         return NULL;
899     }
900 
901     stackArgs.name = "";
902     stackArgs.options = options;
903     stackArgs.locale=locale;
904     cnv = ucnv_createConverterFromSharedData(
905             myUConverter, (UConverterSharedData *)sharedData,
906             &stackArgs, err);
907 
908     UTRACE_EXIT_PTR_STATUS(cnv, *err);
909     return cnv;
910 }
911 
912 U_CFUNC UConverter*
ucnv_createConverterFromPackage(const char * packageName,const char * converterName,UErrorCode * err)913 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err)
914 {
915     UConverter *myUConverter;
916     UConverterSharedData *mySharedConverterData;
917     UConverterNamePieces stackPieces;
918     UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
919 
920     UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE);
921 
922     if(U_FAILURE(*err)) {
923         UTRACE_EXIT_STATUS(*err);
924         return NULL;
925     }
926 
927     UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName);
928 
929     /* first, get the options out of the converterName string */
930     stackPieces.cnvName[0] = 0;
931     stackPieces.locale[0] = 0;
932     stackPieces.options = 0;
933     parseConverterOptions(converterName, &stackPieces, &stackArgs, err);
934     if (U_FAILURE(*err)) {
935         /* Very bad name used. */
936         UTRACE_EXIT_STATUS(*err);
937         return NULL;
938     }
939     stackArgs.nestedLoads=1;
940     stackArgs.pkg=packageName;
941 
942     /* open the data, unflatten the shared structure */
943     mySharedConverterData = createConverterFromFile(&stackArgs, err);
944 
945     if (U_FAILURE(*err)) {
946         UTRACE_EXIT_STATUS(*err);
947         return NULL;
948     }
949 
950     /* create the actual converter */
951     myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err);
952 
953     if (U_FAILURE(*err)) {
954         ucnv_close(myUConverter);
955         UTRACE_EXIT_STATUS(*err);
956         return NULL;
957     }
958 
959     UTRACE_EXIT_PTR_STATUS(myUConverter, *err);
960     return myUConverter;
961 }
962 
963 
964 U_CFUNC UConverter*
ucnv_createConverterFromSharedData(UConverter * myUConverter,UConverterSharedData * mySharedConverterData,UConverterLoadArgs * pArgs,UErrorCode * err)965 ucnv_createConverterFromSharedData(UConverter *myUConverter,
966                                    UConverterSharedData *mySharedConverterData,
967                                    UConverterLoadArgs *pArgs,
968                                    UErrorCode *err)
969 {
970     UBool isCopyLocal;
971 
972     if(U_FAILURE(*err)) {
973         ucnv_unloadSharedDataIfReady(mySharedConverterData);
974         return myUConverter;
975     }
976     if(myUConverter == NULL)
977     {
978         myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
979         if(myUConverter == NULL)
980         {
981             *err = U_MEMORY_ALLOCATION_ERROR;
982             ucnv_unloadSharedDataIfReady(mySharedConverterData);
983             return NULL;
984         }
985         isCopyLocal = FALSE;
986     } else {
987         isCopyLocal = TRUE;
988     }
989 
990     /* initialize the converter */
991     uprv_memset(myUConverter, 0, sizeof(UConverter));
992     myUConverter->isCopyLocal = isCopyLocal;
993     /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */
994     myUConverter->sharedData = mySharedConverterData;
995     myUConverter->options = pArgs->options;
996     if(!pArgs->onlyTestIsLoadable) {
997         myUConverter->preFromUFirstCP = U_SENTINEL;
998         myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK;
999         myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK;
1000         myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus;
1001         myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar;
1002         myUConverter->subChar1 = mySharedConverterData->staticData->subChar1;
1003         myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen;
1004         myUConverter->subChars = (uint8_t *)myUConverter->subUChars;
1005         uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen);
1006         myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */
1007     }
1008 
1009     if(mySharedConverterData->impl->open != NULL) {
1010         mySharedConverterData->impl->open(myUConverter, pArgs, err);
1011         if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) {
1012             /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */
1013             ucnv_close(myUConverter);
1014             return NULL;
1015         }
1016     }
1017 
1018     return myUConverter;
1019 }
1020 
1021 /*Frees all shared immutable objects that aren't referred to (reference count = 0)
1022  */
1023 U_CAPI int32_t U_EXPORT2
ucnv_flushCache()1024 ucnv_flushCache ()
1025 {
1026     UConverterSharedData *mySharedData = NULL;
1027     int32_t pos;
1028     int32_t tableDeletedNum = 0;
1029     const UHashElement *e;
1030     /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/
1031     int32_t i, remaining;
1032 
1033     UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE);
1034 
1035     /* Close the default converter without creating a new one so that everything will be flushed. */
1036     u_flushDefaultConverter();
1037 
1038     /*if shared data hasn't even been lazy evaluated yet
1039     * return 0
1040     */
1041     if (SHARED_DATA_HASHTABLE == NULL) {
1042         UTRACE_EXIT_VALUE((int32_t)0);
1043         return 0;
1044     }
1045 
1046     /*creates an enumeration to iterate through every element in the
1047     * table
1048     *
1049     * Synchronization:  holding cnvCacheMutex will prevent any other thread from
1050     *                   accessing or modifying the hash table during the iteration.
1051     *                   The reference count of an entry may be decremented by
1052     *                   ucnv_close while the iteration is in process, but this is
1053     *                   benign.  It can't be incremented (in ucnv_createConverter())
1054     *                   because the sequence of looking up in the cache + incrementing
1055     *                   is protected by cnvCacheMutex.
1056     */
1057     umtx_lock(&cnvCacheMutex);
1058     /*
1059      * double loop: A delta/extension-only converter has a pointer to its base table's
1060      * shared data; the first iteration of the outer loop may see the delta converter
1061      * before the base converter, and unloading the delta converter may get the base
1062      * converter's reference counter down to 0.
1063      */
1064     i = 0;
1065     do {
1066         remaining = 0;
1067         pos = UHASH_FIRST;
1068         while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL)
1069         {
1070             mySharedData = (UConverterSharedData *) e->value.pointer;
1071             /*deletes only if reference counter == 0 */
1072             if (mySharedData->referenceCounter == 0)
1073             {
1074                 tableDeletedNum++;
1075 
1076                 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData);
1077 
1078                 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
1079                 mySharedData->sharedDataCached = FALSE;
1080                 ucnv_deleteSharedConverterData (mySharedData);
1081             } else {
1082                 ++remaining;
1083             }
1084         }
1085     } while(++i == 1 && remaining > 0);
1086     umtx_unlock(&cnvCacheMutex);
1087 
1088     UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining);
1089 
1090     UTRACE_EXIT_VALUE(tableDeletedNum);
1091     return tableDeletedNum;
1092 }
1093 
1094 /* available converters list --------------------------------------------------- */
1095 
initAvailableConvertersList(UErrorCode & errCode)1096 static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) {
1097     U_ASSERT(gAvailableConverterCount == 0);
1098     U_ASSERT(gAvailableConverters == NULL);
1099 
1100     ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
1101     UEnumeration *allConvEnum = ucnv_openAllNames(&errCode);
1102     int32_t allConverterCount = uenum_count(allConvEnum, &errCode);
1103     if (U_FAILURE(errCode)) {
1104         return;
1105     }
1106 
1107     /* We can't have more than "*converterTable" converters to open */
1108     gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*));
1109     if (!gAvailableConverters) {
1110         errCode = U_MEMORY_ALLOCATION_ERROR;
1111         return;
1112     }
1113 
1114     /* Open the default converter to make sure that it has first dibs in the hash table. */
1115     UErrorCode localStatus = U_ZERO_ERROR;
1116     UConverter tempConverter;
1117     ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus));
1118 
1119     gAvailableConverterCount = 0;
1120 
1121     for (int32_t idx = 0; idx < allConverterCount; idx++) {
1122         localStatus = U_ZERO_ERROR;
1123         const char *converterName = uenum_next(allConvEnum, NULL, &localStatus);
1124         if (ucnv_canCreateConverter(converterName, &localStatus)) {
1125             gAvailableConverters[gAvailableConverterCount++] = converterName;
1126         }
1127     }
1128 
1129     uenum_close(allConvEnum);
1130 }
1131 
1132 
haveAvailableConverterList(UErrorCode * pErrorCode)1133 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) {
1134     umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode);
1135     return U_SUCCESS(*pErrorCode);
1136 }
1137 
1138 U_CFUNC uint16_t
ucnv_bld_countAvailableConverters(UErrorCode * pErrorCode)1139 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) {
1140     if (haveAvailableConverterList(pErrorCode)) {
1141         return gAvailableConverterCount;
1142     }
1143     return 0;
1144 }
1145 
1146 U_CFUNC const char *
ucnv_bld_getAvailableConverter(uint16_t n,UErrorCode * pErrorCode)1147 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) {
1148     if (haveAvailableConverterList(pErrorCode)) {
1149         if (n < gAvailableConverterCount) {
1150             return gAvailableConverters[n];
1151         }
1152         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1153     }
1154     return NULL;
1155 }
1156 
1157 /* default converter name --------------------------------------------------- */
1158 
1159 #if !U_CHARSET_IS_UTF8
1160 /*
1161 Copy the canonical converter name.
1162 ucnv_getDefaultName must be thread safe, which can call this function.
1163 
1164 ucnv_setDefaultName calls this function and it doesn't have to be
1165 thread safe because there is no reliable/safe way to reset the
1166 converter in use in all threads. If you did reset the converter, you
1167 would not be sure that retrieving a default converter for one string
1168 would be the same type of default converter for a successive string.
1169 Since the name is a returned via ucnv_getDefaultName without copying,
1170 you shouldn't be modifying or deleting the string from a separate thread.
1171 */
1172 static inline void
internalSetName(const char * name,UErrorCode * status)1173 internalSetName(const char *name, UErrorCode *status) {
1174     UConverterNamePieces stackPieces;
1175     UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
1176     int32_t length=(int32_t)(uprv_strlen(name));
1177     UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL);
1178     const UConverterSharedData *algorithmicSharedData;
1179 
1180     stackArgs.name = name;
1181     if(containsOption) {
1182         stackPieces.cnvName[0] = 0;
1183         stackPieces.locale[0] = 0;
1184         stackPieces.options = 0;
1185         parseConverterOptions(name, &stackPieces, &stackArgs, status);
1186         if(U_FAILURE(*status)) {
1187             return;
1188         }
1189     }
1190     algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name);
1191 
1192     umtx_lock(&cnvCacheMutex);
1193 
1194     gDefaultAlgorithmicSharedData = algorithmicSharedData;
1195     gDefaultConverterContainsOption = containsOption;
1196     uprv_memcpy(gDefaultConverterNameBuffer, name, length);
1197     gDefaultConverterNameBuffer[length]=0;
1198 
1199     /* gDefaultConverterName MUST be the last global var set by this function.  */
1200     /*    It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */
1201     //    But there is nothing here preventing that from being reordered, either by the compiler
1202     //             or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough.
1203     //             -- Andy
1204     gDefaultConverterName = gDefaultConverterNameBuffer;
1205 
1206     ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup);
1207 
1208     umtx_unlock(&cnvCacheMutex);
1209 }
1210 #endif
1211 
1212 /*
1213  * In order to be really thread-safe, the get function would have to take
1214  * a buffer parameter and copy the current string inside a mutex block.
1215  * This implementation only tries to be really thread-safe while
1216  * setting the name.
1217  * It assumes that setting a pointer is atomic.
1218  */
1219 
1220 U_CAPI const char*  U_EXPORT2
ucnv_getDefaultName()1221 ucnv_getDefaultName() {
1222 #if U_CHARSET_IS_UTF8
1223     return "UTF-8";
1224 #else
1225     /* local variable to be thread-safe */
1226     const char *name;
1227 
1228     /*
1229     Concurrent calls to ucnv_getDefaultName must be thread safe,
1230     but ucnv_setDefaultName is not thread safe.
1231     */
1232     {
1233         icu::Mutex lock(&cnvCacheMutex);
1234         name = gDefaultConverterName;
1235     }
1236     if(name==NULL) {
1237         UErrorCode errorCode = U_ZERO_ERROR;
1238         UConverter *cnv = NULL;
1239 
1240         name = uprv_getDefaultCodepage();
1241 
1242         /* if the name is there, test it out and get the canonical name with options */
1243         if(name != NULL) {
1244             cnv = ucnv_open(name, &errorCode);
1245             if(U_SUCCESS(errorCode) && cnv != NULL) {
1246                 name = ucnv_getName(cnv, &errorCode);
1247             }
1248         }
1249 
1250         if(name == NULL || name[0] == 0
1251             || U_FAILURE(errorCode) || cnv == NULL
1252             || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer))
1253         {
1254             /* Panic time, let's use a fallback. */
1255 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY)
1256             name = "US-ASCII";
1257             /* there is no 'algorithmic' converter for EBCDIC */
1258 #elif U_PLATFORM == U_PF_OS390
1259             name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING;
1260 #else
1261             name = "ibm-37_P100-1995";
1262 #endif
1263         }
1264 
1265         internalSetName(name, &errorCode);
1266 
1267         /* The close may make the current name go away. */
1268         ucnv_close(cnv);
1269     }
1270 
1271     return name;
1272 #endif
1273 }
1274 
1275 #if U_CHARSET_IS_UTF8
ucnv_setDefaultName(const char *)1276 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {}
1277 #else
1278 /*
1279 This function is not thread safe, and it can't be thread safe.
1280 See internalSetName or the API reference for details.
1281 */
1282 U_CAPI void U_EXPORT2
ucnv_setDefaultName(const char * converterName)1283 ucnv_setDefaultName(const char *converterName) {
1284     if(converterName==NULL) {
1285         /* reset to the default codepage */
1286         gDefaultConverterName=NULL;
1287     } else {
1288         UErrorCode errorCode = U_ZERO_ERROR;
1289         UConverter *cnv = NULL;
1290         const char *name = NULL;
1291 
1292         /* if the name is there, test it out and get the canonical name with options */
1293         cnv = ucnv_open(converterName, &errorCode);
1294         if(U_SUCCESS(errorCode) && cnv != NULL) {
1295             name = ucnv_getName(cnv, &errorCode);
1296         }
1297 
1298         if(U_SUCCESS(errorCode) && name!=NULL) {
1299             internalSetName(name, &errorCode);
1300         }
1301         /* else this converter is bad to use. Don't change it to a bad value. */
1302 
1303         /* The close may make the current name go away. */
1304         ucnv_close(cnv);
1305 
1306         /* reset the converter cache */
1307         u_flushDefaultConverter();
1308     }
1309 }
1310 #endif
1311 
1312 /* data swapping ------------------------------------------------------------ */
1313 
1314 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */
1315 
1316 #if !UCONFIG_NO_LEGACY_CONVERSION
1317 
1318 U_CAPI int32_t U_EXPORT2
ucnv_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1319 ucnv_swap(const UDataSwapper *ds,
1320           const void *inData, int32_t length, void *outData,
1321           UErrorCode *pErrorCode) {
1322     const UDataInfo *pInfo;
1323     int32_t headerSize;
1324 
1325     const uint8_t *inBytes;
1326     uint8_t *outBytes;
1327 
1328     uint32_t offset, count, staticDataSize;
1329     int32_t size;
1330 
1331     const UConverterStaticData *inStaticData;
1332     UConverterStaticData *outStaticData;
1333 
1334     const _MBCSHeader *inMBCSHeader;
1335     _MBCSHeader *outMBCSHeader;
1336     _MBCSHeader mbcsHeader;
1337     uint32_t mbcsHeaderLength;
1338     UBool noFromU=FALSE;
1339 
1340     uint8_t outputType;
1341 
1342     int32_t maxFastUChar, mbcsIndexLength;
1343 
1344     const int32_t *inExtIndexes;
1345     int32_t extOffset;
1346 
1347     /* udata_swapDataHeader checks the arguments */
1348     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1349     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1350         return 0;
1351     }
1352 
1353     /* check data format and format version */
1354     pInfo=(const UDataInfo *)((const char *)inData+4);
1355     if(!(
1356         pInfo->dataFormat[0]==0x63 &&   /* dataFormat="cnvt" */
1357         pInfo->dataFormat[1]==0x6e &&
1358         pInfo->dataFormat[2]==0x76 &&
1359         pInfo->dataFormat[3]==0x74 &&
1360         pInfo->formatVersion[0]==6 &&
1361         pInfo->formatVersion[1]>=2
1362     )) {
1363         udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n",
1364                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1365                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1366                          pInfo->formatVersion[0], pInfo->formatVersion[1]);
1367         *pErrorCode=U_UNSUPPORTED_ERROR;
1368         return 0;
1369     }
1370 
1371     inBytes=(const uint8_t *)inData+headerSize;
1372     outBytes=(uint8_t *)outData+headerSize;
1373 
1374     /* read the initial UConverterStaticData structure after the UDataInfo header */
1375     inStaticData=(const UConverterStaticData *)inBytes;
1376     outStaticData=(UConverterStaticData *)outBytes;
1377 
1378     if(length<0) {
1379         staticDataSize=ds->readUInt32(inStaticData->structSize);
1380     } else {
1381         length-=headerSize;
1382         if( length<(int32_t)sizeof(UConverterStaticData) ||
1383             (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
1384         ) {
1385             udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
1386                              length);
1387             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1388             return 0;
1389         }
1390     }
1391 
1392     if(length>=0) {
1393         /* swap the static data */
1394         if(inStaticData!=outStaticData) {
1395             uprv_memcpy(outStaticData, inStaticData, staticDataSize);
1396         }
1397 
1398         ds->swapArray32(ds, &inStaticData->structSize, 4,
1399                            &outStaticData->structSize, pErrorCode);
1400         ds->swapArray32(ds, &inStaticData->codepage, 4,
1401                            &outStaticData->codepage, pErrorCode);
1402 
1403         ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name),
1404                             outStaticData->name, pErrorCode);
1405         if(U_FAILURE(*pErrorCode)) {
1406             udata_printError(ds, "ucnv_swap(): error swapping converter name\n");
1407             return 0;
1408         }
1409     }
1410 
1411     inBytes+=staticDataSize;
1412     outBytes+=staticDataSize;
1413     if(length>=0) {
1414         length-=(int32_t)staticDataSize;
1415     }
1416 
1417     /* check for supported conversionType values */
1418     if(inStaticData->conversionType==UCNV_MBCS) {
1419         /* swap MBCS data */
1420         inMBCSHeader=(const _MBCSHeader *)inBytes;
1421         outMBCSHeader=(_MBCSHeader *)outBytes;
1422 
1423         if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) {
1424             udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
1425                                 length);
1426             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1427             return 0;
1428         }
1429         if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
1430             mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
1431         } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
1432                   ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))&
1433                    MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
1434         ) {
1435             mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK;
1436             noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0);
1437         } else {
1438             udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n",
1439                              inMBCSHeader->version[0], inMBCSHeader->version[1]);
1440             *pErrorCode=U_UNSUPPORTED_ERROR;
1441             return 0;
1442         }
1443 
1444         uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4);
1445         mbcsHeader.countStates=         ds->readUInt32(inMBCSHeader->countStates);
1446         mbcsHeader.countToUFallbacks=   ds->readUInt32(inMBCSHeader->countToUFallbacks);
1447         mbcsHeader.offsetToUCodeUnits=  ds->readUInt32(inMBCSHeader->offsetToUCodeUnits);
1448         mbcsHeader.offsetFromUTable=    ds->readUInt32(inMBCSHeader->offsetFromUTable);
1449         mbcsHeader.offsetFromUBytes=    ds->readUInt32(inMBCSHeader->offsetFromUBytes);
1450         mbcsHeader.flags=               ds->readUInt32(inMBCSHeader->flags);
1451         mbcsHeader.fromUBytesLength=    ds->readUInt32(inMBCSHeader->fromUBytesLength);
1452         /* mbcsHeader.options have been read above */
1453 
1454         extOffset=(int32_t)(mbcsHeader.flags>>8);
1455         outputType=(uint8_t)mbcsHeader.flags;
1456         if(noFromU && outputType==MBCS_OUTPUT_1) {
1457             udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n");
1458             *pErrorCode=U_UNSUPPORTED_ERROR;
1459             return 0;
1460         }
1461 
1462         /* make sure that the output type is known */
1463         switch(outputType) {
1464         case MBCS_OUTPUT_1:
1465         case MBCS_OUTPUT_2:
1466         case MBCS_OUTPUT_3:
1467         case MBCS_OUTPUT_4:
1468         case MBCS_OUTPUT_3_EUC:
1469         case MBCS_OUTPUT_4_EUC:
1470         case MBCS_OUTPUT_2_SISO:
1471         case MBCS_OUTPUT_EXT_ONLY:
1472             /* OK */
1473             break;
1474         default:
1475             udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n",
1476                              outputType);
1477             *pErrorCode=U_UNSUPPORTED_ERROR;
1478             return 0;
1479         }
1480 
1481         /* calculate the length of the MBCS data */
1482 
1483         /*
1484          * utf8Friendly MBCS files (mbcsHeader.version 4.3)
1485          * contain an additional mbcsIndex table:
1486          *   uint16_t[(maxFastUChar+1)>>6];
1487          * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff).
1488          */
1489         maxFastUChar=0;
1490         mbcsIndexLength=0;
1491         if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 &&
1492             mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0
1493         ) {
1494             maxFastUChar=(maxFastUChar<<8)|0xff;
1495             mbcsIndexLength=((maxFastUChar+1)>>6)*2;  /* number of bytes */
1496         }
1497 
1498         if(extOffset==0) {
1499             size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength);
1500             if(!noFromU) {
1501                 size+=(int32_t)mbcsHeader.fromUBytesLength;
1502             }
1503 
1504             /* avoid compiler warnings - not otherwise necessary, and the value does not matter */
1505             inExtIndexes=NULL;
1506         } else {
1507             /* there is extension data after the base data, see ucnv_ext.h */
1508             if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
1509                 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
1510                                  length);
1511                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1512                 return 0;
1513             }
1514 
1515             inExtIndexes=(const int32_t *)(inBytes+extOffset);
1516             size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]);
1517         }
1518 
1519         if(length>=0) {
1520             if(length<size) {
1521                 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
1522                                  length);
1523                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1524                 return 0;
1525             }
1526 
1527             /* copy the data for inaccessible bytes */
1528             if(inBytes!=outBytes) {
1529                 uprv_memcpy(outBytes, inBytes, size);
1530             }
1531 
1532             /* swap the MBCSHeader, except for the version field */
1533             count=mbcsHeaderLength*4;
1534             ds->swapArray32(ds, &inMBCSHeader->countStates, count-4,
1535                                &outMBCSHeader->countStates, pErrorCode);
1536 
1537             if(outputType==MBCS_OUTPUT_EXT_ONLY) {
1538                 /*
1539                  * extension-only file,
1540                  * contains a base name instead of normal base table data
1541                  */
1542 
1543                 /* swap the base name, between the header and the extension data */
1544                 const char *inBaseName=(const char *)inBytes+count;
1545                 char *outBaseName=(char *)outBytes+count;
1546                 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName),
1547                                     outBaseName, pErrorCode);
1548             } else {
1549                 /* normal file with base table data */
1550 
1551                 /* swap the state table, 1kB per state */
1552                 offset=count;
1553                 count=mbcsHeader.countStates*1024;
1554                 ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1555                                    outBytes+offset, pErrorCode);
1556 
1557                 /* swap the toUFallbacks[] */
1558                 offset+=count;
1559                 count=mbcsHeader.countToUFallbacks*8;
1560                 ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1561                                    outBytes+offset, pErrorCode);
1562 
1563                 /* swap the unicodeCodeUnits[] */
1564                 offset=mbcsHeader.offsetToUCodeUnits;
1565                 count=mbcsHeader.offsetFromUTable-offset;
1566                 ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1567                                    outBytes+offset, pErrorCode);
1568 
1569                 /* offset to the stage 1 table, independent of the outputType */
1570                 offset=mbcsHeader.offsetFromUTable;
1571 
1572                 if(outputType==MBCS_OUTPUT_1) {
1573                     /* SBCS: swap the fromU tables, all 16 bits wide */
1574                     count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength;
1575                     ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1576                                        outBytes+offset, pErrorCode);
1577                 } else {
1578                     /* otherwise: swap the stage tables separately */
1579 
1580                     /* stage 1 table: uint16_t[0x440 or 0x40] */
1581                     if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
1582                         count=0x440*2; /* for all of Unicode */
1583                     } else {
1584                         count=0x40*2; /* only BMP */
1585                     }
1586                     ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1587                                        outBytes+offset, pErrorCode);
1588 
1589                     /* stage 2 table: uint32_t[] */
1590                     offset+=count;
1591                     count=mbcsHeader.offsetFromUBytes-offset;
1592                     ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1593                                        outBytes+offset, pErrorCode);
1594 
1595                     /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */
1596                     offset=mbcsHeader.offsetFromUBytes;
1597                     count= noFromU ? 0 : mbcsHeader.fromUBytesLength;
1598                     switch(outputType) {
1599                     case MBCS_OUTPUT_2:
1600                     case MBCS_OUTPUT_3_EUC:
1601                     case MBCS_OUTPUT_2_SISO:
1602                         ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1603                                            outBytes+offset, pErrorCode);
1604                         break;
1605                     case MBCS_OUTPUT_4:
1606                         ds->swapArray32(ds, inBytes+offset, (int32_t)count,
1607                                            outBytes+offset, pErrorCode);
1608                         break;
1609                     default:
1610                         /* just uint8_t[], nothing to swap */
1611                         break;
1612                     }
1613 
1614                     if(mbcsIndexLength!=0) {
1615                         offset+=count;
1616                         count=mbcsIndexLength;
1617                         ds->swapArray16(ds, inBytes+offset, (int32_t)count,
1618                                            outBytes+offset, pErrorCode);
1619                     }
1620                 }
1621             }
1622 
1623             if(extOffset!=0) {
1624                 /* swap the extension data */
1625                 inBytes+=extOffset;
1626                 outBytes+=extOffset;
1627 
1628                 /* swap toUTable[] */
1629                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]);
1630                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]);
1631                 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
1632 
1633                 /* swap toUUChars[] */
1634                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]);
1635                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]);
1636                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1637 
1638                 /* swap fromUTableUChars[] */
1639                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]);
1640                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]);
1641                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1642 
1643                 /* swap fromUTableValues[] */
1644                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]);
1645                 /* same length as for fromUTableUChars[] */
1646                 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
1647 
1648                 /* no need to swap fromUBytes[] */
1649 
1650                 /* swap fromUStage12[] */
1651                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]);
1652                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]);
1653                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1654 
1655                 /* swap fromUStage3[] */
1656                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]);
1657                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]);
1658                 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode);
1659 
1660                 /* swap fromUStage3b[] */
1661                 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]);
1662                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]);
1663                 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode);
1664 
1665                 /* swap indexes[] */
1666                 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]);
1667                 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode);
1668             }
1669         }
1670     } else {
1671         udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n",
1672                          inStaticData->conversionType);
1673         *pErrorCode=U_UNSUPPORTED_ERROR;
1674         return 0;
1675     }
1676 
1677     return headerSize+(int32_t)staticDataSize+size;
1678 }
1679 
1680 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1681 
1682 #endif
1683