1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2004-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #ifndef ULOCIMP_H
11 #define ULOCIMP_H
12 
13 #include "unicode/bytestream.h"
14 #include "unicode/uloc.h"
15 
16 #include "charstr.h"
17 
18 /**
19  * Create an iterator over the specified keywords list
20  * @param keywordList double-null terminated list. Will be copied.
21  * @param keywordListSize size in bytes of keywordList
22  * @param status err code
23  * @return enumeration (owned by caller) of the keyword list.
24  * @internal ICU 3.0
25  */
26 U_CAPI UEnumeration* U_EXPORT2
27 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
28 
29 /**
30  * Look up a resource bundle table item with fallback on the table level.
31  * This is accessible so it can be called by C++ code.
32  */
33 U_CAPI const UChar * U_EXPORT2
34 uloc_getTableStringWithFallback(
35     const char *path,
36     const char *locale,
37     const char *tableKey,
38     const char *subTableKey,
39     const char *itemKey,
40     int32_t *pLength,
41     UErrorCode *pErrorCode);
42 
43 /*returns true if a is an ID separator false otherwise*/
44 #define _isIDSeparator(a) (a == '_' || a == '-')
45 
46 U_CFUNC const char*
47 uloc_getCurrentCountryID(const char* oldID);
48 
49 U_CFUNC const char*
50 uloc_getCurrentLanguageID(const char* oldID);
51 
52 U_CFUNC void
53 ulocimp_getKeywords(const char *localeID,
54              char prev,
55              icu::ByteSink& sink,
56              UBool valuesToo,
57              UErrorCode *status);
58 
59 icu::CharString U_EXPORT2
60 ulocimp_getLanguage(const char *localeID,
61                     const char **pEnd,
62                     UErrorCode &status);
63 
64 icu::CharString U_EXPORT2
65 ulocimp_getScript(const char *localeID,
66                   const char **pEnd,
67                   UErrorCode &status);
68 
69 icu::CharString U_EXPORT2
70 ulocimp_getCountry(const char *localeID,
71                    const char **pEnd,
72                    UErrorCode &status);
73 
74 U_CAPI void U_EXPORT2
75 ulocimp_getName(const char* localeID,
76                 icu::ByteSink& sink,
77                 UErrorCode* err);
78 
79 U_CAPI void U_EXPORT2
80 ulocimp_getBaseName(const char* localeID,
81                     icu::ByteSink& sink,
82                     UErrorCode* err);
83 
84 U_CAPI void U_EXPORT2
85 ulocimp_canonicalize(const char* localeID,
86                      icu::ByteSink& sink,
87                      UErrorCode* err);
88 
89 U_CAPI void U_EXPORT2
90 ulocimp_getKeywordValue(const char* localeID,
91                         const char* keywordName,
92                         icu::ByteSink& sink,
93                         UErrorCode* status);
94 
95 /**
96  * Writes a well-formed language tag for this locale ID.
97  *
98  * **Note**: When `strict` is false, any locale fields which do not satisfy the
99  * BCP47 syntax requirement will be omitted from the result.  When `strict` is
100  * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
101  * fields do not satisfy the BCP47 syntax requirement.
102  *
103  * @param localeID  the input locale ID
104  * @param sink      the output sink receiving the BCP47 language
105  *                  tag for this Locale.
106  * @param strict    boolean value indicating if the function returns
107  *                  an error for an ill-formed input locale ID.
108  * @param err       error information if receiving the language
109  *                  tag failed.
110  * @return          The length of the BCP47 language tag.
111  *
112  * @internal ICU 64
113  */
114 U_CAPI void U_EXPORT2
115 ulocimp_toLanguageTag(const char* localeID,
116                       icu::ByteSink& sink,
117                       UBool strict,
118                       UErrorCode* err);
119 
120 /**
121  * Returns a locale ID for the specified BCP47 language tag string.
122  * If the specified language tag contains any ill-formed subtags,
123  * the first such subtag and all following subtags are ignored.
124  * <p>
125  * This implements the 'Language-Tag' production of BCP 47, and so
126  * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
127  * (regular and irregular) as well as private use language tags.
128  *
129  * Private use tags are represented as 'x-whatever',
130  * and legacy tags are converted to their canonical replacements where they exist.
131  *
132  * Note that a few legacy tags have no modern replacement;
133  * these will be converted using the fallback described in
134  * the first paragraph, so some information might be lost.
135  *
136  * @param langtag   the input BCP47 language tag.
137  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
138  * @param sink      the output sink receiving a locale ID for the
139  *                  specified BCP47 language tag.
140  * @param parsedLength  if not NULL, successfully parsed length
141  *                      for the input language tag is set.
142  * @param err       error information if receiving the locald ID
143  *                  failed.
144  * @internal ICU 63
145  */
146 U_CAPI void U_EXPORT2
147 ulocimp_forLanguageTag(const char* langtag,
148                        int32_t tagLen,
149                        icu::ByteSink& sink,
150                        int32_t* parsedLength,
151                        UErrorCode* err);
152 
153 /**
154  * Get the region to use for supplemental data lookup. Uses
155  * (1) any region specified by locale tag "rg"; if none then
156  * (2) any unicode_region_tag in the locale ID; if none then
157  * (3) if inferRegion is true, the region suggested by
158  * getLikelySubtags on the localeID.
159  * If no region is found, returns length 0.
160  *
161  * @param localeID
162  *     The complete locale ID (with keywords) from which
163  *     to get the region to use for supplemental data.
164  * @param inferRegion
165  *     If true, will try to infer region from localeID if
166  *     no other region is found.
167  * @param region
168  *     Buffer in which to put the region ID found; should
169  *     have a capacity at least ULOC_COUNTRY_CAPACITY.
170  * @param regionCapacity
171  *     The actual capacity of the region buffer.
172  * @param status
173  *     Pointer to in/out UErrorCode value for latest status.
174  * @return
175  *     The length of any region code found, or 0 if none.
176  * @internal ICU 57
177  */
178 U_CAPI int32_t U_EXPORT2
179 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
180                                      char *region, int32_t regionCapacity, UErrorCode* status);
181 
182 /**
183  * Add the likely subtags for a provided locale ID, per the algorithm described
184  * in the following CLDR technical report:
185  *
186  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
187  *
188  * If localeID is already in the maximal form, or there is no data available
189  * for maximization, it will be copied to the output buffer.  For example,
190  * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
191  *
192  * Examples:
193  *
194  * "en" maximizes to "en_Latn_US"
195  *
196  * "de" maximizes to "de_Latn_US"
197  *
198  * "sr" maximizes to "sr_Cyrl_RS"
199  *
200  * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
201  *
202  * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
203  *
204  * @param localeID The locale to maximize
205  * @param sink The output sink receiving the maximized locale
206  * @param err Error information if maximizing the locale failed.  If the length
207  * of the localeID and the null-terminator is greater than the maximum allowed size,
208  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
209  * @internal ICU 64
210  */
211 U_CAPI void U_EXPORT2
212 ulocimp_addLikelySubtags(const char* localeID,
213                          icu::ByteSink& sink,
214                          UErrorCode* err);
215 
216 /**
217  * Minimize the subtags for a provided locale ID, per the algorithm described
218  * in the following CLDR technical report:
219  *
220  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
221  *
222  * If localeID is already in the minimal form, or there is no data available
223  * for minimization, it will be copied to the output buffer.  Since the
224  * minimization algorithm relies on proper maximization, see the comments
225  * for ulocimp_addLikelySubtags for reasons why there might not be any data.
226  *
227  * Examples:
228  *
229  * "en_Latn_US" minimizes to "en"
230  *
231  * "de_Latn_US" minimizes to "de"
232  *
233  * "sr_Cyrl_RS" minimizes to "sr"
234  *
235  * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
236  * script, and minimizing to "zh" would imply "zh_Hans_CN".)
237  *
238  * @param localeID The locale to minimize
239  * @param sink The output sink receiving the maximized locale
240  * @param err Error information if minimizing the locale failed.  If the length
241  * of the localeID and the null-terminator is greater than the maximum allowed size,
242  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
243  * @internal ICU 64
244  */
245 U_CAPI void U_EXPORT2
246 ulocimp_minimizeSubtags(const char* localeID,
247                         icu::ByteSink& sink,
248                         UErrorCode* err);
249 
250 U_CAPI const char * U_EXPORT2
251 locale_getKeywordsStart(const char *localeID);
252 
253 U_CFUNC UBool
254 ultag_isExtensionSubtags(const char* s, int32_t len);
255 
256 U_CFUNC UBool
257 ultag_isLanguageSubtag(const char* s, int32_t len);
258 
259 U_CFUNC UBool
260 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
261 
262 U_CFUNC UBool
263 ultag_isRegionSubtag(const char* s, int32_t len);
264 
265 U_CFUNC UBool
266 ultag_isScriptSubtag(const char* s, int32_t len);
267 
268 U_CFUNC UBool
269 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
270 
271 U_CFUNC UBool
272 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
273 
274 U_CFUNC UBool
275 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
276 
277 U_CFUNC UBool
278 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
279 
280 U_CFUNC UBool
281 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
282 
283 U_CFUNC UBool
284 ultag_isUnicodeLocaleType(const char* s, int32_t len);
285 
286 U_CFUNC UBool
287 ultag_isVariantSubtags(const char* s, int32_t len);
288 
289 U_CAPI const char * U_EXPORT2
290 ultag_getTKeyStart(const char *localeID);
291 
292 U_CFUNC const char*
293 ulocimp_toBcpKey(const char* key);
294 
295 U_CFUNC const char*
296 ulocimp_toLegacyKey(const char* key);
297 
298 U_CFUNC const char*
299 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
300 
301 U_CFUNC const char*
302 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
303 
304 /* Function for testing purpose */
305 U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
306 
307 // Return true if the value is already canonicalized.
308 U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
309 
310 /**
311  * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
312  * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
313  * and then, if it's not big enough, reallocate it on the heap and try again.
314  *
315  * You use it like this:
316  * UErrorCode err = U_ZERO_ERROR;
317  *
318  * PreflightingLocaleIDBuffer tempBuffer;
319  * do {
320  *     tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
321  * } while (tempBuffer.needToTryAgain(&err));
322  * if (U_SUCCESS(err)) {
323  *     uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
324  * }
325  */
326 class PreflightingLocaleIDBuffer {
327 private:
328     char stackBuffer[ULOC_FULLNAME_CAPACITY];
329     char* heapBuffer = nullptr;
330     int32_t capacity = ULOC_FULLNAME_CAPACITY;
331 
332 public:
333     int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
334 
335     // No heap allocation. Use only on the stack.
336     static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
337     static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
338 #if U_HAVE_PLACEMENT_NEW
339     static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
340 #endif
341 
PreflightingLocaleIDBuffer()342     PreflightingLocaleIDBuffer() {}
343 
~PreflightingLocaleIDBuffer()344     ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
345 
getBuffer()346     char* getBuffer() {
347         if (heapBuffer == nullptr) {
348             return stackBuffer;
349         } else {
350             return heapBuffer;
351         }
352     }
353 
getCapacity()354     int32_t getCapacity() {
355         return capacity;
356     }
357 
needToTryAgain(UErrorCode * err)358     bool needToTryAgain(UErrorCode* err) {
359         if (heapBuffer != nullptr) {
360             return false;
361         }
362 
363         if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
364             int32_t newCapacity = requestedCapacity + 2;    // one for the terminating null, one just for paranoia
365             heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
366             if (heapBuffer == nullptr) {
367                 *err = U_MEMORY_ALLOCATION_ERROR;
368             } else {
369                 *err = U_ZERO_ERROR;
370                 capacity = newCapacity;
371             }
372             return U_SUCCESS(*err);
373         }
374         return false;
375     }
376 };
377 
378 #endif
379