1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2004-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 
10 #ifndef ULOCIMP_H
11 #define ULOCIMP_H
12 
13 #include "unicode/bytestream.h"
14 #include "unicode/uloc.h"
15 
16 /**
17  * Create an iterator over the specified keywords list
18  * @param keywordList double-null terminated list. Will be copied.
19  * @param keywordListSize size in bytes of keywordList
20  * @param status err code
21  * @return enumeration (owned by caller) of the keyword list.
22  * @internal ICU 3.0
23  */
24 U_CAPI UEnumeration* U_EXPORT2
25 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
26 
27 /**
28  * Look up a resource bundle table item with fallback on the table level.
29  * This is accessible so it can be called by C++ code.
30  */
31 U_CAPI const UChar * U_EXPORT2
32 uloc_getTableStringWithFallback(
33     const char *path,
34     const char *locale,
35     const char *tableKey,
36     const char *subTableKey,
37     const char *itemKey,
38     int32_t *pLength,
39     UErrorCode *pErrorCode);
40 
41 /*returns TRUE if a is an ID separator FALSE otherwise*/
42 #define _isIDSeparator(a) (a == '_' || a == '-')
43 
44 U_CFUNC const char*
45 uloc_getCurrentCountryID(const char* oldID);
46 
47 U_CFUNC const char*
48 uloc_getCurrentLanguageID(const char* oldID);
49 
50 U_CFUNC int32_t
51 ulocimp_getLanguage(const char *localeID,
52                     char *language, int32_t languageCapacity,
53                     const char **pEnd);
54 
55 U_CFUNC int32_t
56 ulocimp_getScript(const char *localeID,
57                    char *script, int32_t scriptCapacity,
58                    const char **pEnd);
59 
60 U_CFUNC int32_t
61 ulocimp_getCountry(const char *localeID,
62                    char *country, int32_t countryCapacity,
63                    const char **pEnd);
64 
65 U_STABLE void U_EXPORT2
66 ulocimp_getName(const char* localeID,
67                 icu::ByteSink& sink,
68                 UErrorCode* err);
69 
70 U_STABLE void U_EXPORT2
71 ulocimp_getBaseName(const char* localeID,
72                     icu::ByteSink& sink,
73                     UErrorCode* err);
74 
75 U_STABLE void U_EXPORT2
76 ulocimp_canonicalize(const char* localeID,
77                      icu::ByteSink& sink,
78                      UErrorCode* err);
79 
80 /**
81  * Writes a well-formed language tag for this locale ID.
82  *
83  * **Note**: When `strict` is FALSE, any locale fields which do not satisfy the
84  * BCP47 syntax requirement will be omitted from the result.  When `strict` is
85  * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
86  * fields do not satisfy the BCP47 syntax requirement.
87  *
88  * @param localeID  the input locale ID
89  * @param sink      the output sink receiving the BCP47 language
90  *                  tag for this Locale.
91  * @param strict    boolean value indicating if the function returns
92  *                  an error for an ill-formed input locale ID.
93  * @param err       error information if receiving the language
94  *                  tag failed.
95  * @return          The length of the BCP47 language tag.
96  *
97  * @internal ICU 64
98  */
99 U_STABLE void U_EXPORT2
100 ulocimp_toLanguageTag(const char* localeID,
101                       icu::ByteSink& sink,
102                       UBool strict,
103                       UErrorCode* err);
104 
105 /**
106  * Returns a locale ID for the specified BCP47 language tag string.
107  * If the specified language tag contains any ill-formed subtags,
108  * the first such subtag and all following subtags are ignored.
109  * <p>
110  * This implements the 'Language-Tag' production of BCP47, and so
111  * supports grandfathered (regular and irregular) as well as private
112  * use language tags.  Private use tags are represented as 'x-whatever',
113  * and grandfathered tags are converted to their canonical replacements
114  * where they exist.  Note that a few grandfathered tags have no modern
115  * replacement, these will be converted using the fallback described in
116  * the first paragraph, so some information might be lost.
117  * @param langtag   the input BCP47 language tag.
118  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
119  * @param sink      the output sink receiving a locale ID for the
120  *                  specified BCP47 language tag.
121  * @param parsedLength  if not NULL, successfully parsed length
122  *                      for the input language tag is set.
123  * @param err       error information if receiving the locald ID
124  *                  failed.
125  * @internal ICU 63
126  */
127 U_CAPI void U_EXPORT2
128 ulocimp_forLanguageTag(const char* langtag,
129                        int32_t tagLen,
130                        icu::ByteSink& sink,
131                        int32_t* parsedLength,
132                        UErrorCode* err);
133 
134 /**
135  * Get the region to use for supplemental data lookup. Uses
136  * (1) any region specified by locale tag "rg"; if none then
137  * (2) any unicode_region_tag in the locale ID; if none then
138  * (3) if inferRegion is TRUE, the region suggested by
139  * getLikelySubtags on the localeID.
140  * If no region is found, returns length 0.
141  *
142  * @param localeID
143  *     The complete locale ID (with keywords) from which
144  *     to get the region to use for supplemental data.
145  * @param inferRegion
146  *     If TRUE, will try to infer region from localeID if
147  *     no other region is found.
148  * @param region
149  *     Buffer in which to put the region ID found; should
150  *     have a capacity at least ULOC_COUNTRY_CAPACITY.
151  * @param regionCapacity
152  *     The actual capacity of the region buffer.
153  * @param status
154  *     Pointer to in/out UErrorCode value for latest status.
155  * @return
156  *     The length of any region code found, or 0 if none.
157  * @internal ICU 57
158  */
159 U_CAPI int32_t U_EXPORT2
160 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
161                                      char *region, int32_t regionCapacity, UErrorCode* status);
162 
163 /**
164  * Add the likely subtags for a provided locale ID, per the algorithm described
165  * in the following CLDR technical report:
166  *
167  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
168  *
169  * If localeID is already in the maximal form, or there is no data available
170  * for maximization, it will be copied to the output buffer.  For example,
171  * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
172  *
173  * Examples:
174  *
175  * "en" maximizes to "en_Latn_US"
176  *
177  * "de" maximizes to "de_Latn_US"
178  *
179  * "sr" maximizes to "sr_Cyrl_RS"
180  *
181  * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
182  *
183  * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
184  *
185  * @param localeID The locale to maximize
186  * @param sink The output sink receiving the maximized locale
187  * @param err Error information if maximizing the locale failed.  If the length
188  * of the localeID and the null-terminator is greater than the maximum allowed size,
189  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
190  * @internal ICU 64
191  */
192 U_STABLE void U_EXPORT2
193 ulocimp_addLikelySubtags(const char* localeID,
194                          icu::ByteSink& sink,
195                          UErrorCode* err);
196 
197 /**
198  * Minimize the subtags for a provided locale ID, per the algorithm described
199  * in the following CLDR technical report:
200  *
201  *   http://www.unicode.org/reports/tr35/#Likely_Subtags
202  *
203  * If localeID is already in the minimal form, or there is no data available
204  * for minimization, it will be copied to the output buffer.  Since the
205  * minimization algorithm relies on proper maximization, see the comments
206  * for ulocimp_addLikelySubtags for reasons why there might not be any data.
207  *
208  * Examples:
209  *
210  * "en_Latn_US" minimizes to "en"
211  *
212  * "de_Latn_US" minimizes to "de"
213  *
214  * "sr_Cyrl_RS" minimizes to "sr"
215  *
216  * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
217  * script, and minimizing to "zh" would imply "zh_Hans_CN".)
218  *
219  * @param localeID The locale to minimize
220  * @param sink The output sink receiving the maximized locale
221  * @param err Error information if minimizing the locale failed.  If the length
222  * of the localeID and the null-terminator is greater than the maximum allowed size,
223  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
224  * @internal ICU 64
225  */
226 U_STABLE void U_EXPORT2
227 ulocimp_minimizeSubtags(const char* localeID,
228                         icu::ByteSink& sink,
229                         UErrorCode* err);
230 
231 U_CAPI const char * U_EXPORT2
232 locale_getKeywordsStart(const char *localeID);
233 
234 U_CFUNC UBool
235 ultag_isExtensionSubtags(const char* s, int32_t len);
236 
237 U_CFUNC UBool
238 ultag_isLanguageSubtag(const char* s, int32_t len);
239 
240 U_CFUNC UBool
241 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
242 
243 U_CFUNC UBool
244 ultag_isRegionSubtag(const char* s, int32_t len);
245 
246 U_CFUNC UBool
247 ultag_isScriptSubtag(const char* s, int32_t len);
248 
249 U_CFUNC UBool
250 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
251 
252 U_CFUNC UBool
253 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
254 
255 U_CFUNC UBool
256 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
257 
258 U_CFUNC UBool
259 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
260 
261 U_CFUNC UBool
262 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
263 
264 U_CFUNC UBool
265 ultag_isUnicodeLocaleType(const char* s, int32_t len);
266 
267 U_CFUNC UBool
268 ultag_isVariantSubtags(const char* s, int32_t len);
269 
270 U_CFUNC const char*
271 ulocimp_toBcpKey(const char* key);
272 
273 U_CFUNC const char*
274 ulocimp_toLegacyKey(const char* key);
275 
276 U_CFUNC const char*
277 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
278 
279 U_CFUNC const char*
280 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
281 
282 #endif
283