1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // casemap.h
5 // created: 2017jan12 Markus W. Scherer
6 
7 #ifndef __CASEMAP_H__
8 #define __CASEMAP_H__
9 
10 #include "unicode/utypes.h"
11 
12 #if U_SHOW_CPLUSPLUS_API
13 
14 #include "unicode/stringpiece.h"
15 #include "unicode/uobject.h"
16 
17 /**
18  * \file
19  * \brief C++ API: Low-level C++ case mapping functions.
20  */
21 
22 U_NAMESPACE_BEGIN
23 
24 class BreakIterator;
25 class ByteSink;
26 class Edits;
27 
28 /**
29  * Low-level C++ case mapping functions.
30  *
31  * @stable ICU 59
32  */
33 class U_COMMON_API CaseMap U_FINAL : public UMemory {
34 public:
35     /**
36      * Lowercases a UTF-16 string and optionally records edits.
37      * Casing is locale-dependent and context-sensitive.
38      * The result may be longer or shorter than the original.
39      * The source string and the destination buffer must not overlap.
40      *
41      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
42      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
43      * @param src       The original string.
44      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
45      * @param dest      A buffer for the result string. The result will be NUL-terminated if
46      *                  the buffer is large enough.
47      *                  The contents is undefined in case of failure.
48      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
49      *                  dest may be NULL and the function will only return the length of the result
50      *                  without writing any of the result string.
51      * @param edits     Records edits for index mapping, working with styled text,
52      *                  and getting only changes (if any).
53      *                  The Edits contents is undefined if any error occurs.
54      *                  This function calls edits->reset() first unless
55      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
56      * @param errorCode Reference to an in/out error code value
57      *                  which must not indicate a failure before the function call.
58      * @return The length of the result string, if successful.
59      *         When the result would be longer than destCapacity,
60      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
61      *
62      * @see u_strToLower
63      * @stable ICU 59
64      */
65      static int32_t toLower(
66             const char *locale, uint32_t options,
67             const char16_t *src, int32_t srcLength,
68             char16_t *dest, int32_t destCapacity, Edits *edits,
69             UErrorCode &errorCode);
70 
71     /**
72      * Uppercases a UTF-16 string and optionally records edits.
73      * Casing is locale-dependent and context-sensitive.
74      * The result may be longer or shorter than the original.
75      * The source string and the destination buffer must not overlap.
76      *
77      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
78      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
79      * @param src       The original string.
80      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
81      * @param dest      A buffer for the result string. The result will be NUL-terminated if
82      *                  the buffer is large enough.
83      *                  The contents is undefined in case of failure.
84      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
85      *                  dest may be NULL and the function will only return the length of the result
86      *                  without writing any of the result string.
87      * @param edits     Records edits for index mapping, working with styled text,
88      *                  and getting only changes (if any).
89      *                  The Edits contents is undefined if any error occurs.
90      *                  This function calls edits->reset() first unless
91      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
92      * @param errorCode Reference to an in/out error code value
93      *                  which must not indicate a failure before the function call.
94      * @return The length of the result string, if successful.
95      *         When the result would be longer than destCapacity,
96      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
97      *
98      * @see u_strToUpper
99      * @stable ICU 59
100      */
101     static int32_t toUpper(
102             const char *locale, uint32_t options,
103             const char16_t *src, int32_t srcLength,
104             char16_t *dest, int32_t destCapacity, Edits *edits,
105             UErrorCode &errorCode);
106 
107 #if !UCONFIG_NO_BREAK_ITERATION
108 
109     /**
110      * Titlecases a UTF-16 string and optionally records edits.
111      * Casing is locale-dependent and context-sensitive.
112      * The result may be longer or shorter than the original.
113      * The source string and the destination buffer must not overlap.
114      *
115      * Titlecasing uses a break iterator to find the first characters of words
116      * that are to be titlecased. It titlecases those characters and lowercases
117      * all others. (This can be modified with options bits.)
118      *
119      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
120      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
121      *                  U_TITLECASE_NO_LOWERCASE,
122      *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
123      *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
124      * @param iter      A break iterator to find the first characters of words that are to be titlecased.
125      *                  It is set to the source string (setText())
126      *                  and used one or more times for iteration (first() and next()).
127      *                  If NULL, then a word break iterator for the locale is used
128      *                  (or something equivalent).
129      * @param src       The original string.
130      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
131      * @param dest      A buffer for the result string. The result will be NUL-terminated if
132      *                  the buffer is large enough.
133      *                  The contents is undefined in case of failure.
134      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
135      *                  dest may be NULL and the function will only return the length of the result
136      *                  without writing any of the result string.
137      * @param edits     Records edits for index mapping, working with styled text,
138      *                  and getting only changes (if any).
139      *                  The Edits contents is undefined if any error occurs.
140      *                  This function calls edits->reset() first unless
141      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
142      * @param errorCode Reference to an in/out error code value
143      *                  which must not indicate a failure before the function call.
144      * @return The length of the result string, if successful.
145      *         When the result would be longer than destCapacity,
146      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
147      *
148      * @see u_strToTitle
149      * @see ucasemap_toTitle
150      * @stable ICU 59
151      */
152     static int32_t toTitle(
153             const char *locale, uint32_t options, BreakIterator *iter,
154             const char16_t *src, int32_t srcLength,
155             char16_t *dest, int32_t destCapacity, Edits *edits,
156             UErrorCode &errorCode);
157 
158 #endif  // UCONFIG_NO_BREAK_ITERATION
159 
160     /**
161      * Case-folds a UTF-16 string and optionally records edits.
162      *
163      * Case folding is locale-independent and not context-sensitive,
164      * but there is an option for whether to include or exclude mappings for dotted I
165      * and dotless i that are marked with 'T' in CaseFolding.txt.
166      *
167      * The result may be longer or shorter than the original.
168      * The source string and the destination buffer must not overlap.
169      *
170      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
171      *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
172      * @param src       The original string.
173      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
174      * @param dest      A buffer for the result string. The result will be NUL-terminated if
175      *                  the buffer is large enough.
176      *                  The contents is undefined in case of failure.
177      * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
178      *                  dest may be NULL and the function will only return the length of the result
179      *                  without writing any of the result string.
180      * @param edits     Records edits for index mapping, working with styled text,
181      *                  and getting only changes (if any).
182      *                  The Edits contents is undefined if any error occurs.
183      *                  This function calls edits->reset() first unless
184      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
185      * @param errorCode Reference to an in/out error code value
186      *                  which must not indicate a failure before the function call.
187      * @return The length of the result string, if successful.
188      *         When the result would be longer than destCapacity,
189      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
190      *
191      * @see u_strFoldCase
192      * @stable ICU 59
193      */
194     static int32_t fold(
195             uint32_t options,
196             const char16_t *src, int32_t srcLength,
197             char16_t *dest, int32_t destCapacity, Edits *edits,
198             UErrorCode &errorCode);
199 
200     /**
201      * Lowercases a UTF-8 string and optionally records edits.
202      * Casing is locale-dependent and context-sensitive.
203      * The result may be longer or shorter than the original.
204      *
205      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
206      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
207      * @param src       The original string.
208      * @param sink      A ByteSink to which the result string is written.
209      *                  sink.Flush() is called at the end.
210      * @param edits     Records edits for index mapping, working with styled text,
211      *                  and getting only changes (if any).
212      *                  The Edits contents is undefined if any error occurs.
213      *                  This function calls edits->reset() first unless
214      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
215      * @param errorCode Reference to an in/out error code value
216      *                  which must not indicate a failure before the function call.
217      *
218      * @see ucasemap_utf8ToLower
219      * @stable ICU 60
220      */
221     static void utf8ToLower(
222             const char *locale, uint32_t options,
223             StringPiece src, ByteSink &sink, Edits *edits,
224             UErrorCode &errorCode);
225 
226     /**
227      * Uppercases a UTF-8 string and optionally records edits.
228      * Casing is locale-dependent and context-sensitive.
229      * The result may be longer or shorter than the original.
230      *
231      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
232      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
233      * @param src       The original string.
234      * @param sink      A ByteSink to which the result string is written.
235      *                  sink.Flush() is called at the end.
236      * @param edits     Records edits for index mapping, working with styled text,
237      *                  and getting only changes (if any).
238      *                  The Edits contents is undefined if any error occurs.
239      *                  This function calls edits->reset() first unless
240      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
241      * @param errorCode Reference to an in/out error code value
242      *                  which must not indicate a failure before the function call.
243      *
244      * @see ucasemap_utf8ToUpper
245      * @stable ICU 60
246      */
247     static void utf8ToUpper(
248             const char *locale, uint32_t options,
249             StringPiece src, ByteSink &sink, Edits *edits,
250             UErrorCode &errorCode);
251 
252 #if !UCONFIG_NO_BREAK_ITERATION
253 
254     /**
255      * Titlecases a UTF-8 string and optionally records edits.
256      * Casing is locale-dependent and context-sensitive.
257      * The result may be longer or shorter than the original.
258      *
259      * Titlecasing uses a break iterator to find the first characters of words
260      * that are to be titlecased. It titlecases those characters and lowercases
261      * all others. (This can be modified with options bits.)
262      *
263      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
264      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
265      *                  U_TITLECASE_NO_LOWERCASE,
266      *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
267      *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
268      * @param iter      A break iterator to find the first characters of words that are to be titlecased.
269      *                  It is set to the source string (setUText())
270      *                  and used one or more times for iteration (first() and next()).
271      *                  If NULL, then a word break iterator for the locale is used
272      *                  (or something equivalent).
273      * @param src       The original string.
274      * @param sink      A ByteSink to which the result string is written.
275      *                  sink.Flush() is called at the end.
276      * @param edits     Records edits for index mapping, working with styled text,
277      *                  and getting only changes (if any).
278      *                  The Edits contents is undefined if any error occurs.
279      *                  This function calls edits->reset() first unless
280      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
281      * @param errorCode Reference to an in/out error code value
282      *                  which must not indicate a failure before the function call.
283      *
284      * @see ucasemap_utf8ToTitle
285      * @stable ICU 60
286      */
287     static void utf8ToTitle(
288             const char *locale, uint32_t options, BreakIterator *iter,
289             StringPiece src, ByteSink &sink, Edits *edits,
290             UErrorCode &errorCode);
291 
292 #endif  // UCONFIG_NO_BREAK_ITERATION
293 
294     /**
295      * Case-folds a UTF-8 string and optionally records edits.
296      *
297      * Case folding is locale-independent and not context-sensitive,
298      * but there is an option for whether to include or exclude mappings for dotted I
299      * and dotless i that are marked with 'T' in CaseFolding.txt.
300      *
301      * The result may be longer or shorter than the original.
302      *
303      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
304      * @param src       The original string.
305      * @param sink      A ByteSink to which the result string is written.
306      *                  sink.Flush() is called at the end.
307      * @param edits     Records edits for index mapping, working with styled text,
308      *                  and getting only changes (if any).
309      *                  The Edits contents is undefined if any error occurs.
310      *                  This function calls edits->reset() first unless
311      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
312      * @param errorCode Reference to an in/out error code value
313      *                  which must not indicate a failure before the function call.
314      *
315      * @see ucasemap_utf8FoldCase
316      * @stable ICU 60
317      */
318     static void utf8Fold(
319             uint32_t options,
320             StringPiece src, ByteSink &sink, Edits *edits,
321             UErrorCode &errorCode);
322 
323     /**
324      * Lowercases a UTF-8 string and optionally records edits.
325      * Casing is locale-dependent and context-sensitive.
326      * The result may be longer or shorter than the original.
327      * The source string and the destination buffer must not overlap.
328      *
329      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
330      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
331      * @param src       The original string.
332      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
333      * @param dest      A buffer for the result string. The result will be NUL-terminated if
334      *                  the buffer is large enough.
335      *                  The contents is undefined in case of failure.
336      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
337      *                  dest may be NULL and the function will only return the length of the result
338      *                  without writing any of the result string.
339      * @param edits     Records edits for index mapping, working with styled text,
340      *                  and getting only changes (if any).
341      *                  The Edits contents is undefined if any error occurs.
342      *                  This function calls edits->reset() first unless
343      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
344      * @param errorCode Reference to an in/out error code value
345      *                  which must not indicate a failure before the function call.
346      * @return The length of the result string, if successful.
347      *         When the result would be longer than destCapacity,
348      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
349      *
350      * @see ucasemap_utf8ToLower
351      * @stable ICU 59
352      */
353     static int32_t utf8ToLower(
354             const char *locale, uint32_t options,
355             const char *src, int32_t srcLength,
356             char *dest, int32_t destCapacity, Edits *edits,
357             UErrorCode &errorCode);
358 
359     /**
360      * Uppercases a UTF-8 string and optionally records edits.
361      * Casing is locale-dependent and context-sensitive.
362      * The result may be longer or shorter than the original.
363      * The source string and the destination buffer must not overlap.
364      *
365      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
366      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
367      * @param src       The original string.
368      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
369      * @param dest      A buffer for the result string. The result will be NUL-terminated if
370      *                  the buffer is large enough.
371      *                  The contents is undefined in case of failure.
372      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
373      *                  dest may be NULL and the function will only return the length of the result
374      *                  without writing any of the result string.
375      * @param edits     Records edits for index mapping, working with styled text,
376      *                  and getting only changes (if any).
377      *                  The Edits contents is undefined if any error occurs.
378      *                  This function calls edits->reset() first unless
379      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
380      * @param errorCode Reference to an in/out error code value
381      *                  which must not indicate a failure before the function call.
382      * @return The length of the result string, if successful.
383      *         When the result would be longer than destCapacity,
384      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
385      *
386      * @see ucasemap_utf8ToUpper
387      * @stable ICU 59
388      */
389     static int32_t utf8ToUpper(
390             const char *locale, uint32_t options,
391             const char *src, int32_t srcLength,
392             char *dest, int32_t destCapacity, Edits *edits,
393             UErrorCode &errorCode);
394 
395 #if !UCONFIG_NO_BREAK_ITERATION
396 
397     /**
398      * Titlecases a UTF-8 string and optionally records edits.
399      * Casing is locale-dependent and context-sensitive.
400      * The result may be longer or shorter than the original.
401      * The source string and the destination buffer must not overlap.
402      *
403      * Titlecasing uses a break iterator to find the first characters of words
404      * that are to be titlecased. It titlecases those characters and lowercases
405      * all others. (This can be modified with options bits.)
406      *
407      * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
408      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
409      *                  U_TITLECASE_NO_LOWERCASE,
410      *                  U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
411      *                  U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
412      * @param iter      A break iterator to find the first characters of words that are to be titlecased.
413      *                  It is set to the source string (setUText())
414      *                  and used one or more times for iteration (first() and next()).
415      *                  If NULL, then a word break iterator for the locale is used
416      *                  (or something equivalent).
417      * @param src       The original string.
418      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
419      * @param dest      A buffer for the result string. The result will be NUL-terminated if
420      *                  the buffer is large enough.
421      *                  The contents is undefined in case of failure.
422      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
423      *                  dest may be NULL and the function will only return the length of the result
424      *                  without writing any of the result string.
425      * @param edits     Records edits for index mapping, working with styled text,
426      *                  and getting only changes (if any).
427      *                  The Edits contents is undefined if any error occurs.
428      *                  This function calls edits->reset() first unless
429      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
430      * @param errorCode Reference to an in/out error code value
431      *                  which must not indicate a failure before the function call.
432      * @return The length of the result string, if successful.
433      *         When the result would be longer than destCapacity,
434      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
435      *
436      * @see ucasemap_utf8ToTitle
437      * @stable ICU 59
438      */
439     static int32_t utf8ToTitle(
440             const char *locale, uint32_t options, BreakIterator *iter,
441             const char *src, int32_t srcLength,
442             char *dest, int32_t destCapacity, Edits *edits,
443             UErrorCode &errorCode);
444 
445 #endif  // UCONFIG_NO_BREAK_ITERATION
446 
447     /**
448      * Case-folds a UTF-8 string and optionally records edits.
449      *
450      * Case folding is locale-independent and not context-sensitive,
451      * but there is an option for whether to include or exclude mappings for dotted I
452      * and dotless i that are marked with 'T' in CaseFolding.txt.
453      *
454      * The result may be longer or shorter than the original.
455      * The source string and the destination buffer must not overlap.
456      *
457      * @param options   Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
458      *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
459      * @param src       The original string.
460      * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
461      * @param dest      A buffer for the result string. The result will be NUL-terminated if
462      *                  the buffer is large enough.
463      *                  The contents is undefined in case of failure.
464      * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
465      *                  dest may be NULL and the function will only return the length of the result
466      *                  without writing any of the result string.
467      * @param edits     Records edits for index mapping, working with styled text,
468      *                  and getting only changes (if any).
469      *                  The Edits contents is undefined if any error occurs.
470      *                  This function calls edits->reset() first unless
471      *                  options includes U_EDITS_NO_RESET. edits can be NULL.
472      * @param errorCode Reference to an in/out error code value
473      *                  which must not indicate a failure before the function call.
474      * @return The length of the result string, if successful.
475      *         When the result would be longer than destCapacity,
476      *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
477      *
478      * @see ucasemap_utf8FoldCase
479      * @stable ICU 59
480      */
481     static int32_t utf8Fold(
482             uint32_t options,
483             const char *src, int32_t srcLength,
484             char *dest, int32_t destCapacity, Edits *edits,
485             UErrorCode &errorCode);
486 
487 private:
488     CaseMap() = delete;
489     CaseMap(const CaseMap &other) = delete;
490     CaseMap &operator=(const CaseMap &other) = delete;
491 };
492 
493 U_NAMESPACE_END
494 
495 #endif /* U_SHOW_CPLUSPLUS_API */
496 
497 #endif  // __CASEMAP_H__
498