1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2008-2011, International Business Machines
7 *   Corporation, Google and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 */
11 /*
12  * Author : eldawy@google.com (Mohamed Eldawy)
13  * ucnvsel.h
14  *
15  * Purpose: To generate a list of encodings capable of handling
16  * a given Unicode text
17  *
18  * Started 09-April-2008
19  */
20 
21 #ifndef __ICU_UCNV_SEL_H__
22 #define __ICU_UCNV_SEL_H__
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_CONVERSION
27 
28 #include "unicode/uset.h"
29 #include "unicode/utf16.h"
30 #include "unicode/uenum.h"
31 #include "unicode/ucnv.h"
32 
33 #if U_SHOW_CPLUSPLUS_API
34 #include "unicode/localpointer.h"
35 #endif   // U_SHOW_CPLUSPLUS_API
36 
37 /**
38  * \file
39  *
40  * A converter selector is built with a set of encoding/charset names
41  * and given an input string returns the set of names of the
42  * corresponding converters which can convert the string.
43  *
44  * A converter selector can be serialized into a buffer and reopened
45  * from the serialized form.
46  */
47 
48 struct UConverterSelector;
49 /**
50  * @{
51  * Typedef for selector data structure.
52  */
53 typedef struct UConverterSelector UConverterSelector;
54 /** @} */
55 
56 /**
57  * Open a selector.
58  * If converterListSize is 0, build for all available converters.
59  * If excludedCodePoints is NULL, don't exclude any code points.
60  *
61  * @param converterList a pointer to encoding names needed to be involved.
62  *                      Can be NULL if converterListSize==0.
63  *                      The list and the names will be cloned, and the caller
64  *                      retains ownership of the original.
65  * @param converterListSize number of encodings in above list.
66  *                          If 0, builds a selector for all available converters.
67  * @param excludedCodePoints a set of code points to be excluded from consideration.
68  *                           That is, excluded code points in a string do not change
69  *                           the selection result. (They might be handled by a callback.)
70  *                           Use NULL to exclude nothing.
71  * @param whichSet what converter set to use? Use this to determine whether
72  *                 to consider only roundtrip mappings or also fallbacks.
73  * @param status an in/out ICU UErrorCode
74  * @return the new selector
75  *
76  * @stable ICU 4.2
77  */
78 U_CAPI UConverterSelector* U_EXPORT2
79 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
80              const USet* excludedCodePoints,
81              const UConverterUnicodeSet whichSet, UErrorCode* status);
82 
83 /**
84  * Closes a selector.
85  * If any Enumerations were returned by ucnv_select*, they become invalid.
86  * They can be closed before or after calling ucnv_closeSelector,
87  * but should never be used after the selector is closed.
88  *
89  * @see ucnv_selectForString
90  * @see ucnv_selectForUTF8
91  *
92  * @param sel selector to close
93  *
94  * @stable ICU 4.2
95  */
96 U_CAPI void U_EXPORT2
97 ucnvsel_close(UConverterSelector *sel);
98 
99 #if U_SHOW_CPLUSPLUS_API
100 
101 U_NAMESPACE_BEGIN
102 
103 /**
104  * \class LocalUConverterSelectorPointer
105  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
106  * For most methods see the LocalPointerBase base class.
107  *
108  * @see LocalPointerBase
109  * @see LocalPointer
110  * @stable ICU 4.4
111  */
112 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
113 
114 U_NAMESPACE_END
115 
116 #endif
117 
118 /**
119  * Open a selector from its serialized form.
120  * The buffer must remain valid and unchanged for the lifetime of the selector.
121  * This is much faster than creating a selector from scratch.
122  * Using a serialized form from a different machine (endianness/charset) is supported.
123  *
124  * @param buffer pointer to the serialized form of a converter selector;
125  *               must be 32-bit-aligned
126  * @param length the capacity of this buffer (can be equal to or larger than
127  *               the actual data length)
128  * @param status an in/out ICU UErrorCode
129  * @return the new selector
130  *
131  * @stable ICU 4.2
132  */
133 U_CAPI UConverterSelector* U_EXPORT2
134 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
135 
136 /**
137  * Serialize a selector into a linear buffer.
138  * The serialized form is portable to different machines.
139  *
140  * @param sel selector to consider
141  * @param buffer pointer to 32-bit-aligned memory to be filled with the
142  *               serialized form of this converter selector
143  * @param bufferCapacity the capacity of this buffer
144  * @param status an in/out ICU UErrorCode
145  * @return the required buffer capacity to hold serialize data (even if the call fails
146  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
147  *
148  * @stable ICU 4.2
149  */
150 U_CAPI int32_t U_EXPORT2
151 ucnvsel_serialize(const UConverterSelector* sel,
152                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
153 
154 /**
155  * Select converters that can map all characters in a UTF-16 string,
156  * ignoring the excluded code points.
157  *
158  * @param sel a selector
159  * @param s UTF-16 string
160  * @param length length of the string, or -1 if NUL-terminated
161  * @param status an in/out ICU UErrorCode
162  * @return an enumeration containing encoding names.
163  *         The returned encoding names and their order will be the same as
164  *         supplied when building the selector.
165  *
166  * @stable ICU 4.2
167  */
168 U_CAPI UEnumeration * U_EXPORT2
169 ucnvsel_selectForString(const UConverterSelector* sel,
170                         const UChar *s, int32_t length, UErrorCode *status);
171 
172 /**
173  * Select converters that can map all characters in a UTF-8 string,
174  * ignoring the excluded code points.
175  *
176  * @param sel a selector
177  * @param s UTF-8 string
178  * @param length length of the string, or -1 if NUL-terminated
179  * @param status an in/out ICU UErrorCode
180  * @return an enumeration containing encoding names.
181  *         The returned encoding names and their order will be the same as
182  *         supplied when building the selector.
183  *
184  * @stable ICU 4.2
185  */
186 U_CAPI UEnumeration * U_EXPORT2
187 ucnvsel_selectForUTF8(const UConverterSelector* sel,
188                       const char *s, int32_t length, UErrorCode *status);
189 
190 #endif  /* !UCONFIG_NO_CONVERSION */
191 
192 #endif  /* __ICU_UCNV_SEL_H__ */
193