1 /*-
2  * Copyright (c) 1999,2000
3  *	Konstantin Chuguev.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by Konstantin Chuguev
16  *	and its contributors.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *	iconv (Charset Conversion Library) v2.0
31  */
32 
33 #ifndef _ICONV_H_
34 #define _ICONV_H_
35 
36 #include <sys/types.h>	/* size_t */
37 
38 /*
39  * iconv_t:	charset conversion descriptor type
40  */
41 typedef void *iconv_t;
42 
43 __BEGIN_DECLS
44 
45 iconv_t	iconv_open(const char *, const char *);
46 size_t	iconv(iconv_t, const char **, size_t *, char **, size_t *);
47 int	iconv_close(iconv_t);
48 
49 __END_DECLS
50 
51 
52 #ifdef ICONV_INTERNAL
53 /*
54  * Internal declarations for charset conversion library and modules.
55  */
56 
57 #include <stddef.h>		/* NULL */
58 /* #include <sys/types.h>	   size_t
59  * (already included in ICONV_H public section) */
60 
61 
62 /*
63  * Unicode character data types and constants.
64  */
65 
66 #define UCS_CHAR_ZERO_WIDTH_NBSP 0xFEFF
67 #define UCS_CHAR_INVALID	 0xFFFE
68 #define UCS_CHAR_NONE		 0xFFFF
69 
70 typedef u_int16_t ucs2_t;	/* Unicode character [D5] */
71 typedef u_int32_t ucs4_t;	/* Unicode scalar character [D28] */
72 #define ucs_t     ucs4_t
73 
74 #define iconv_char32bit(ch)	((ch) & 0xFFFF0000)
75 
76 
77 /*
78  * Builtin CCS and CES data.
79  */
80 
81 typedef struct {
82 	const char *key;
83 	const void *value;
84 } iconv_builtin_table;
85 
86 extern const iconv_builtin_table iconv_builtin_ccs[];
87 extern const iconv_builtin_table iconv_builtin_ces[];
88 
89 
90 /*
91  * Table-driven coded character set (CCS) definitions
92  */
93 struct iconv_ccs;
94 
95 /* Basic CCS functions */
96 typedef ucs2_t iconv_ccs_convert_t(const void *table, ucs2_t ch);
97 typedef int iconv_ccs_close_t(struct iconv_ccs *desc);
98 
99 /* CCS structure */
100 struct iconv_ccs {
101 	const void *		from_ucs;	/* FROM table pointer */
102 	const void *		to_ucs;		/* TO table pointer */
103 	iconv_ccs_convert_t *	convert_from_ucs;	/* converter  */
104 	iconv_ccs_convert_t *	convert_to_ucs;		/*  functions */
105 	iconv_ccs_close_t *	close;		/* close function */
106 	void *			extra;		/* optional extra data */
107 	unsigned int		nbits;		/* number of bits */
108 };
109 
110 /* CCS initialisation function */
111 int iconv_ccs_init(struct iconv_ccs *ccs, const char *name);
112 
113 /* CCS conversion macros */
114 #define ICONV_CCS_CONVERT_FROM_UCS(ccs, ch) \
115         ((ccs)->convert_from_ucs((ccs)->from_ucs, (ch)))
116 #define ICONV_CCS_CONVERT_TO_UCS(ccs, ch) \
117         ((ccs)->convert_to_ucs((ccs)->to_ucs, (ch)))
118 #define ICONV_CCS_NBITS
119 
120 /*
121  * Module-driven character encoding scheme (CES) definitions
122  */
123 struct iconv_ces;
124 
125 /* Basic CES function types */
126 typedef int  iconv_ces_init_t(void **, const char *, const void *);
127 typedef	int iconv_ces_close_t(void *);
128 typedef	void iconv_ces_reset_t(void *);
129 typedef ssize_t iconv_ces_convert_from_ucs_t(
130 	struct iconv_ces *data, ucs_t in, unsigned char **outbuf,
131 	size_t *outbytesleft);
132 typedef ucs_t iconv_ces_convert_to_ucs_t(
133 	struct iconv_ces *data, const unsigned char **inbuf,
134 	size_t *inbytesleft);
135 
136 /* CES descriptor structure - CES class data */
137 struct iconv_ces_desc {
138 	iconv_ces_init_t *	init;
139 	iconv_ces_close_t *	close;	/* class-specific close function */
140 	iconv_ces_reset_t *	reset;
141 	iconv_ces_convert_from_ucs_t * convert_from_ucs;
142 	iconv_ces_convert_to_ucs_t * convert_to_ucs;
143 	const void *data;	/* optional specific CES class data */
144 };
145 
146 /* explicit CES class for table (CCS) driven charsets */
147 extern const struct iconv_ces_desc iconv_ces_table_driven;
148 
149 /* CES structure - CES instance data */
150 struct iconv_ces {
151 	const struct iconv_ces_desc *desc;  /* descriptor/class pointer */
152 	iconv_ces_close_t *close;	/* instance-specific close function */
153 	void		  *data;	/* optional extra data */
154 	void		  *handle;	/* optional handle */
155 };
156 
157 /* Basic CES functions and macros */
158 int iconv_ces_init(struct iconv_ces *ces, const char *name);
159 
160 #define ICONV_CES_CLOSE(ces)	((ces)->close((ces)))
161 #define ICONV_CES_RESET(ces)	((ces)->desc->reset((ces)->data))
162 #define ICONV_CES_CONVERT_FROM_UCS(cesd, in, outbuf, outbytes) \
163 	    ((cesd)->desc->convert_from_ucs((cesd), (in), (outbuf), (outbytes)))
164 #define ICONV_CES_CONVERT_TO_UCS(cesd, inbuf, inbytes) \
165 	    ((cesd)->desc->convert_to_ucs((cesd), (inbuf), (inbytes)))
166 
167 
168 /* Virtual CES initialisation function type */
169 typedef int iconv_ces_init_int_t(void **, const void *, size_t);
170 
171 /* CES subclass macros (for EUC and ISO-2022) */
172 #define ICONV_CES_DRIVER_DECL(name) \
173 	iconv_ces_init_int_t iconv_##name##_init; \
174 	iconv_ces_close_t iconv_##name##_close; \
175 	iconv_ces_reset_t iconv_##name##_reset; \
176 	iconv_ces_convert_from_ucs_t iconv_##name##_convert_from_ucs; \
177 	iconv_ces_convert_to_ucs_t iconv_##name##_convert_to_ucs
178 
179 /* CES functions and macros for stateless encodings */
180 iconv_ces_init_t  iconv_ces_init_null;
181 iconv_ces_close_t iconv_ces_close_null;
182 iconv_ces_reset_t iconv_ces_reset_null;
183 
184 #define ICONV_CES_STATELESS_MODULE_DECL(name) \
185 	const struct iconv_ces_desc iconv_ces_##name = { \
186 		iconv_ces_init_null, \
187 		iconv_ces_close_null, \
188 		iconv_ces_reset_null, \
189 		convert_from_ucs, \
190 		convert_to_ucs, \
191 		NULL \
192 	}
193 
194 /* CES functions and macros for stateful (integer state) encodings */
195 iconv_ces_init_t  iconv_ces_init_state;
196 iconv_ces_close_t iconv_ces_close_state;
197 iconv_ces_reset_t iconv_ces_reset_state;
198 
199 #define ICONV_CES_STATEFUL_MODULE_DECL(name) \
200 	const struct iconv_ces_desc iconv_ces_##name = { \
201 		iconv_ces_init_state, \
202 		iconv_ces_close_state, \
203 		iconv_ces_reset_state, \
204 		convert_from_ucs, \
205 		convert_to_ucs, \
206 		NULL \
207 	}
208 
209 /* CES functions and macros for other encodings */
210 #define ICONV_CES_MODULE_DECL(type, name) \
211 	static int \
212 	module_init(void **data, const char *cs_name, const void *desc_data) \
213 	{ \
214 		return iconv_##type##_init(data, desc_data, \
215 		                           sizeof(ccsattr) / \
216 		                           sizeof(iconv_ces_##type##_ccs)); \
217 	} \
218 	\
219 	const struct iconv_ces_desc iconv_ces_##name = { \
220 		module_init, \
221 		iconv_##type##_close, \
222 		iconv_##type##_reset, \
223 		iconv_##type##_convert_from_ucs, \
224 		iconv_##type##_convert_to_ucs, \
225 		&ccsattr \
226 	}
227 
228 
229 /*
230  * EUC character encoding schemes and functions
231  */
232 
233 typedef struct {
234 	const char	*name;
235 	const char	*prefix;
236 	size_t		prefixlen;
237 } iconv_ces_euc_ccs;
238 
239 ICONV_CES_DRIVER_DECL(euc);
240 #define iconv_euc_reset	iconv_ces_reset_null
241 
242 
243 /*
244  * ISO-2022 character encoding schemes and functions
245  */
246 
247 enum { ICONV_SHIFT_SI = 0, ICONV_SHIFT_SO, ICONV_SHIFT_SS2, ICONV_SHIFT_SS3 };
248 
249 typedef struct {
250 	const char *	name;
251 	const char *	designator;
252 	size_t		designatorlen;
253 	int		shift;
254 } iconv_ces_iso2022_ccs;
255 
256 ICONV_CES_DRIVER_DECL(iso2022);
257 
258 
259 /*
260  * Converter structure and functions
261  */
262 
263 typedef size_t iconv_conv_t(void *, const unsigned char **, size_t *,
264                             unsigned char **, size_t *);
265 typedef int iconv_close_t(void *);
266 
267 /* Generic converter structure */
268 typedef struct {
269 	iconv_conv_t *convert;
270 	iconv_close_t *close;
271 } iconv_converter;
272 
273 
274 /* Converter initialisers */
275 iconv_converter *iconv_unicode_conv_init(const char *to, const char *from);
276 iconv_converter *iconv_null_conv_init(const char *to, const char *from);
277 
278 
279 /*
280  * iconv auxiliary functions
281  */
282 
283 int iconv_malloc(size_t size, void **pp);
284 off_t iconv_filesize(const char *pathlist, const char *filename, char *result);
285 const void *iconv_mmap(const char *filename, size_t size);
286 int iconv_munmap(const void *addr, size_t size);
287 int iconv_get_aliases(char **name1, char **name2);
288 
289 
290 /*
291  * iconv debug functions
292  */
293 
294 #ifdef ICONV_DEBUG
295 void iconv_debug(const char *file, int line, const char *function,
296 	const char *format, ...);
297 #define idebug(format, args...) \
298 		iconv_debug(__FILE__, __LINE__, __FUNCTION__, format , ## args)
299 #include <err.h>
300 #define iconv_warnx(format, args...) \
301 		warnx(__FUNCTION__ ": " format , ## args)
302 #else
303 #define idebug(format, args...)
304 #define iconv_warnx(format, args...)
305 #endif
306 
307 #endif /* ICONV_INTERNAL */
308 
309 #endif /* _ICONV_H_ */
310