1 /*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
2 
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <machine/endian.h>
41 #include <sys/queue.h>
42 
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_hash.h"
49 #include "citrus_iconv.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56 
57 /* ---------------------------------------------------------------------- */
58 
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61 
62 
63 /* ---------------------------------------------------------------------- */
64 
65 int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops * ops,size_t lenops,u_int32_t expected_version)66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 			       u_int32_t expected_version)
68 {
69 	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 		return (EINVAL);
71 
72 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 	       sizeof(_citrus_iconv_std_iconv_ops));
74 
75 	return (0);
76 }
77 
78 /* ---------------------------------------------------------------------- */
79 
80 /*
81  * convenience routines for stdenc.
82  */
83 static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding * se)84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 	if (se->se_ps)
87 		memcpy(se->se_pssaved, se->se_ps,
88 		       _stdenc_get_state_size(se->se_handle));
89 }
90 
91 static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding * se)92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 	if (se->se_ps)
95 		memcpy(se->se_ps, se->se_pssaved,
96 		       _stdenc_get_state_size(se->se_handle));
97 }
98 
99 static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding * se)100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 	if (se->se_ps)
103 		_stdenc_init_state(se->se_handle, se->se_ps);
104 }
105 
106 static __inline int
mbtocsx(struct _citrus_iconv_std_encoding * se,_csid_t * csid,_index_t * idx,const char ** s,size_t n,size_t * nresult)107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 	_csid_t *csid, _index_t *idx, const char **s, size_t n,
109 	size_t *nresult)
110 {
111 	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 			      nresult);
113 }
114 
115 static __inline int
cstombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_csid_t csid,_index_t idx,size_t * nresult)116 cstombx(struct _citrus_iconv_std_encoding *se,
117 	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 			      nresult);
121 }
122 
123 static __inline int
wctombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_wc_t wc,size_t * nresult)124 wctombx(struct _citrus_iconv_std_encoding *se,
125 	char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129 
130 static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,size_t * nresult)131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
132 		 char *s, size_t n, size_t *nresult)
133 {
134 	return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
135 }
136 
137 static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding * se,int * rstate)138 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
139 {
140 	int ret;
141 	struct _stdenc_state_desc ssd;
142 
143 	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
144 				     _STDENC_SDID_GENERIC, &ssd);
145 	if (!ret)
146 		*rstate = ssd.u.generic.state;
147 
148 	return ret;
149 }
150 
151 /*
152  * init encoding context
153  */
154 static int
init_encoding(struct _citrus_iconv_std_encoding * se,struct _stdenc * cs,void * ps1,void * ps2)155 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
156 	      void *ps1, void *ps2)
157 {
158 	int ret = -1;
159 
160 	se->se_handle = cs;
161 	se->se_ps = ps1;
162 	se->se_pssaved = ps2;
163 
164 	if (se->se_ps)
165 		ret = _stdenc_init_state(cs, se->se_ps);
166 	if (!ret && se->se_pssaved)
167 		ret = _stdenc_init_state(cs, se->se_pssaved);
168 
169 	return ret;
170 }
171 
172 static int
open_csmapper(struct _csmapper ** rcm,const char * src,const char * dst,unsigned long * rnorm)173 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
174 	      unsigned long *rnorm)
175 {
176 	int ret;
177 	struct _csmapper *cm;
178 
179 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
180 	if (ret)
181 		return ret;
182 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
183 	    _csmapper_get_state_size(cm) != 0) {
184 		_csmapper_close(cm);
185 		return EINVAL;
186 	}
187 
188 	*rcm = cm;
189 
190 	return 0;
191 }
192 
193 static void
close_dsts(struct _citrus_iconv_std_dst_list * dl)194 close_dsts(struct _citrus_iconv_std_dst_list *dl)
195 {
196 	struct _citrus_iconv_std_dst *sd;
197 
198 	while ((sd=TAILQ_FIRST(dl)) != NULL) {
199 		TAILQ_REMOVE(dl, sd, sd_entry);
200 		_csmapper_close(sd->sd_mapper);
201 		free(sd);
202 	}
203 }
204 
205 static int
open_dsts(struct _citrus_iconv_std_dst_list * dl,const struct _esdb_charset * ec,const struct _esdb * dbdst)206 open_dsts(struct _citrus_iconv_std_dst_list *dl,
207 	  const struct _esdb_charset *ec, const struct _esdb *dbdst)
208 {
209 	int i, ret;
210 	struct _citrus_iconv_std_dst *sd, *sdtmp;
211 	unsigned long norm;
212 
213 	sd = malloc(sizeof(*sd));
214 	if (sd == NULL)
215 		return errno;
216 
217 	for (i=0; i<dbdst->db_num_charsets; i++) {
218 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
219 				    dbdst->db_charsets[i].ec_csname, &norm);
220 		if (ret == 0) {
221 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
222 			sd->sd_norm = norm;
223 			/* insert this mapper by sorted order. */
224 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
225 				if (sdtmp->sd_norm > norm) {
226 					TAILQ_INSERT_BEFORE(sdtmp, sd,
227 							    sd_entry);
228 					sd = NULL;
229 					break;
230 				}
231 			}
232 			if (sd)
233 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
234 			sd = malloc(sizeof(*sd));
235 			if (sd == NULL) {
236 				ret = errno;
237 				close_dsts(dl);
238 				return ret;
239 			}
240 		} else if (ret != ENOENT) {
241 			close_dsts(dl);
242 			free(sd);
243 			return ret;
244 		}
245 	}
246 	free(sd);
247 	return 0;
248 }
249 
250 static void
close_srcs(struct _citrus_iconv_std_src_list * sl)251 close_srcs(struct _citrus_iconv_std_src_list *sl)
252 {
253 	struct _citrus_iconv_std_src *ss;
254 
255 	while ((ss=TAILQ_FIRST(sl)) != NULL) {
256 		TAILQ_REMOVE(sl, ss, ss_entry);
257 		close_dsts(&ss->ss_dsts);
258 		free(ss);
259 	}
260 }
261 
262 static int
open_srcs(struct _citrus_iconv_std_src_list * sl,const struct _esdb * dbsrc,const struct _esdb * dbdst)263 open_srcs(struct _citrus_iconv_std_src_list *sl,
264 	  const struct _esdb *dbsrc, const struct _esdb *dbdst)
265 {
266 	int i, ret, count = 0;
267 	struct _citrus_iconv_std_src *ss;
268 
269 	ss = malloc(sizeof(*ss));
270 	if (ss == NULL)
271 		return errno;
272 
273 	TAILQ_INIT(&ss->ss_dsts);
274 
275 	for (i=0; i<dbsrc->db_num_charsets; i++) {
276 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
277 		if (ret)
278 			goto err;
279 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
280 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
281 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
282 			ss = malloc(sizeof(*ss));
283 			if (ss == NULL) {
284 				ret = errno;
285 				goto err;
286 			}
287 			count++;
288 			TAILQ_INIT(&ss->ss_dsts);
289 		}
290 	}
291 	free(ss);
292 
293 	return count ? 0 : ENOENT;
294 
295 err:
296 	free(ss);
297 	close_srcs(sl);
298 	return ret;
299 }
300 
301 /* do convert a character */
302 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
303 static int
304 /*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared * is,struct _citrus_iconv_std_context * sc,_csid_t * csid,_index_t * idx)305 do_conv(const struct _citrus_iconv_std_shared *is,
306 	struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
307 {
308 	_index_t tmpidx;
309 	int ret;
310 	struct _citrus_iconv_std_src *ss;
311 	struct _citrus_iconv_std_dst *sd;
312 
313 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
314 		if (ss->ss_csid == *csid) {
315 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
316 				ret = _csmapper_convert(sd->sd_mapper,
317 							&tmpidx, *idx, NULL);
318 				switch (ret) {
319 				case _MAPPER_CONVERT_SUCCESS:
320 					*csid = sd->sd_csid;
321 					*idx = tmpidx;
322 					return 0;
323 				case _MAPPER_CONVERT_NONIDENTICAL:
324 					break;
325 				case _MAPPER_CONVERT_SRC_MORE:
326 					/*FALLTHROUGH*/
327 				case _MAPPER_CONVERT_DST_MORE:
328 					/*FALLTHROUGH*/
329 				case _MAPPER_CONVERT_FATAL:
330 					return EINVAL;
331 				case _MAPPER_CONVERT_ILSEQ:
332 					return EILSEQ;
333 				}
334 			}
335 			break;
336 		}
337 	}
338 
339 	return E_NO_CORRESPONDING_CHAR;
340 }
341 /* ---------------------------------------------------------------------- */
342 
343 static int
344 /*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared * ci,const char * __restrict curdir,const char * __restrict src,const char * __restrict dst,const void * __restrict var,size_t lenvar)345 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
346 				    const char * __restrict curdir,
347 				    const char * __restrict src,
348 				    const char * __restrict dst,
349 				    const void * __restrict var, size_t lenvar)
350 {
351 	int ret;
352 	struct _citrus_iconv_std_shared *is;
353 	struct _citrus_esdb esdbsrc, esdbdst;
354 
355 	is = malloc(sizeof(*is));
356 	if (is==NULL) {
357 		ret = errno;
358 		goto err0;
359 	}
360 	ret = _citrus_esdb_open(&esdbsrc, src);
361 	if (ret)
362 		goto err1;
363 	ret = _citrus_esdb_open(&esdbdst, dst);
364 	if (ret)
365 		goto err2;
366 	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
367 			   esdbsrc.db_variable, esdbsrc.db_len_variable);
368 	if (ret)
369 		goto err3;
370 	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
371 			   esdbdst.db_variable, esdbdst.db_len_variable);
372 	if (ret)
373 		goto err4;
374 	is->is_use_invalid = esdbdst.db_use_invalid;
375 	is->is_invalid = esdbdst.db_invalid;
376 
377 	TAILQ_INIT(&is->is_srcs);
378 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
379 	if (ret)
380 		goto err5;
381 
382 	_esdb_close(&esdbsrc);
383 	_esdb_close(&esdbdst);
384 	ci->ci_closure = is;
385 
386 	return 0;
387 
388 err5:
389 	_stdenc_close(is->is_dst_encoding);
390 err4:
391 	_stdenc_close(is->is_src_encoding);
392 err3:
393 	_esdb_close(&esdbdst);
394 err2:
395 	_esdb_close(&esdbsrc);
396 err1:
397 	free(is);
398 err0:
399 	return ret;
400 }
401 
402 static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared * ci)403 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
404 {
405 	struct _citrus_iconv_std_shared *is = ci->ci_closure;
406 
407 	if (is == NULL)
408 		return;
409 
410 	_stdenc_close(is->is_src_encoding);
411 	_stdenc_close(is->is_dst_encoding);
412 	close_srcs(&is->is_srcs);
413 	free(is);
414 }
415 
416 static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv * cv)417 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
418 {
419 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
420 	struct _citrus_iconv_std_context *sc;
421 	size_t szpssrc, szpsdst, sz;
422 	char *ptr;
423 
424 	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
425 	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
426 
427 	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
428 	sc = malloc(sz);
429 	if (sc == NULL)
430 		return errno;
431 
432 	ptr = (char *)&sc[1];
433 	if (szpssrc)
434 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
435 			      ptr, ptr+szpssrc);
436 	else
437 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438 			      NULL, NULL);
439 	ptr += szpssrc*2;
440 	if (szpsdst)
441 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
442 			      ptr, ptr+szpsdst);
443 	else
444 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445 			      NULL, NULL);
446 
447 	cv->cv_closure = (void *)sc;
448 
449 	return 0;
450 }
451 
452 static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv * cv)453 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
454 {
455 	free(cv->cv_closure);
456 }
457 
458 static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,const char * __restrict * __restrict in,size_t * __restrict inbytes,char * __restrict * __restrict out,size_t * __restrict outbytes,u_int32_t flags,size_t * __restrict invalids)459 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
460 				const char * __restrict * __restrict in,
461 				size_t * __restrict inbytes,
462 				char * __restrict * __restrict out,
463 				size_t * __restrict outbytes, u_int32_t flags,
464 				size_t * __restrict invalids)
465 {
466 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
467 	struct _citrus_iconv_std_context *sc = cv->cv_closure;
468 	_index_t idx;
469 	_csid_t csid;
470 	int ret, state;
471 	size_t szrin, szrout;
472 	size_t inval;
473 	const char *tmpin;
474 
475 	inval = 0;
476 	if (in==NULL || *in==NULL) {
477 		/* special cases */
478 		if (out!=NULL && *out!=NULL) {
479 			/* init output state and store the shift sequence */
480 			save_encoding_state(&sc->sc_src_encoding);
481 			save_encoding_state(&sc->sc_dst_encoding);
482 			szrout = 0;
483 
484 			ret = put_state_resetx(&sc->sc_dst_encoding,
485 					       *out, *outbytes,
486 					       &szrout);
487 			if (ret)
488 				goto err;
489 
490 			if (szrout == (size_t)-2) {
491 				/* too small to store the character */
492 				ret = EINVAL;
493 				goto err;
494 			}
495 			*out += szrout;
496 			*outbytes -= szrout;
497 		} else
498 			/* otherwise, discard the shift sequence */
499 			init_encoding_state(&sc->sc_dst_encoding);
500 		init_encoding_state(&sc->sc_src_encoding);
501 		*invalids = 0;
502 		return 0;
503 	}
504 
505 	/* normal case */
506 	for (;;) {
507 		if (*inbytes==0) {
508 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509 			if (state == _STDENC_SDGEN_INITIAL ||
510 			    state == _STDENC_SDGEN_STABLE)
511 				break;
512 		}
513 
514 		/* save the encoding states for the error recovery */
515 		save_encoding_state(&sc->sc_src_encoding);
516 		save_encoding_state(&sc->sc_dst_encoding);
517 
518 		/* mb -> csid/index */
519 		tmpin = *in;
520 		szrin = szrout = 0;
521 		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
522 			      &tmpin, *inbytes, &szrin);
523 		if (ret)
524 			goto err;
525 
526 		if (szrin == (size_t)-2) {
527 			/* incompleted character */
528 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529 			if (ret) {
530 				ret = EINVAL;
531 				goto err;
532 			}
533 			switch (state) {
534 			case _STDENC_SDGEN_INITIAL:
535 			case _STDENC_SDGEN_STABLE:
536 				/* fetch shift sequences only. */
537 				goto next;
538 			}
539 			ret = EINVAL;
540 			goto err;
541 		}
542 		/* convert the character */
543 		ret = do_conv(is, sc, &csid, &idx);
544 		if (ret) {
545 			if (ret == E_NO_CORRESPONDING_CHAR) {
546 				inval++;
547 				szrout = 0;
548 				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
549 				    is->is_use_invalid) {
550 					ret = wctombx(&sc->sc_dst_encoding,
551 						      *out, *outbytes,
552 						      is->is_invalid,
553 						      &szrout);
554 					if (ret)
555 						goto err;
556 				}
557 				goto next;
558 			} else {
559 				goto err;
560 			}
561 		}
562 		/* csid/index -> mb */
563 		ret = cstombx(&sc->sc_dst_encoding,
564 			      *out, *outbytes, csid, idx, &szrout);
565 		if (ret)
566 			goto err;
567 next:
568 		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
569 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
570 		*in = tmpin;
571 		*outbytes -= szrout;
572 		*out += szrout;
573 	}
574 	*invalids = inval;
575 
576 	return 0;
577 
578 err:
579 	restore_encoding_state(&sc->sc_src_encoding);
580 	restore_encoding_state(&sc->sc_dst_encoding);
581 err_norestore:
582 	*invalids = inval;
583 
584 	return ret;
585 }
586