1 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */
2
3 /*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <machine/endian.h>
41 #include <sys/queue.h>
42
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_hash.h"
49 #include "citrus_iconv.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56
57 /* ---------------------------------------------------------------------- */
58
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63 /* ---------------------------------------------------------------------- */
64
65 int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops * ops,size_t lenops,u_int32_t expected_version)66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 u_int32_t expected_version)
68 {
69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 return (EINVAL);
71
72 memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 sizeof(_citrus_iconv_std_iconv_ops));
74
75 return (0);
76 }
77
78 /* ---------------------------------------------------------------------- */
79
80 /*
81 * convenience routines for stdenc.
82 */
83 static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding * se)84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 if (se->se_ps)
87 memcpy(se->se_pssaved, se->se_ps,
88 _stdenc_get_state_size(se->se_handle));
89 }
90
91 static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding * se)92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 if (se->se_ps)
95 memcpy(se->se_ps, se->se_pssaved,
96 _stdenc_get_state_size(se->se_handle));
97 }
98
99 static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding * se)100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 if (se->se_ps)
103 _stdenc_init_state(se->se_handle, se->se_ps);
104 }
105
106 static __inline int
mbtocsx(struct _citrus_iconv_std_encoding * se,_csid_t * csid,_index_t * idx,const char ** s,size_t n,size_t * nresult)107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 _csid_t *csid, _index_t *idx, const char **s, size_t n,
109 size_t *nresult)
110 {
111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 nresult);
113 }
114
115 static __inline int
cstombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_csid_t csid,_index_t idx,size_t * nresult)116 cstombx(struct _citrus_iconv_std_encoding *se,
117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 nresult);
121 }
122
123 static __inline int
wctombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_wc_t wc,size_t * nresult)124 wctombx(struct _citrus_iconv_std_encoding *se,
125 char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129
130 static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,size_t * nresult)131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
132 char *s, size_t n, size_t *nresult)
133 {
134 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
135 }
136
137 static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding * se,int * rstate)138 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
139 {
140 int ret;
141 struct _stdenc_state_desc ssd;
142
143 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
144 _STDENC_SDID_GENERIC, &ssd);
145 if (!ret)
146 *rstate = ssd.u.generic.state;
147
148 return ret;
149 }
150
151 /*
152 * init encoding context
153 */
154 static int
init_encoding(struct _citrus_iconv_std_encoding * se,struct _stdenc * cs,void * ps1,void * ps2)155 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
156 void *ps1, void *ps2)
157 {
158 int ret = -1;
159
160 se->se_handle = cs;
161 se->se_ps = ps1;
162 se->se_pssaved = ps2;
163
164 if (se->se_ps)
165 ret = _stdenc_init_state(cs, se->se_ps);
166 if (!ret && se->se_pssaved)
167 ret = _stdenc_init_state(cs, se->se_pssaved);
168
169 return ret;
170 }
171
172 static int
open_csmapper(struct _csmapper ** rcm,const char * src,const char * dst,unsigned long * rnorm)173 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
174 unsigned long *rnorm)
175 {
176 int ret;
177 struct _csmapper *cm;
178
179 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
180 if (ret)
181 return ret;
182 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
183 _csmapper_get_state_size(cm) != 0) {
184 _csmapper_close(cm);
185 return EINVAL;
186 }
187
188 *rcm = cm;
189
190 return 0;
191 }
192
193 static void
close_dsts(struct _citrus_iconv_std_dst_list * dl)194 close_dsts(struct _citrus_iconv_std_dst_list *dl)
195 {
196 struct _citrus_iconv_std_dst *sd;
197
198 while ((sd=TAILQ_FIRST(dl)) != NULL) {
199 TAILQ_REMOVE(dl, sd, sd_entry);
200 _csmapper_close(sd->sd_mapper);
201 free(sd);
202 }
203 }
204
205 static int
open_dsts(struct _citrus_iconv_std_dst_list * dl,const struct _esdb_charset * ec,const struct _esdb * dbdst)206 open_dsts(struct _citrus_iconv_std_dst_list *dl,
207 const struct _esdb_charset *ec, const struct _esdb *dbdst)
208 {
209 int i, ret;
210 struct _citrus_iconv_std_dst *sd, *sdtmp;
211 unsigned long norm;
212
213 sd = malloc(sizeof(*sd));
214 if (sd == NULL)
215 return errno;
216
217 for (i=0; i<dbdst->db_num_charsets; i++) {
218 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
219 dbdst->db_charsets[i].ec_csname, &norm);
220 if (ret == 0) {
221 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
222 sd->sd_norm = norm;
223 /* insert this mapper by sorted order. */
224 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
225 if (sdtmp->sd_norm > norm) {
226 TAILQ_INSERT_BEFORE(sdtmp, sd,
227 sd_entry);
228 sd = NULL;
229 break;
230 }
231 }
232 if (sd)
233 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
234 sd = malloc(sizeof(*sd));
235 if (sd == NULL) {
236 ret = errno;
237 close_dsts(dl);
238 return ret;
239 }
240 } else if (ret != ENOENT) {
241 close_dsts(dl);
242 free(sd);
243 return ret;
244 }
245 }
246 free(sd);
247 return 0;
248 }
249
250 static void
close_srcs(struct _citrus_iconv_std_src_list * sl)251 close_srcs(struct _citrus_iconv_std_src_list *sl)
252 {
253 struct _citrus_iconv_std_src *ss;
254
255 while ((ss=TAILQ_FIRST(sl)) != NULL) {
256 TAILQ_REMOVE(sl, ss, ss_entry);
257 close_dsts(&ss->ss_dsts);
258 free(ss);
259 }
260 }
261
262 static int
open_srcs(struct _citrus_iconv_std_src_list * sl,const struct _esdb * dbsrc,const struct _esdb * dbdst)263 open_srcs(struct _citrus_iconv_std_src_list *sl,
264 const struct _esdb *dbsrc, const struct _esdb *dbdst)
265 {
266 int i, ret, count = 0;
267 struct _citrus_iconv_std_src *ss;
268
269 ss = malloc(sizeof(*ss));
270 if (ss == NULL)
271 return errno;
272
273 TAILQ_INIT(&ss->ss_dsts);
274
275 for (i=0; i<dbsrc->db_num_charsets; i++) {
276 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
277 if (ret)
278 goto err;
279 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
280 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
281 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
282 ss = malloc(sizeof(*ss));
283 if (ss == NULL) {
284 ret = errno;
285 goto err;
286 }
287 count++;
288 TAILQ_INIT(&ss->ss_dsts);
289 }
290 }
291 free(ss);
292
293 return count ? 0 : ENOENT;
294
295 err:
296 free(ss);
297 close_srcs(sl);
298 return ret;
299 }
300
301 /* do convert a character */
302 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
303 static int
304 /*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared * is,struct _citrus_iconv_std_context * sc,_csid_t * csid,_index_t * idx)305 do_conv(const struct _citrus_iconv_std_shared *is,
306 struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
307 {
308 _index_t tmpidx;
309 int ret;
310 struct _citrus_iconv_std_src *ss;
311 struct _citrus_iconv_std_dst *sd;
312
313 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
314 if (ss->ss_csid == *csid) {
315 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
316 ret = _csmapper_convert(sd->sd_mapper,
317 &tmpidx, *idx, NULL);
318 switch (ret) {
319 case _MAPPER_CONVERT_SUCCESS:
320 *csid = sd->sd_csid;
321 *idx = tmpidx;
322 return 0;
323 case _MAPPER_CONVERT_NONIDENTICAL:
324 break;
325 case _MAPPER_CONVERT_SRC_MORE:
326 /*FALLTHROUGH*/
327 case _MAPPER_CONVERT_DST_MORE:
328 /*FALLTHROUGH*/
329 case _MAPPER_CONVERT_FATAL:
330 return EINVAL;
331 case _MAPPER_CONVERT_ILSEQ:
332 return EILSEQ;
333 }
334 }
335 break;
336 }
337 }
338
339 return E_NO_CORRESPONDING_CHAR;
340 }
341 /* ---------------------------------------------------------------------- */
342
343 static int
344 /*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared * ci,const char * __restrict curdir,const char * __restrict src,const char * __restrict dst,const void * __restrict var,size_t lenvar)345 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
346 const char * __restrict curdir,
347 const char * __restrict src,
348 const char * __restrict dst,
349 const void * __restrict var, size_t lenvar)
350 {
351 int ret;
352 struct _citrus_iconv_std_shared *is;
353 struct _citrus_esdb esdbsrc, esdbdst;
354
355 is = malloc(sizeof(*is));
356 if (is==NULL) {
357 ret = errno;
358 goto err0;
359 }
360 ret = _citrus_esdb_open(&esdbsrc, src);
361 if (ret)
362 goto err1;
363 ret = _citrus_esdb_open(&esdbdst, dst);
364 if (ret)
365 goto err2;
366 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
367 esdbsrc.db_variable, esdbsrc.db_len_variable);
368 if (ret)
369 goto err3;
370 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
371 esdbdst.db_variable, esdbdst.db_len_variable);
372 if (ret)
373 goto err4;
374 is->is_use_invalid = esdbdst.db_use_invalid;
375 is->is_invalid = esdbdst.db_invalid;
376
377 TAILQ_INIT(&is->is_srcs);
378 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
379 if (ret)
380 goto err5;
381
382 _esdb_close(&esdbsrc);
383 _esdb_close(&esdbdst);
384 ci->ci_closure = is;
385
386 return 0;
387
388 err5:
389 _stdenc_close(is->is_dst_encoding);
390 err4:
391 _stdenc_close(is->is_src_encoding);
392 err3:
393 _esdb_close(&esdbdst);
394 err2:
395 _esdb_close(&esdbsrc);
396 err1:
397 free(is);
398 err0:
399 return ret;
400 }
401
402 static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared * ci)403 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
404 {
405 struct _citrus_iconv_std_shared *is = ci->ci_closure;
406
407 if (is == NULL)
408 return;
409
410 _stdenc_close(is->is_src_encoding);
411 _stdenc_close(is->is_dst_encoding);
412 close_srcs(&is->is_srcs);
413 free(is);
414 }
415
416 static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv * cv)417 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
418 {
419 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
420 struct _citrus_iconv_std_context *sc;
421 size_t szpssrc, szpsdst, sz;
422 char *ptr;
423
424 szpssrc = _stdenc_get_state_size(is->is_src_encoding);
425 szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
426
427 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
428 sc = malloc(sz);
429 if (sc == NULL)
430 return errno;
431
432 ptr = (char *)&sc[1];
433 if (szpssrc)
434 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
435 ptr, ptr+szpssrc);
436 else
437 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438 NULL, NULL);
439 ptr += szpssrc*2;
440 if (szpsdst)
441 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
442 ptr, ptr+szpsdst);
443 else
444 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445 NULL, NULL);
446
447 cv->cv_closure = (void *)sc;
448
449 return 0;
450 }
451
452 static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv * cv)453 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
454 {
455 free(cv->cv_closure);
456 }
457
458 static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,const char * __restrict * __restrict in,size_t * __restrict inbytes,char * __restrict * __restrict out,size_t * __restrict outbytes,u_int32_t flags,size_t * __restrict invalids)459 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
460 const char * __restrict * __restrict in,
461 size_t * __restrict inbytes,
462 char * __restrict * __restrict out,
463 size_t * __restrict outbytes, u_int32_t flags,
464 size_t * __restrict invalids)
465 {
466 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
467 struct _citrus_iconv_std_context *sc = cv->cv_closure;
468 _index_t idx;
469 _csid_t csid;
470 int ret, state;
471 size_t szrin, szrout;
472 size_t inval;
473 const char *tmpin;
474
475 inval = 0;
476 if (in==NULL || *in==NULL) {
477 /* special cases */
478 if (out!=NULL && *out!=NULL) {
479 /* init output state and store the shift sequence */
480 save_encoding_state(&sc->sc_src_encoding);
481 save_encoding_state(&sc->sc_dst_encoding);
482 szrout = 0;
483
484 ret = put_state_resetx(&sc->sc_dst_encoding,
485 *out, *outbytes,
486 &szrout);
487 if (ret)
488 goto err;
489
490 if (szrout == (size_t)-2) {
491 /* too small to store the character */
492 ret = EINVAL;
493 goto err;
494 }
495 *out += szrout;
496 *outbytes -= szrout;
497 } else
498 /* otherwise, discard the shift sequence */
499 init_encoding_state(&sc->sc_dst_encoding);
500 init_encoding_state(&sc->sc_src_encoding);
501 *invalids = 0;
502 return 0;
503 }
504
505 /* normal case */
506 for (;;) {
507 if (*inbytes==0) {
508 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509 if (state == _STDENC_SDGEN_INITIAL ||
510 state == _STDENC_SDGEN_STABLE)
511 break;
512 }
513
514 /* save the encoding states for the error recovery */
515 save_encoding_state(&sc->sc_src_encoding);
516 save_encoding_state(&sc->sc_dst_encoding);
517
518 /* mb -> csid/index */
519 tmpin = *in;
520 szrin = szrout = 0;
521 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
522 &tmpin, *inbytes, &szrin);
523 if (ret)
524 goto err;
525
526 if (szrin == (size_t)-2) {
527 /* incompleted character */
528 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529 if (ret) {
530 ret = EINVAL;
531 goto err;
532 }
533 switch (state) {
534 case _STDENC_SDGEN_INITIAL:
535 case _STDENC_SDGEN_STABLE:
536 /* fetch shift sequences only. */
537 goto next;
538 }
539 ret = EINVAL;
540 goto err;
541 }
542 /* convert the character */
543 ret = do_conv(is, sc, &csid, &idx);
544 if (ret) {
545 if (ret == E_NO_CORRESPONDING_CHAR) {
546 inval++;
547 szrout = 0;
548 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
549 is->is_use_invalid) {
550 ret = wctombx(&sc->sc_dst_encoding,
551 *out, *outbytes,
552 is->is_invalid,
553 &szrout);
554 if (ret)
555 goto err;
556 }
557 goto next;
558 } else {
559 goto err;
560 }
561 }
562 /* csid/index -> mb */
563 ret = cstombx(&sc->sc_dst_encoding,
564 *out, *outbytes, csid, idx, &szrout);
565 if (ret)
566 goto err;
567 next:
568 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
569 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
570 *in = tmpin;
571 *outbytes -= szrout;
572 *out += szrout;
573 }
574 *invalids = inval;
575
576 return 0;
577
578 err:
579 restore_encoding_state(&sc->sc_src_encoding);
580 restore_encoding_state(&sc->sc_dst_encoding);
581 err_norestore:
582 *invalids = inval;
583
584 return ret;
585 }
586