1 /* $FreeBSD: head/lib/libiconv_modules/iconv_std/citrus_iconv_std.c 281550 2015-04-15 09:09:20Z tijl $ */
2 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */
3
4 /*-
5 * Copyright (c)2003 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include <sys/endian.h>
32 #include <sys/queue.h>
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 #include "citrus_namespace.h"
43 #include "citrus_types.h"
44 #include "citrus_module.h"
45 #include "citrus_region.h"
46 #include "citrus_mmap.h"
47 #include "citrus_hash.h"
48 #include "citrus_iconv.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_mapper.h"
51 #include "citrus_csmapper.h"
52 #include "citrus_memstream.h"
53 #include "citrus_iconv_std.h"
54 #include "citrus_esdb.h"
55
56 /* ---------------------------------------------------------------------- */
57
58 _CITRUS_ICONV_DECLS(iconv_std);
59 _CITRUS_ICONV_DEF_OPS(iconv_std);
60
61
62 /* ---------------------------------------------------------------------- */
63
64 int
_citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops * ops)65 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops)
66 {
67
68 memcpy(ops, &_citrus_iconv_std_iconv_ops,
69 sizeof(_citrus_iconv_std_iconv_ops));
70
71 return (0);
72 }
73
74 /* ---------------------------------------------------------------------- */
75
76 /*
77 * convenience routines for stdenc.
78 */
79 static __inline void
save_encoding_state(struct _citrus_iconv_std_encoding * se)80 save_encoding_state(struct _citrus_iconv_std_encoding *se)
81 {
82
83 if (se->se_ps)
84 memcpy(se->se_pssaved, se->se_ps,
85 _stdenc_get_state_size(se->se_handle));
86 }
87
88 static __inline void
restore_encoding_state(struct _citrus_iconv_std_encoding * se)89 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
90 {
91
92 if (se->se_ps)
93 memcpy(se->se_ps, se->se_pssaved,
94 _stdenc_get_state_size(se->se_handle));
95 }
96
97 static __inline void
init_encoding_state(struct _citrus_iconv_std_encoding * se)98 init_encoding_state(struct _citrus_iconv_std_encoding *se)
99 {
100
101 if (se->se_ps)
102 _stdenc_init_state(se->se_handle, se->se_ps);
103 }
104
105 static __inline int
mbtocsx(struct _citrus_iconv_std_encoding * se,_csid_t * csid,_index_t * idx,char ** s,size_t n,size_t * nresult,struct iconv_hooks * hooks)106 mbtocsx(struct _citrus_iconv_std_encoding *se,
107 _csid_t *csid, _index_t *idx, char **s, size_t n, size_t *nresult,
108 struct iconv_hooks *hooks)
109 {
110
111 return (_stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 nresult, hooks));
113 }
114
115 static __inline int
cstombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_csid_t csid,_index_t idx,size_t * nresult,struct iconv_hooks * hooks)116 cstombx(struct _citrus_iconv_std_encoding *se,
117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult,
118 struct iconv_hooks *hooks)
119 {
120
121 return (_stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
122 nresult, hooks));
123 }
124
125 static __inline int
wctombx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,_wc_t wc,size_t * nresult,struct iconv_hooks * hooks)126 wctombx(struct _citrus_iconv_std_encoding *se,
127 char *s, size_t n, _wc_t wc, size_t *nresult,
128 struct iconv_hooks *hooks)
129 {
130
131 return (_stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult,
132 hooks));
133 }
134
135 static __inline int
put_state_resetx(struct _citrus_iconv_std_encoding * se,char * s,size_t n,size_t * nresult)136 put_state_resetx(struct _citrus_iconv_std_encoding *se, char *s, size_t n,
137 size_t *nresult)
138 {
139
140 return (_stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult));
141 }
142
143 static __inline int
get_state_desc_gen(struct _citrus_iconv_std_encoding * se,int * rstate)144 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
145 {
146 struct _stdenc_state_desc ssd;
147 int ret;
148
149 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
150 _STDENC_SDID_GENERIC, &ssd);
151 if (!ret)
152 *rstate = ssd.u.generic.state;
153
154 return (ret);
155 }
156
157 /*
158 * init encoding context
159 */
160 static int
init_encoding(struct _citrus_iconv_std_encoding * se,struct _stdenc * cs,void * ps1,void * ps2)161 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
162 void *ps1, void *ps2)
163 {
164 int ret = -1;
165
166 se->se_handle = cs;
167 se->se_ps = ps1;
168 se->se_pssaved = ps2;
169
170 if (se->se_ps)
171 ret = _stdenc_init_state(cs, se->se_ps);
172 if (!ret && se->se_pssaved)
173 ret = _stdenc_init_state(cs, se->se_pssaved);
174
175 return (ret);
176 }
177
178 static int
open_csmapper(struct _csmapper ** rcm,const char * src,const char * dst,unsigned long * rnorm)179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
180 unsigned long *rnorm)
181 {
182 struct _csmapper *cm;
183 int ret;
184
185 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
186 if (ret)
187 return (ret);
188 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
189 _csmapper_get_state_size(cm) != 0) {
190 _csmapper_close(cm);
191 return (EINVAL);
192 }
193
194 *rcm = cm;
195
196 return (0);
197 }
198
199 static void
close_dsts(struct _citrus_iconv_std_dst_list * dl)200 close_dsts(struct _citrus_iconv_std_dst_list *dl)
201 {
202 struct _citrus_iconv_std_dst *sd;
203
204 while ((sd = TAILQ_FIRST(dl)) != NULL) {
205 TAILQ_REMOVE(dl, sd, sd_entry);
206 _csmapper_close(sd->sd_mapper);
207 free(sd);
208 }
209 }
210
211 static int
open_dsts(struct _citrus_iconv_std_dst_list * dl,const struct _esdb_charset * ec,const struct _esdb * dbdst)212 open_dsts(struct _citrus_iconv_std_dst_list *dl,
213 const struct _esdb_charset *ec, const struct _esdb *dbdst)
214 {
215 struct _citrus_iconv_std_dst *sd, *sdtmp;
216 unsigned long norm;
217 int i, ret;
218
219 sd = malloc(sizeof(*sd));
220 if (sd == NULL)
221 return (errno);
222
223 for (i = 0; i < dbdst->db_num_charsets; i++) {
224 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
225 dbdst->db_charsets[i].ec_csname, &norm);
226 if (ret == 0) {
227 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
228 sd->sd_norm = norm;
229 /* insert this mapper by sorted order. */
230 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
231 if (sdtmp->sd_norm > norm) {
232 TAILQ_INSERT_BEFORE(sdtmp, sd,
233 sd_entry);
234 sd = NULL;
235 break;
236 }
237 }
238 if (sd)
239 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
240 sd = malloc(sizeof(*sd));
241 if (sd == NULL) {
242 ret = errno;
243 close_dsts(dl);
244 return (ret);
245 }
246 } else if (ret != ENOENT) {
247 close_dsts(dl);
248 free(sd);
249 return (ret);
250 }
251 }
252 free(sd);
253 return (0);
254 }
255
256 static void
close_srcs(struct _citrus_iconv_std_src_list * sl)257 close_srcs(struct _citrus_iconv_std_src_list *sl)
258 {
259 struct _citrus_iconv_std_src *ss;
260
261 while ((ss = TAILQ_FIRST(sl)) != NULL) {
262 TAILQ_REMOVE(sl, ss, ss_entry);
263 close_dsts(&ss->ss_dsts);
264 free(ss);
265 }
266 }
267
268 static int
open_srcs(struct _citrus_iconv_std_src_list * sl,const struct _esdb * dbsrc,const struct _esdb * dbdst)269 open_srcs(struct _citrus_iconv_std_src_list *sl,
270 const struct _esdb *dbsrc, const struct _esdb *dbdst)
271 {
272 struct _citrus_iconv_std_src *ss;
273 int count = 0, i, ret;
274
275 ss = malloc(sizeof(*ss));
276 if (ss == NULL)
277 return (errno);
278
279 TAILQ_INIT(&ss->ss_dsts);
280
281 for (i = 0; i < dbsrc->db_num_charsets; i++) {
282 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
283 if (ret)
284 goto err;
285 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
286 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
287 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
288 ss = malloc(sizeof(*ss));
289 if (ss == NULL) {
290 ret = errno;
291 goto err;
292 }
293 count++;
294 TAILQ_INIT(&ss->ss_dsts);
295 }
296 }
297 free(ss);
298
299 return (count ? 0 : ENOENT);
300
301 err:
302 free(ss);
303 close_srcs(sl);
304 return (ret);
305 }
306
307 /* do convert a character */
308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
309 static int
310 /*ARGSUSED*/
do_conv(const struct _citrus_iconv_std_shared * is,_csid_t * csid,_index_t * idx)311 do_conv(const struct _citrus_iconv_std_shared *is,
312 _csid_t *csid, _index_t *idx)
313 {
314 struct _citrus_iconv_std_dst *sd;
315 struct _citrus_iconv_std_src *ss;
316 _index_t tmpidx;
317 int ret;
318
319 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
320 if (ss->ss_csid == *csid) {
321 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
322 ret = _csmapper_convert(sd->sd_mapper,
323 &tmpidx, *idx, NULL);
324 switch (ret) {
325 case _MAPPER_CONVERT_SUCCESS:
326 *csid = sd->sd_csid;
327 *idx = tmpidx;
328 return (0);
329 case _MAPPER_CONVERT_NONIDENTICAL:
330 break;
331 case _MAPPER_CONVERT_SRC_MORE:
332 /*FALLTHROUGH*/
333 case _MAPPER_CONVERT_DST_MORE:
334 /*FALLTHROUGH*/
335 case _MAPPER_CONVERT_ILSEQ:
336 return (EILSEQ);
337 case _MAPPER_CONVERT_FATAL:
338 return (EINVAL);
339 }
340 }
341 break;
342 }
343 }
344
345 return (E_NO_CORRESPONDING_CHAR);
346 }
347 /* ---------------------------------------------------------------------- */
348
349 static int
350 /*ARGSUSED*/
_citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared * ci,const char * __restrict src,const char * __restrict dst)351 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
352 const char * __restrict src, const char * __restrict dst)
353 {
354 struct _citrus_esdb esdbdst, esdbsrc;
355 struct _citrus_iconv_std_shared *is;
356 int ret;
357
358 is = malloc(sizeof(*is));
359 if (is == NULL) {
360 ret = errno;
361 goto err0;
362 }
363 ret = _citrus_esdb_open(&esdbsrc, src);
364 if (ret)
365 goto err1;
366 ret = _citrus_esdb_open(&esdbdst, dst);
367 if (ret)
368 goto err2;
369 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
370 esdbsrc.db_variable, esdbsrc.db_len_variable);
371 if (ret)
372 goto err3;
373 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
374 esdbdst.db_variable, esdbdst.db_len_variable);
375 if (ret)
376 goto err4;
377 is->is_use_invalid = esdbdst.db_use_invalid;
378 is->is_invalid = esdbdst.db_invalid;
379
380 TAILQ_INIT(&is->is_srcs);
381 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
382 if (ret)
383 goto err5;
384
385 _esdb_close(&esdbsrc);
386 _esdb_close(&esdbdst);
387 ci->ci_closure = is;
388
389 return (0);
390
391 err5:
392 _stdenc_close(is->is_dst_encoding);
393 err4:
394 _stdenc_close(is->is_src_encoding);
395 err3:
396 _esdb_close(&esdbdst);
397 err2:
398 _esdb_close(&esdbsrc);
399 err1:
400 free(is);
401 err0:
402 return (ret);
403 }
404
405 static void
_citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared * ci)406 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
407 {
408 struct _citrus_iconv_std_shared *is = ci->ci_closure;
409
410 if (is == NULL)
411 return;
412
413 _stdenc_close(is->is_src_encoding);
414 _stdenc_close(is->is_dst_encoding);
415 close_srcs(&is->is_srcs);
416 free(is);
417 }
418
419 static int
_citrus_iconv_std_iconv_init_context(struct _citrus_iconv * cv)420 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
421 {
422 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
423 struct _citrus_iconv_std_context *sc;
424 char *ptr;
425 size_t sz, szpsdst, szpssrc;
426
427 szpssrc = _stdenc_get_state_size(is->is_src_encoding);
428 szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
429
430 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
431 sc = malloc(sz);
432 if (sc == NULL)
433 return (errno);
434
435 ptr = (char *)&sc[1];
436 if (szpssrc > 0)
437 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
438 ptr, ptr+szpssrc);
439 else
440 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
441 NULL, NULL);
442 ptr += szpssrc*2;
443 if (szpsdst > 0)
444 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
445 ptr, ptr+szpsdst);
446 else
447 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
448 NULL, NULL);
449
450 cv->cv_closure = (void *)sc;
451
452 return (0);
453 }
454
455 static void
_citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv * cv)456 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
457 {
458
459 free(cv->cv_closure);
460 }
461
462 static int
_citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,char * __restrict * __restrict in,size_t * __restrict inbytes,char * __restrict * __restrict out,size_t * __restrict outbytes,uint32_t flags,size_t * __restrict invalids)463 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
464 char * __restrict * __restrict in, size_t * __restrict inbytes,
465 char * __restrict * __restrict out, size_t * __restrict outbytes,
466 uint32_t flags, size_t * __restrict invalids)
467 {
468 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
469 struct _citrus_iconv_std_context *sc = cv->cv_closure;
470 _csid_t csid;
471 _index_t idx;
472 char *tmpin;
473 size_t inval, szrin, szrout;
474 int ret, state = 0;
475
476 inval = 0;
477 if (in == NULL || *in == NULL) {
478 /* special cases */
479 if (out != NULL && *out != NULL) {
480 /* init output state and store the shift sequence */
481 save_encoding_state(&sc->sc_src_encoding);
482 save_encoding_state(&sc->sc_dst_encoding);
483 szrout = 0;
484
485 ret = put_state_resetx(&sc->sc_dst_encoding,
486 *out, *outbytes, &szrout);
487 if (ret)
488 goto err;
489
490 if (szrout == (size_t)-2) {
491 /* too small to store the character */
492 ret = EINVAL;
493 goto err;
494 }
495 *out += szrout;
496 *outbytes -= szrout;
497 } else
498 /* otherwise, discard the shift sequence */
499 init_encoding_state(&sc->sc_dst_encoding);
500 init_encoding_state(&sc->sc_src_encoding);
501 *invalids = 0;
502 return (0);
503 }
504
505 /* normal case */
506 for (;;) {
507 if (*inbytes == 0) {
508 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
509 if (state == _STDENC_SDGEN_INITIAL ||
510 state == _STDENC_SDGEN_STABLE)
511 break;
512 }
513
514 /* save the encoding states for the error recovery */
515 save_encoding_state(&sc->sc_src_encoding);
516 save_encoding_state(&sc->sc_dst_encoding);
517
518 /* mb -> csid/index */
519 tmpin = *in;
520 szrin = szrout = 0;
521 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, &tmpin,
522 *inbytes, &szrin, cv->cv_shared->ci_hooks);
523 if (ret)
524 goto err;
525
526 if (szrin == (size_t)-2) {
527 /* incompleted character */
528 ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
529 if (ret) {
530 ret = EINVAL;
531 goto err;
532 }
533 switch (state) {
534 case _STDENC_SDGEN_INITIAL:
535 case _STDENC_SDGEN_STABLE:
536 /* fetch shift sequences only. */
537 goto next;
538 }
539 ret = EINVAL;
540 goto err;
541 }
542 /* convert the character */
543 ret = do_conv(is, &csid, &idx);
544 if (ret) {
545 if (ret == E_NO_CORRESPONDING_CHAR) {
546 /*
547 * GNU iconv returns EILSEQ when no
548 * corresponding character in the output.
549 * Some software depends on this behavior
550 * though this is against POSIX specification.
551 */
552 if (cv->cv_shared->ci_ilseq_invalid != 0) {
553 ret = EILSEQ;
554 goto err;
555 }
556 inval++;
557 szrout = 0;
558 if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&
559 !cv->cv_shared->ci_discard_ilseq) &&
560 is->is_use_invalid) {
561 ret = wctombx(&sc->sc_dst_encoding,
562 *out, *outbytes, is->is_invalid,
563 &szrout, cv->cv_shared->ci_hooks);
564 if (ret)
565 goto err;
566 }
567 goto next;
568 } else
569 goto err;
570 }
571 /* csid/index -> mb */
572 ret = cstombx(&sc->sc_dst_encoding,
573 *out, *outbytes, csid, idx, &szrout,
574 cv->cv_shared->ci_hooks);
575 if (ret)
576 goto err;
577 next:
578 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
579 *in = tmpin;
580 *outbytes -= szrout;
581 *out += szrout;
582 }
583 *invalids = inval;
584
585 return (0);
586
587 err:
588 restore_encoding_state(&sc->sc_src_encoding);
589 restore_encoding_state(&sc->sc_dst_encoding);
590 *invalids = inval;
591
592 return (ret);
593 }
594