1 /* $FreeBSD: head/lib/libiconv_modules/HZ/citrus_hz.c 281550 2015-04-15 09:09:20Z tijl $ */
2 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */
3
4 /*-
5 * Copyright (c)2004, 2006 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30
31 #include <sys/cdefs.h>
32 #include <sys/queue.h>
33 #include <sys/types.h>
34
35 #include <assert.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <stddef.h>
39 #include <stdint.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wchar.h>
43
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
49
50 #include "citrus_hz.h"
51 #include "citrus_prop.h"
52
53 /*
54 * wchar_t mapping:
55 *
56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx
57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx
58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
59 */
60
61 #define ESCAPE_CHAR '~'
62
63 typedef enum {
64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4
65 } charset_t;
66
67 typedef struct {
68 int start;
69 int end;
70 int width;
71 } range_t;
72
73 static const range_t ranges[] = {
74 #define RANGE(start, end) { start, end, (end - start) + 1 }
75 /* CTRL */ RANGE(0x00, 0x1F),
76 /* ASCII */ RANGE(0x20, 0x7F),
77 /* GB2312 */ RANGE(0x21, 0x7E),
78 /* CS94 */ RANGE(0x21, 0x7E),
79 /* CS96 */ RANGE(0x20, 0x7F),
80 #undef RANGE
81 };
82
83 typedef struct escape_t escape_t;
84 typedef struct {
85 charset_t charset;
86 escape_t *escape;
87 ssize_t length;
88 #define ROWCOL_MAX 3
89 } graphic_t;
90
91 typedef TAILQ_HEAD(escape_list, escape_t) escape_list;
92 struct escape_t {
93 TAILQ_ENTRY(escape_t) entry;
94 escape_list *set;
95 graphic_t *left;
96 graphic_t *right;
97 int ch;
98 };
99
100 #define GL(escape) ((escape)->left)
101 #define GR(escape) ((escape)->right)
102 #define SET(escape) ((escape)->set)
103 #define ESC(escape) ((escape)->ch)
104 #define INIT(escape) (TAILQ_FIRST(SET(escape)))
105
106 static __inline escape_t *
find_escape(escape_list * set,int ch)107 find_escape(escape_list *set, int ch)
108 {
109 escape_t *escape;
110
111 TAILQ_FOREACH(escape, set, entry) {
112 if (ESC(escape) == ch)
113 break;
114 }
115
116 return (escape);
117 }
118
119 typedef struct {
120 escape_list e0;
121 escape_list e1;
122 graphic_t *ascii;
123 graphic_t *gb2312;
124 } _HZEncodingInfo;
125
126 #define E0SET(ei) (&(ei)->e0)
127 #define E1SET(ei) (&(ei)->e1)
128 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei)))
129 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei)))
130
131 typedef struct {
132 escape_t *inuse;
133 int chlen;
134 char ch[ROWCOL_MAX];
135 } _HZState;
136
137 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
138 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
139
140 #define _FUNCNAME(m) _citrus_HZ_##m
141 #define _ENCODING_INFO _HZEncodingInfo
142 #define _ENCODING_STATE _HZState
143 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
144 #define _ENCODING_IS_STATE_DEPENDENT 1
145 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL)
146
147 static __inline void
_citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,_HZState * __restrict psenc)148 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei,
149 _HZState * __restrict psenc)
150 {
151
152 psenc->chlen = 0;
153 psenc->inuse = INIT0(ei);
154 }
155
156 #if 0
157 static __inline void
158 /*ARGSUSED*/
159 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused,
160 void *__restrict pspriv, const _HZState * __restrict psenc)
161 {
162
163 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
164 }
165
166 static __inline void
167 /*ARGSUSED*/
168 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused,
169 _HZState * __restrict psenc, const void * __restrict pspriv)
170 {
171
172 memcpy((void *)psenc, pspriv, sizeof(*psenc));
173 }
174 #endif
175
176 static int
_citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,wchar_t * __restrict pwc,char ** __restrict s,size_t n,_HZState * __restrict psenc,size_t * __restrict nresult)177 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei,
178 wchar_t * __restrict pwc, char ** __restrict s, size_t n,
179 _HZState * __restrict psenc, size_t * __restrict nresult)
180 {
181 escape_t *candidate, *init;
182 graphic_t *graphic;
183 const range_t *range;
184 char *s0;
185 wchar_t wc;
186 int bit, ch, head, len, tail;
187
188 if (*s == NULL) {
189 _citrus_HZ_init_state(ei, psenc);
190 *nresult = 1;
191 return (0);
192 }
193 s0 = *s;
194 if (psenc->chlen < 0 || psenc->inuse == NULL)
195 return (EINVAL);
196
197 wc = (wchar_t)0;
198 bit = head = tail = 0;
199 graphic = NULL;
200 for (len = 0; len <= MB_LEN_MAX;) {
201 if (psenc->chlen == tail) {
202 if (n-- < 1) {
203 *s = s0;
204 *nresult = (size_t)-2;
205 return (0);
206 }
207 psenc->ch[psenc->chlen++] = *s0++;
208 ++len;
209 }
210 ch = (unsigned char)psenc->ch[tail++];
211 if (tail == 1) {
212 if ((ch & ~0x80) <= 0x1F) {
213 if (psenc->inuse != INIT0(ei))
214 break;
215 wc = (wchar_t)ch;
216 goto done;
217 }
218 if (ch & 0x80) {
219 graphic = GR(psenc->inuse);
220 bit = 0x80;
221 ch &= ~0x80;
222 } else {
223 graphic = GL(psenc->inuse);
224 if (ch == ESCAPE_CHAR)
225 continue;
226 bit = 0x0;
227 }
228 if (graphic == NULL)
229 break;
230 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) {
231 if (tail < psenc->chlen)
232 return (EINVAL);
233 if (ch == ESCAPE_CHAR) {
234 ++head;
235 } else if (ch == '\n') {
236 if (psenc->inuse != INIT0(ei))
237 break;
238 tail = psenc->chlen = 0;
239 continue;
240 } else {
241 candidate = NULL;
242 init = INIT0(ei);
243 if (psenc->inuse == init) {
244 init = INIT1(ei);
245 } else if (INIT(psenc->inuse) == init) {
246 if (ESC(init) != ch)
247 break;
248 candidate = init;
249 }
250 if (candidate == NULL) {
251 candidate = find_escape(
252 SET(psenc->inuse), ch);
253 if (candidate == NULL) {
254 if (init == NULL ||
255 ESC(init) != ch)
256 break;
257 candidate = init;
258 }
259 }
260 psenc->inuse = candidate;
261 tail = psenc->chlen = 0;
262 continue;
263 }
264 } else if (ch & 0x80) {
265 if (graphic != GR(psenc->inuse))
266 break;
267 ch &= ~0x80;
268 } else {
269 if (graphic != GL(psenc->inuse))
270 break;
271 }
272 range = &ranges[(size_t)graphic->charset];
273 if (range->start > ch || range->end < ch)
274 break;
275 wc <<= 8;
276 wc |= ch;
277 if (graphic->length == (tail - head)) {
278 if (graphic->charset > GB2312)
279 bit |= ESC(psenc->inuse) << 24;
280 wc |= bit;
281 goto done;
282 }
283 }
284 *nresult = (size_t)-1;
285 return (EILSEQ);
286 done:
287 if (tail < psenc->chlen)
288 return (EINVAL);
289 *s = s0;
290 if (pwc != NULL)
291 *pwc = wc;
292 psenc->chlen = 0;
293 *nresult = (wc == 0) ? 0 : len;
294
295 return (0);
296 }
297
298 static int
_citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_HZState * __restrict psenc,size_t * __restrict nresult)299 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei,
300 char * __restrict s, size_t n, wchar_t wc,
301 _HZState * __restrict psenc, size_t * __restrict nresult)
302 {
303 escape_t *candidate, *init;
304 graphic_t *graphic;
305 const range_t *range;
306 size_t len;
307 int bit, ch;
308
309 if (psenc->chlen != 0 || psenc->inuse == NULL)
310 return (EINVAL);
311 if (wc & 0x80) {
312 bit = 0x80;
313 wc &= ~0x80;
314 } else {
315 bit = 0x0;
316 }
317 if ((uint32_t)wc <= 0x1F) {
318 candidate = INIT0(ei);
319 graphic = (bit == 0) ? candidate->left : candidate->right;
320 if (graphic == NULL)
321 goto ilseq;
322 range = &ranges[(size_t)CTRL];
323 len = 1;
324 } else if ((uint32_t)wc <= 0x7F) {
325 graphic = ei->ascii;
326 if (graphic == NULL)
327 goto ilseq;
328 candidate = graphic->escape;
329 range = &ranges[(size_t)graphic->charset];
330 len = graphic->length;
331 } else if ((uint32_t)wc <= 0x7F7F) {
332 graphic = ei->gb2312;
333 if (graphic == NULL)
334 goto ilseq;
335 candidate = graphic->escape;
336 range = &ranges[(size_t)graphic->charset];
337 len = graphic->length;
338 } else {
339 ch = (wc >> 24) & 0xFF;
340 candidate = find_escape(E0SET(ei), ch);
341 if (candidate == NULL) {
342 candidate = find_escape(E1SET(ei), ch);
343 if (candidate == NULL)
344 goto ilseq;
345 }
346 wc &= ~0xFF000000;
347 graphic = (bit == 0) ? candidate->left : candidate->right;
348 if (graphic == NULL)
349 goto ilseq;
350 range = &ranges[(size_t)graphic->charset];
351 len = graphic->length;
352 }
353 if (psenc->inuse != candidate) {
354 init = INIT0(ei);
355 if (SET(psenc->inuse) == SET(candidate)) {
356 if (INIT(psenc->inuse) != init ||
357 psenc->inuse == init || candidate == init)
358 init = NULL;
359 } else if (candidate == (init = INIT(candidate))) {
360 init = NULL;
361 }
362 if (init != NULL) {
363 if (n < 2)
364 return (E2BIG);
365 n -= 2;
366 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
367 psenc->ch[psenc->chlen++] = ESC(init);
368 }
369 if (n < 2)
370 return (E2BIG);
371 n -= 2;
372 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
373 psenc->ch[psenc->chlen++] = ESC(candidate);
374 psenc->inuse = candidate;
375 }
376 if (n < len)
377 return (E2BIG);
378 while (len-- > 0) {
379 ch = (wc >> (len * 8)) & 0xFF;
380 if (range->start > ch || range->end < ch)
381 goto ilseq;
382 psenc->ch[psenc->chlen++] = ch | bit;
383 }
384 memcpy(s, psenc->ch, psenc->chlen);
385 *nresult = psenc->chlen;
386 psenc->chlen = 0;
387
388 return (0);
389
390 ilseq:
391 *nresult = (size_t)-1;
392 return (EILSEQ);
393 }
394
395 static __inline int
_citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,char * __restrict s,size_t n,_HZState * __restrict psenc,size_t * __restrict nresult)396 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei,
397 char * __restrict s, size_t n, _HZState * __restrict psenc,
398 size_t * __restrict nresult)
399 {
400 escape_t *candidate;
401
402 if (psenc->chlen != 0 || psenc->inuse == NULL)
403 return (EINVAL);
404 candidate = INIT0(ei);
405 if (psenc->inuse != candidate) {
406 if (n < 2)
407 return (E2BIG);
408 n -= 2;
409 psenc->ch[psenc->chlen++] = ESCAPE_CHAR;
410 psenc->ch[psenc->chlen++] = ESC(candidate);
411 }
412 if (n < 1)
413 return (E2BIG);
414 if (psenc->chlen > 0)
415 memcpy(s, psenc->ch, psenc->chlen);
416 *nresult = psenc->chlen;
417 _citrus_HZ_init_state(ei, psenc);
418
419 return (0);
420 }
421
422 static __inline int
_citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,_HZState * __restrict psenc,int * __restrict rstate)423 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei,
424 _HZState * __restrict psenc, int * __restrict rstate)
425 {
426
427 if (psenc->chlen < 0 || psenc->inuse == NULL)
428 return (EINVAL);
429 *rstate = (psenc->chlen == 0)
430 ? ((psenc->inuse == INIT0(ei))
431 ? _STDENC_SDGEN_INITIAL
432 : _STDENC_SDGEN_STABLE)
433 : ((psenc->ch[0] == ESCAPE_CHAR)
434 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
435 : _STDENC_SDGEN_INCOMPLETE_CHAR);
436
437 return (0);
438 }
439
440 static __inline int
441 /*ARGSUSED*/
_citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)442 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused,
443 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
444 {
445 int bit;
446
447 if (wc & 0x80) {
448 bit = 0x80;
449 wc &= ~0x80;
450 } else
451 bit = 0x0;
452 if ((uint32_t)wc <= 0x7F) {
453 *csid = (_csid_t)bit;
454 *idx = (_index_t)wc;
455 } else if ((uint32_t)wc <= 0x7F7F) {
456 *csid = (_csid_t)(bit | 0x8000);
457 *idx = (_index_t)wc;
458 } else {
459 *csid = (_index_t)(wc & ~0x00FFFF7F);
460 *idx = (_csid_t)(wc & 0x00FFFF7F);
461 }
462
463 return (0);
464 }
465
466 static __inline int
467 /*ARGSUSED*/
_citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,wchar_t * __restrict wc,_csid_t csid,_index_t idx)468 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused,
469 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
470 {
471
472 *wc = (wchar_t)idx;
473 switch (csid) {
474 case 0x80:
475 case 0x8080:
476 *wc |= (wchar_t)0x80;
477 /*FALLTHROUGH*/
478 case 0x0:
479 case 0x8000:
480 break;
481 default:
482 *wc |= (wchar_t)csid;
483 }
484
485 return (0);
486 }
487
488 static void
_citrus_HZ_encoding_module_uninit(_HZEncodingInfo * ei)489 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei)
490 {
491 escape_t *escape;
492
493 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) {
494 TAILQ_REMOVE(E0SET(ei), escape, entry);
495 free(GL(escape));
496 free(GR(escape));
497 free(escape);
498 }
499 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) {
500 TAILQ_REMOVE(E1SET(ei), escape, entry);
501 free(GL(escape));
502 free(GR(escape));
503 free(escape);
504 }
505 }
506
507 static int
_citrus_HZ_parse_char(void * context,const char * name __unused,const char * s)508 _citrus_HZ_parse_char(void *context, const char *name __unused, const char *s)
509 {
510 escape_t *escape;
511 void **p;
512
513 p = (void **)context;
514 escape = (escape_t *)p[0];
515 if (escape->ch != '\0')
516 return (EINVAL);
517 escape->ch = *s++;
518 if (escape->ch == ESCAPE_CHAR || *s != '\0')
519 return (EINVAL);
520
521 return (0);
522 }
523
524 static int
_citrus_HZ_parse_graphic(void * context,const char * name,const char * s)525 _citrus_HZ_parse_graphic(void *context, const char *name, const char *s)
526 {
527 _HZEncodingInfo *ei;
528 escape_t *escape;
529 graphic_t *graphic;
530 void **p;
531
532 p = (void **)context;
533 escape = (escape_t *)p[0];
534 ei = (_HZEncodingInfo *)p[1];
535 graphic = calloc(1, sizeof(*graphic));
536 if (graphic == NULL)
537 return (ENOMEM);
538 if (strcmp("GL", name) == 0) {
539 if (GL(escape) != NULL)
540 goto release;
541 GL(escape) = graphic;
542 } else if (strcmp("GR", name) == 0) {
543 if (GR(escape) != NULL)
544 goto release;
545 GR(escape) = graphic;
546 } else {
547 release:
548 free(graphic);
549 return (EINVAL);
550 }
551 graphic->escape = escape;
552 if (_bcs_strncasecmp("ASCII", s, 5) == 0) {
553 if (s[5] != '\0')
554 return (EINVAL);
555 graphic->charset = ASCII;
556 graphic->length = 1;
557 ei->ascii = graphic;
558 return (0);
559 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) {
560 if (s[6] != '\0')
561 return (EINVAL);
562 graphic->charset = GB2312;
563 graphic->length = 2;
564 ei->gb2312 = graphic;
565 return (0);
566 } else if (strncmp("94*", s, 3) == 0)
567 graphic->charset = CS94;
568 else if (strncmp("96*", s, 3) == 0)
569 graphic->charset = CS96;
570 else
571 return (EINVAL);
572 s += 3;
573 switch(*s) {
574 case '1': case '2': case '3':
575 graphic->length = (size_t)(*s - '0');
576 if (*++s == '\0')
577 break;
578 /*FALLTHROUGH*/
579 default:
580 return (EINVAL);
581 }
582 return (0);
583 }
584
585 static const _citrus_prop_hint_t escape_hints[] = {
586 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char),
587 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic),
588 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic),
589 _CITRUS_PROP_HINT_END
590 };
591
592 static int
_citrus_HZ_parse_escape(void * context,const char * name,const char * s)593 _citrus_HZ_parse_escape(void *context, const char *name, const char *s)
594 {
595 _HZEncodingInfo *ei;
596 escape_t *escape;
597 void *p[2];
598
599 ei = (_HZEncodingInfo *)context;
600 escape = calloc(1, sizeof(*escape));
601 if (escape == NULL)
602 return (EINVAL);
603 if (strcmp("0", name) == 0) {
604 escape->set = E0SET(ei);
605 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry);
606 } else if (strcmp("1", name) == 0) {
607 escape->set = E1SET(ei);
608 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry);
609 } else {
610 free(escape);
611 return (EINVAL);
612 }
613 p[0] = (void *)escape;
614 p[1] = (void *)ei;
615 return (_citrus_prop_parse_variable(
616 escape_hints, (void *)&p[0], s, strlen(s)));
617 }
618
619 static const _citrus_prop_hint_t root_hints[] = {
620 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape),
621 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape),
622 _CITRUS_PROP_HINT_END
623 };
624
625 static int
_citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)626 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei,
627 const void * __restrict var, size_t lenvar)
628 {
629 int errnum;
630
631 memset(ei, 0, sizeof(*ei));
632 TAILQ_INIT(E0SET(ei));
633 TAILQ_INIT(E1SET(ei));
634 errnum = _citrus_prop_parse_variable(
635 root_hints, (void *)ei, var, lenvar);
636 if (errnum != 0)
637 _citrus_HZ_encoding_module_uninit(ei);
638 return (errnum);
639 }
640
641 /* ----------------------------------------------------------------------
642 * public interface for stdenc
643 */
644
645 _CITRUS_STDENC_DECLS(HZ);
646 _CITRUS_STDENC_DEF_OPS(HZ);
647
648 #include "citrus_stdenc_template.h"
649