xref: /netbsd/lib/libc/citrus/modules/citrus_gbk2k.c (revision 6550d01e)
1 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
2 
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <wchar.h>
41 #include <sys/types.h>
42 #include <limits.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_ctype.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_gbk2k.h"
51 
52 
53 /* ----------------------------------------------------------------------
54  * private stuffs used by templates
55  */
56 
57 typedef struct _GBK2KState {
58 	char ch[4];
59 	int chlen;
60 } _GBK2KState;
61 
62 typedef struct {
63 	int mb_cur_max;
64 } _GBK2KEncodingInfo;
65 
66 typedef struct {
67 	_GBK2KEncodingInfo	ei;
68 	struct {
69 		/* for future multi-locale facility */
70 		_GBK2KState	s_mblen;
71 		_GBK2KState	s_mbrlen;
72 		_GBK2KState	s_mbrtowc;
73 		_GBK2KState	s_mbtowc;
74 		_GBK2KState	s_mbsrtowcs;
75 		_GBK2KState	s_wcrtomb;
76 		_GBK2KState	s_wcsrtombs;
77 		_GBK2KState	s_wctomb;
78 	} states;
79 } _GBK2KCTypeInfo;
80 
81 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
82 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
83 
84 #define _FUNCNAME(m)			_citrus_GBK2K_##m
85 #define _ENCODING_INFO			_GBK2KEncodingInfo
86 #define _CTYPE_INFO			_GBK2KCTypeInfo
87 #define _ENCODING_STATE			_GBK2KState
88 #define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
89 #define _ENCODING_IS_STATE_DEPENDENT	0
90 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
91 
92 static __inline void
93 /*ARGSUSED*/
94 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,
95 			 _GBK2KState * __restrict s)
96 {
97 	memset(s, 0, sizeof(*s));
98 }
99 
100 static __inline void
101 /*ARGSUSED*/
102 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,
103 			 void * __restrict pspriv,
104 			 const _GBK2KState * __restrict s)
105 {
106 	memcpy(pspriv, (const void *)s, sizeof(*s));
107 }
108 
109 static __inline void
110 /*ARGSUSED*/
111 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,
112 			   _GBK2KState * __restrict s,
113 			   const void * __restrict pspriv)
114 {
115 	memcpy((void *)s, pspriv, sizeof(*s));
116 }
117 
118 static  __inline int
119 _mb_singlebyte(int c)
120 {
121 	c &= 0xff;
122 	return (c <= 0x7f);
123 }
124 
125 static __inline int
126 _mb_leadbyte(int c)
127 {
128 	c &= 0xff;
129 	return (0x81 <= c && c <= 0xfe);
130 }
131 
132 static __inline int
133 _mb_trailbyte(int c)
134 {
135 	c &= 0xff;
136 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
137 }
138 
139 static __inline int
140 _mb_surrogate(int c)
141 {
142 	c &= 0xff;
143 	return (0x30 <= c && c <= 0x39);
144 }
145 
146 static __inline int
147 _mb_count(wchar_t v)
148 {
149 	u_int32_t c;
150 
151 	c = (u_int32_t)v; /* XXX */
152 	if (!(c & 0xffffff00))
153 		return (1);
154 	if (!(c & 0xffff0000))
155 		return (2);
156 	return (4);
157 }
158 
159 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
160 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
161 
162 static int
163 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
164 			   wchar_t * __restrict pwc,
165 			   const char ** __restrict s, size_t n,
166 			   _GBK2KState * __restrict psenc,
167 			   size_t * __restrict nresult)
168 {
169 	int chlenbak, len;
170 	const char *s0, *s1;
171 	wchar_t wc;
172 
173 	_DIAGASSERT(ei != NULL);
174 	/* pwc may be NULL */
175 	_DIAGASSERT(s != NULL);
176 	_DIAGASSERT(psenc != NULL);
177 
178 	s0 = *s;
179 
180 	if (s0 == NULL) {
181 		/* _citrus_GBK2K_init_state(ei, psenc); */
182 		psenc->chlen = 0;
183 		*nresult = 0;
184 		return (0);
185 	}
186 
187 	chlenbak = psenc->chlen;
188 
189 	switch (psenc->chlen) {
190 	case 3:
191 		if (!_mb_leadbyte (_PSENC))
192 			goto invalid;
193 	/* FALLTHROUGH */
194 	case 2:
195 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
196 			goto invalid;
197 	/* FALLTHROUGH */
198 	case 1:
199 		if (!_mb_leadbyte (_PSENC))
200 			goto invalid;
201 	/* FALLTHOROUGH */
202 	case 0:
203 		break;
204 	default:
205 		goto invalid;
206 	}
207 
208 	for (;;) {
209 		if (n-- < 1)
210 			goto restart;
211 
212 		_PUSH_PSENC(*s0++);
213 
214 		switch (psenc->chlen) {
215 		case 1:
216 			if (_mb_singlebyte(_PSENC))
217 				goto convert;
218 			if (_mb_leadbyte  (_PSENC))
219 				continue;
220 			goto ilseq;
221 		case 2:
222 			if (_mb_trailbyte (_PSENC))
223 				goto convert;
224 			if (ei->mb_cur_max == 4 &&
225 			    _mb_surrogate (_PSENC))
226 				continue;
227 			goto ilseq;
228 		case 3:
229 			if (_mb_leadbyte  (_PSENC))
230 				continue;
231 			goto ilseq;
232 		case 4:
233 			if (_mb_surrogate (_PSENC))
234 				goto convert;
235 			goto ilseq;
236 		}
237 	}
238 
239 convert:
240 	len = psenc->chlen;
241 	s1  = &psenc->ch[0];
242 	wc  = 0;
243 	while (len-- > 0)
244 		wc = (wc << 8) | (*s1++ & 0xff);
245 
246 	if (pwc != NULL)
247 		*pwc = wc;
248 	*s = s0;
249 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
250 	/* _citrus_GBK2K_init_state(ei, psenc); */
251 	psenc->chlen = 0;
252 
253 	return (0);
254 
255 restart:
256 	*s = s0;
257 	*nresult = (size_t)-2;
258 
259 	return (0);
260 
261 invalid:
262 	return (EINVAL);
263 
264 ilseq:
265 	*nresult = (size_t)-1;
266 	return (EILSEQ);
267 }
268 
269 static int
270 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
271 			   char * __restrict s, size_t n, wchar_t wc,
272 			   _GBK2KState * __restrict psenc,
273 			   size_t * __restrict nresult)
274 {
275 	int len, ret;
276 
277 	_DIAGASSERT(ei != NULL);
278 	_DIAGASSERT(s != NULL);
279 	_DIAGASSERT(psenc != NULL);
280 
281 	if (psenc->chlen != 0) {
282 		ret = EINVAL;
283 		goto err;
284 	}
285 
286 	len = _mb_count(wc);
287 	if (n < len) {
288 		ret = E2BIG;
289 		goto err;
290 	}
291 
292 	switch (len) {
293 	case 1:
294 		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
295 			ret = EILSEQ;
296 			goto err;
297 		}
298 		break;
299 	case 2:
300 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
301 		    !_mb_trailbyte (_PUSH_PSENC(wc     ))) {
302 			ret = EILSEQ;
303 			goto err;
304 		}
305 		break;
306 	case 4:
307 		if (ei->mb_cur_max != 4 ||
308 		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
309 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
310 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
311 		    !_mb_surrogate (_PUSH_PSENC(wc      ))) {
312 			ret = EILSEQ;
313 			goto err;
314 		}
315 		break;
316 	}
317 
318 	_DIAGASSERT(len == psenc->chlen);
319 
320 	memcpy(s, psenc->ch, psenc->chlen);
321 	*nresult = psenc->chlen;
322 	/* _citrus_GBK2K_init_state(ei, psenc); */
323 	psenc->chlen = 0;
324 
325 	return (0);
326 
327 err:
328 	*nresult = (size_t)-1;
329 	return ret;
330 }
331 
332 static __inline int
333 /*ARGSUSED*/
334 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,
335 			    _csid_t * __restrict csid,
336 			    _index_t * __restrict idx, wchar_t wc)
337 {
338 	u_int8_t ch, cl;
339 
340 	_DIAGASSERT(csid != NULL && idx != NULL);
341 
342 	if ((u_int32_t)wc<0x80) {
343 		/* ISO646 */
344 		*csid = 0;
345 		*idx = (_index_t)wc;
346 	} else if ((u_int32_t)wc>=0x10000) {
347 		/* GBKUCS : XXX */
348 		*csid = 3;
349 		*idx = (_index_t)wc;
350 	} else {
351 		ch = (u_int8_t)(wc >> 8);
352 		cl = (u_int8_t)wc;
353 		if (ch>=0xA1 && cl>=0xA1) {
354 			/* EUC G1 */
355 			*csid = 1;
356 			*idx = (_index_t)wc & 0x7F7FU;
357 		} else {
358 			/* extended area (0x8140-) */
359 			*csid = 2;
360 			*idx = (_index_t)wc;
361 		}
362 	}
363 
364 	return 0;
365 }
366 
367 static __inline int
368 /*ARGSUSED*/
369 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
370 			    wchar_t * __restrict wc,
371 			    _csid_t csid, _index_t idx)
372 {
373 
374 	_DIAGASSERT(wc != NULL);
375 
376 	switch (csid) {
377 	case 0:
378 		/* ISO646 */
379 		*wc = (wchar_t)idx;
380 		break;
381 	case 1:
382 		/* EUC G1 */
383 		*wc = (wchar_t)idx | 0x8080U;
384 		break;
385 	case 2:
386 		/* extended area */
387 		*wc = (wchar_t)idx;
388 		break;
389 	case 3:
390 		/* GBKUCS : XXX */
391 		if (ei->mb_cur_max != 4)
392 			return EINVAL;
393 		*wc = (wchar_t)idx;
394 		break;
395 	default:
396 		return EILSEQ;
397 	}
398 
399 	return 0;
400 }
401 
402 static __inline int
403 /*ARGSUSED*/
404 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei,
405 					    _GBK2KState * __restrict psenc,
406 					    int * __restrict rstate)
407 {
408 
409 	if (psenc->chlen == 0)
410 		*rstate = _STDENC_SDGEN_INITIAL;
411 	else
412 		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
413 
414 	return 0;
415 }
416 
417 static int
418 /*ARGSUSED*/
419 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
420 				   const void * __restrict var, size_t lenvar)
421 {
422 	const char *p;
423 
424 	_DIAGASSERT(ei != NULL);
425 
426 	p = var;
427 #define MATCH(x, act)                                           \
428 do {                                                            \
429         if (lenvar >= (sizeof(#x)-1) &&                         \
430             _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {       \
431                 act;                                            \
432                 lenvar -= sizeof(#x)-1;                         \
433                 p += sizeof(#x)-1;                              \
434         }                                                       \
435 } while (/*CONSTCOND*/0)
436 	memset((void *)ei, 0, sizeof(*ei));
437 	ei->mb_cur_max = 4;
438 	while (lenvar>0) {
439 		switch (_bcs_tolower(*p)) {
440 		case '2':
441 			MATCH("2byte", ei->mb_cur_max = 2);
442 			break;
443 		}
444 		p++;
445 		lenvar--;
446 	}
447 
448 	return (0);
449 }
450 
451 static void
452 /*ARGSUSED*/
453 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei)
454 {
455 }
456 
457 
458 /* ----------------------------------------------------------------------
459  * public interface for ctype
460  */
461 
462 _CITRUS_CTYPE_DECLS(GBK2K);
463 _CITRUS_CTYPE_DEF_OPS(GBK2K);
464 
465 #include "citrus_ctype_template.h"
466 
467 /* ----------------------------------------------------------------------
468  * public interface for stdenc
469  */
470 
471 _CITRUS_STDENC_DECLS(GBK2K);
472 _CITRUS_STDENC_DEF_OPS(GBK2K);
473 
474 #include "citrus_stdenc_template.h"
475