xref: /netbsd/lib/libc/citrus/modules/citrus_gbk2k.c (revision abd1934e)
1 /* $NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $ */
2 
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <wchar.h>
41 #include <sys/types.h>
42 #include <limits.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_ctype.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_gbk2k.h"
51 
52 
53 /* ----------------------------------------------------------------------
54  * private stuffs used by templates
55  */
56 
57 typedef struct _GBK2KState {
58 	char ch[4];
59 	int chlen;
60 } _GBK2KState;
61 
62 typedef struct {
63 	int mb_cur_max;
64 } _GBK2KEncodingInfo;
65 
66 typedef struct {
67 	_GBK2KEncodingInfo	ei;
68 	struct {
69 		/* for future multi-locale facility */
70 		_GBK2KState	s_mblen;
71 		_GBK2KState	s_mbrlen;
72 		_GBK2KState	s_mbrtowc;
73 		_GBK2KState	s_mbtowc;
74 		_GBK2KState	s_mbsrtowcs;
75 		_GBK2KState	s_mbsnrtowcs;
76 		_GBK2KState	s_wcrtomb;
77 		_GBK2KState	s_wcsrtombs;
78 		_GBK2KState	s_wcsnrtombs;
79 		_GBK2KState	s_wctomb;
80 	} states;
81 } _GBK2KCTypeInfo;
82 
83 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
84 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
85 
86 #define _FUNCNAME(m)			_citrus_GBK2K_##m
87 #define _ENCODING_INFO			_GBK2KEncodingInfo
88 #define _CTYPE_INFO			_GBK2KCTypeInfo
89 #define _ENCODING_STATE			_GBK2KState
90 #define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
91 #define _ENCODING_IS_STATE_DEPENDENT	0
92 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
93 
94 static __inline void
95 /*ARGSUSED*/
_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,_GBK2KState * __restrict s)96 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,
97 			 _GBK2KState * __restrict s)
98 {
99 	memset(s, 0, sizeof(*s));
100 }
101 
102 static __inline void
103 /*ARGSUSED*/
_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,void * __restrict pspriv,const _GBK2KState * __restrict s)104 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,
105 			 void * __restrict pspriv,
106 			 const _GBK2KState * __restrict s)
107 {
108 	memcpy(pspriv, (const void *)s, sizeof(*s));
109 }
110 
111 static __inline void
112 /*ARGSUSED*/
_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,_GBK2KState * __restrict s,const void * __restrict pspriv)113 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,
114 			   _GBK2KState * __restrict s,
115 			   const void * __restrict pspriv)
116 {
117 	memcpy((void *)s, pspriv, sizeof(*s));
118 }
119 
120 static  __inline int
_mb_singlebyte(int c)121 _mb_singlebyte(int c)
122 {
123 	c &= 0xff;
124 	return (c <= 0x7f);
125 }
126 
127 static __inline int
_mb_leadbyte(int c)128 _mb_leadbyte(int c)
129 {
130 	c &= 0xff;
131 	return (0x81 <= c && c <= 0xfe);
132 }
133 
134 static __inline int
_mb_trailbyte(int c)135 _mb_trailbyte(int c)
136 {
137 	c &= 0xff;
138 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
139 }
140 
141 static __inline int
_mb_surrogate(int c)142 _mb_surrogate(int c)
143 {
144 	c &= 0xff;
145 	return (0x30 <= c && c <= 0x39);
146 }
147 
148 static __inline int
_mb_count(wchar_t v)149 _mb_count(wchar_t v)
150 {
151 	u_int32_t c;
152 
153 	c = (u_int32_t)v; /* XXX */
154 	if (!(c & 0xffffff00))
155 		return (1);
156 	if (!(c & 0xffff0000))
157 		return (2);
158 	return (4);
159 }
160 
161 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
162 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
163 
164 static int
_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,wchar_t * __restrict pwc,const char ** __restrict s,size_t n,_GBK2KState * __restrict psenc,size_t * __restrict nresult)165 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
166 			   wchar_t * __restrict pwc,
167 			   const char ** __restrict s, size_t n,
168 			   _GBK2KState * __restrict psenc,
169 			   size_t * __restrict nresult)
170 {
171 	int chlenbak, len;
172 	const char *s0, *s1;
173 	wchar_t wc;
174 
175 	_DIAGASSERT(ei != NULL);
176 	/* pwc may be NULL */
177 	_DIAGASSERT(s != NULL);
178 	_DIAGASSERT(psenc != NULL);
179 
180 	s0 = *s;
181 
182 	if (s0 == NULL) {
183 		/* _citrus_GBK2K_init_state(ei, psenc); */
184 		psenc->chlen = 0;
185 		*nresult = 0;
186 		return (0);
187 	}
188 
189 	chlenbak = psenc->chlen;
190 
191 	switch (psenc->chlen) {
192 	case 3:
193 		if (!_mb_leadbyte (_PSENC))
194 			goto invalid;
195 	/* FALLTHROUGH */
196 	case 2:
197 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
198 			goto invalid;
199 	/* FALLTHROUGH */
200 	case 1:
201 		if (!_mb_leadbyte (_PSENC))
202 			goto invalid;
203 	/* FALLTHOROUGH */
204 	case 0:
205 		break;
206 	default:
207 		goto invalid;
208 	}
209 
210 	for (;;) {
211 		if (n-- < 1)
212 			goto restart;
213 
214 		_PUSH_PSENC(*s0++);
215 
216 		switch (psenc->chlen) {
217 		case 1:
218 			if (_mb_singlebyte(_PSENC))
219 				goto convert;
220 			if (_mb_leadbyte  (_PSENC))
221 				continue;
222 			goto ilseq;
223 		case 2:
224 			if (_mb_trailbyte (_PSENC))
225 				goto convert;
226 			if (ei->mb_cur_max == 4 &&
227 			    _mb_surrogate (_PSENC))
228 				continue;
229 			goto ilseq;
230 		case 3:
231 			if (_mb_leadbyte  (_PSENC))
232 				continue;
233 			goto ilseq;
234 		case 4:
235 			if (_mb_surrogate (_PSENC))
236 				goto convert;
237 			goto ilseq;
238 		}
239 	}
240 
241 convert:
242 	len = psenc->chlen;
243 	s1  = &psenc->ch[0];
244 	wc  = 0;
245 	while (len-- > 0)
246 		wc = (wc << 8) | (*s1++ & 0xff);
247 
248 	if (pwc != NULL)
249 		*pwc = wc;
250 	*s = s0;
251 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
252 	/* _citrus_GBK2K_init_state(ei, psenc); */
253 	psenc->chlen = 0;
254 
255 	return (0);
256 
257 restart:
258 	*s = s0;
259 	*nresult = (size_t)-2;
260 
261 	return (0);
262 
263 invalid:
264 	return (EINVAL);
265 
266 ilseq:
267 	*nresult = (size_t)-1;
268 	return (EILSEQ);
269 }
270 
271 static int
_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_GBK2KState * __restrict psenc,size_t * __restrict nresult)272 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
273 			   char * __restrict s, size_t n, wchar_t wc,
274 			   _GBK2KState * __restrict psenc,
275 			   size_t * __restrict nresult)
276 {
277 	int len, ret;
278 
279 	_DIAGASSERT(ei != NULL);
280 	_DIAGASSERT(s != NULL);
281 	_DIAGASSERT(psenc != NULL);
282 
283 	if (psenc->chlen != 0) {
284 		ret = EINVAL;
285 		goto err;
286 	}
287 
288 	len = _mb_count(wc);
289 	if (n < len) {
290 		ret = E2BIG;
291 		goto err;
292 	}
293 
294 	switch (len) {
295 	case 1:
296 		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
297 			ret = EILSEQ;
298 			goto err;
299 		}
300 		break;
301 	case 2:
302 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
303 		    !_mb_trailbyte (_PUSH_PSENC(wc     ))) {
304 			ret = EILSEQ;
305 			goto err;
306 		}
307 		break;
308 	case 4:
309 		if (ei->mb_cur_max != 4 ||
310 		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
311 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
312 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
313 		    !_mb_surrogate (_PUSH_PSENC(wc      ))) {
314 			ret = EILSEQ;
315 			goto err;
316 		}
317 		break;
318 	}
319 
320 	_DIAGASSERT(len == psenc->chlen);
321 
322 	memcpy(s, psenc->ch, psenc->chlen);
323 	*nresult = psenc->chlen;
324 	/* _citrus_GBK2K_init_state(ei, psenc); */
325 	psenc->chlen = 0;
326 
327 	return (0);
328 
329 err:
330 	*nresult = (size_t)-1;
331 	return ret;
332 }
333 
334 static __inline int
335 /*ARGSUSED*/
_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)336 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,
337 			    _csid_t * __restrict csid,
338 			    _index_t * __restrict idx, wchar_t wc)
339 {
340 	u_int8_t ch, cl;
341 
342 	_DIAGASSERT(csid != NULL && idx != NULL);
343 
344 	if ((u_int32_t)wc<0x80) {
345 		/* ISO646 */
346 		*csid = 0;
347 		*idx = (_index_t)wc;
348 	} else if ((u_int32_t)wc>=0x10000) {
349 		/* GBKUCS : XXX */
350 		*csid = 3;
351 		*idx = (_index_t)wc;
352 	} else {
353 		ch = (u_int8_t)(wc >> 8);
354 		cl = (u_int8_t)wc;
355 		if (ch>=0xA1 && cl>=0xA1) {
356 			/* EUC G1 */
357 			*csid = 1;
358 			*idx = (_index_t)wc & 0x7F7FU;
359 		} else {
360 			/* extended area (0x8140-) */
361 			*csid = 2;
362 			*idx = (_index_t)wc;
363 		}
364 	}
365 
366 	return 0;
367 }
368 
369 static __inline int
370 /*ARGSUSED*/
_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)371 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
372 			    wchar_t * __restrict wc,
373 			    _csid_t csid, _index_t idx)
374 {
375 
376 	_DIAGASSERT(wc != NULL);
377 
378 	switch (csid) {
379 	case 0:
380 		/* ISO646 */
381 		*wc = (wchar_t)idx;
382 		break;
383 	case 1:
384 		/* EUC G1 */
385 		*wc = (wchar_t)idx | 0x8080U;
386 		break;
387 	case 2:
388 		/* extended area */
389 		*wc = (wchar_t)idx;
390 		break;
391 	case 3:
392 		/* GBKUCS : XXX */
393 		if (ei->mb_cur_max != 4)
394 			return EINVAL;
395 		*wc = (wchar_t)idx;
396 		break;
397 	default:
398 		return EILSEQ;
399 	}
400 
401 	return 0;
402 }
403 
404 static __inline int
405 /*ARGSUSED*/
_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei,_GBK2KState * __restrict psenc,int * __restrict rstate)406 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei,
407 					    _GBK2KState * __restrict psenc,
408 					    int * __restrict rstate)
409 {
410 
411 	if (psenc->chlen == 0)
412 		*rstate = _STDENC_SDGEN_INITIAL;
413 	else
414 		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
415 
416 	return 0;
417 }
418 
419 static int
420 /*ARGSUSED*/
_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)421 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
422 				   const void * __restrict var, size_t lenvar)
423 {
424 	const char *p;
425 
426 	_DIAGASSERT(ei != NULL);
427 
428 	p = var;
429 #define MATCH(x, act)                                           \
430 do {                                                            \
431         if (lenvar >= (sizeof(#x)-1) &&                         \
432             _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {       \
433                 act;                                            \
434                 lenvar -= sizeof(#x)-1;                         \
435                 p += sizeof(#x)-1;                              \
436         }                                                       \
437 } while (0)
438 	memset((void *)ei, 0, sizeof(*ei));
439 	ei->mb_cur_max = 4;
440 	while (lenvar>0) {
441 		switch (_bcs_tolower(*p)) {
442 		case '2':
443 			MATCH("2byte", ei->mb_cur_max = 2);
444 			break;
445 		}
446 		p++;
447 		lenvar--;
448 	}
449 
450 	return (0);
451 }
452 
453 static void
454 /*ARGSUSED*/
_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo * ei)455 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei)
456 {
457 }
458 
459 
460 /* ----------------------------------------------------------------------
461  * public interface for ctype
462  */
463 
464 _CITRUS_CTYPE_DECLS(GBK2K);
465 _CITRUS_CTYPE_DEF_OPS(GBK2K);
466 
467 #include "citrus_ctype_template.h"
468 
469 /* ----------------------------------------------------------------------
470  * public interface for stdenc
471  */
472 
473 _CITRUS_STDENC_DECLS(GBK2K);
474 _CITRUS_STDENC_DEF_OPS(GBK2K);
475 
476 #include "citrus_stdenc_template.h"
477