1 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c)2003 Citrus Project,
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/types.h>
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdbool.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wchar.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
49 #include "citrus_gbk2k.h"
50 
51 
52 /* ----------------------------------------------------------------------
53  * private stuffs used by templates
54  */
55 
56 typedef struct _GBK2KState {
57 	int	 chlen;
58 	char	 ch[4];
59 } _GBK2KState;
60 
61 typedef struct {
62 	int	 mb_cur_max;
63 } _GBK2KEncodingInfo;
64 
65 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
66 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
67 
68 #define _FUNCNAME(m)			_citrus_GBK2K_##m
69 #define _ENCODING_INFO			_GBK2KEncodingInfo
70 #define _ENCODING_STATE			_GBK2KState
71 #define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
72 #define _ENCODING_IS_STATE_DEPENDENT	0
73 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
74 
75 static __inline void
76 /*ARGSUSED*/
77 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,
78     _GBK2KState * __restrict s)
79 {
80 
81 	memset(s, 0, sizeof(*s));
82 }
83 
84 #if 0
85 static __inline void
86 /*ARGSUSED*/
87 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused,
88     void * __restrict pspriv, const _GBK2KState * __restrict s)
89 {
90 
91 	memcpy(pspriv, (const void *)s, sizeof(*s));
92 }
93 
94 static __inline void
95 /*ARGSUSED*/
96 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused,
97     _GBK2KState * __restrict s, const void * __restrict pspriv)
98 {
99 
100 	memcpy((void *)s, pspriv, sizeof(*s));
101 }
102 #endif
103 
104 static  __inline bool
105 _mb_singlebyte(int c)
106 {
107 
108 	return ((c & 0xff) <= 0x7f);
109 }
110 
111 static __inline bool
112 _mb_leadbyte(int c)
113 {
114 
115 	c &= 0xff;
116 	return (0x81 <= c && c <= 0xfe);
117 }
118 
119 static __inline bool
120 _mb_trailbyte(int c)
121 {
122 
123 	c &= 0xff;
124 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
125 }
126 
127 static __inline bool
128 _mb_surrogate(int c)
129 {
130 
131 	c &= 0xff;
132 	return (0x30 <= c && c <= 0x39);
133 }
134 
135 static __inline int
136 _mb_count(wchar_t v)
137 {
138 	uint32_t c;
139 
140 	c = (uint32_t)v; /* XXX */
141 	if (!(c & 0xffffff00))
142 		return (1);
143 	if (!(c & 0xffff0000))
144 		return (2);
145 	return (4);
146 }
147 
148 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
149 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
150 
151 static int
152 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
153     wchar_t * __restrict pwc, char ** __restrict s, size_t n,
154     _GBK2KState * __restrict psenc, size_t * __restrict nresult)
155 {
156 	char *s0, *s1;
157 	wchar_t wc;
158 	int chlenbak, len;
159 
160 	s0 = *s;
161 
162 	if (s0 == NULL) {
163 		/* _citrus_GBK2K_init_state(ei, psenc); */
164 		psenc->chlen = 0;
165 		*nresult = 0;
166 		return (0);
167 	}
168 
169 	chlenbak = psenc->chlen;
170 
171 	switch (psenc->chlen) {
172 	case 3:
173 		if (!_mb_leadbyte (_PSENC))
174 			goto invalid;
175 	/* FALLTHROUGH */
176 	case 2:
177 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
178 			goto invalid;
179 	/* FALLTHROUGH */
180 	case 1:
181 		if (!_mb_leadbyte (_PSENC))
182 			goto invalid;
183 	/* FALLTHOROUGH */
184 	case 0:
185 		break;
186 	default:
187 		goto invalid;
188 	}
189 
190 	for (;;) {
191 		if (n-- < 1)
192 			goto restart;
193 
194 		_PUSH_PSENC(*s0++);
195 
196 		switch (psenc->chlen) {
197 		case 1:
198 			if (_mb_singlebyte(_PSENC))
199 				goto convert;
200 			if (_mb_leadbyte  (_PSENC))
201 				continue;
202 			goto ilseq;
203 		case 2:
204 			if (_mb_trailbyte (_PSENC))
205 				goto convert;
206 			if (ei->mb_cur_max == 4 &&
207 			    _mb_surrogate (_PSENC))
208 				continue;
209 			goto ilseq;
210 		case 3:
211 			if (_mb_leadbyte  (_PSENC))
212 				continue;
213 			goto ilseq;
214 		case 4:
215 			if (_mb_surrogate (_PSENC))
216 				goto convert;
217 			goto ilseq;
218 		}
219 	}
220 
221 convert:
222 	len = psenc->chlen;
223 	s1  = &psenc->ch[0];
224 	wc  = 0;
225 	while (len-- > 0)
226 		wc = (wc << 8) | (*s1++ & 0xff);
227 
228 	if (pwc != NULL)
229 		*pwc = wc;
230 	*s = s0;
231 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
232 	/* _citrus_GBK2K_init_state(ei, psenc); */
233 	psenc->chlen = 0;
234 
235 	return (0);
236 
237 restart:
238 	*s = s0;
239 	*nresult = (size_t)-2;
240 
241 	return (0);
242 
243 invalid:
244 	return (EINVAL);
245 
246 ilseq:
247 	*nresult = (size_t)-1;
248 	return (EILSEQ);
249 }
250 
251 static int
252 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
253     char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc,
254     size_t * __restrict nresult)
255 {
256 	size_t len;
257 	int ret;
258 
259 	if (psenc->chlen != 0) {
260 		ret = EINVAL;
261 		goto err;
262 	}
263 
264 	len = _mb_count(wc);
265 	if (n < len) {
266 		ret = E2BIG;
267 		goto err;
268 	}
269 
270 	switch (len) {
271 	case 1:
272 		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
273 			ret = EILSEQ;
274 			goto err;
275 		}
276 		break;
277 	case 2:
278 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
279 		    !_mb_trailbyte (_PUSH_PSENC(wc))) {
280 			ret = EILSEQ;
281 			goto err;
282 		}
283 		break;
284 	case 4:
285 		if (ei->mb_cur_max != 4 ||
286 		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
287 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
288 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
289 		    !_mb_surrogate (_PUSH_PSENC(wc))) {
290 			ret = EILSEQ;
291 			goto err;
292 		}
293 		break;
294 	}
295 
296 	memcpy(s, psenc->ch, psenc->chlen);
297 	*nresult = psenc->chlen;
298 	/* _citrus_GBK2K_init_state(ei, psenc); */
299 	psenc->chlen = 0;
300 
301 	return (0);
302 
303 err:
304 	*nresult = (size_t)-1;
305 	return (ret);
306 }
307 
308 static __inline int
309 /*ARGSUSED*/
310 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,
311     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
312 {
313 	uint8_t ch, cl;
314 
315 	if ((uint32_t)wc < 0x80) {
316 		/* ISO646 */
317 		*csid = 0;
318 		*idx = (_index_t)wc;
319 	} else if ((uint32_t)wc >= 0x10000) {
320 		/* GBKUCS : XXX */
321 		*csid = 3;
322 		*idx = (_index_t)wc;
323 	} else {
324 		ch = (uint8_t)(wc >> 8);
325 		cl = (uint8_t)wc;
326 		if (ch >= 0xA1 && cl >= 0xA1) {
327 			/* EUC G1 */
328 			*csid = 1;
329 			*idx = (_index_t)wc & 0x7F7FU;
330 		} else {
331 			/* extended area (0x8140-) */
332 			*csid = 2;
333 			*idx = (_index_t)wc;
334 		}
335 	}
336 
337 	return (0);
338 }
339 
340 static __inline int
341 /*ARGSUSED*/
342 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
343     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
344 {
345 
346 	switch (csid) {
347 	case 0:
348 		/* ISO646 */
349 		*wc = (wchar_t)idx;
350 		break;
351 	case 1:
352 		/* EUC G1 */
353 		*wc = (wchar_t)idx | 0x8080U;
354 		break;
355 	case 2:
356 		/* extended area */
357 		*wc = (wchar_t)idx;
358 		break;
359 	case 3:
360 		/* GBKUCS : XXX */
361 		if (ei->mb_cur_max != 4)
362 			return (EINVAL);
363 		*wc = (wchar_t)idx;
364 		break;
365 	default:
366 		return (EILSEQ);
367 	}
368 
369 	return (0);
370 }
371 
372 static __inline int
373 /*ARGSUSED*/
374 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,
375     _GBK2KState * __restrict psenc, int * __restrict rstate)
376 {
377 
378 	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
379 	    _STDENC_SDGEN_INCOMPLETE_CHAR;
380 	return (0);
381 }
382 
383 static int
384 /*ARGSUSED*/
385 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
386     const void * __restrict var, size_t lenvar)
387 {
388 	const char *p;
389 
390 	p = var;
391 	memset((void *)ei, 0, sizeof(*ei));
392 	ei->mb_cur_max = 4;
393 	while (lenvar > 0) {
394 		switch (_bcs_tolower(*p)) {
395 		case '2':
396 			MATCH("2byte", ei->mb_cur_max = 2);
397 			break;
398 		}
399 		p++;
400 		lenvar--;
401 	}
402 
403 	return (0);
404 }
405 
406 static void
407 /*ARGSUSED*/
408 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused)
409 {
410 
411 }
412 
413 /* ----------------------------------------------------------------------
414  * public interface for stdenc
415  */
416 
417 _CITRUS_STDENC_DECLS(GBK2K);
418 _CITRUS_STDENC_DEF_OPS(GBK2K);
419 
420 #include "citrus_stdenc_template.h"
421