1 /* $FreeBSD$ */
2 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2003 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/types.h>
34 
35 #include <assert.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <stdbool.h>
39 #include <stddef.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 
45 #include "citrus_namespace.h"
46 #include "citrus_types.h"
47 #include "citrus_bcs.h"
48 #include "citrus_module.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_gbk2k.h"
51 
52 
53 /* ----------------------------------------------------------------------
54  * private stuffs used by templates
55  */
56 
57 typedef struct _GBK2KState {
58 	int	 chlen;
59 	char	 ch[4];
60 } _GBK2KState;
61 
62 typedef struct {
63 	int	 mb_cur_max;
64 } _GBK2KEncodingInfo;
65 
66 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
67 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
68 
69 #define _FUNCNAME(m)			_citrus_GBK2K_##m
70 #define _ENCODING_INFO			_GBK2KEncodingInfo
71 #define _ENCODING_STATE			_GBK2KState
72 #define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
73 #define _ENCODING_IS_STATE_DEPENDENT	0
74 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
75 
76 static __inline void
77 /*ARGSUSED*/
78 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,
79     _GBK2KState * __restrict s)
80 {
81 
82 	memset(s, 0, sizeof(*s));
83 }
84 
85 #if 0
86 static __inline void
87 /*ARGSUSED*/
88 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused,
89     void * __restrict pspriv, const _GBK2KState * __restrict s)
90 {
91 
92 	memcpy(pspriv, (const void *)s, sizeof(*s));
93 }
94 
95 static __inline void
96 /*ARGSUSED*/
97 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused,
98     _GBK2KState * __restrict s, const void * __restrict pspriv)
99 {
100 
101 	memcpy((void *)s, pspriv, sizeof(*s));
102 }
103 #endif
104 
105 static  __inline bool
106 _mb_singlebyte(int c)
107 {
108 
109 	return ((c & 0xff) <= 0x7f);
110 }
111 
112 static __inline bool
113 _mb_leadbyte(int c)
114 {
115 
116 	c &= 0xff;
117 	return (0x81 <= c && c <= 0xfe);
118 }
119 
120 static __inline bool
121 _mb_trailbyte(int c)
122 {
123 
124 	c &= 0xff;
125 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
126 }
127 
128 static __inline bool
129 _mb_surrogate(int c)
130 {
131 
132 	c &= 0xff;
133 	return (0x30 <= c && c <= 0x39);
134 }
135 
136 static __inline int
137 _mb_count(wchar_t v)
138 {
139 	uint32_t c;
140 
141 	c = (uint32_t)v; /* XXX */
142 	if (!(c & 0xffffff00))
143 		return (1);
144 	if (!(c & 0xffff0000))
145 		return (2);
146 	return (4);
147 }
148 
149 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
150 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
151 
152 static int
153 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
154     wchar_t * __restrict pwc, char ** __restrict s, size_t n,
155     _GBK2KState * __restrict psenc, size_t * __restrict nresult)
156 {
157 	char *s0, *s1;
158 	wchar_t wc;
159 	int chlenbak, len;
160 
161 	s0 = *s;
162 
163 	if (s0 == NULL) {
164 		/* _citrus_GBK2K_init_state(ei, psenc); */
165 		psenc->chlen = 0;
166 		*nresult = 0;
167 		return (0);
168 	}
169 
170 	chlenbak = psenc->chlen;
171 
172 	switch (psenc->chlen) {
173 	case 3:
174 		if (!_mb_leadbyte (_PSENC))
175 			goto invalid;
176 	/* FALLTHROUGH */
177 	case 2:
178 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
179 			goto invalid;
180 	/* FALLTHROUGH */
181 	case 1:
182 		if (!_mb_leadbyte (_PSENC))
183 			goto invalid;
184 	/* FALLTHOROUGH */
185 	case 0:
186 		break;
187 	default:
188 		goto invalid;
189 	}
190 
191 	for (;;) {
192 		if (n-- < 1)
193 			goto restart;
194 
195 		_PUSH_PSENC(*s0++);
196 
197 		switch (psenc->chlen) {
198 		case 1:
199 			if (_mb_singlebyte(_PSENC))
200 				goto convert;
201 			if (_mb_leadbyte  (_PSENC))
202 				continue;
203 			goto ilseq;
204 		case 2:
205 			if (_mb_trailbyte (_PSENC))
206 				goto convert;
207 			if (ei->mb_cur_max == 4 &&
208 			    _mb_surrogate (_PSENC))
209 				continue;
210 			goto ilseq;
211 		case 3:
212 			if (_mb_leadbyte  (_PSENC))
213 				continue;
214 			goto ilseq;
215 		case 4:
216 			if (_mb_surrogate (_PSENC))
217 				goto convert;
218 			goto ilseq;
219 		}
220 	}
221 
222 convert:
223 	len = psenc->chlen;
224 	s1  = &psenc->ch[0];
225 	wc  = 0;
226 	while (len-- > 0)
227 		wc = (wc << 8) | (*s1++ & 0xff);
228 
229 	if (pwc != NULL)
230 		*pwc = wc;
231 	*s = s0;
232 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
233 	/* _citrus_GBK2K_init_state(ei, psenc); */
234 	psenc->chlen = 0;
235 
236 	return (0);
237 
238 restart:
239 	*s = s0;
240 	*nresult = (size_t)-2;
241 
242 	return (0);
243 
244 invalid:
245 	return (EINVAL);
246 
247 ilseq:
248 	*nresult = (size_t)-1;
249 	return (EILSEQ);
250 }
251 
252 static int
253 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
254     char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc,
255     size_t * __restrict nresult)
256 {
257 	size_t len;
258 	int ret;
259 
260 	if (psenc->chlen != 0) {
261 		ret = EINVAL;
262 		goto err;
263 	}
264 
265 	len = _mb_count(wc);
266 	if (n < len) {
267 		ret = E2BIG;
268 		goto err;
269 	}
270 
271 	switch (len) {
272 	case 1:
273 		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
274 			ret = EILSEQ;
275 			goto err;
276 		}
277 		break;
278 	case 2:
279 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
280 		    !_mb_trailbyte (_PUSH_PSENC(wc))) {
281 			ret = EILSEQ;
282 			goto err;
283 		}
284 		break;
285 	case 4:
286 		if (ei->mb_cur_max != 4 ||
287 		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
288 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
289 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
290 		    !_mb_surrogate (_PUSH_PSENC(wc))) {
291 			ret = EILSEQ;
292 			goto err;
293 		}
294 		break;
295 	}
296 
297 	memcpy(s, psenc->ch, psenc->chlen);
298 	*nresult = psenc->chlen;
299 	/* _citrus_GBK2K_init_state(ei, psenc); */
300 	psenc->chlen = 0;
301 
302 	return (0);
303 
304 err:
305 	*nresult = (size_t)-1;
306 	return (ret);
307 }
308 
309 static __inline int
310 /*ARGSUSED*/
311 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,
312     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
313 {
314 	uint8_t ch, cl;
315 
316 	if ((uint32_t)wc < 0x80) {
317 		/* ISO646 */
318 		*csid = 0;
319 		*idx = (_index_t)wc;
320 	} else if ((uint32_t)wc >= 0x10000) {
321 		/* GBKUCS : XXX */
322 		*csid = 3;
323 		*idx = (_index_t)wc;
324 	} else {
325 		ch = (uint8_t)(wc >> 8);
326 		cl = (uint8_t)wc;
327 		if (ch >= 0xA1 && cl >= 0xA1) {
328 			/* EUC G1 */
329 			*csid = 1;
330 			*idx = (_index_t)wc & 0x7F7FU;
331 		} else {
332 			/* extended area (0x8140-) */
333 			*csid = 2;
334 			*idx = (_index_t)wc;
335 		}
336 	}
337 
338 	return (0);
339 }
340 
341 static __inline int
342 /*ARGSUSED*/
343 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
344     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
345 {
346 
347 	switch (csid) {
348 	case 0:
349 		/* ISO646 */
350 		*wc = (wchar_t)idx;
351 		break;
352 	case 1:
353 		/* EUC G1 */
354 		*wc = (wchar_t)idx | 0x8080U;
355 		break;
356 	case 2:
357 		/* extended area */
358 		*wc = (wchar_t)idx;
359 		break;
360 	case 3:
361 		/* GBKUCS : XXX */
362 		if (ei->mb_cur_max != 4)
363 			return (EINVAL);
364 		*wc = (wchar_t)idx;
365 		break;
366 	default:
367 		return (EILSEQ);
368 	}
369 
370 	return (0);
371 }
372 
373 static __inline int
374 /*ARGSUSED*/
375 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,
376     _GBK2KState * __restrict psenc, int * __restrict rstate)
377 {
378 
379 	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
380 	    _STDENC_SDGEN_INCOMPLETE_CHAR;
381 	return (0);
382 }
383 
384 static int
385 /*ARGSUSED*/
386 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
387     const void * __restrict var, size_t lenvar)
388 {
389 	const char *p;
390 
391 	p = var;
392 	memset((void *)ei, 0, sizeof(*ei));
393 	ei->mb_cur_max = 4;
394 	while (lenvar > 0) {
395 		switch (_bcs_tolower(*p)) {
396 		case '2':
397 			MATCH("2byte", ei->mb_cur_max = 2);
398 			break;
399 		}
400 		p++;
401 		lenvar--;
402 	}
403 
404 	return (0);
405 }
406 
407 static void
408 /*ARGSUSED*/
409 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused)
410 {
411 
412 }
413 
414 /* ----------------------------------------------------------------------
415  * public interface for stdenc
416  */
417 
418 _CITRUS_STDENC_DECLS(GBK2K);
419 _CITRUS_STDENC_DEF_OPS(GBK2K);
420 
421 #include "citrus_stdenc_template.h"
422