1 /* $NetBSD: citrus_dechanyu.c,v 1.5 2013/05/28 16:57:56 joerg Exp $ */
2 
3 /*-
4  * Copyright (c)2007 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 #include <sys/cdefs.h>
29 #if defined(LIBC_SCCS) && !defined(lint)
30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.5 2013/05/28 16:57:56 joerg Exp $");
31 #endif /* LIBC_SCCS and not lint */
32 
33 #include <sys/types.h>
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <wchar.h>
42 #include <limits.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_ctype.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_dechanyu.h"
51 
52 /* ----------------------------------------------------------------------
53  * private stuffs used by templates
54  */
55 
56 typedef struct {
57 	int chlen;
58 	char ch[4];
59 } _DECHanyuState;
60 
61 typedef struct {
62 	int dummy;
63 } _DECHanyuEncodingInfo;
64 
65 typedef struct {
66 	_DECHanyuEncodingInfo	ei;
67 	struct {
68 		/* for future multi-locale facility */
69 		_DECHanyuState	s_mblen;
70 		_DECHanyuState	s_mbrlen;
71 		_DECHanyuState	s_mbrtowc;
72 		_DECHanyuState	s_mbtowc;
73 		_DECHanyuState	s_mbsrtowcs;
74 		_DECHanyuState	s_mbsnrtowcs;
75 		_DECHanyuState	s_wcrtomb;
76 		_DECHanyuState	s_wcsrtombs;
77 		_DECHanyuState	s_wcsnrtombs;
78 		_DECHanyuState	s_wctomb;
79 	} states;
80 } _DECHanyuCTypeInfo;
81 
82 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
83 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
84 
85 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
86 #define _ENCODING_INFO			_DECHanyuEncodingInfo
87 #define _CTYPE_INFO			_DECHanyuCTypeInfo
88 #define _ENCODING_STATE			_DECHanyuState
89 #define _ENCODING_MB_CUR_MAX(_ei_)		4
90 #define _ENCODING_IS_STATE_DEPENDENT		0
91 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
92 
93 static __inline void
94 /*ARGSUSED*/
_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,_DECHanyuState * __restrict psenc)95 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
96 	_DECHanyuState * __restrict psenc)
97 {
98 	/* ei may be null */
99 	_DIAGASSERT(psenc != NULL);
100 
101 	psenc->chlen = 0;
102 }
103 
104 static __inline void
105 /*ARGSUSED*/
_citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,void * __restrict pspriv,const _DECHanyuState * __restrict psenc)106 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
107 	void * __restrict pspriv,
108 	const _DECHanyuState * __restrict psenc)
109 {
110 	/* ei may be null */
111 	_DIAGASSERT(pspriv != NULL);
112 	_DIAGASSERT(psenc != NULL);
113 
114 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
115 }
116 
117 static __inline void
118 /*ARGSUSED*/
_citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,_DECHanyuState * __restrict psenc,const void * __restrict pspriv)119 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
120 	_DECHanyuState * __restrict psenc,
121 	const void * __restrict pspriv)
122 {
123 	/* ei may be null */
124 	_DIAGASSERT(psenc != NULL);
125 	_DIAGASSERT(pspriv != NULL);
126 
127 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
128 }
129 
130 static void
131 /*ARGSUSED*/
_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo * ei)132 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
133 {
134 	/* ei may be null */
135 }
136 
137 static int
138 /*ARGSUSED*/
_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)139 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
140 	const void * __restrict var, size_t lenvar)
141 {
142 	/* ei may be null */
143 	return 0;
144 }
145 
146 static __inline int
is_singlebyte(int c)147 is_singlebyte(int c)
148 {
149 	return c <= 0x7F;
150 }
151 
152 static __inline int
is_leadbyte(int c)153 is_leadbyte(int c)
154 {
155 	return c >= 0xA1 && c <= 0xFE;
156 }
157 
158 static __inline int
is_trailbyte(int c)159 is_trailbyte(int c)
160 {
161 	c &= ~0x80;
162 	return c >= 0x21 && c <= 0x7E;
163 }
164 
165 static __inline int
is_hanyu1(int c)166 is_hanyu1(int c)
167 {
168 	return c == 0xC2;
169 }
170 
171 static __inline int
is_hanyu2(int c)172 is_hanyu2(int c)
173 {
174 	return c == 0xCB;
175 }
176 
177 #define HANYUBIT	0xC2CB0000
178 
179 static __inline int
is_94charset(int c)180 is_94charset(int c)
181 {
182 	return c >= 0x21 && c <= 0x7E;
183 }
184 
185 static int
186 /*ARGSUSED*/
_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,wchar_t * __restrict pwc,const char ** __restrict s,size_t n,_DECHanyuState * __restrict psenc,size_t * __restrict nresult)187 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
188 	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
189 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
190 {
191 	const char *s0;
192 	int ch;
193 	wchar_t wc;
194 
195 	/* ei may be unused */
196 	_DIAGASSERT(s != NULL);
197 	_DIAGASSERT(psenc != NULL);
198 	_DIAGASSERT(nresult != NULL);
199 
200 	if (*s == NULL) {
201 		_citrus_DECHanyu_init_state(ei, psenc);
202 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
203 		return 0;
204 	}
205 	s0 = *s;
206 
207 	wc = (wchar_t)0;
208 	switch (psenc->chlen) {
209 	case 0:
210 		if (n-- < 1)
211 			goto restart;
212 		ch = *s0++ & 0xFF;
213 		if (is_singlebyte(ch) != 0) {
214 			if (pwc != NULL)
215 				*pwc = (wchar_t)ch;
216 			*nresult = (size_t)((ch == 0) ? 0 : 1);
217 			*s = s0;
218 			return 0;
219 		}
220 		if (is_leadbyte(ch) == 0)
221 			goto ilseq;
222 		psenc->ch[psenc->chlen++] = ch;
223 		break;
224 	case 1:
225 		ch = psenc->ch[0] & 0xFF;
226 		if (is_leadbyte(ch) == 0)
227 			return EINVAL;
228 		break;
229 	case 2: case 3:
230 		ch = psenc->ch[0] & 0xFF;
231 		if (is_hanyu1(ch) != 0) {
232 			ch = psenc->ch[1] & 0xFF;
233 			if (is_hanyu2(ch) != 0) {
234 				wc |= (wchar_t)HANYUBIT;
235 				break;
236 			}
237 		}
238 	/*FALLTHROUGH*/
239 	default:
240 		return EINVAL;
241 	}
242 
243 	switch (psenc->chlen) {
244 	case 1:
245 		if (is_hanyu1(ch) != 0) {
246 			if (n-- < 1)
247 				goto restart;
248 			ch = *s0++ & 0xFF;
249 			if (is_hanyu2(ch) == 0)
250 				goto ilseq;
251 			psenc->ch[psenc->chlen++] = ch;
252 			wc |= (wchar_t)HANYUBIT;
253 			if (n-- < 1)
254 				goto restart;
255 			ch = *s0++ & 0xFF;
256 			if (is_leadbyte(ch) == 0)
257 				goto ilseq;
258 			psenc->ch[psenc->chlen++] = ch;
259 		}
260 		break;
261 	case 2:
262 		if (n-- < 1)
263 			goto restart;
264 		ch = *s0++ & 0xFF;
265 		if (is_leadbyte(ch) == 0)
266 			goto ilseq;
267 		psenc->ch[psenc->chlen++] = ch;
268 		break;
269 	case 3:
270 		ch = psenc->ch[2] & 0xFF;
271 		if (is_leadbyte(ch) == 0)
272 			return EINVAL;
273 	}
274 	if (n-- < 1)
275 		goto restart;
276 	wc |= (wchar_t)(ch << 8);
277 	ch = *s0++ & 0xFF;
278 	if (is_trailbyte(ch) == 0)
279 		goto ilseq;
280 	wc |= (wchar_t)ch;
281 	if (pwc != NULL)
282 		*pwc = wc;
283 	*nresult = (size_t)(s0 - *s);
284 	*s = s0;
285 	psenc->chlen = 0;
286 
287 	return 0;
288 
289 restart:
290 	*nresult = (size_t)-2;
291 	*s = s0;
292 	return 0;
293 
294 ilseq:
295 	*nresult = (size_t)-1;
296 	return EILSEQ;
297 }
298 
299 static int
300 /*ARGSUSED*/
_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_DECHanyuState * __restrict psenc,size_t * __restrict nresult)301 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
302 	char * __restrict s, size_t n, wchar_t wc,
303 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
304 {
305 	int ch;
306 
307 	/* ei may be unused */
308 	_DIAGASSERT(s != NULL);
309 	_DIAGASSERT(psenc != NULL);
310 	_DIAGASSERT(nresult != NULL);
311 
312 	if (psenc->chlen != 0)
313 		return EINVAL;
314 
315 	/* XXX: assume wchar_t as int */
316 	if ((uint32_t)wc <= 0x7F) {
317 		ch = wc & 0xFF;
318 	} else {
319 		if ((uint32_t)wc > 0xFFFF) {
320 			if ((wc & ~0xFFFF) != HANYUBIT)
321 				goto ilseq;
322 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
323 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
324 			wc &= 0xFFFF;
325 		}
326 		ch = (wc >> 8) & 0xFF;
327 		if (!is_leadbyte(ch))
328 			goto ilseq;
329 		psenc->ch[psenc->chlen++] = ch;
330 		ch = wc & 0xFF;
331 		if (is_trailbyte(ch) == 0)
332 			goto ilseq;
333 	}
334 	psenc->ch[psenc->chlen++] = ch;
335 	if (n < psenc->chlen) {
336 		*nresult = (size_t)-1;
337 		return E2BIG;
338 	}
339 	memcpy(s, psenc->ch, psenc->chlen);
340 	*nresult = psenc->chlen;
341 	psenc->chlen = 0;
342 
343 	return 0;
344 
345 ilseq:
346 	*nresult = (size_t)-1;
347 	return EILSEQ;
348 }
349 
350 static __inline int
351 /*ARGSUSED*/
_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)352 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
353 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
354 {
355 	int plane;
356 	wchar_t mask;
357 
358 	/* ei may be unused */
359 	_DIAGASSERT(csid != NULL);
360 	_DIAGASSERT(idx != NULL);
361 
362 	plane = 0;
363 	mask = 0x7F;
364 	/* XXX: assume wchar_t as int */
365 	if ((uint32_t)wc > 0x7F) {
366 		if ((uint32_t)wc > 0xFFFF) {
367 			if ((wc & ~0xFFFF) != HANYUBIT)
368 				return EILSEQ;
369 			plane += 2;
370 		}
371 		if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
372 		    is_trailbyte(wc & 0xFF) == 0)
373 			return EILSEQ;
374 		plane += (wc & 0x80) ? 1 : 2;
375 		mask |= 0x7F00;
376 	}
377 	*csid = plane;
378 	*idx = (_index_t)(wc & mask);
379 
380 	return 0;
381 }
382 
383 static __inline int
384 /*ARGSUSED*/
_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)385 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
386 	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
387 {
388 	/* ei may be unused */
389 	_DIAGASSERT(wc != NULL);
390 
391 	if (csid == 0) {
392 		if (idx > 0x7F)
393 			return EILSEQ;
394 	} else if (csid <= 4) {
395 		if (is_94charset(idx >> 8) == 0)
396 			return EILSEQ;
397 		if (is_94charset(idx & 0xFF) == 0)
398 			return EILSEQ;
399 		if (csid % 2)
400 			idx |= 0x80;
401 		idx |= 0x8000;
402 		if (csid > 2)
403 			idx |= HANYUBIT;
404 	} else
405 		return EILSEQ;
406 	*wc = (wchar_t)idx;
407 	return 0;
408 }
409 
410 static __inline int
411 /*ARGSUSED*/
_citrus_DECHanyu_stdenc_get_state_desc_generic(_DECHanyuEncodingInfo * __restrict ei,_DECHanyuState * __restrict psenc,int * __restrict rstate)412 _citrus_DECHanyu_stdenc_get_state_desc_generic(
413 	_DECHanyuEncodingInfo * __restrict ei,
414 	_DECHanyuState * __restrict psenc, int * __restrict rstate)
415 {
416 	/* ei may be unused */
417 	_DIAGASSERT(psenc != NULL);
418 	_DIAGASSERT(rstate != NULL);
419 
420 	*rstate = (psenc->chlen == 0)
421 	    ? _STDENC_SDGEN_INITIAL
422 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
423 	return 0;
424 }
425 
426 /* ----------------------------------------------------------------------
427  * public interface for ctype
428  */
429 
430 _CITRUS_CTYPE_DECLS(DECHanyu);
431 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
432 
433 #include "citrus_ctype_template.h"
434 
435 
436 /* ----------------------------------------------------------------------
437  * public interface for stdenc
438  */
439 
440 _CITRUS_STDENC_DECLS(DECHanyu);
441 _CITRUS_STDENC_DEF_OPS(DECHanyu);
442 
443 #include "citrus_stdenc_template.h"
444