1 /* $FreeBSD$ */
2 /* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2007 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 #include <sys/cdefs.h>
32 #include <sys/types.h>
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stddef.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wchar.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
49 #include "citrus_dechanyu.h"
50 
51 /* ----------------------------------------------------------------------
52  * private stuffs used by templates
53  */
54 
55 typedef struct {
56 	size_t	 chlen;
57 	char	 ch[4];
58 } _DECHanyuState;
59 
60 typedef struct {
61 	int	 dummy;
62 } _DECHanyuEncodingInfo;
63 
64 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
65 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
66 
67 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
68 #define _ENCODING_INFO			_DECHanyuEncodingInfo
69 #define _ENCODING_STATE			_DECHanyuState
70 #define _ENCODING_MB_CUR_MAX(_ei_)		4
71 #define _ENCODING_IS_STATE_DEPENDENT		0
72 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
73 
74 static __inline void
75 /*ARGSUSED*/
76 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
77     _DECHanyuState * __restrict psenc)
78 {
79 
80 	psenc->chlen = 0;
81 }
82 
83 #if 0
84 static __inline void
85 /*ARGSUSED*/
86 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
87     void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
88 {
89 
90 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
91 }
92 
93 static __inline void
94 /*ARGSUSED*/
95 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
96     _DECHanyuState * __restrict psenc,
97     const void * __restrict pspriv)
98 {
99 
100 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
101 }
102 #endif
103 
104 static void
105 /*ARGSUSED*/
106 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
107 {
108 
109 	/* ei may be null */
110 }
111 
112 static int
113 /*ARGSUSED*/
114 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
115     const void * __restrict var __unused, size_t lenvar __unused)
116 {
117 
118 	/* ei may be null */
119 	return (0);
120 }
121 
122 static __inline bool
123 is_singlebyte(int c)
124 {
125 
126 	return (c <= 0x7F);
127 }
128 
129 static __inline bool
130 is_leadbyte(int c)
131 {
132 
133 	return (c >= 0xA1 && c <= 0xFE);
134 }
135 
136 static __inline bool
137 is_trailbyte(int c)
138 {
139 
140 	c &= ~0x80;
141 	return (c >= 0x21 && c <= 0x7E);
142 }
143 
144 static __inline bool
145 is_hanyu1(int c)
146 {
147 
148 	return (c == 0xC2);
149 }
150 
151 static __inline bool
152 is_hanyu2(int c)
153 {
154 
155 	return (c == 0xCB);
156 }
157 
158 #define HANYUBIT	0xC2CB0000
159 
160 static __inline bool
161 is_94charset(int c)
162 {
163 
164 	return (c >= 0x21 && c <= 0x7E);
165 }
166 
167 static int
168 /*ARGSUSED*/
169 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
170     wchar_t * __restrict pwc, char ** __restrict s, size_t n,
171     _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
172 {
173 	char *s0;
174 	wchar_t wc;
175 	int ch;
176 
177 	if (*s == NULL) {
178 		_citrus_DECHanyu_init_state(ei, psenc);
179 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
180 		return (0);
181 	}
182 	s0 = *s;
183 
184 	wc = (wchar_t)0;
185 	switch (psenc->chlen) {
186 	case 0:
187 		if (n-- < 1)
188 			goto restart;
189 		ch = *s0++ & 0xFF;
190 		if (is_singlebyte(ch)) {
191 			if (pwc != NULL)
192 				*pwc = (wchar_t)ch;
193 			*nresult = (size_t)((ch == 0) ? 0 : 1);
194 			*s = s0;
195 			return (0);
196 		}
197 		if (!is_leadbyte(ch))
198 			goto ilseq;
199 		psenc->ch[psenc->chlen++] = ch;
200 		break;
201 	case 1:
202 		ch = psenc->ch[0] & 0xFF;
203 		if (!is_leadbyte(ch))
204 			return (EINVAL);
205 		break;
206 	case 2: case 3:
207 		ch = psenc->ch[0] & 0xFF;
208 		if (is_hanyu1(ch)) {
209 			ch = psenc->ch[1] & 0xFF;
210 			if (is_hanyu2(ch)) {
211 				wc |= (wchar_t)HANYUBIT;
212 				break;
213 			}
214 		}
215 	/*FALLTHROUGH*/
216 	default:
217 		return (EINVAL);
218 	}
219 
220 	switch (psenc->chlen) {
221 	case 1:
222 		if (is_hanyu1(ch)) {
223 			if (n-- < 1)
224 				goto restart;
225 			ch = *s0++ & 0xFF;
226 			if (!is_hanyu2(ch))
227 				goto ilseq;
228 			psenc->ch[psenc->chlen++] = ch;
229 			wc |= (wchar_t)HANYUBIT;
230 			if (n-- < 1)
231 				goto restart;
232 			ch = *s0++ & 0xFF;
233 			if (!is_leadbyte(ch))
234 				goto ilseq;
235 			psenc->ch[psenc->chlen++] = ch;
236 		}
237 		break;
238 	case 2:
239 		if (n-- < 1)
240 			goto restart;
241 		ch = *s0++ & 0xFF;
242 		if (!is_leadbyte(ch))
243 			goto ilseq;
244 		psenc->ch[psenc->chlen++] = ch;
245 		break;
246 	case 3:
247 		ch = psenc->ch[2] & 0xFF;
248 		if (!is_leadbyte(ch))
249 			return (EINVAL);
250 	}
251 	if (n-- < 1)
252 		goto restart;
253 	wc |= (wchar_t)(ch << 8);
254 	ch = *s0++ & 0xFF;
255 	if (!is_trailbyte(ch))
256 		goto ilseq;
257 	wc |= (wchar_t)ch;
258 	if (pwc != NULL)
259 		*pwc = wc;
260 	*nresult = (size_t)(s0 - *s);
261 	*s = s0;
262 	psenc->chlen = 0;
263 
264 	return (0);
265 
266 restart:
267 	*nresult = (size_t)-2;
268 	*s = s0;
269 	return (0);
270 
271 ilseq:
272 	*nresult = (size_t)-1;
273 	return (EILSEQ);
274 }
275 
276 static int
277 /*ARGSUSED*/
278 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
279     char * __restrict s, size_t n, wchar_t wc,
280     _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
281 {
282 	int ch;
283 
284 	if (psenc->chlen != 0)
285 		return (EINVAL);
286 
287 	/* XXX: assume wchar_t as int */
288 	if ((uint32_t)wc <= 0x7F) {
289 		ch = wc & 0xFF;
290 	} else {
291 		if ((uint32_t)wc > 0xFFFF) {
292 			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
293 				goto ilseq;
294 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
295 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
296 			wc &= 0xFFFF;
297 		}
298 		ch = (wc >> 8) & 0xFF;
299 		if (!is_leadbyte(ch))
300 			goto ilseq;
301 		psenc->ch[psenc->chlen++] = ch;
302 		ch = wc & 0xFF;
303 		if (!is_trailbyte(ch))
304 			goto ilseq;
305 	}
306 	psenc->ch[psenc->chlen++] = ch;
307 	if (n < psenc->chlen) {
308 		*nresult = (size_t)-1;
309 		return (E2BIG);
310 	}
311 	memcpy(s, psenc->ch, psenc->chlen);
312 	*nresult = psenc->chlen;
313 	psenc->chlen = 0;
314 
315 	return (0);
316 
317 ilseq:
318 	*nresult = (size_t)-1;
319 	return (EILSEQ);
320 }
321 
322 static __inline int
323 /*ARGSUSED*/
324 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
325     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
326 {
327 	wchar_t mask;
328 	int plane;
329 
330 	plane = 0;
331 	mask = 0x7F;
332 	/* XXX: assume wchar_t as int */
333 	if ((uint32_t)wc > 0x7F) {
334 		if ((uint32_t)wc > 0xFFFF) {
335 			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
336 				return (EILSEQ);
337 			plane += 2;
338 		}
339 		if (!is_leadbyte((wc >> 8) & 0xFF) ||
340 		    !is_trailbyte(wc & 0xFF))
341 			return (EILSEQ);
342 		plane += (wc & 0x80) ? 1 : 2;
343 		mask |= 0x7F00;
344 	}
345 	*csid = plane;
346 	*idx = (_index_t)(wc & mask);
347 
348 	return (0);
349 }
350 
351 static __inline int
352 /*ARGSUSED*/
353 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
354     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
355 {
356 
357 	if (csid == 0) {
358 		if (idx > 0x7F)
359 			return (EILSEQ);
360 	} else if (csid <= 4) {
361 		if (!is_94charset(idx >> 8))
362 			return (EILSEQ);
363 		if (!is_94charset(idx & 0xFF))
364 			return (EILSEQ);
365 		if (csid % 2)
366 			idx |= 0x80;
367 		idx |= 0x8000;
368 		if (csid > 2)
369 			idx |= HANYUBIT;
370 	} else
371 		return (EILSEQ);
372 	*wc = (wchar_t)idx;
373 	return (0);
374 }
375 
376 static __inline int
377 /*ARGSUSED*/
378 _citrus_DECHanyu_stdenc_get_state_desc_generic(
379     _DECHanyuEncodingInfo * __restrict ei __unused,
380     _DECHanyuState * __restrict psenc, int * __restrict rstate)
381 {
382 
383 	*rstate = (psenc->chlen == 0)
384 	    ? _STDENC_SDGEN_INITIAL
385 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
386 	return (0);
387 }
388 
389 /* ----------------------------------------------------------------------
390  * public interface for stdenc
391  */
392 
393 _CITRUS_STDENC_DECLS(DECHanyu);
394 _CITRUS_STDENC_DEF_OPS(DECHanyu);
395 
396 #include "citrus_stdenc_template.h"
397