1 /* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c)2007 Citrus Project,
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/types.h>
32 
33 #include <assert.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stddef.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_bcs.h"
46 #include "citrus_module.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_dechanyu.h"
49 
50 /* ----------------------------------------------------------------------
51  * private stuffs used by templates
52  */
53 
54 typedef struct {
55 	size_t	 chlen;
56 	char	 ch[4];
57 } _DECHanyuState;
58 
59 typedef struct {
60 	int	 dummy;
61 } _DECHanyuEncodingInfo;
62 
63 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
64 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
65 
66 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
67 #define _ENCODING_INFO			_DECHanyuEncodingInfo
68 #define _ENCODING_STATE			_DECHanyuState
69 #define _ENCODING_MB_CUR_MAX(_ei_)		4
70 #define _ENCODING_IS_STATE_DEPENDENT		0
71 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
72 
73 static __inline void
74 /*ARGSUSED*/
75 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
76     _DECHanyuState * __restrict psenc)
77 {
78 
79 	psenc->chlen = 0;
80 }
81 
82 #if 0
83 static __inline void
84 /*ARGSUSED*/
85 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
86     void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
87 {
88 
89 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
90 }
91 
92 static __inline void
93 /*ARGSUSED*/
94 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
95     _DECHanyuState * __restrict psenc,
96     const void * __restrict pspriv)
97 {
98 
99 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
100 }
101 #endif
102 
103 static void
104 /*ARGSUSED*/
105 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
106 {
107 
108 	/* ei may be null */
109 }
110 
111 static int
112 /*ARGSUSED*/
113 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
114     const void * __restrict var __unused, size_t lenvar __unused)
115 {
116 
117 	/* ei may be null */
118 	return (0);
119 }
120 
121 static __inline bool
122 is_singlebyte(int c)
123 {
124 
125 	return (c <= 0x7F);
126 }
127 
128 static __inline bool
129 is_leadbyte(int c)
130 {
131 
132 	return (c >= 0xA1 && c <= 0xFE);
133 }
134 
135 static __inline bool
136 is_trailbyte(int c)
137 {
138 
139 	c &= ~0x80;
140 	return (c >= 0x21 && c <= 0x7E);
141 }
142 
143 static __inline bool
144 is_hanyu1(int c)
145 {
146 
147 	return (c == 0xC2);
148 }
149 
150 static __inline bool
151 is_hanyu2(int c)
152 {
153 
154 	return (c == 0xCB);
155 }
156 
157 #define HANYUBIT	0xC2CB0000
158 
159 static __inline bool
160 is_94charset(int c)
161 {
162 
163 	return (c >= 0x21 && c <= 0x7E);
164 }
165 
166 static int
167 /*ARGSUSED*/
168 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
169     wchar_t * __restrict pwc, char ** __restrict s, size_t n,
170     _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
171 {
172 	char *s0;
173 	wchar_t wc;
174 	int ch;
175 
176 	if (*s == NULL) {
177 		_citrus_DECHanyu_init_state(ei, psenc);
178 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
179 		return (0);
180 	}
181 	s0 = *s;
182 
183 	wc = (wchar_t)0;
184 	switch (psenc->chlen) {
185 	case 0:
186 		if (n-- < 1)
187 			goto restart;
188 		ch = *s0++ & 0xFF;
189 		if (is_singlebyte(ch)) {
190 			if (pwc != NULL)
191 				*pwc = (wchar_t)ch;
192 			*nresult = (size_t)((ch == 0) ? 0 : 1);
193 			*s = s0;
194 			return (0);
195 		}
196 		if (!is_leadbyte(ch))
197 			goto ilseq;
198 		psenc->ch[psenc->chlen++] = ch;
199 		break;
200 	case 1:
201 		ch = psenc->ch[0] & 0xFF;
202 		if (!is_leadbyte(ch))
203 			return (EINVAL);
204 		break;
205 	case 2: case 3:
206 		ch = psenc->ch[0] & 0xFF;
207 		if (is_hanyu1(ch)) {
208 			ch = psenc->ch[1] & 0xFF;
209 			if (is_hanyu2(ch)) {
210 				wc |= (wchar_t)HANYUBIT;
211 				break;
212 			}
213 		}
214 	/*FALLTHROUGH*/
215 	default:
216 		return (EINVAL);
217 	}
218 
219 	switch (psenc->chlen) {
220 	case 1:
221 		if (is_hanyu1(ch)) {
222 			if (n-- < 1)
223 				goto restart;
224 			ch = *s0++ & 0xFF;
225 			if (!is_hanyu2(ch))
226 				goto ilseq;
227 			psenc->ch[psenc->chlen++] = ch;
228 			wc |= (wchar_t)HANYUBIT;
229 			if (n-- < 1)
230 				goto restart;
231 			ch = *s0++ & 0xFF;
232 			if (!is_leadbyte(ch))
233 				goto ilseq;
234 			psenc->ch[psenc->chlen++] = ch;
235 		}
236 		break;
237 	case 2:
238 		if (n-- < 1)
239 			goto restart;
240 		ch = *s0++ & 0xFF;
241 		if (!is_leadbyte(ch))
242 			goto ilseq;
243 		psenc->ch[psenc->chlen++] = ch;
244 		break;
245 	case 3:
246 		ch = psenc->ch[2] & 0xFF;
247 		if (!is_leadbyte(ch))
248 			return (EINVAL);
249 	}
250 	if (n-- < 1)
251 		goto restart;
252 	wc |= (wchar_t)(ch << 8);
253 	ch = *s0++ & 0xFF;
254 	if (!is_trailbyte(ch))
255 		goto ilseq;
256 	wc |= (wchar_t)ch;
257 	if (pwc != NULL)
258 		*pwc = wc;
259 	*nresult = (size_t)(s0 - *s);
260 	*s = s0;
261 	psenc->chlen = 0;
262 
263 	return (0);
264 
265 restart:
266 	*nresult = (size_t)-2;
267 	*s = s0;
268 	return (0);
269 
270 ilseq:
271 	*nresult = (size_t)-1;
272 	return (EILSEQ);
273 }
274 
275 static int
276 /*ARGSUSED*/
277 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
278     char * __restrict s, size_t n, wchar_t wc,
279     _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
280 {
281 	int ch;
282 
283 	if (psenc->chlen != 0)
284 		return (EINVAL);
285 
286 	/* XXX: assume wchar_t as int */
287 	if ((uint32_t)wc <= 0x7F) {
288 		ch = wc & 0xFF;
289 	} else {
290 		if ((uint32_t)wc > 0xFFFF) {
291 			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
292 				goto ilseq;
293 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
294 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
295 			wc &= 0xFFFF;
296 		}
297 		ch = (wc >> 8) & 0xFF;
298 		if (!is_leadbyte(ch))
299 			goto ilseq;
300 		psenc->ch[psenc->chlen++] = ch;
301 		ch = wc & 0xFF;
302 		if (!is_trailbyte(ch))
303 			goto ilseq;
304 	}
305 	psenc->ch[psenc->chlen++] = ch;
306 	if (n < psenc->chlen) {
307 		*nresult = (size_t)-1;
308 		return (E2BIG);
309 	}
310 	memcpy(s, psenc->ch, psenc->chlen);
311 	*nresult = psenc->chlen;
312 	psenc->chlen = 0;
313 
314 	return (0);
315 
316 ilseq:
317 	*nresult = (size_t)-1;
318 	return (EILSEQ);
319 }
320 
321 static __inline int
322 /*ARGSUSED*/
323 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
324     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
325 {
326 	wchar_t mask;
327 	int plane;
328 
329 	plane = 0;
330 	mask = 0x7F;
331 	/* XXX: assume wchar_t as int */
332 	if ((uint32_t)wc > 0x7F) {
333 		if ((uint32_t)wc > 0xFFFF) {
334 			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
335 				return (EILSEQ);
336 			plane += 2;
337 		}
338 		if (!is_leadbyte((wc >> 8) & 0xFF) ||
339 		    !is_trailbyte(wc & 0xFF))
340 			return (EILSEQ);
341 		plane += (wc & 0x80) ? 1 : 2;
342 		mask |= 0x7F00;
343 	}
344 	*csid = plane;
345 	*idx = (_index_t)(wc & mask);
346 
347 	return (0);
348 }
349 
350 static __inline int
351 /*ARGSUSED*/
352 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
353     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
354 {
355 
356 	if (csid == 0) {
357 		if (idx > 0x7F)
358 			return (EILSEQ);
359 	} else if (csid <= 4) {
360 		if (!is_94charset(idx >> 8))
361 			return (EILSEQ);
362 		if (!is_94charset(idx & 0xFF))
363 			return (EILSEQ);
364 		if (csid % 2)
365 			idx |= 0x80;
366 		idx |= 0x8000;
367 		if (csid > 2)
368 			idx |= HANYUBIT;
369 	} else
370 		return (EILSEQ);
371 	*wc = (wchar_t)idx;
372 	return (0);
373 }
374 
375 static __inline int
376 /*ARGSUSED*/
377 _citrus_DECHanyu_stdenc_get_state_desc_generic(
378     _DECHanyuEncodingInfo * __restrict ei __unused,
379     _DECHanyuState * __restrict psenc, int * __restrict rstate)
380 {
381 
382 	*rstate = (psenc->chlen == 0)
383 	    ? _STDENC_SDGEN_INITIAL
384 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
385 	return (0);
386 }
387 
388 /* ----------------------------------------------------------------------
389  * public interface for stdenc
390  */
391 
392 _CITRUS_STDENC_DECLS(DECHanyu);
393 _CITRUS_STDENC_DEF_OPS(DECHanyu);
394 
395 #include "citrus_stdenc_template.h"
396