xref: /netbsd/lib/libc/citrus/modules/citrus_zw.c (revision 6550d01e)
1 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
2 
3 /*-
4  * Copyright (c)2004, 2006 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 #if defined(LIB_SCCS) && !defined(lint)
32 __RCSID("$NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $");
33 #endif /* LIB_SCCS and not lint */
34 
35 #include <sys/types.h>
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <stddef.h>
43 #include <wchar.h>
44 #include <limits.h>
45 
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_zw.h"
52 
53 /* ----------------------------------------------------------------------
54  * private stuffs used by templates
55  */
56 
57 typedef struct {
58 	int dummy;
59 } _ZWEncodingInfo;
60 
61 typedef enum {
62 	NONE, AMBIGIOUS, ASCII, GB2312
63 } _ZWCharset;
64 
65 typedef struct {
66 	int		chlen;
67 	char		ch[4];
68 	_ZWCharset	charset;
69 } _ZWState;
70 
71 typedef struct {
72 	_ZWEncodingInfo	ei;
73 	struct {
74 		/* for future multi-locale facility */
75 		_ZWState	s_mblen;
76 		_ZWState	s_mbrlen;
77 		_ZWState	s_mbrtowc;
78 		_ZWState	s_mbtowc;
79 		_ZWState	s_mbsrtowcs;
80 		_ZWState	s_wcrtomb;
81 		_ZWState	s_wcsrtombs;
82 		_ZWState	s_wctomb;
83 	} states;
84 } _ZWCTypeInfo;
85 
86 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
87 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
88 
89 #define _FUNCNAME(m)			_citrus_ZW_##m
90 #define _ENCODING_INFO			_ZWEncodingInfo
91 #define _CTYPE_INFO			_ZWCTypeInfo
92 #define _ENCODING_STATE			_ZWState
93 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
94 #define _ENCODING_IS_STATE_DEPENDENT		1
95 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
96 
97 static __inline void
98 /*ARGSUSED*/
99 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,
100 	_ZWState * __restrict psenc)
101 {
102 	/* ei my be unused */
103 	_DIAGASSERT(psenc != NULL);
104 
105 	psenc->chlen = 0;
106 	psenc->charset = NONE;
107 }
108 
109 static __inline void
110 /*ARGSUSED*/
111 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,
112 	void *__restrict pspriv, const _ZWState * __restrict psenc)
113 {
114 	/* ei may be unused */
115 	_DIAGASSERT(pspriv != NULL);
116 	_DIAGASSERT(psenc != NULL);
117 
118 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
119 }
120 
121 static __inline void
122 /*ARGSUSED*/
123 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,
124 	_ZWState * __restrict psenc, const void * __restrict pspriv)
125 {
126 	/* ei may be unused */
127 	_DIAGASSERT(psenc != NULL);
128 	_DIAGASSERT(pspriv != NULL);
129 
130 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
131 }
132 
133 static int
134 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
135 	wchar_t * __restrict pwc, const char **__restrict s, size_t n,
136 	_ZWState * __restrict psenc, size_t * __restrict nresult)
137 {
138 	const char *s0;
139 	int ch, len;
140 	wchar_t	 wc;
141 
142 	/* ei may be unused */
143 	/* pwc may be null */
144 	_DIAGASSERT(s != NULL);
145 	_DIAGASSERT(psenc != NULL);
146 	_DIAGASSERT(nresult != NULL);
147 
148 	if (*s == NULL) {
149 		_citrus_ZW_init_state(ei, psenc);
150 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
151 		return 0;
152 	}
153 	s0 = *s;
154 	len = 0;
155 
156 #define	STORE				\
157 do {					\
158 	if (n-- < 1) {			\
159 		*nresult = (size_t)-2;	\
160 		*s = s0;		\
161 		return 0;		\
162 	}				\
163 	ch = (unsigned char)*s0++;	\
164 	if (len++ > MB_LEN_MAX || ch > 0x7F)\
165 		goto ilseq;		\
166 	psenc->ch[psenc->chlen++] = ch;	\
167 } while (/*CONSTCOND*/0)
168 
169 loop:
170 	switch (psenc->charset) {
171 	case ASCII:
172 		switch (psenc->chlen) {
173 		case 0:
174 			STORE;
175 			switch (psenc->ch[0]) {
176 			case '\0': case '\n':
177 				psenc->charset = NONE;
178 			}
179 		/*FALLTHROUGH*/
180 		case 1:
181 			break;
182 		default:
183 			return EINVAL;
184 		}
185 		ch = (unsigned char)psenc->ch[0];
186 		if (ch > 0x7F)
187 			goto ilseq;
188 		wc = (wchar_t)ch;
189 		psenc->chlen = 0;
190 		break;
191 	case NONE:
192 		if (psenc->chlen != 0)
193 			return EINVAL;
194 		STORE;
195 		ch = (unsigned char)psenc->ch[0];
196 		if (ch != 'z') {
197 			if (ch != '\n' && ch != '\0')
198 				psenc->charset = ASCII;
199 			wc = (wchar_t)ch;
200 			psenc->chlen = 0;
201 			break;
202 		}
203 		psenc->charset = AMBIGIOUS;
204 		psenc->chlen = 0;
205 	/* FALLTHROUGH */
206 	case AMBIGIOUS:
207 		if (psenc->chlen != 0)
208 			return EINVAL;
209 		STORE;
210 		if (psenc->ch[0] != 'W') {
211 			psenc->charset = ASCII;
212 			wc = L'z';
213 			break;
214 		}
215 		psenc->charset = GB2312;
216 		psenc->chlen = 0;
217 	/* FALLTHROUGH */
218 	case GB2312:
219 		switch (psenc->chlen) {
220 		case 0:
221 			STORE;
222 			ch = (unsigned char)psenc->ch[0];
223 			if (ch == '\0') {
224 				psenc->charset = NONE;
225 				wc = (wchar_t)ch;
226 				psenc->chlen = 0;
227 				break;
228 			} else if (ch == '\n') {
229 				psenc->charset = NONE;
230 				psenc->chlen = 0;
231 				goto loop;
232 			}
233 		/*FALLTHROUGH*/
234 		case 1:
235 			STORE;
236 			if (psenc->ch[0] == ' ') {
237 				ch = (unsigned char)psenc->ch[1];
238 				wc = (wchar_t)ch;
239 				psenc->chlen = 0;
240 				break;
241 			} else if (psenc->ch[0] == '#') {
242 				ch = (unsigned char)psenc->ch[1];
243 				if (ch == '\n') {
244 					psenc->charset = NONE;
245 					wc = (wchar_t)ch;
246 					psenc->chlen = 0;
247 					break;
248 				} else if (ch == ' ') {
249 					wc = (wchar_t)ch;
250 					psenc->chlen = 0;
251 					break;
252 				}
253 			}
254 			ch = (unsigned char)psenc->ch[0];
255 			if (ch < 0x21 || ch > 0x7E)
256 				goto ilseq;
257 			wc = (wchar_t)(ch << 8);
258 			ch = (unsigned char)psenc->ch[1];
259 			if (ch < 0x21 || ch > 0x7E) {
260 ilseq:
261 				*nresult = (size_t)-1;
262 				return EILSEQ;
263 			}
264 			wc |= (wchar_t)ch;
265 			psenc->chlen = 0;
266 			break;
267 		default:
268 			return EINVAL;
269 		}
270 		break;
271 	default:
272 		return EINVAL;
273 	}
274 	if (pwc != NULL)
275 		*pwc = wc;
276 
277 	*nresult = (size_t)(wc == 0 ? 0 : len);
278 	*s = s0;
279 
280 	return 0;
281 }
282 
283 static int
284 /*ARGSUSED*/
285 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,
286 	char *__restrict s, size_t n, wchar_t wc,
287 	_ZWState * __restrict psenc, size_t * __restrict nresult)
288 {
289 	int ch;
290 
291 	/* ei may be null */
292 	_DIAGASSERT(s != NULL);
293 	_DIAGASSERT(psenc != NULL);
294 	_DIAGASSERT(nresult != NULL);
295 
296 	if (psenc->chlen != 0)
297 		return EINVAL;
298 	if ((uint32_t)wc <= 0x7F) {
299 		ch = (unsigned char)wc;
300 		switch (psenc->charset) {
301 		case NONE:
302 			if (ch == '\0' || ch == '\n') {
303 				psenc->ch[psenc->chlen++] = ch;
304 			} else {
305 				if (n < 4)
306 					return E2BIG;
307 				n -= 4;
308 				psenc->ch[psenc->chlen++] = 'z';
309 				psenc->ch[psenc->chlen++] = 'W';
310 				psenc->ch[psenc->chlen++] = ' ';
311 				psenc->ch[psenc->chlen++] = ch;
312 				psenc->charset = GB2312;
313 			}
314 			break;
315 		case GB2312:
316 			if (n < 2)
317 				return E2BIG;
318 			n -= 2;
319 			if (ch == '\0') {
320 				psenc->ch[psenc->chlen++] = '\n';
321 				psenc->ch[psenc->chlen++] = '\0';
322 				psenc->charset = NONE;
323 			} else if (ch == '\n') {
324 				psenc->ch[psenc->chlen++] = '#';
325 				psenc->ch[psenc->chlen++] = '\n';
326 				psenc->charset = NONE;
327 			} else {
328 				psenc->ch[psenc->chlen++] = ' ';
329 				psenc->ch[psenc->chlen++] = ch;
330 			}
331 			break;
332 		default:
333 			return EINVAL;
334 		}
335 	} else if ((uint32_t)wc <= 0x7E7E) {
336 		switch (psenc->charset) {
337 		case NONE:
338 			if (n < 2)
339 				return E2BIG;
340 			n -= 2;
341 			psenc->ch[psenc->chlen++] = 'z';
342 			psenc->ch[psenc->chlen++] = 'W';
343 			psenc->charset = GB2312;
344 		/* FALLTHROUGH*/
345 		case GB2312:
346 			if (n < 2)
347 				return E2BIG;
348 			n -= 2;
349 			ch = (wc >> 8) & 0xFF;
350 			if (ch < 0x21 || ch > 0x7E)
351 				goto ilseq;
352 			psenc->ch[psenc->chlen++] = ch;
353 			ch = wc & 0xFF;
354 			if (ch < 0x21 || ch > 0x7E)
355 				goto ilseq;
356 			psenc->ch[psenc->chlen++] = ch;
357 			break;
358 		default:
359 			return EINVAL;
360 		}
361 	} else {
362 ilseq:
363 		*nresult = (size_t)-1;
364 		return EILSEQ;
365 	}
366 	memcpy(s, psenc->ch, psenc->chlen);
367 	*nresult = psenc->chlen;
368 	psenc->chlen = 0;
369 
370 	return 0;
371 }
372 
373 static int
374 /*ARGSUSED*/
375 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,
376 	char * __restrict s, size_t n,
377 	_ZWState * __restrict psenc, size_t * __restrict nresult)
378 {
379 	/* ei may be unused */
380 	_DIAGASSERT(s != NULL);
381 	_DIAGASSERT(psenc != NULL);
382 	_DIAGASSERT(nresult != NULL);
383 
384 	if (psenc->chlen != 0)
385 		return EINVAL;
386 	switch (psenc->charset) {
387 	case GB2312:
388 		if (n-- < 1)
389 			return E2BIG;
390 		psenc->ch[psenc->chlen++] = '\n';
391 		psenc->charset = NONE;
392 	/*FALLTHROUGH*/
393 	case NONE:
394 		*nresult = psenc->chlen;
395 		if (psenc->chlen > 0) {
396 			memcpy(s, psenc->ch, psenc->chlen);
397 			psenc->chlen = 0;
398 		}
399 		break;
400 	default:
401 		return EINVAL;
402 	}
403 
404 	return 0;
405 }
406 
407 static __inline int
408 /*ARGSUSED*/
409 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,
410 	_ZWState * __restrict psenc, int * __restrict rstate)
411 {
412 	/* ei may be unused */
413 	_DIAGASSERT(psenc != NULL);
414 	_DIAGASSERT(rstate != NULL);
415 
416 	switch (psenc->charset) {
417 	case NONE:
418 		if (psenc->chlen != 0)
419 			return EINVAL;
420 		*rstate = _STDENC_SDGEN_INITIAL;
421 		break;
422 	case AMBIGIOUS:
423 		if (psenc->chlen != 0)
424 			return EINVAL;
425 		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
426 		break;
427 	case ASCII:
428 	case GB2312:
429 		switch (psenc->chlen) {
430 		case 0:
431 			*rstate = _STDENC_SDGEN_STABLE;
432 			break;
433 		case 1:
434 			*rstate = (psenc->ch[0] == '#')
435 			    ? _STDENC_SDGEN_INCOMPLETE_SHIFT
436 			    : _STDENC_SDGEN_INCOMPLETE_CHAR;
437 			break;
438 		default:
439 			return EINVAL;
440 		}
441 		break;
442 	default:
443 		return EINVAL;
444 	}
445 	return 0;
446 }
447 
448 static __inline int
449 /*ARGSUSED*/
450 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,
451 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
452 {
453 	/* ei seems to be unused */
454 	_DIAGASSERT(csid != NULL);
455 	_DIAGASSERT(idx != NULL);
456 
457 	*csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1;
458 	*idx = (_index_t)wc;
459 
460 	return 0;
461 }
462 
463 static __inline int
464 /*ARGSUSED*/
465 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,
466 	 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
467 {
468 	/* ei seems to be unused */
469 	_DIAGASSERT(wc != NULL);
470 
471 	switch (csid) {
472 	case 0: case 1:
473 		break;
474 	default:
475 		return EINVAL;
476 	}
477 	*wc = (wchar_t)idx;
478 
479 	return 0;
480 }
481 
482 static void
483 /*ARGSUSED*/
484 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei)
485 {
486 }
487 
488 static int
489 /*ARGSUSED*/
490 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,
491 	const void *__restrict var, size_t lenvar)
492 {
493 	return 0;
494 }
495 
496 /* ----------------------------------------------------------------------
497  * public interface for ctype
498  */
499 
500 _CITRUS_CTYPE_DECLS(ZW);
501 _CITRUS_CTYPE_DEF_OPS(ZW);
502 
503 #include "citrus_ctype_template.h"
504 
505 /* ----------------------------------------------------------------------
506  * public interface for stdenc
507  */
508 
509 _CITRUS_STDENC_DECLS(ZW);
510 _CITRUS_STDENC_DEF_OPS(ZW);
511 
512 #include "citrus_stdenc_template.h"
513