xref: /netbsd/lib/libc/citrus/modules/citrus_zw.c (revision abd1934e)
1 /* $NetBSD: citrus_zw.c,v 1.6 2022/04/19 20:32:14 rillig Exp $ */
2 
3 /*-
4  * Copyright (c)2004, 2006 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 #if defined(LIB_SCCS) && !defined(lint)
32 __RCSID("$NetBSD: citrus_zw.c,v 1.6 2022/04/19 20:32:14 rillig Exp $");
33 #endif /* LIB_SCCS and not lint */
34 
35 #include <sys/types.h>
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <stddef.h>
43 #include <wchar.h>
44 #include <limits.h>
45 
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_zw.h"
52 
53 /* ----------------------------------------------------------------------
54  * private stuffs used by templates
55  */
56 
57 typedef struct {
58 	int dummy;
59 } _ZWEncodingInfo;
60 
61 typedef enum {
62 	NONE, AMBIGIOUS, ASCII, GB2312
63 } _ZWCharset;
64 
65 typedef struct {
66 	int		chlen;
67 	char		ch[4];
68 	_ZWCharset	charset;
69 } _ZWState;
70 
71 typedef struct {
72 	_ZWEncodingInfo	ei;
73 	struct {
74 		/* for future multi-locale facility */
75 		_ZWState	s_mblen;
76 		_ZWState	s_mbrlen;
77 		_ZWState	s_mbrtowc;
78 		_ZWState	s_mbtowc;
79 		_ZWState	s_mbsrtowcs;
80 		_ZWState	s_mbsnrtowcs;
81 		_ZWState	s_wcrtomb;
82 		_ZWState	s_wcsrtombs;
83 		_ZWState	s_wcsnrtombs;
84 		_ZWState	s_wctomb;
85 	} states;
86 } _ZWCTypeInfo;
87 
88 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
89 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
90 
91 #define _FUNCNAME(m)			_citrus_ZW_##m
92 #define _ENCODING_INFO			_ZWEncodingInfo
93 #define _CTYPE_INFO			_ZWCTypeInfo
94 #define _ENCODING_STATE			_ZWState
95 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
96 #define _ENCODING_IS_STATE_DEPENDENT		1
97 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
98 
99 static __inline void
100 /*ARGSUSED*/
_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,_ZWState * __restrict psenc)101 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,
102 	_ZWState * __restrict psenc)
103 {
104 	/* ei my be unused */
105 	_DIAGASSERT(psenc != NULL);
106 
107 	psenc->chlen = 0;
108 	psenc->charset = NONE;
109 }
110 
111 static __inline void
112 /*ARGSUSED*/
_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,void * __restrict pspriv,const _ZWState * __restrict psenc)113 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,
114 	void *__restrict pspriv, const _ZWState * __restrict psenc)
115 {
116 	/* ei may be unused */
117 	_DIAGASSERT(pspriv != NULL);
118 	_DIAGASSERT(psenc != NULL);
119 
120 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
121 }
122 
123 static __inline void
124 /*ARGSUSED*/
_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,_ZWState * __restrict psenc,const void * __restrict pspriv)125 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,
126 	_ZWState * __restrict psenc, const void * __restrict pspriv)
127 {
128 	/* ei may be unused */
129 	_DIAGASSERT(psenc != NULL);
130 	_DIAGASSERT(pspriv != NULL);
131 
132 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
133 }
134 
135 static int
_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,wchar_t * __restrict pwc,const char ** __restrict s,size_t n,_ZWState * __restrict psenc,size_t * __restrict nresult)136 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
137 	wchar_t * __restrict pwc, const char **__restrict s, size_t n,
138 	_ZWState * __restrict psenc, size_t * __restrict nresult)
139 {
140 	const char *s0;
141 	int ch, len;
142 	wchar_t	 wc;
143 
144 	/* ei may be unused */
145 	/* pwc may be null */
146 	_DIAGASSERT(s != NULL);
147 	_DIAGASSERT(psenc != NULL);
148 	_DIAGASSERT(nresult != NULL);
149 
150 	if (*s == NULL) {
151 		_citrus_ZW_init_state(ei, psenc);
152 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
153 		return 0;
154 	}
155 	s0 = *s;
156 	len = 0;
157 
158 #define	STORE				\
159 do {					\
160 	if (n-- < 1) {			\
161 		*nresult = (size_t)-2;	\
162 		*s = s0;		\
163 		return 0;		\
164 	}				\
165 	ch = (unsigned char)*s0++;	\
166 	if (len++ > MB_LEN_MAX || ch > 0x7F)\
167 		goto ilseq;		\
168 	psenc->ch[psenc->chlen++] = ch;	\
169 } while (0)
170 
171 loop:
172 	switch (psenc->charset) {
173 	case ASCII:
174 		switch (psenc->chlen) {
175 		case 0:
176 			STORE;
177 			switch (psenc->ch[0]) {
178 			case '\0': case '\n':
179 				psenc->charset = NONE;
180 			}
181 		/*FALLTHROUGH*/
182 		case 1:
183 			break;
184 		default:
185 			return EINVAL;
186 		}
187 		ch = (unsigned char)psenc->ch[0];
188 		if (ch > 0x7F)
189 			goto ilseq;
190 		wc = (wchar_t)ch;
191 		psenc->chlen = 0;
192 		break;
193 	case NONE:
194 		if (psenc->chlen != 0)
195 			return EINVAL;
196 		STORE;
197 		ch = (unsigned char)psenc->ch[0];
198 		if (ch != 'z') {
199 			if (ch != '\n' && ch != '\0')
200 				psenc->charset = ASCII;
201 			wc = (wchar_t)ch;
202 			psenc->chlen = 0;
203 			break;
204 		}
205 		psenc->charset = AMBIGIOUS;
206 		psenc->chlen = 0;
207 	/* FALLTHROUGH */
208 	case AMBIGIOUS:
209 		if (psenc->chlen != 0)
210 			return EINVAL;
211 		STORE;
212 		if (psenc->ch[0] != 'W') {
213 			psenc->charset = ASCII;
214 			wc = L'z';
215 			break;
216 		}
217 		psenc->charset = GB2312;
218 		psenc->chlen = 0;
219 	/* FALLTHROUGH */
220 	case GB2312:
221 		switch (psenc->chlen) {
222 		case 0:
223 			STORE;
224 			ch = (unsigned char)psenc->ch[0];
225 			if (ch == '\0') {
226 				psenc->charset = NONE;
227 				wc = (wchar_t)ch;
228 				psenc->chlen = 0;
229 				break;
230 			} else if (ch == '\n') {
231 				psenc->charset = NONE;
232 				psenc->chlen = 0;
233 				goto loop;
234 			}
235 		/*FALLTHROUGH*/
236 		case 1:
237 			STORE;
238 			if (psenc->ch[0] == ' ') {
239 				ch = (unsigned char)psenc->ch[1];
240 				wc = (wchar_t)ch;
241 				psenc->chlen = 0;
242 				break;
243 			} else if (psenc->ch[0] == '#') {
244 				ch = (unsigned char)psenc->ch[1];
245 				if (ch == '\n') {
246 					psenc->charset = NONE;
247 					wc = (wchar_t)ch;
248 					psenc->chlen = 0;
249 					break;
250 				} else if (ch == ' ') {
251 					wc = (wchar_t)ch;
252 					psenc->chlen = 0;
253 					break;
254 				}
255 			}
256 			ch = (unsigned char)psenc->ch[0];
257 			if (ch < 0x21 || ch > 0x7E)
258 				goto ilseq;
259 			wc = (wchar_t)(ch << 8);
260 			ch = (unsigned char)psenc->ch[1];
261 			if (ch < 0x21 || ch > 0x7E) {
262 ilseq:
263 				*nresult = (size_t)-1;
264 				return EILSEQ;
265 			}
266 			wc |= (wchar_t)ch;
267 			psenc->chlen = 0;
268 			break;
269 		default:
270 			return EINVAL;
271 		}
272 		break;
273 	default:
274 		return EINVAL;
275 	}
276 	if (pwc != NULL)
277 		*pwc = wc;
278 
279 	*nresult = (size_t)(wc == 0 ? 0 : len);
280 	*s = s0;
281 
282 	return 0;
283 }
284 
285 static int
286 /*ARGSUSED*/
_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_ZWState * __restrict psenc,size_t * __restrict nresult)287 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,
288 	char *__restrict s, size_t n, wchar_t wc,
289 	_ZWState * __restrict psenc, size_t * __restrict nresult)
290 {
291 	int ch;
292 
293 	/* ei may be null */
294 	_DIAGASSERT(s != NULL);
295 	_DIAGASSERT(psenc != NULL);
296 	_DIAGASSERT(nresult != NULL);
297 
298 	if (psenc->chlen != 0)
299 		return EINVAL;
300 	if ((uint32_t)wc <= 0x7F) {
301 		ch = (unsigned char)wc;
302 		switch (psenc->charset) {
303 		case NONE:
304 			if (ch == '\0' || ch == '\n') {
305 				psenc->ch[psenc->chlen++] = ch;
306 			} else {
307 				if (n < 4)
308 					return E2BIG;
309 				n -= 4;
310 				psenc->ch[psenc->chlen++] = 'z';
311 				psenc->ch[psenc->chlen++] = 'W';
312 				psenc->ch[psenc->chlen++] = ' ';
313 				psenc->ch[psenc->chlen++] = ch;
314 				psenc->charset = GB2312;
315 			}
316 			break;
317 		case GB2312:
318 			if (n < 2)
319 				return E2BIG;
320 			n -= 2;
321 			if (ch == '\0') {
322 				psenc->ch[psenc->chlen++] = '\n';
323 				psenc->ch[psenc->chlen++] = '\0';
324 				psenc->charset = NONE;
325 			} else if (ch == '\n') {
326 				psenc->ch[psenc->chlen++] = '#';
327 				psenc->ch[psenc->chlen++] = '\n';
328 				psenc->charset = NONE;
329 			} else {
330 				psenc->ch[psenc->chlen++] = ' ';
331 				psenc->ch[psenc->chlen++] = ch;
332 			}
333 			break;
334 		default:
335 			return EINVAL;
336 		}
337 	} else if ((uint32_t)wc <= 0x7E7E) {
338 		switch (psenc->charset) {
339 		case NONE:
340 			if (n < 2)
341 				return E2BIG;
342 			n -= 2;
343 			psenc->ch[psenc->chlen++] = 'z';
344 			psenc->ch[psenc->chlen++] = 'W';
345 			psenc->charset = GB2312;
346 		/* FALLTHROUGH*/
347 		case GB2312:
348 			if (n < 2)
349 				return E2BIG;
350 			n -= 2;
351 			ch = (wc >> 8) & 0xFF;
352 			if (ch < 0x21 || ch > 0x7E)
353 				goto ilseq;
354 			psenc->ch[psenc->chlen++] = ch;
355 			ch = wc & 0xFF;
356 			if (ch < 0x21 || ch > 0x7E)
357 				goto ilseq;
358 			psenc->ch[psenc->chlen++] = ch;
359 			break;
360 		default:
361 			return EINVAL;
362 		}
363 	} else {
364 ilseq:
365 		*nresult = (size_t)-1;
366 		return EILSEQ;
367 	}
368 	memcpy(s, psenc->ch, psenc->chlen);
369 	*nresult = psenc->chlen;
370 	psenc->chlen = 0;
371 
372 	return 0;
373 }
374 
375 static int
376 /*ARGSUSED*/
_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,char * __restrict s,size_t n,_ZWState * __restrict psenc,size_t * __restrict nresult)377 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,
378 	char * __restrict s, size_t n,
379 	_ZWState * __restrict psenc, size_t * __restrict nresult)
380 {
381 	/* ei may be unused */
382 	_DIAGASSERT(s != NULL);
383 	_DIAGASSERT(psenc != NULL);
384 	_DIAGASSERT(nresult != NULL);
385 
386 	if (psenc->chlen != 0)
387 		return EINVAL;
388 	switch (psenc->charset) {
389 	case GB2312:
390 		if (n-- < 1)
391 			return E2BIG;
392 		psenc->ch[psenc->chlen++] = '\n';
393 		psenc->charset = NONE;
394 	/*FALLTHROUGH*/
395 	case NONE:
396 		*nresult = psenc->chlen;
397 		if (psenc->chlen > 0) {
398 			memcpy(s, psenc->ch, psenc->chlen);
399 			psenc->chlen = 0;
400 		}
401 		break;
402 	default:
403 		return EINVAL;
404 	}
405 
406 	return 0;
407 }
408 
409 static __inline int
410 /*ARGSUSED*/
_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,_ZWState * __restrict psenc,int * __restrict rstate)411 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,
412 	_ZWState * __restrict psenc, int * __restrict rstate)
413 {
414 	/* ei may be unused */
415 	_DIAGASSERT(psenc != NULL);
416 	_DIAGASSERT(rstate != NULL);
417 
418 	switch (psenc->charset) {
419 	case NONE:
420 		if (psenc->chlen != 0)
421 			return EINVAL;
422 		*rstate = _STDENC_SDGEN_INITIAL;
423 		break;
424 	case AMBIGIOUS:
425 		if (psenc->chlen != 0)
426 			return EINVAL;
427 		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
428 		break;
429 	case ASCII:
430 	case GB2312:
431 		switch (psenc->chlen) {
432 		case 0:
433 			*rstate = _STDENC_SDGEN_STABLE;
434 			break;
435 		case 1:
436 			*rstate = (psenc->ch[0] == '#')
437 			    ? _STDENC_SDGEN_INCOMPLETE_SHIFT
438 			    : _STDENC_SDGEN_INCOMPLETE_CHAR;
439 			break;
440 		default:
441 			return EINVAL;
442 		}
443 		break;
444 	default:
445 		return EINVAL;
446 	}
447 	return 0;
448 }
449 
450 static __inline int
451 /*ARGSUSED*/
_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)452 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,
453 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
454 {
455 	/* ei seems to be unused */
456 	_DIAGASSERT(csid != NULL);
457 	_DIAGASSERT(idx != NULL);
458 
459 	*csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1;
460 	*idx = (_index_t)wc;
461 
462 	return 0;
463 }
464 
465 static __inline int
466 /*ARGSUSED*/
_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)467 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,
468 	 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
469 {
470 	/* ei seems to be unused */
471 	_DIAGASSERT(wc != NULL);
472 
473 	switch (csid) {
474 	case 0: case 1:
475 		break;
476 	default:
477 		return EINVAL;
478 	}
479 	*wc = (wchar_t)idx;
480 
481 	return 0;
482 }
483 
484 static void
485 /*ARGSUSED*/
_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo * ei)486 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei)
487 {
488 }
489 
490 static int
491 /*ARGSUSED*/
_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)492 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,
493 	const void *__restrict var, size_t lenvar)
494 {
495 	return 0;
496 }
497 
498 /* ----------------------------------------------------------------------
499  * public interface for ctype
500  */
501 
502 _CITRUS_CTYPE_DECLS(ZW);
503 _CITRUS_CTYPE_DEF_OPS(ZW);
504 
505 #include "citrus_ctype_template.h"
506 
507 /* ----------------------------------------------------------------------
508  * public interface for stdenc
509  */
510 
511 _CITRUS_STDENC_DECLS(ZW);
512 _CITRUS_STDENC_DEF_OPS(ZW);
513 
514 #include "citrus_stdenc_template.h"
515