1 /* $NetBSD: citrus_zw.c,v 1.6 2022/04/19 20:32:14 rillig Exp $ */
2
3 /*-
4 * Copyright (c)2004, 2006 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/cdefs.h>
31 #if defined(LIB_SCCS) && !defined(lint)
32 __RCSID("$NetBSD: citrus_zw.c,v 1.6 2022/04/19 20:32:14 rillig Exp $");
33 #endif /* LIB_SCCS and not lint */
34
35 #include <sys/types.h>
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <stddef.h>
43 #include <wchar.h>
44 #include <limits.h>
45
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_zw.h"
52
53 /* ----------------------------------------------------------------------
54 * private stuffs used by templates
55 */
56
57 typedef struct {
58 int dummy;
59 } _ZWEncodingInfo;
60
61 typedef enum {
62 NONE, AMBIGIOUS, ASCII, GB2312
63 } _ZWCharset;
64
65 typedef struct {
66 int chlen;
67 char ch[4];
68 _ZWCharset charset;
69 } _ZWState;
70
71 typedef struct {
72 _ZWEncodingInfo ei;
73 struct {
74 /* for future multi-locale facility */
75 _ZWState s_mblen;
76 _ZWState s_mbrlen;
77 _ZWState s_mbrtowc;
78 _ZWState s_mbtowc;
79 _ZWState s_mbsrtowcs;
80 _ZWState s_mbsnrtowcs;
81 _ZWState s_wcrtomb;
82 _ZWState s_wcsrtombs;
83 _ZWState s_wcsnrtombs;
84 _ZWState s_wctomb;
85 } states;
86 } _ZWCTypeInfo;
87
88 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
89 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
90
91 #define _FUNCNAME(m) _citrus_ZW_##m
92 #define _ENCODING_INFO _ZWEncodingInfo
93 #define _CTYPE_INFO _ZWCTypeInfo
94 #define _ENCODING_STATE _ZWState
95 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
96 #define _ENCODING_IS_STATE_DEPENDENT 1
97 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE)
98
99 static __inline void
100 /*ARGSUSED*/
_citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,_ZWState * __restrict psenc)101 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,
102 _ZWState * __restrict psenc)
103 {
104 /* ei my be unused */
105 _DIAGASSERT(psenc != NULL);
106
107 psenc->chlen = 0;
108 psenc->charset = NONE;
109 }
110
111 static __inline void
112 /*ARGSUSED*/
_citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,void * __restrict pspriv,const _ZWState * __restrict psenc)113 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,
114 void *__restrict pspriv, const _ZWState * __restrict psenc)
115 {
116 /* ei may be unused */
117 _DIAGASSERT(pspriv != NULL);
118 _DIAGASSERT(psenc != NULL);
119
120 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
121 }
122
123 static __inline void
124 /*ARGSUSED*/
_citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,_ZWState * __restrict psenc,const void * __restrict pspriv)125 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,
126 _ZWState * __restrict psenc, const void * __restrict pspriv)
127 {
128 /* ei may be unused */
129 _DIAGASSERT(psenc != NULL);
130 _DIAGASSERT(pspriv != NULL);
131
132 memcpy((void *)psenc, pspriv, sizeof(*psenc));
133 }
134
135 static int
_citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,wchar_t * __restrict pwc,const char ** __restrict s,size_t n,_ZWState * __restrict psenc,size_t * __restrict nresult)136 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
137 wchar_t * __restrict pwc, const char **__restrict s, size_t n,
138 _ZWState * __restrict psenc, size_t * __restrict nresult)
139 {
140 const char *s0;
141 int ch, len;
142 wchar_t wc;
143
144 /* ei may be unused */
145 /* pwc may be null */
146 _DIAGASSERT(s != NULL);
147 _DIAGASSERT(psenc != NULL);
148 _DIAGASSERT(nresult != NULL);
149
150 if (*s == NULL) {
151 _citrus_ZW_init_state(ei, psenc);
152 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
153 return 0;
154 }
155 s0 = *s;
156 len = 0;
157
158 #define STORE \
159 do { \
160 if (n-- < 1) { \
161 *nresult = (size_t)-2; \
162 *s = s0; \
163 return 0; \
164 } \
165 ch = (unsigned char)*s0++; \
166 if (len++ > MB_LEN_MAX || ch > 0x7F)\
167 goto ilseq; \
168 psenc->ch[psenc->chlen++] = ch; \
169 } while (0)
170
171 loop:
172 switch (psenc->charset) {
173 case ASCII:
174 switch (psenc->chlen) {
175 case 0:
176 STORE;
177 switch (psenc->ch[0]) {
178 case '\0': case '\n':
179 psenc->charset = NONE;
180 }
181 /*FALLTHROUGH*/
182 case 1:
183 break;
184 default:
185 return EINVAL;
186 }
187 ch = (unsigned char)psenc->ch[0];
188 if (ch > 0x7F)
189 goto ilseq;
190 wc = (wchar_t)ch;
191 psenc->chlen = 0;
192 break;
193 case NONE:
194 if (psenc->chlen != 0)
195 return EINVAL;
196 STORE;
197 ch = (unsigned char)psenc->ch[0];
198 if (ch != 'z') {
199 if (ch != '\n' && ch != '\0')
200 psenc->charset = ASCII;
201 wc = (wchar_t)ch;
202 psenc->chlen = 0;
203 break;
204 }
205 psenc->charset = AMBIGIOUS;
206 psenc->chlen = 0;
207 /* FALLTHROUGH */
208 case AMBIGIOUS:
209 if (psenc->chlen != 0)
210 return EINVAL;
211 STORE;
212 if (psenc->ch[0] != 'W') {
213 psenc->charset = ASCII;
214 wc = L'z';
215 break;
216 }
217 psenc->charset = GB2312;
218 psenc->chlen = 0;
219 /* FALLTHROUGH */
220 case GB2312:
221 switch (psenc->chlen) {
222 case 0:
223 STORE;
224 ch = (unsigned char)psenc->ch[0];
225 if (ch == '\0') {
226 psenc->charset = NONE;
227 wc = (wchar_t)ch;
228 psenc->chlen = 0;
229 break;
230 } else if (ch == '\n') {
231 psenc->charset = NONE;
232 psenc->chlen = 0;
233 goto loop;
234 }
235 /*FALLTHROUGH*/
236 case 1:
237 STORE;
238 if (psenc->ch[0] == ' ') {
239 ch = (unsigned char)psenc->ch[1];
240 wc = (wchar_t)ch;
241 psenc->chlen = 0;
242 break;
243 } else if (psenc->ch[0] == '#') {
244 ch = (unsigned char)psenc->ch[1];
245 if (ch == '\n') {
246 psenc->charset = NONE;
247 wc = (wchar_t)ch;
248 psenc->chlen = 0;
249 break;
250 } else if (ch == ' ') {
251 wc = (wchar_t)ch;
252 psenc->chlen = 0;
253 break;
254 }
255 }
256 ch = (unsigned char)psenc->ch[0];
257 if (ch < 0x21 || ch > 0x7E)
258 goto ilseq;
259 wc = (wchar_t)(ch << 8);
260 ch = (unsigned char)psenc->ch[1];
261 if (ch < 0x21 || ch > 0x7E) {
262 ilseq:
263 *nresult = (size_t)-1;
264 return EILSEQ;
265 }
266 wc |= (wchar_t)ch;
267 psenc->chlen = 0;
268 break;
269 default:
270 return EINVAL;
271 }
272 break;
273 default:
274 return EINVAL;
275 }
276 if (pwc != NULL)
277 *pwc = wc;
278
279 *nresult = (size_t)(wc == 0 ? 0 : len);
280 *s = s0;
281
282 return 0;
283 }
284
285 static int
286 /*ARGSUSED*/
_citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_ZWState * __restrict psenc,size_t * __restrict nresult)287 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,
288 char *__restrict s, size_t n, wchar_t wc,
289 _ZWState * __restrict psenc, size_t * __restrict nresult)
290 {
291 int ch;
292
293 /* ei may be null */
294 _DIAGASSERT(s != NULL);
295 _DIAGASSERT(psenc != NULL);
296 _DIAGASSERT(nresult != NULL);
297
298 if (psenc->chlen != 0)
299 return EINVAL;
300 if ((uint32_t)wc <= 0x7F) {
301 ch = (unsigned char)wc;
302 switch (psenc->charset) {
303 case NONE:
304 if (ch == '\0' || ch == '\n') {
305 psenc->ch[psenc->chlen++] = ch;
306 } else {
307 if (n < 4)
308 return E2BIG;
309 n -= 4;
310 psenc->ch[psenc->chlen++] = 'z';
311 psenc->ch[psenc->chlen++] = 'W';
312 psenc->ch[psenc->chlen++] = ' ';
313 psenc->ch[psenc->chlen++] = ch;
314 psenc->charset = GB2312;
315 }
316 break;
317 case GB2312:
318 if (n < 2)
319 return E2BIG;
320 n -= 2;
321 if (ch == '\0') {
322 psenc->ch[psenc->chlen++] = '\n';
323 psenc->ch[psenc->chlen++] = '\0';
324 psenc->charset = NONE;
325 } else if (ch == '\n') {
326 psenc->ch[psenc->chlen++] = '#';
327 psenc->ch[psenc->chlen++] = '\n';
328 psenc->charset = NONE;
329 } else {
330 psenc->ch[psenc->chlen++] = ' ';
331 psenc->ch[psenc->chlen++] = ch;
332 }
333 break;
334 default:
335 return EINVAL;
336 }
337 } else if ((uint32_t)wc <= 0x7E7E) {
338 switch (psenc->charset) {
339 case NONE:
340 if (n < 2)
341 return E2BIG;
342 n -= 2;
343 psenc->ch[psenc->chlen++] = 'z';
344 psenc->ch[psenc->chlen++] = 'W';
345 psenc->charset = GB2312;
346 /* FALLTHROUGH*/
347 case GB2312:
348 if (n < 2)
349 return E2BIG;
350 n -= 2;
351 ch = (wc >> 8) & 0xFF;
352 if (ch < 0x21 || ch > 0x7E)
353 goto ilseq;
354 psenc->ch[psenc->chlen++] = ch;
355 ch = wc & 0xFF;
356 if (ch < 0x21 || ch > 0x7E)
357 goto ilseq;
358 psenc->ch[psenc->chlen++] = ch;
359 break;
360 default:
361 return EINVAL;
362 }
363 } else {
364 ilseq:
365 *nresult = (size_t)-1;
366 return EILSEQ;
367 }
368 memcpy(s, psenc->ch, psenc->chlen);
369 *nresult = psenc->chlen;
370 psenc->chlen = 0;
371
372 return 0;
373 }
374
375 static int
376 /*ARGSUSED*/
_citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,char * __restrict s,size_t n,_ZWState * __restrict psenc,size_t * __restrict nresult)377 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,
378 char * __restrict s, size_t n,
379 _ZWState * __restrict psenc, size_t * __restrict nresult)
380 {
381 /* ei may be unused */
382 _DIAGASSERT(s != NULL);
383 _DIAGASSERT(psenc != NULL);
384 _DIAGASSERT(nresult != NULL);
385
386 if (psenc->chlen != 0)
387 return EINVAL;
388 switch (psenc->charset) {
389 case GB2312:
390 if (n-- < 1)
391 return E2BIG;
392 psenc->ch[psenc->chlen++] = '\n';
393 psenc->charset = NONE;
394 /*FALLTHROUGH*/
395 case NONE:
396 *nresult = psenc->chlen;
397 if (psenc->chlen > 0) {
398 memcpy(s, psenc->ch, psenc->chlen);
399 psenc->chlen = 0;
400 }
401 break;
402 default:
403 return EINVAL;
404 }
405
406 return 0;
407 }
408
409 static __inline int
410 /*ARGSUSED*/
_citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,_ZWState * __restrict psenc,int * __restrict rstate)411 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,
412 _ZWState * __restrict psenc, int * __restrict rstate)
413 {
414 /* ei may be unused */
415 _DIAGASSERT(psenc != NULL);
416 _DIAGASSERT(rstate != NULL);
417
418 switch (psenc->charset) {
419 case NONE:
420 if (psenc->chlen != 0)
421 return EINVAL;
422 *rstate = _STDENC_SDGEN_INITIAL;
423 break;
424 case AMBIGIOUS:
425 if (psenc->chlen != 0)
426 return EINVAL;
427 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
428 break;
429 case ASCII:
430 case GB2312:
431 switch (psenc->chlen) {
432 case 0:
433 *rstate = _STDENC_SDGEN_STABLE;
434 break;
435 case 1:
436 *rstate = (psenc->ch[0] == '#')
437 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
438 : _STDENC_SDGEN_INCOMPLETE_CHAR;
439 break;
440 default:
441 return EINVAL;
442 }
443 break;
444 default:
445 return EINVAL;
446 }
447 return 0;
448 }
449
450 static __inline int
451 /*ARGSUSED*/
_citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)452 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,
453 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
454 {
455 /* ei seems to be unused */
456 _DIAGASSERT(csid != NULL);
457 _DIAGASSERT(idx != NULL);
458
459 *csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1;
460 *idx = (_index_t)wc;
461
462 return 0;
463 }
464
465 static __inline int
466 /*ARGSUSED*/
_citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)467 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,
468 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
469 {
470 /* ei seems to be unused */
471 _DIAGASSERT(wc != NULL);
472
473 switch (csid) {
474 case 0: case 1:
475 break;
476 default:
477 return EINVAL;
478 }
479 *wc = (wchar_t)idx;
480
481 return 0;
482 }
483
484 static void
485 /*ARGSUSED*/
_citrus_ZW_encoding_module_uninit(_ZWEncodingInfo * ei)486 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei)
487 {
488 }
489
490 static int
491 /*ARGSUSED*/
_citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)492 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,
493 const void *__restrict var, size_t lenvar)
494 {
495 return 0;
496 }
497
498 /* ----------------------------------------------------------------------
499 * public interface for ctype
500 */
501
502 _CITRUS_CTYPE_DECLS(ZW);
503 _CITRUS_CTYPE_DEF_OPS(ZW);
504
505 #include "citrus_ctype_template.h"
506
507 /* ----------------------------------------------------------------------
508 * public interface for stdenc
509 */
510
511 _CITRUS_STDENC_DECLS(ZW);
512 _CITRUS_STDENC_DEF_OPS(ZW);
513
514 #include "citrus_stdenc_template.h"
515