1 /* $NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $ */
2
3 /*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.9 2022/04/19 20:32:14 rillig Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <wchar.h>
41 #include <sys/types.h>
42 #include <limits.h>
43
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_ctype.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_gbk2k.h"
51
52
53 /* ----------------------------------------------------------------------
54 * private stuffs used by templates
55 */
56
57 typedef struct _GBK2KState {
58 char ch[4];
59 int chlen;
60 } _GBK2KState;
61
62 typedef struct {
63 int mb_cur_max;
64 } _GBK2KEncodingInfo;
65
66 typedef struct {
67 _GBK2KEncodingInfo ei;
68 struct {
69 /* for future multi-locale facility */
70 _GBK2KState s_mblen;
71 _GBK2KState s_mbrlen;
72 _GBK2KState s_mbrtowc;
73 _GBK2KState s_mbtowc;
74 _GBK2KState s_mbsrtowcs;
75 _GBK2KState s_mbsnrtowcs;
76 _GBK2KState s_wcrtomb;
77 _GBK2KState s_wcsrtombs;
78 _GBK2KState s_wcsnrtombs;
79 _GBK2KState s_wctomb;
80 } states;
81 } _GBK2KCTypeInfo;
82
83 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
84 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
85
86 #define _FUNCNAME(m) _citrus_GBK2K_##m
87 #define _ENCODING_INFO _GBK2KEncodingInfo
88 #define _CTYPE_INFO _GBK2KCTypeInfo
89 #define _ENCODING_STATE _GBK2KState
90 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max
91 #define _ENCODING_IS_STATE_DEPENDENT 0
92 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
93
94 static __inline void
95 /*ARGSUSED*/
_citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,_GBK2KState * __restrict s)96 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,
97 _GBK2KState * __restrict s)
98 {
99 memset(s, 0, sizeof(*s));
100 }
101
102 static __inline void
103 /*ARGSUSED*/
_citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,void * __restrict pspriv,const _GBK2KState * __restrict s)104 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,
105 void * __restrict pspriv,
106 const _GBK2KState * __restrict s)
107 {
108 memcpy(pspriv, (const void *)s, sizeof(*s));
109 }
110
111 static __inline void
112 /*ARGSUSED*/
_citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,_GBK2KState * __restrict s,const void * __restrict pspriv)113 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,
114 _GBK2KState * __restrict s,
115 const void * __restrict pspriv)
116 {
117 memcpy((void *)s, pspriv, sizeof(*s));
118 }
119
120 static __inline int
_mb_singlebyte(int c)121 _mb_singlebyte(int c)
122 {
123 c &= 0xff;
124 return (c <= 0x7f);
125 }
126
127 static __inline int
_mb_leadbyte(int c)128 _mb_leadbyte(int c)
129 {
130 c &= 0xff;
131 return (0x81 <= c && c <= 0xfe);
132 }
133
134 static __inline int
_mb_trailbyte(int c)135 _mb_trailbyte(int c)
136 {
137 c &= 0xff;
138 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
139 }
140
141 static __inline int
_mb_surrogate(int c)142 _mb_surrogate(int c)
143 {
144 c &= 0xff;
145 return (0x30 <= c && c <= 0x39);
146 }
147
148 static __inline int
_mb_count(wchar_t v)149 _mb_count(wchar_t v)
150 {
151 u_int32_t c;
152
153 c = (u_int32_t)v; /* XXX */
154 if (!(c & 0xffffff00))
155 return (1);
156 if (!(c & 0xffff0000))
157 return (2);
158 return (4);
159 }
160
161 #define _PSENC (psenc->ch[psenc->chlen - 1])
162 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c))
163
164 static int
_citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,wchar_t * __restrict pwc,const char ** __restrict s,size_t n,_GBK2KState * __restrict psenc,size_t * __restrict nresult)165 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
166 wchar_t * __restrict pwc,
167 const char ** __restrict s, size_t n,
168 _GBK2KState * __restrict psenc,
169 size_t * __restrict nresult)
170 {
171 int chlenbak, len;
172 const char *s0, *s1;
173 wchar_t wc;
174
175 _DIAGASSERT(ei != NULL);
176 /* pwc may be NULL */
177 _DIAGASSERT(s != NULL);
178 _DIAGASSERT(psenc != NULL);
179
180 s0 = *s;
181
182 if (s0 == NULL) {
183 /* _citrus_GBK2K_init_state(ei, psenc); */
184 psenc->chlen = 0;
185 *nresult = 0;
186 return (0);
187 }
188
189 chlenbak = psenc->chlen;
190
191 switch (psenc->chlen) {
192 case 3:
193 if (!_mb_leadbyte (_PSENC))
194 goto invalid;
195 /* FALLTHROUGH */
196 case 2:
197 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
198 goto invalid;
199 /* FALLTHROUGH */
200 case 1:
201 if (!_mb_leadbyte (_PSENC))
202 goto invalid;
203 /* FALLTHOROUGH */
204 case 0:
205 break;
206 default:
207 goto invalid;
208 }
209
210 for (;;) {
211 if (n-- < 1)
212 goto restart;
213
214 _PUSH_PSENC(*s0++);
215
216 switch (psenc->chlen) {
217 case 1:
218 if (_mb_singlebyte(_PSENC))
219 goto convert;
220 if (_mb_leadbyte (_PSENC))
221 continue;
222 goto ilseq;
223 case 2:
224 if (_mb_trailbyte (_PSENC))
225 goto convert;
226 if (ei->mb_cur_max == 4 &&
227 _mb_surrogate (_PSENC))
228 continue;
229 goto ilseq;
230 case 3:
231 if (_mb_leadbyte (_PSENC))
232 continue;
233 goto ilseq;
234 case 4:
235 if (_mb_surrogate (_PSENC))
236 goto convert;
237 goto ilseq;
238 }
239 }
240
241 convert:
242 len = psenc->chlen;
243 s1 = &psenc->ch[0];
244 wc = 0;
245 while (len-- > 0)
246 wc = (wc << 8) | (*s1++ & 0xff);
247
248 if (pwc != NULL)
249 *pwc = wc;
250 *s = s0;
251 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
252 /* _citrus_GBK2K_init_state(ei, psenc); */
253 psenc->chlen = 0;
254
255 return (0);
256
257 restart:
258 *s = s0;
259 *nresult = (size_t)-2;
260
261 return (0);
262
263 invalid:
264 return (EINVAL);
265
266 ilseq:
267 *nresult = (size_t)-1;
268 return (EILSEQ);
269 }
270
271 static int
_citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,char * __restrict s,size_t n,wchar_t wc,_GBK2KState * __restrict psenc,size_t * __restrict nresult)272 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
273 char * __restrict s, size_t n, wchar_t wc,
274 _GBK2KState * __restrict psenc,
275 size_t * __restrict nresult)
276 {
277 int len, ret;
278
279 _DIAGASSERT(ei != NULL);
280 _DIAGASSERT(s != NULL);
281 _DIAGASSERT(psenc != NULL);
282
283 if (psenc->chlen != 0) {
284 ret = EINVAL;
285 goto err;
286 }
287
288 len = _mb_count(wc);
289 if (n < len) {
290 ret = E2BIG;
291 goto err;
292 }
293
294 switch (len) {
295 case 1:
296 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) {
297 ret = EILSEQ;
298 goto err;
299 }
300 break;
301 case 2:
302 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) ||
303 !_mb_trailbyte (_PUSH_PSENC(wc ))) {
304 ret = EILSEQ;
305 goto err;
306 }
307 break;
308 case 4:
309 if (ei->mb_cur_max != 4 ||
310 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) ||
311 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
312 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) ||
313 !_mb_surrogate (_PUSH_PSENC(wc ))) {
314 ret = EILSEQ;
315 goto err;
316 }
317 break;
318 }
319
320 _DIAGASSERT(len == psenc->chlen);
321
322 memcpy(s, psenc->ch, psenc->chlen);
323 *nresult = psenc->chlen;
324 /* _citrus_GBK2K_init_state(ei, psenc); */
325 psenc->chlen = 0;
326
327 return (0);
328
329 err:
330 *nresult = (size_t)-1;
331 return ret;
332 }
333
334 static __inline int
335 /*ARGSUSED*/
_citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)336 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,
337 _csid_t * __restrict csid,
338 _index_t * __restrict idx, wchar_t wc)
339 {
340 u_int8_t ch, cl;
341
342 _DIAGASSERT(csid != NULL && idx != NULL);
343
344 if ((u_int32_t)wc<0x80) {
345 /* ISO646 */
346 *csid = 0;
347 *idx = (_index_t)wc;
348 } else if ((u_int32_t)wc>=0x10000) {
349 /* GBKUCS : XXX */
350 *csid = 3;
351 *idx = (_index_t)wc;
352 } else {
353 ch = (u_int8_t)(wc >> 8);
354 cl = (u_int8_t)wc;
355 if (ch>=0xA1 && cl>=0xA1) {
356 /* EUC G1 */
357 *csid = 1;
358 *idx = (_index_t)wc & 0x7F7FU;
359 } else {
360 /* extended area (0x8140-) */
361 *csid = 2;
362 *idx = (_index_t)wc;
363 }
364 }
365
366 return 0;
367 }
368
369 static __inline int
370 /*ARGSUSED*/
_citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)371 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
372 wchar_t * __restrict wc,
373 _csid_t csid, _index_t idx)
374 {
375
376 _DIAGASSERT(wc != NULL);
377
378 switch (csid) {
379 case 0:
380 /* ISO646 */
381 *wc = (wchar_t)idx;
382 break;
383 case 1:
384 /* EUC G1 */
385 *wc = (wchar_t)idx | 0x8080U;
386 break;
387 case 2:
388 /* extended area */
389 *wc = (wchar_t)idx;
390 break;
391 case 3:
392 /* GBKUCS : XXX */
393 if (ei->mb_cur_max != 4)
394 return EINVAL;
395 *wc = (wchar_t)idx;
396 break;
397 default:
398 return EILSEQ;
399 }
400
401 return 0;
402 }
403
404 static __inline int
405 /*ARGSUSED*/
_citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei,_GBK2KState * __restrict psenc,int * __restrict rstate)406 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei,
407 _GBK2KState * __restrict psenc,
408 int * __restrict rstate)
409 {
410
411 if (psenc->chlen == 0)
412 *rstate = _STDENC_SDGEN_INITIAL;
413 else
414 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
415
416 return 0;
417 }
418
419 static int
420 /*ARGSUSED*/
_citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)421 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
422 const void * __restrict var, size_t lenvar)
423 {
424 const char *p;
425
426 _DIAGASSERT(ei != NULL);
427
428 p = var;
429 #define MATCH(x, act) \
430 do { \
431 if (lenvar >= (sizeof(#x)-1) && \
432 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \
433 act; \
434 lenvar -= sizeof(#x)-1; \
435 p += sizeof(#x)-1; \
436 } \
437 } while (0)
438 memset((void *)ei, 0, sizeof(*ei));
439 ei->mb_cur_max = 4;
440 while (lenvar>0) {
441 switch (_bcs_tolower(*p)) {
442 case '2':
443 MATCH("2byte", ei->mb_cur_max = 2);
444 break;
445 }
446 p++;
447 lenvar--;
448 }
449
450 return (0);
451 }
452
453 static void
454 /*ARGSUSED*/
_citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo * ei)455 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei)
456 {
457 }
458
459
460 /* ----------------------------------------------------------------------
461 * public interface for ctype
462 */
463
464 _CITRUS_CTYPE_DECLS(GBK2K);
465 _CITRUS_CTYPE_DEF_OPS(GBK2K);
466
467 #include "citrus_ctype_template.h"
468
469 /* ----------------------------------------------------------------------
470 * public interface for stdenc
471 */
472
473 _CITRUS_STDENC_DECLS(GBK2K);
474 _CITRUS_STDENC_DEF_OPS(GBK2K);
475
476 #include "citrus_stdenc_template.h"
477