1 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)2003 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <string.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <stddef.h> 40 #include <wchar.h> 41 #include <sys/types.h> 42 #include <limits.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_bcs.h" 47 #include "citrus_module.h" 48 #include "citrus_ctype.h" 49 #include "citrus_stdenc.h" 50 #include "citrus_gbk2k.h" 51 52 53 /* ---------------------------------------------------------------------- 54 * private stuffs used by templates 55 */ 56 57 typedef struct _GBK2KState { 58 char ch[4]; 59 int chlen; 60 } _GBK2KState; 61 62 typedef struct { 63 int mb_cur_max; 64 } _GBK2KEncodingInfo; 65 66 typedef struct { 67 _GBK2KEncodingInfo ei; 68 struct { 69 /* for future multi-locale facility */ 70 _GBK2KState s_mblen; 71 _GBK2KState s_mbrlen; 72 _GBK2KState s_mbrtowc; 73 _GBK2KState s_mbtowc; 74 _GBK2KState s_mbsrtowcs; 75 _GBK2KState s_wcrtomb; 76 _GBK2KState s_wcsrtombs; 77 _GBK2KState s_wctomb; 78 } states; 79 } _GBK2KCTypeInfo; 80 81 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 82 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 83 84 #define _FUNCNAME(m) _citrus_GBK2K_##m 85 #define _ENCODING_INFO _GBK2KEncodingInfo 86 #define _CTYPE_INFO _GBK2KCTypeInfo 87 #define _ENCODING_STATE _GBK2KState 88 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 89 #define _ENCODING_IS_STATE_DEPENDENT 0 90 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 91 92 static __inline void 93 /*ARGSUSED*/ 94 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei, 95 _GBK2KState * __restrict s) 96 { 97 memset(s, 0, sizeof(*s)); 98 } 99 100 static __inline void 101 /*ARGSUSED*/ 102 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei, 103 void * __restrict pspriv, 104 const _GBK2KState * __restrict s) 105 { 106 memcpy(pspriv, (const void *)s, sizeof(*s)); 107 } 108 109 static __inline void 110 /*ARGSUSED*/ 111 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei, 112 _GBK2KState * __restrict s, 113 const void * __restrict pspriv) 114 { 115 memcpy((void *)s, pspriv, sizeof(*s)); 116 } 117 118 static __inline int 119 _mb_singlebyte(int c) 120 { 121 c &= 0xff; 122 return (c <= 0x7f); 123 } 124 125 static __inline int 126 _mb_leadbyte(int c) 127 { 128 c &= 0xff; 129 return (0x81 <= c && c <= 0xfe); 130 } 131 132 static __inline int 133 _mb_trailbyte(int c) 134 { 135 c &= 0xff; 136 return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); 137 } 138 139 static __inline int 140 _mb_surrogate(int c) 141 { 142 c &= 0xff; 143 return (0x30 <= c && c <= 0x39); 144 } 145 146 static __inline int 147 _mb_count(wchar_t v) 148 { 149 u_int32_t c; 150 151 c = (u_int32_t)v; /* XXX */ 152 if (!(c & 0xffffff00)) 153 return (1); 154 if (!(c & 0xffff0000)) 155 return (2); 156 return (4); 157 } 158 159 #define _PSENC (psenc->ch[psenc->chlen - 1]) 160 #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (c)) 161 162 static int 163 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei, 164 wchar_t * __restrict pwc, 165 const char ** __restrict s, size_t n, 166 _GBK2KState * __restrict psenc, 167 size_t * __restrict nresult) 168 { 169 int chlenbak, len; 170 const char *s0, *s1; 171 wchar_t wc; 172 173 _DIAGASSERT(ei != NULL); 174 /* pwc may be NULL */ 175 _DIAGASSERT(s != NULL); 176 _DIAGASSERT(psenc != NULL); 177 178 s0 = *s; 179 180 if (s0 == NULL) { 181 /* _citrus_GBK2K_init_state(ei, psenc); */ 182 psenc->chlen = 0; 183 *nresult = 0; 184 return (0); 185 } 186 187 chlenbak = psenc->chlen; 188 189 switch (psenc->chlen) { 190 case 3: 191 if (!_mb_leadbyte (_PSENC)) 192 goto invalid; 193 /* FALLTHROUGH */ 194 case 2: 195 if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) 196 goto invalid; 197 /* FALLTHROUGH */ 198 case 1: 199 if (!_mb_leadbyte (_PSENC)) 200 goto invalid; 201 /* FALLTHOROUGH */ 202 case 0: 203 break; 204 default: 205 goto invalid; 206 } 207 208 for (;;) { 209 if (n-- < 1) 210 goto restart; 211 212 _PUSH_PSENC(*s0++); 213 214 switch (psenc->chlen) { 215 case 1: 216 if (_mb_singlebyte(_PSENC)) 217 goto convert; 218 if (_mb_leadbyte (_PSENC)) 219 continue; 220 goto ilseq; 221 case 2: 222 if (_mb_trailbyte (_PSENC)) 223 goto convert; 224 if (ei->mb_cur_max == 4 && 225 _mb_surrogate (_PSENC)) 226 continue; 227 goto ilseq; 228 case 3: 229 if (_mb_leadbyte (_PSENC)) 230 continue; 231 goto ilseq; 232 case 4: 233 if (_mb_surrogate (_PSENC)) 234 goto convert; 235 goto ilseq; 236 } 237 } 238 239 convert: 240 len = psenc->chlen; 241 s1 = &psenc->ch[0]; 242 wc = 0; 243 while (len-- > 0) 244 wc = (wc << 8) | (*s1++ & 0xff); 245 246 if (pwc != NULL) 247 *pwc = wc; 248 *s = s0; 249 *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; 250 /* _citrus_GBK2K_init_state(ei, psenc); */ 251 psenc->chlen = 0; 252 253 return (0); 254 255 restart: 256 *s = s0; 257 *nresult = (size_t)-2; 258 259 return (0); 260 261 invalid: 262 return (EINVAL); 263 264 ilseq: 265 *nresult = (size_t)-1; 266 return (EILSEQ); 267 } 268 269 static int 270 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei, 271 char * __restrict s, size_t n, wchar_t wc, 272 _GBK2KState * __restrict psenc, 273 size_t * __restrict nresult) 274 { 275 int len, ret; 276 277 _DIAGASSERT(ei != NULL); 278 _DIAGASSERT(s != NULL); 279 _DIAGASSERT(psenc != NULL); 280 281 if (psenc->chlen != 0) { 282 ret = EINVAL; 283 goto err; 284 } 285 286 len = _mb_count(wc); 287 if (n < len) { 288 ret = E2BIG; 289 goto err; 290 } 291 292 switch (len) { 293 case 1: 294 if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { 295 ret = EILSEQ; 296 goto err; 297 } 298 break; 299 case 2: 300 if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 301 !_mb_trailbyte (_PUSH_PSENC(wc ))) { 302 ret = EILSEQ; 303 goto err; 304 } 305 break; 306 case 4: 307 if (ei->mb_cur_max != 4 || 308 !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || 309 !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || 310 !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || 311 !_mb_surrogate (_PUSH_PSENC(wc ))) { 312 ret = EILSEQ; 313 goto err; 314 } 315 break; 316 } 317 318 _DIAGASSERT(len == psenc->chlen); 319 320 memcpy(s, psenc->ch, psenc->chlen); 321 *nresult = psenc->chlen; 322 /* _citrus_GBK2K_init_state(ei, psenc); */ 323 psenc->chlen = 0; 324 325 return (0); 326 327 err: 328 *nresult = (size_t)-1; 329 return ret; 330 } 331 332 static __inline int 333 /*ARGSUSED*/ 334 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei, 335 _csid_t * __restrict csid, 336 _index_t * __restrict idx, wchar_t wc) 337 { 338 u_int8_t ch, cl; 339 340 _DIAGASSERT(csid != NULL && idx != NULL); 341 342 if ((u_int32_t)wc<0x80) { 343 /* ISO646 */ 344 *csid = 0; 345 *idx = (_index_t)wc; 346 } else if ((u_int32_t)wc>=0x10000) { 347 /* GBKUCS : XXX */ 348 *csid = 3; 349 *idx = (_index_t)wc; 350 } else { 351 ch = (u_int8_t)(wc >> 8); 352 cl = (u_int8_t)wc; 353 if (ch>=0xA1 && cl>=0xA1) { 354 /* EUC G1 */ 355 *csid = 1; 356 *idx = (_index_t)wc & 0x7F7FU; 357 } else { 358 /* extended area (0x8140-) */ 359 *csid = 2; 360 *idx = (_index_t)wc; 361 } 362 } 363 364 return 0; 365 } 366 367 static __inline int 368 /*ARGSUSED*/ 369 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei, 370 wchar_t * __restrict wc, 371 _csid_t csid, _index_t idx) 372 { 373 374 _DIAGASSERT(wc != NULL); 375 376 switch (csid) { 377 case 0: 378 /* ISO646 */ 379 *wc = (wchar_t)idx; 380 break; 381 case 1: 382 /* EUC G1 */ 383 *wc = (wchar_t)idx | 0x8080U; 384 break; 385 case 2: 386 /* extended area */ 387 *wc = (wchar_t)idx; 388 break; 389 case 3: 390 /* GBKUCS : XXX */ 391 if (ei->mb_cur_max != 4) 392 return EINVAL; 393 *wc = (wchar_t)idx; 394 break; 395 default: 396 return EILSEQ; 397 } 398 399 return 0; 400 } 401 402 static __inline int 403 /*ARGSUSED*/ 404 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei, 405 _GBK2KState * __restrict psenc, 406 int * __restrict rstate) 407 { 408 409 if (psenc->chlen == 0) 410 *rstate = _STDENC_SDGEN_INITIAL; 411 else 412 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 413 414 return 0; 415 } 416 417 static int 418 /*ARGSUSED*/ 419 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei, 420 const void * __restrict var, size_t lenvar) 421 { 422 const char *p; 423 424 _DIAGASSERT(ei != NULL); 425 426 p = var; 427 #define MATCH(x, act) \ 428 do { \ 429 if (lenvar >= (sizeof(#x)-1) && \ 430 _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) { \ 431 act; \ 432 lenvar -= sizeof(#x)-1; \ 433 p += sizeof(#x)-1; \ 434 } \ 435 } while (/*CONSTCOND*/0) 436 memset((void *)ei, 0, sizeof(*ei)); 437 ei->mb_cur_max = 4; 438 while (lenvar>0) { 439 switch (_bcs_tolower(*p)) { 440 case '2': 441 MATCH("2byte", ei->mb_cur_max = 2); 442 break; 443 } 444 p++; 445 lenvar--; 446 } 447 448 return (0); 449 } 450 451 static void 452 /*ARGSUSED*/ 453 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei) 454 { 455 } 456 457 458 /* ---------------------------------------------------------------------- 459 * public interface for ctype 460 */ 461 462 _CITRUS_CTYPE_DECLS(GBK2K); 463 _CITRUS_CTYPE_DEF_OPS(GBK2K); 464 465 #include "citrus_ctype_template.h" 466 467 /* ---------------------------------------------------------------------- 468 * public interface for stdenc 469 */ 470 471 _CITRUS_STDENC_DECLS(GBK2K); 472 _CITRUS_STDENC_DEF_OPS(GBK2K); 473 474 #include "citrus_stdenc_template.h" 475