1 /* $NetBSD: citrus_big5.c,v 1.14 2013/05/28 16:57:56 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)2002, 2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 __RCSID("$NetBSD: citrus_big5.c,v 1.14 2013/05/28 16:57:56 joerg Exp $"); 64 #endif /* LIBC_SCCS and not lint */ 65 66 #include <sys/queue.h> 67 #include <sys/types.h> 68 #include <assert.h> 69 #include <errno.h> 70 #include <string.h> 71 #include <stdint.h> 72 #include <stdio.h> 73 #include <stdlib.h> 74 #include <stddef.h> 75 #include <wchar.h> 76 #include <limits.h> 77 78 #include "citrus_namespace.h" 79 #include "citrus_types.h" 80 #include "citrus_bcs.h" 81 #include "citrus_module.h" 82 #include "citrus_ctype.h" 83 #include "citrus_stdenc.h" 84 #include "citrus_big5.h" 85 86 #include "citrus_prop.h" 87 88 /* ---------------------------------------------------------------------- 89 * private stuffs used by templates 90 */ 91 92 typedef struct { 93 char ch[2]; 94 int chlen; 95 } _BIG5State; 96 97 typedef struct _BIG5Exclude { 98 TAILQ_ENTRY(_BIG5Exclude) entry; 99 wint_t start, end; 100 } _BIG5Exclude; 101 102 typedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList; 103 104 typedef struct { 105 int cell[0x100]; 106 _BIG5ExcludeList excludes; 107 } _BIG5EncodingInfo; 108 109 typedef struct { 110 _BIG5EncodingInfo ei; 111 struct { 112 /* for future multi-locale facility */ 113 _BIG5State s_mblen; 114 _BIG5State s_mbrlen; 115 _BIG5State s_mbrtowc; 116 _BIG5State s_mbtowc; 117 _BIG5State s_mbsrtowcs; 118 _BIG5State s_mbsnrtowcs; 119 _BIG5State s_wcrtomb; 120 _BIG5State s_wcsrtombs; 121 _BIG5State s_wcsnrtombs; 122 _BIG5State s_wctomb; 123 } states; 124 } _BIG5CTypeInfo; 125 126 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 127 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 128 129 #define _FUNCNAME(m) _citrus_BIG5_##m 130 #define _ENCODING_INFO _BIG5EncodingInfo 131 #define _CTYPE_INFO _BIG5CTypeInfo 132 #define _ENCODING_STATE _BIG5State 133 #define _ENCODING_MB_CUR_MAX(_ei_) 2 134 #define _ENCODING_IS_STATE_DEPENDENT 0 135 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 136 137 138 static __inline void 139 /*ARGSUSED*/ 140 _citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei, 141 _BIG5State * __restrict s) 142 { 143 memset(s, 0, sizeof(*s)); 144 } 145 146 static __inline void 147 /*ARGSUSED*/ 148 _citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei, 149 void * __restrict pspriv, 150 const _BIG5State * __restrict s) 151 { 152 memcpy(pspriv, (const void *)s, sizeof(*s)); 153 } 154 155 static __inline void 156 /*ARGSUSED*/ 157 _citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei, 158 _BIG5State * __restrict s, 159 const void * __restrict pspriv) 160 { 161 memcpy((void *)s, pspriv, sizeof(*s)); 162 } 163 164 static __inline int 165 _citrus_BIG5_check(_BIG5EncodingInfo *ei, u_int c) 166 { 167 _DIAGASSERT(ei != NULL); 168 169 return (ei->cell[c & 0xFF] & 0x1) ? 2 : 1; 170 } 171 172 static __inline int 173 _citrus_BIG5_check2(_BIG5EncodingInfo *ei, u_int c) 174 { 175 _DIAGASSERT(ei != NULL); 176 177 return (ei->cell[c & 0xFF] & 0x2) ? 1 : 0; 178 } 179 180 static __inline int 181 _citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c) 182 { 183 _BIG5Exclude *exclude; 184 185 _DIAGASSERT(ei != NULL); 186 187 TAILQ_FOREACH(exclude, &ei->excludes, entry) { 188 if (c >= exclude->start && c <= exclude->end) 189 return EILSEQ; 190 } 191 return 0; 192 } 193 194 static int 195 _citrus_BIG5_fill_rowcol(void ** __restrict ctx, const char * __restrict s, 196 uint64_t start, uint64_t end) 197 { 198 _BIG5EncodingInfo *ei; 199 int i; 200 uint64_t n; 201 202 _DIAGASSERT(ctx != NULL && *ctx != NULL); 203 204 if (start > 0xFF || end > 0xFF) 205 return EINVAL; 206 ei = (_BIG5EncodingInfo *)*ctx; 207 i = strcmp("row", s) ? 1 : 0; 208 i = 1 << i; 209 for (n = start; n <= end; ++n) 210 ei->cell[n & 0xFF] |= i; 211 return 0; 212 } 213 214 static int 215 /*ARGSUSED*/ 216 _citrus_BIG5_fill_excludes(void ** __restrict ctx, const char * __restrict s, 217 uint64_t start, uint64_t end) 218 { 219 _BIG5EncodingInfo *ei; 220 _BIG5Exclude *exclude; 221 222 _DIAGASSERT(ctx != NULL && *ctx != NULL); 223 224 if (start > 0xFFFF || end > 0xFFFF) 225 return EINVAL; 226 ei = (_BIG5EncodingInfo *)*ctx; 227 exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList); 228 if (exclude != NULL && (wint_t)start <= exclude->end) 229 return EINVAL; 230 exclude = (void *)malloc(sizeof(*exclude)); 231 if (exclude == NULL) 232 return ENOMEM; 233 exclude->start = (wint_t)start; 234 exclude->end = (wint_t)end; 235 TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry); 236 237 return 0; 238 } 239 240 static const _citrus_prop_hint_t root_hints[] = { 241 _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol), 242 _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol), 243 _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes), 244 _CITRUS_PROP_HINT_END 245 }; 246 247 static void 248 /*ARGSUSED*/ 249 _citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei) 250 { 251 _BIG5Exclude *exclude; 252 253 _DIAGASSERT(ei != NULL); 254 255 while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) { 256 TAILQ_REMOVE(&ei->excludes, exclude, entry); 257 free(exclude); 258 } 259 } 260 261 static int 262 /*ARGSUSED*/ 263 _citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei, 264 const void * __restrict var, size_t lenvar) 265 { 266 int err; 267 const char *s; 268 269 _DIAGASSERT(ei != NULL); 270 271 memset((void *)ei, 0, sizeof(*ei)); 272 TAILQ_INIT(&ei->excludes); 273 274 if (lenvar > 0 && var != NULL) { 275 s = _bcs_skip_ws_len((const char *)var, &lenvar); 276 if (lenvar > 0 && *s != '\0') { 277 err = _citrus_prop_parse_variable( 278 root_hints, (void *)ei, s, lenvar); 279 if (err == 0) 280 return 0; 281 282 _citrus_BIG5_encoding_module_uninit(ei); 283 memset((void *)ei, 0, sizeof(*ei)); 284 TAILQ_INIT(&ei->excludes); 285 } 286 } 287 288 /* fallback Big5-1984, for backward compatibility. */ 289 _citrus_BIG5_fill_rowcol((void **)&ei, "row", 0xA1, 0xFE); 290 _citrus_BIG5_fill_rowcol((void **)&ei, "col", 0x40, 0x7E); 291 _citrus_BIG5_fill_rowcol((void **)&ei, "col", 0xA1, 0xFE); 292 293 return 0; 294 } 295 296 static int 297 /*ARGSUSED*/ 298 _citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei, 299 wchar_t * __restrict pwc, 300 const char ** __restrict s, size_t n, 301 _BIG5State * __restrict psenc, 302 size_t * __restrict nresult) 303 { 304 wchar_t wchar; 305 int c; 306 int chlenbak; 307 const char *s0; 308 309 _DIAGASSERT(nresult != 0); 310 _DIAGASSERT(ei != NULL); 311 _DIAGASSERT(psenc != NULL); 312 _DIAGASSERT(s != NULL && *s != NULL); 313 314 s0 = *s; 315 316 if (s0 == NULL) { 317 _citrus_BIG5_init_state(ei, psenc); 318 *nresult = 0; 319 return (0); 320 } 321 322 chlenbak = psenc->chlen; 323 324 /* make sure we have the first byte in the buffer */ 325 switch (psenc->chlen) { 326 case 0: 327 if (n < 1) 328 goto restart; 329 psenc->ch[0] = *s0++; 330 psenc->chlen = 1; 331 n--; 332 break; 333 case 1: 334 break; 335 default: 336 /* illegal state */ 337 goto ilseq; 338 } 339 340 c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff); 341 if (c == 0) 342 goto ilseq; 343 while (psenc->chlen < c) { 344 if (n < 1) { 345 goto restart; 346 } 347 psenc->ch[psenc->chlen] = *s0++; 348 psenc->chlen++; 349 n--; 350 } 351 352 switch (c) { 353 case 1: 354 wchar = psenc->ch[0] & 0xff; 355 break; 356 case 2: 357 if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff)) 358 goto ilseq; 359 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 360 break; 361 default: 362 /* illegal state */ 363 goto ilseq; 364 } 365 366 if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0) 367 goto ilseq; 368 369 *s = s0; 370 psenc->chlen = 0; 371 if (pwc) 372 *pwc = wchar; 373 if (!wchar) 374 *nresult = 0; 375 else 376 *nresult = c - chlenbak; 377 378 return (0); 379 380 ilseq: 381 psenc->chlen = 0; 382 *nresult = (size_t)-1; 383 return (EILSEQ); 384 385 restart: 386 *s = s0; 387 *nresult = (size_t)-2; 388 return (0); 389 } 390 391 static int 392 /*ARGSUSED*/ 393 _citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei, 394 char * __restrict s, 395 size_t n, wchar_t wc, _BIG5State * __restrict psenc, 396 size_t * __restrict nresult) 397 { 398 size_t l, ret; 399 400 _DIAGASSERT(ei != NULL); 401 _DIAGASSERT(nresult != 0); 402 _DIAGASSERT(s != NULL); 403 404 /* check invalid sequence */ 405 if (wc & ~0xffff || 406 _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) { 407 ret = EILSEQ; 408 goto err; 409 } 410 411 if (wc & 0x8000) { 412 if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 || 413 !_citrus_BIG5_check2(ei, wc & 0xff)) { 414 ret = EILSEQ; 415 goto err; 416 } 417 l = 2; 418 } else { 419 if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) { 420 ret = EILSEQ; 421 goto err; 422 } 423 l = 1; 424 } 425 426 if (n < l) { 427 /* bound check failure */ 428 ret = E2BIG; 429 goto err; 430 } 431 432 if (l == 2) { 433 s[0] = (wc >> 8) & 0xff; 434 s[1] = wc & 0xff; 435 } else 436 s[0] = wc & 0xff; 437 438 *nresult = l; 439 440 return 0; 441 442 err: 443 *nresult = (size_t)-1; 444 return ret; 445 } 446 447 static __inline int 448 /*ARGSUSED*/ 449 _citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei, 450 _csid_t * __restrict csid, 451 _index_t * __restrict idx, wchar_t wc) 452 { 453 454 _DIAGASSERT(csid != NULL && idx != NULL); 455 456 *csid = (wc < 0x100) ? 0 : 1; 457 *idx = (_index_t)wc; 458 459 return 0; 460 } 461 462 static __inline int 463 /*ARGSUSED*/ 464 _citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei, 465 wchar_t * __restrict wc, 466 _csid_t csid, _index_t idx) 467 { 468 _DIAGASSERT(wc != NULL); 469 470 switch (csid) { 471 case 0: 472 case 1: 473 *wc = (wchar_t)idx; 474 break; 475 default: 476 return EILSEQ; 477 } 478 479 return 0; 480 } 481 482 static __inline int 483 /*ARGSUSED*/ 484 _citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei, 485 _BIG5State * __restrict psenc, 486 int * __restrict rstate) 487 { 488 489 if (psenc->chlen == 0) 490 *rstate = _STDENC_SDGEN_INITIAL; 491 else 492 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 493 494 return 0; 495 } 496 497 /* ---------------------------------------------------------------------- 498 * public interface for ctype 499 */ 500 501 _CITRUS_CTYPE_DECLS(BIG5); 502 _CITRUS_CTYPE_DEF_OPS(BIG5); 503 504 #include "citrus_ctype_template.h" 505 506 507 /* ---------------------------------------------------------------------- 508 * public interface for stdenc 509 */ 510 511 _CITRUS_STDENC_DECLS(BIG5); 512 _CITRUS_STDENC_DEF_OPS(BIG5); 513 514 #include "citrus_stdenc_template.h" 515