1 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)2004, 2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #if defined(LIB_SCCS) && !defined(lint) 32 __RCSID("$NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $"); 33 #endif /* LIB_SCCS and not lint */ 34 35 #include <sys/types.h> 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <stddef.h> 43 #include <wchar.h> 44 #include <limits.h> 45 46 #include "citrus_namespace.h" 47 #include "citrus_types.h" 48 #include "citrus_module.h" 49 #include "citrus_ctype.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_zw.h" 52 53 /* ---------------------------------------------------------------------- 54 * private stuffs used by templates 55 */ 56 57 typedef struct { 58 int dummy; 59 } _ZWEncodingInfo; 60 61 typedef enum { 62 NONE, AMBIGIOUS, ASCII, GB2312 63 } _ZWCharset; 64 65 typedef struct { 66 int chlen; 67 char ch[4]; 68 _ZWCharset charset; 69 } _ZWState; 70 71 typedef struct { 72 _ZWEncodingInfo ei; 73 struct { 74 /* for future multi-locale facility */ 75 _ZWState s_mblen; 76 _ZWState s_mbrlen; 77 _ZWState s_mbrtowc; 78 _ZWState s_mbtowc; 79 _ZWState s_mbsrtowcs; 80 _ZWState s_wcrtomb; 81 _ZWState s_wcsrtombs; 82 _ZWState s_wctomb; 83 } states; 84 } _ZWCTypeInfo; 85 86 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 87 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 88 89 #define _FUNCNAME(m) _citrus_ZW_##m 90 #define _ENCODING_INFO _ZWEncodingInfo 91 #define _CTYPE_INFO _ZWCTypeInfo 92 #define _ENCODING_STATE _ZWState 93 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 94 #define _ENCODING_IS_STATE_DEPENDENT 1 95 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 96 97 static __inline void 98 /*ARGSUSED*/ 99 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei, 100 _ZWState * __restrict psenc) 101 { 102 /* ei my be unused */ 103 _DIAGASSERT(psenc != NULL); 104 105 psenc->chlen = 0; 106 psenc->charset = NONE; 107 } 108 109 static __inline void 110 /*ARGSUSED*/ 111 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei, 112 void *__restrict pspriv, const _ZWState * __restrict psenc) 113 { 114 /* ei may be unused */ 115 _DIAGASSERT(pspriv != NULL); 116 _DIAGASSERT(psenc != NULL); 117 118 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 119 } 120 121 static __inline void 122 /*ARGSUSED*/ 123 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei, 124 _ZWState * __restrict psenc, const void * __restrict pspriv) 125 { 126 /* ei may be unused */ 127 _DIAGASSERT(psenc != NULL); 128 _DIAGASSERT(pspriv != NULL); 129 130 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 131 } 132 133 static int 134 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 135 wchar_t * __restrict pwc, const char **__restrict s, size_t n, 136 _ZWState * __restrict psenc, size_t * __restrict nresult) 137 { 138 const char *s0; 139 int ch, len; 140 wchar_t wc; 141 142 /* ei may be unused */ 143 /* pwc may be null */ 144 _DIAGASSERT(s != NULL); 145 _DIAGASSERT(psenc != NULL); 146 _DIAGASSERT(nresult != NULL); 147 148 if (*s == NULL) { 149 _citrus_ZW_init_state(ei, psenc); 150 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 151 return 0; 152 } 153 s0 = *s; 154 len = 0; 155 156 #define STORE \ 157 do { \ 158 if (n-- < 1) { \ 159 *nresult = (size_t)-2; \ 160 *s = s0; \ 161 return 0; \ 162 } \ 163 ch = (unsigned char)*s0++; \ 164 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 165 goto ilseq; \ 166 psenc->ch[psenc->chlen++] = ch; \ 167 } while (/*CONSTCOND*/0) 168 169 loop: 170 switch (psenc->charset) { 171 case ASCII: 172 switch (psenc->chlen) { 173 case 0: 174 STORE; 175 switch (psenc->ch[0]) { 176 case '\0': case '\n': 177 psenc->charset = NONE; 178 } 179 /*FALLTHROUGH*/ 180 case 1: 181 break; 182 default: 183 return EINVAL; 184 } 185 ch = (unsigned char)psenc->ch[0]; 186 if (ch > 0x7F) 187 goto ilseq; 188 wc = (wchar_t)ch; 189 psenc->chlen = 0; 190 break; 191 case NONE: 192 if (psenc->chlen != 0) 193 return EINVAL; 194 STORE; 195 ch = (unsigned char)psenc->ch[0]; 196 if (ch != 'z') { 197 if (ch != '\n' && ch != '\0') 198 psenc->charset = ASCII; 199 wc = (wchar_t)ch; 200 psenc->chlen = 0; 201 break; 202 } 203 psenc->charset = AMBIGIOUS; 204 psenc->chlen = 0; 205 /* FALLTHROUGH */ 206 case AMBIGIOUS: 207 if (psenc->chlen != 0) 208 return EINVAL; 209 STORE; 210 if (psenc->ch[0] != 'W') { 211 psenc->charset = ASCII; 212 wc = L'z'; 213 break; 214 } 215 psenc->charset = GB2312; 216 psenc->chlen = 0; 217 /* FALLTHROUGH */ 218 case GB2312: 219 switch (psenc->chlen) { 220 case 0: 221 STORE; 222 ch = (unsigned char)psenc->ch[0]; 223 if (ch == '\0') { 224 psenc->charset = NONE; 225 wc = (wchar_t)ch; 226 psenc->chlen = 0; 227 break; 228 } else if (ch == '\n') { 229 psenc->charset = NONE; 230 psenc->chlen = 0; 231 goto loop; 232 } 233 /*FALLTHROUGH*/ 234 case 1: 235 STORE; 236 if (psenc->ch[0] == ' ') { 237 ch = (unsigned char)psenc->ch[1]; 238 wc = (wchar_t)ch; 239 psenc->chlen = 0; 240 break; 241 } else if (psenc->ch[0] == '#') { 242 ch = (unsigned char)psenc->ch[1]; 243 if (ch == '\n') { 244 psenc->charset = NONE; 245 wc = (wchar_t)ch; 246 psenc->chlen = 0; 247 break; 248 } else if (ch == ' ') { 249 wc = (wchar_t)ch; 250 psenc->chlen = 0; 251 break; 252 } 253 } 254 ch = (unsigned char)psenc->ch[0]; 255 if (ch < 0x21 || ch > 0x7E) 256 goto ilseq; 257 wc = (wchar_t)(ch << 8); 258 ch = (unsigned char)psenc->ch[1]; 259 if (ch < 0x21 || ch > 0x7E) { 260 ilseq: 261 *nresult = (size_t)-1; 262 return EILSEQ; 263 } 264 wc |= (wchar_t)ch; 265 psenc->chlen = 0; 266 break; 267 default: 268 return EINVAL; 269 } 270 break; 271 default: 272 return EINVAL; 273 } 274 if (pwc != NULL) 275 *pwc = wc; 276 277 *nresult = (size_t)(wc == 0 ? 0 : len); 278 *s = s0; 279 280 return 0; 281 } 282 283 static int 284 /*ARGSUSED*/ 285 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei, 286 char *__restrict s, size_t n, wchar_t wc, 287 _ZWState * __restrict psenc, size_t * __restrict nresult) 288 { 289 int ch; 290 291 /* ei may be null */ 292 _DIAGASSERT(s != NULL); 293 _DIAGASSERT(psenc != NULL); 294 _DIAGASSERT(nresult != NULL); 295 296 if (psenc->chlen != 0) 297 return EINVAL; 298 if ((uint32_t)wc <= 0x7F) { 299 ch = (unsigned char)wc; 300 switch (psenc->charset) { 301 case NONE: 302 if (ch == '\0' || ch == '\n') { 303 psenc->ch[psenc->chlen++] = ch; 304 } else { 305 if (n < 4) 306 return E2BIG; 307 n -= 4; 308 psenc->ch[psenc->chlen++] = 'z'; 309 psenc->ch[psenc->chlen++] = 'W'; 310 psenc->ch[psenc->chlen++] = ' '; 311 psenc->ch[psenc->chlen++] = ch; 312 psenc->charset = GB2312; 313 } 314 break; 315 case GB2312: 316 if (n < 2) 317 return E2BIG; 318 n -= 2; 319 if (ch == '\0') { 320 psenc->ch[psenc->chlen++] = '\n'; 321 psenc->ch[psenc->chlen++] = '\0'; 322 psenc->charset = NONE; 323 } else if (ch == '\n') { 324 psenc->ch[psenc->chlen++] = '#'; 325 psenc->ch[psenc->chlen++] = '\n'; 326 psenc->charset = NONE; 327 } else { 328 psenc->ch[psenc->chlen++] = ' '; 329 psenc->ch[psenc->chlen++] = ch; 330 } 331 break; 332 default: 333 return EINVAL; 334 } 335 } else if ((uint32_t)wc <= 0x7E7E) { 336 switch (psenc->charset) { 337 case NONE: 338 if (n < 2) 339 return E2BIG; 340 n -= 2; 341 psenc->ch[psenc->chlen++] = 'z'; 342 psenc->ch[psenc->chlen++] = 'W'; 343 psenc->charset = GB2312; 344 /* FALLTHROUGH*/ 345 case GB2312: 346 if (n < 2) 347 return E2BIG; 348 n -= 2; 349 ch = (wc >> 8) & 0xFF; 350 if (ch < 0x21 || ch > 0x7E) 351 goto ilseq; 352 psenc->ch[psenc->chlen++] = ch; 353 ch = wc & 0xFF; 354 if (ch < 0x21 || ch > 0x7E) 355 goto ilseq; 356 psenc->ch[psenc->chlen++] = ch; 357 break; 358 default: 359 return EINVAL; 360 } 361 } else { 362 ilseq: 363 *nresult = (size_t)-1; 364 return EILSEQ; 365 } 366 memcpy(s, psenc->ch, psenc->chlen); 367 *nresult = psenc->chlen; 368 psenc->chlen = 0; 369 370 return 0; 371 } 372 373 static int 374 /*ARGSUSED*/ 375 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei, 376 char * __restrict s, size_t n, 377 _ZWState * __restrict psenc, size_t * __restrict nresult) 378 { 379 /* ei may be unused */ 380 _DIAGASSERT(s != NULL); 381 _DIAGASSERT(psenc != NULL); 382 _DIAGASSERT(nresult != NULL); 383 384 if (psenc->chlen != 0) 385 return EINVAL; 386 switch (psenc->charset) { 387 case GB2312: 388 if (n-- < 1) 389 return E2BIG; 390 psenc->ch[psenc->chlen++] = '\n'; 391 psenc->charset = NONE; 392 /*FALLTHROUGH*/ 393 case NONE: 394 *nresult = psenc->chlen; 395 if (psenc->chlen > 0) { 396 memcpy(s, psenc->ch, psenc->chlen); 397 psenc->chlen = 0; 398 } 399 break; 400 default: 401 return EINVAL; 402 } 403 404 return 0; 405 } 406 407 static __inline int 408 /*ARGSUSED*/ 409 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei, 410 _ZWState * __restrict psenc, int * __restrict rstate) 411 { 412 /* ei may be unused */ 413 _DIAGASSERT(psenc != NULL); 414 _DIAGASSERT(rstate != NULL); 415 416 switch (psenc->charset) { 417 case NONE: 418 if (psenc->chlen != 0) 419 return EINVAL; 420 *rstate = _STDENC_SDGEN_INITIAL; 421 break; 422 case AMBIGIOUS: 423 if (psenc->chlen != 0) 424 return EINVAL; 425 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 426 break; 427 case ASCII: 428 case GB2312: 429 switch (psenc->chlen) { 430 case 0: 431 *rstate = _STDENC_SDGEN_STABLE; 432 break; 433 case 1: 434 *rstate = (psenc->ch[0] == '#') 435 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 436 : _STDENC_SDGEN_INCOMPLETE_CHAR; 437 break; 438 default: 439 return EINVAL; 440 } 441 break; 442 default: 443 return EINVAL; 444 } 445 return 0; 446 } 447 448 static __inline int 449 /*ARGSUSED*/ 450 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei, 451 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 452 { 453 /* ei seems to be unused */ 454 _DIAGASSERT(csid != NULL); 455 _DIAGASSERT(idx != NULL); 456 457 *csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1; 458 *idx = (_index_t)wc; 459 460 return 0; 461 } 462 463 static __inline int 464 /*ARGSUSED*/ 465 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei, 466 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 467 { 468 /* ei seems to be unused */ 469 _DIAGASSERT(wc != NULL); 470 471 switch (csid) { 472 case 0: case 1: 473 break; 474 default: 475 return EINVAL; 476 } 477 *wc = (wchar_t)idx; 478 479 return 0; 480 } 481 482 static void 483 /*ARGSUSED*/ 484 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei) 485 { 486 } 487 488 static int 489 /*ARGSUSED*/ 490 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei, 491 const void *__restrict var, size_t lenvar) 492 { 493 return 0; 494 } 495 496 /* ---------------------------------------------------------------------- 497 * public interface for ctype 498 */ 499 500 _CITRUS_CTYPE_DECLS(ZW); 501 _CITRUS_CTYPE_DEF_OPS(ZW); 502 503 #include "citrus_ctype_template.h" 504 505 /* ---------------------------------------------------------------------- 506 * public interface for stdenc 507 */ 508 509 _CITRUS_STDENC_DECLS(ZW); 510 _CITRUS_STDENC_DEF_OPS(ZW); 511 512 #include "citrus_stdenc_template.h" 513