1 /* $NetBSD: citrus_ctype_template.h,v 1.14 2002/05/24 04:04:30 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c)2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 66 /* 67 * CAUTION: THIS IS NOT STANDALONE FILE 68 * 69 * function templates of ctype encoding handler for each encodings. 70 * 71 * you need to define the macros below: 72 * 73 * _FUNCNAME(method) : 74 * It should convine the real function name for the method. 75 * e.g. _FUNCNAME(mbrtowc) should be expanded to 76 * _EUC_ctype_mbrtowc 77 * for EUC locale. 78 * 79 * _CEI_TO_STATE(cei, method) : 80 * It should be expanded to the pointer of the method-internal state 81 * structures. 82 * e.g. _CEI_TO_STATE(cei, mbrtowc) might be expanded to 83 * (cei)->states.s_mbrtowc 84 * This structure may use if the function is called as 85 * mbrtowc(&wc, s, n, NULL); 86 * Such individual structures are needed by: 87 * mblen 88 * mbrlen 89 * mbrtowc 90 * mbtowc 91 * mbsrtowcs 92 * wcrtomb 93 * wcsrtombs 94 * wctomb 95 * These need to be keeped in the ctype encoding information structure, 96 * pointed by "cei". 97 * 98 * _ENCODING_INFO : 99 * It should be expanded to the name of the encoding information structure. 100 * e.g. For EUC encoding, this macro is expanded to _EUCInfo. 101 * Encoding information structure need to contain the common informations 102 * for the codeset. 103 * 104 * _ENCODING_STATE : 105 * It should be expanded to the name of the encoding state structure. 106 * e.g. For EUC encoding, this macro is expanded to _EUCState. 107 * Encoding state structure need to contain the context-dependent states, 108 * which are "unpacked-form" of mbstate_t type and keeped during sequent 109 * calls of mb/wc functions, 110 * 111 * _ENCODING_IS_STATE_DEPENDENT : 112 * If the encoding is state dependent, this should be expanded to 113 * non-zero integral value. Otherwise, 0. 114 * 115 * _STATE_NEEDS_EXPLICIT_INIT(ps) : 116 * If the encoding state pointed by "ps" needs to be initialized 117 * explicitly, return non-zero. Otherwize, 0. 118 * 119 */ 120 121 122 /* prototypes */ 123 124 __BEGIN_DECLS 125 static void _FUNCNAME(init_state)(_ENCODING_INFO * __restrict, 126 _ENCODING_STATE * __restrict); 127 static void _FUNCNAME(pack_state)(_ENCODING_INFO * __restrict, 128 void * __restrict, 129 const _ENCODING_STATE * __restrict); 130 static void _FUNCNAME(unpack_state)(_ENCODING_INFO * __restrict, 131 _ENCODING_STATE * __restrict, 132 const void * __restrict); 133 134 135 /* 136 * standard form of mbrtowc_priv. 137 * 138 * note (differences from real mbrtowc): 139 * - 3rd parameter is not "const char *s" but "const char **s". 140 * after the call of the function, *s will point the first byte of 141 * the next character. 142 * - additional 4th parameter is the size of src buffer. 143 * - 5th parameter is unpacked encoding-dependent state structure. 144 * - additional 6th parameter is the storage to be stored 145 * the return value in the real mbrtowc context. 146 * - return value means "errno" in the real mbrtowc context. 147 */ 148 149 static int _FUNCNAME(mbrtowc_priv)(_ENCODING_INFO * __restrict, 150 wchar_t * __restrict, 151 const char ** __restrict, 152 size_t, _ENCODING_STATE * __restrict, 153 size_t * __restrict); 154 155 /* 156 * standard form of wcrtomb_priv. 157 * 158 * note (differences from real wcrtomb): 159 * - additional 3th parameter is the size of src buffer. 160 * - 5th parameter is unpacked encoding-dependent state structure. 161 * - additional 6th parameter is the storage to be stored 162 * the return value in the real mbrtowc context. 163 * - return value means "errno" in the real wcrtomb context. 164 */ 165 166 static int _FUNCNAME(wcrtomb_priv)(_ENCODING_INFO * __restrict, 167 char * __restrict, size_t, wchar_t, 168 _ENCODING_STATE * __restrict, 169 size_t * __restrict); 170 __END_DECLS 171 172 173 /* 174 * macros 175 */ 176 177 #define _TO_CEI(_cl_) ((_CTYPE_INFO*)(_cl_)) 178 179 180 /* 181 * templates 182 */ 183 184 /* internal routines */ 185 186 static __inline int 187 _FUNCNAME(mbtowc_priv)(_ENCODING_INFO * __restrict ei, 188 wchar_t * __restrict pwc, const char * __restrict s, 189 size_t n, _ENCODING_STATE * __restrict psenc, 190 int * __restrict nresult) 191 { 192 _ENCODING_STATE state; 193 size_t nr; 194 int err = 0; 195 196 _DIAGASSERT(ei != NULL); 197 _DIAGASSERT(psenc != NULL); 198 199 if (s == NULL) { 200 *nresult = _ENCODING_IS_STATE_DEPENDENT; 201 return (0); 202 } 203 204 state = *psenc; 205 err = _FUNCNAME(mbrtowc_priv)(ei, pwc, (const char **)&s, n, psenc, &nr); 206 if (err) { 207 *nresult = -1; 208 return (err); 209 } 210 if (nr==(size_t)-2) { 211 *psenc = state; 212 *nresult = -1; 213 return (EILSEQ); 214 } 215 216 *nresult = (int)nr; 217 218 return (0); 219 } 220 221 static int 222 _FUNCNAME(mbsrtowcs_priv)(_ENCODING_INFO * __restrict ei, 223 wchar_t * __restrict pwcs, 224 const char ** __restrict s, 225 size_t n, _ENCODING_STATE * __restrict psenc, 226 size_t * __restrict nresult) 227 { 228 int err, cnt; 229 size_t siz; 230 const char *s0; 231 size_t mbcurmax; 232 233 _DIAGASSERT(nresult != 0); 234 _DIAGASSERT(ei != NULL); 235 _DIAGASSERT(psenc != NULL); 236 237 if (s == NULL || *s == NULL || n==0) { 238 *nresult = (size_t)-1; 239 return EILSEQ; 240 } 241 242 if (!pwcs) 243 n = 1; 244 245 cnt = 0; 246 s0 = *s; /* to keep *s unchanged for now, use copy instead. */ 247 mbcurmax = _ENCODING_MB_CUR_MAX(ei); 248 while (n > 0) { 249 err = _FUNCNAME(mbrtowc_priv)(ei, pwcs, &s0, mbcurmax, 250 psenc, &siz); 251 if (siz == (size_t)-2) 252 err = EILSEQ; 253 if (err) { 254 cnt = -1; 255 goto bye; 256 } 257 switch (siz) { 258 case 0: 259 if (pwcs) { 260 _FUNCNAME(init_state)(ei, psenc); 261 } 262 s0 = 0; 263 goto bye; 264 default: 265 if (pwcs) { 266 pwcs++; 267 n--; 268 } 269 cnt++; 270 break; 271 } 272 } 273 bye: 274 if (pwcs) 275 *s = s0; 276 277 *nresult = (size_t)cnt; 278 279 return err; 280 } 281 282 283 static int 284 _FUNCNAME(wcsrtombs_priv)(_ENCODING_INFO * __restrict ei, char * __restrict s, 285 const wchar_t ** __restrict pwcs, 286 size_t n, _ENCODING_STATE * __restrict psenc, 287 size_t * __restrict nresult) 288 { 289 int cnt = 0, err; 290 char buf[MB_LEN_MAX]; 291 size_t siz; 292 const wchar_t* pwcs0; 293 #if _ENCODING_IS_STATE_DEPENDENT 294 _ENCODING_STATE state; 295 #endif 296 297 pwcs0 = *pwcs; 298 299 if (!s) 300 n = 1; 301 302 while (n > 0) { 303 #if _ENCODING_IS_STATE_DEPENDENT 304 state = *psenc; 305 #endif 306 err = _FUNCNAME(wcrtomb_priv)(ei, buf, sizeof(buf), 307 *pwcs0, psenc, &siz); 308 if (siz == (size_t)-1) { 309 *nresult = siz; 310 return (err); 311 } 312 313 if (s) { 314 if (n < siz) { 315 #if _ENCODING_IS_STATE_DEPENDENT 316 *psenc = state; 317 #endif 318 break; 319 } 320 memcpy(s, buf, siz); 321 s += siz; 322 n -= siz; 323 } 324 cnt += siz; 325 if (!*pwcs0) { 326 if (s) { 327 _FUNCNAME(init_state)(ei, psenc); 328 } 329 pwcs0 = 0; 330 cnt--; /* don't include terminating null */ 331 break; 332 } 333 pwcs0++; 334 } 335 if (s) 336 *pwcs = pwcs0; 337 338 *nresult = (size_t)cnt; 339 return (0); 340 } 341 342 343 /* ---------------------------------------------------------------------- 344 * templates for public functions 345 */ 346 347 #define _RESTART_BEGIN(_func_, _cei_, _pspriv_, _pse_) \ 348 do { \ 349 _ENCODING_STATE _state; \ 350 do { \ 351 if (_pspriv_ == NULL) { \ 352 _pse_ = &_CEI_TO_STATE(_cei_, _func_); \ 353 if (_STATE_NEEDS_EXPLICIT_INIT(_pse_)) \ 354 _FUNCNAME(init_state)(_CEI_TO_EI(_cei_), \ 355 psenc); \ 356 } else { \ 357 _pse_ = &_state; \ 358 _FUNCNAME(unpack_state)(_CEI_TO_EI(_cei_), \ 359 _pse_, _pspriv_); \ 360 } \ 361 } while (/*CONSTCOND*/0) 362 363 #define _RESTART_END(_func_, _cei_, _pspriv_, _pse_) \ 364 if (_pspriv_ != NULL) { \ 365 _FUNCNAME(pack_state)(_CEI_TO_EI(_cei_), _pspriv_, \ 366 _pse_); \ 367 } \ 368 } while (/*CONSTCOND*/0) 369 370 int 371 _FUNCNAME(ctype_getops)(_citrus_ctype_ops_rec_t *ops, size_t lenops, 372 u_int32_t expected_version) 373 { 374 if (expected_version<_CITRUS_CTYPE_ABI_VERSION || lenops<sizeof(*ops)) 375 return (EINVAL); 376 377 memcpy(ops, &_FUNCNAME(ctype_ops), sizeof(_FUNCNAME(ctype_ops))); 378 379 return (0); 380 } 381 382 static int 383 _FUNCNAME(ctype_init)(void ** __restrict cl, 384 void * __restrict var, size_t lenvar, size_t lenps) 385 { 386 _CTYPE_INFO *cei; 387 388 _DIAGASSERT(cl != NULL); 389 390 /* sanity check to avoid overruns */ 391 if (sizeof(_ENCODING_STATE) > lenps) 392 return (EINVAL); 393 394 cei = calloc(1, sizeof(_CTYPE_INFO)); 395 if (cei == NULL) 396 return (ENOMEM); 397 398 *cl = (void *)cei; 399 400 return _FUNCNAME(stdencoding_init)(_CEI_TO_EI(cei), var, lenvar); 401 } 402 403 static void 404 _FUNCNAME(ctype_uninit)(void *cl) 405 { 406 if (cl) { 407 _FUNCNAME(stdencoding_uninit)(_CEI_TO_EI(_TO_CEI(cl))); 408 free(cl); 409 } 410 } 411 412 static unsigned 413 /*ARGSUSED*/ 414 _FUNCNAME(ctype_get_mb_cur_max)(void *cl) 415 { 416 return _ENCODING_MB_CUR_MAX(_CEI_TO_EI(_TO_CEI(cl))); 417 } 418 419 static int 420 _FUNCNAME(ctype_mblen)(void * __restrict cl, 421 const char * __restrict s, size_t n, 422 int * __restrict nresult) 423 { 424 425 _DIAGASSERT(cl != NULL); 426 427 return _FUNCNAME(mbtowc_priv)(_CEI_TO_EI(_TO_CEI(cl)), NULL, s, n, 428 &_CEI_TO_STATE(_TO_CEI(cl), mblen), 429 nresult); 430 } 431 432 static int 433 _FUNCNAME(ctype_mbrlen)(void * __restrict cl, const char * __restrict s, 434 size_t n, void * __restrict pspriv, 435 size_t * __restrict nresult) 436 { 437 _ENCODING_STATE *psenc; 438 int err = 0; 439 440 _DIAGASSERT(cl != NULL); 441 442 _RESTART_BEGIN(mbrlen, _TO_CEI(cl), pspriv, psenc); 443 if (s == NULL) { 444 _FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), psenc); 445 *nresult = 0; 446 } else { 447 err = _FUNCNAME(mbrtowc_priv)( 448 cl, NULL, (const char **)&s, n, (void *)psenc, nresult); 449 } 450 _RESTART_END(mbrlen, _TO_CEI(cl), pspriv, psenc); 451 452 return (err); 453 } 454 455 static int 456 _FUNCNAME(ctype_mbrtowc)(void * __restrict cl, wchar_t * __restrict pwc, 457 const char * __restrict s, size_t n, 458 void * __restrict pspriv, size_t * __restrict nresult) 459 { 460 _ENCODING_STATE *psenc; 461 int err = 0; 462 463 _DIAGASSERT(cl != NULL); 464 465 _RESTART_BEGIN(mbrtowc, _TO_CEI(cl), pspriv, psenc); 466 if (s == NULL) { 467 _FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), psenc); 468 *nresult = 0; 469 } else { 470 err = _FUNCNAME(mbrtowc_priv)( 471 cl, pwc, (const char **)&s, n, (void *)psenc, nresult); 472 } 473 _RESTART_END(mbrtowc, _TO_CEI(cl), pspriv, psenc); 474 475 return (err); 476 } 477 478 static int 479 /*ARGSUSED*/ 480 _FUNCNAME(ctype_mbsinit)(void * __restrict cl, const void * __restrict pspriv, 481 int * __restrict nresult) 482 { 483 _ENCODING_STATE state; 484 485 if (pspriv == NULL) { 486 *nresult = 1; 487 return (0); 488 } 489 490 _FUNCNAME(unpack_state)(_CEI_TO_EI(_TO_CEI(cl)), &state, pspriv); 491 492 *nresult = (state.chlen == 0); /* XXX: FIXME */ 493 494 return (0); 495 } 496 497 static int 498 _FUNCNAME(ctype_mbsrtowcs)(void * __restrict cl, wchar_t * __restrict pwcs, 499 const char ** __restrict s, size_t n, 500 void * __restrict pspriv, 501 size_t * __restrict nresult) 502 { 503 _ENCODING_STATE *psenc; 504 int err = 0; 505 506 _DIAGASSERT(cl != NULL); 507 508 _RESTART_BEGIN(mbsrtowcs, _TO_CEI(cl), pspriv, psenc); 509 err = _FUNCNAME(mbsrtowcs_priv)(cl, pwcs, s, n, psenc, nresult); 510 _RESTART_END(mbsrtowcs, _TO_CEI(cl), pspriv, psenc); 511 512 return (err); 513 } 514 515 static int 516 _FUNCNAME(ctype_mbstowcs)(void * __restrict cl, wchar_t * __restrict pwcs, 517 const char * __restrict s, size_t n, 518 size_t * __restrict nresult) 519 { 520 int err; 521 _ENCODING_STATE state; 522 523 _DIAGASSERT(cl != NULL); 524 525 _FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), &state); 526 err = _FUNCNAME(mbsrtowcs_priv)(cl, pwcs, (const char **)&s, n, &state, nresult); 527 if (*nresult == (size_t)-2) { 528 err = EILSEQ; 529 *nresult = (size_t)-1; 530 } 531 532 return (err); 533 } 534 535 static int 536 _FUNCNAME(ctype_mbtowc)(void * __restrict cl, wchar_t * __restrict pwc, 537 const char * __restrict s, size_t n, 538 int * __restrict nresult) 539 { 540 541 _DIAGASSERT(cl != NULL); 542 543 return _FUNCNAME(mbtowc_priv)(cl, pwc, s, n, 544 &_CEI_TO_STATE(_TO_CEI(cl), mbtowc), 545 nresult); 546 } 547 548 static int 549 _FUNCNAME(ctype_wcrtomb)(void * __restrict cl, char * __restrict s, wchar_t wc, 550 void * __restrict pspriv, size_t * __restrict nresult) 551 { 552 _ENCODING_STATE *psenc; 553 int err = 0; 554 555 _DIAGASSERT(cl != NULL); 556 557 _RESTART_BEGIN(wcrtomb, _TO_CEI(cl), pspriv, psenc); 558 err = _FUNCNAME(wcrtomb_priv)(_CEI_TO_EI(_TO_CEI(cl)), s, 559 _ENCODING_MB_CUR_MAX(_CEI_TO_EI(_TO_CEI(cl))), 560 wc, psenc, nresult); 561 _RESTART_END(wcrtomb, _TO_CEI(cl), pspriv, psenc); 562 563 return err; 564 } 565 566 static int 567 /*ARGSUSED*/ 568 _FUNCNAME(ctype_wcsrtombs)(void * __restrict cl, char * __restrict s, 569 const wchar_t ** __restrict pwcs, size_t n, 570 void * __restrict pspriv, 571 size_t * __restrict nresult) 572 { 573 _ENCODING_STATE *psenc; 574 int err = 0; 575 576 _DIAGASSERT(cl != NULL); 577 578 _RESTART_BEGIN(wcsrtombs, _TO_CEI(cl), pspriv, psenc); 579 err = _FUNCNAME(wcsrtombs_priv)(cl, s, pwcs, n, psenc, nresult); 580 _RESTART_END(wcsrtombs, _TO_CEI(cl), pspriv, psenc); 581 582 return err; 583 } 584 585 static int 586 /*ARGSUSED*/ 587 _FUNCNAME(ctype_wcstombs)(void * __restrict cl, char * __restrict s, 588 const wchar_t * __restrict pwcs, size_t n, 589 size_t * __restrict nresult) 590 { 591 _ENCODING_STATE state; 592 int err; 593 594 _DIAGASSERT(cl != NULL); 595 596 _FUNCNAME(init_state)(_CEI_TO_EI(_TO_CEI(cl)), &state); 597 err = _FUNCNAME(wcsrtombs_priv)(cl, s, (const wchar_t **)&pwcs, n, 598 &state, nresult); 599 600 return err; 601 } 602 603 static int 604 _FUNCNAME(ctype_wctomb)(void * __restrict cl, char * __restrict s, wchar_t wc, 605 int * __restrict nresult) 606 { 607 size_t nr; 608 int err = 0; 609 char s0[MB_LEN_MAX]; 610 611 _DIAGASSERT(cl != NULL); 612 613 if (s==NULL) 614 s = s0; 615 616 err = _FUNCNAME(wcrtomb_priv)(cl, s, 617 _ENCODING_MB_CUR_MAX(_CEI_TO_EI(_TO_CEI(cl))), 618 wc, &_CEI_TO_STATE(_TO_CEI(cl), wctomb), &nr); 619 *nresult = (int)nr; 620 621 return 0; 622 } 623