1 /* $NetBSD: citrus_iso2022.c,v 1.6 2002/03/28 10:53:49 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31 #include <sys/cdefs.h> 32 #if defined(LIBC_SCCS) && !defined(lint) 33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.6 2002/03/28 10:53:49 yamt Exp $"); 34 #endif /* LIBC_SCCS and not lint */ 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stddef.h> 42 #include <locale.h> 43 #include <wchar.h> 44 #include <sys/types.h> 45 #include <limits.h> 46 #include "citrus_module.h" 47 #include "citrus_ctype.h" 48 #include "citrus_iso2022.h" 49 50 51 /* ---------------------------------------------------------------------- 52 * private stuffs used by templates 53 */ 54 55 56 /* 57 * wchar_t mappings: 58 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 59 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 60 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 61 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 62 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 63 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 64 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 65 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 66 * 94x94 charset (ESC & V ESC $ ( F) 67 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 68 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 69 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 70 */ 71 72 typedef struct { 73 u_char type; 74 #define CS94 (0U) 75 #define CS96 (1U) 76 #define CS94MULTI (2U) 77 #define CS96MULTI (3U) 78 79 u_char final; 80 u_char interm; 81 u_char vers; 82 } _ISO2022Charset; 83 84 typedef struct { 85 _ISO2022Charset g[4]; 86 /* need 3 bits to hold -1, 0, ..., 3 */ 87 int gl:3, 88 gr:3, 89 singlegl:3, 90 singlegr:3; 91 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 92 int chlen; 93 int flags; 94 #define _ISO2022STATE_FLAG_INITIALIZED 1 95 } _ISO2022State; 96 97 typedef struct { 98 _ISO2022Charset *recommend[4]; 99 size_t recommendsize[4]; 100 _ISO2022Charset initg[4]; 101 int maxcharset; 102 int flags; 103 #define F_8BIT 0x0001 104 #define F_NOOLD 0x0002 105 #define F_SI 0x0010 /*0F*/ 106 #define F_SO 0x0020 /*0E*/ 107 #define F_LS0 0x0010 /*0F*/ 108 #define F_LS1 0x0020 /*0E*/ 109 #define F_LS2 0x0040 /*ESC n*/ 110 #define F_LS3 0x0080 /*ESC o*/ 111 #define F_LS1R 0x0100 /*ESC ~*/ 112 #define F_LS2R 0x0200 /*ESC }*/ 113 #define F_LS3R 0x0400 /*ESC |*/ 114 #define F_SS2 0x0800 /*ESC N*/ 115 #define F_SS3 0x1000 /*ESC O*/ 116 #define F_SS2R 0x2000 /*8E*/ 117 #define F_SS3R 0x4000 /*8F*/ 118 } _ISO2022EncodingInfo; 119 typedef struct { 120 _ISO2022EncodingInfo ei; 121 struct { 122 /* for future multi-locale facility */ 123 _ISO2022State s_mblen; 124 _ISO2022State s_mbrlen; 125 _ISO2022State s_mbrtowc; 126 _ISO2022State s_mbtowc; 127 _ISO2022State s_mbsrtowcs; 128 _ISO2022State s_wcrtomb; 129 _ISO2022State s_wcsrtombs; 130 _ISO2022State s_wctomb; 131 } states; 132 } _ISO2022CTypeInfo; 133 134 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 135 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 136 137 #define _FUNCNAME(m) _citrus_ISO2022_##m 138 #define _ENCODING_INFO _ISO2022EncodingInfo 139 #define _CTYPE_INFO _ISO2022CTypeInfo 140 #define _ENCODING_STATE _ISO2022State 141 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 142 #define _ENCODING_IS_STATE_DEPENDENT 1 143 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 144 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 145 146 147 #define _ISO2022INVALID (wchar_t)-1 148 149 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 150 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 151 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 152 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 153 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 154 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 155 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 156 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 157 158 static __inline int 159 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 160 { 161 162 _DIAGASSERT(p != NULL); 163 _DIAGASSERT(cs != NULL); 164 165 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 166 cs->final = (u_char)(p[3] & 0xff); 167 cs->interm = '\0'; 168 cs->vers = '\0'; 169 cs->type = CS94MULTI; 170 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 171 cs->final = (u_char)(p[3] & 0xff); 172 cs->interm = '\0'; 173 cs->vers = '\0'; 174 cs->type = CS96MULTI; 175 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 176 cs->final = (u_char)(p[2] & 0xff); 177 cs->interm = '\0'; 178 cs->vers = '\0'; 179 cs->type = CS94; 180 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 181 cs->final = (u_char )(p[2] & 0xff); 182 cs->interm = '\0'; 183 cs->vers = '\0'; 184 cs->type = CS96; 185 } else { 186 return 1; 187 } 188 189 return 0; 190 } 191 192 193 #define _NOTMATCH 0 194 #define _MATCH 1 195 #define _PARSEFAIL 2 196 197 static __inline int 198 get_recommend(_ISO2022EncodingInfo * __restrict ei, 199 const char * __restrict token) 200 { 201 int i; 202 _ISO2022Charset cs; 203 204 if (!strchr("0123", token[0]) || token[1] != '=') 205 return (_NOTMATCH); 206 207 if (getcs(&token[2], &cs) == 0) 208 ; 209 else if (!strcmp(&token[2], "94")) { 210 cs.final = (u_char)(token[4]); 211 cs.interm = '\0'; 212 cs.vers = '\0'; 213 cs.type = CS94; 214 } else if (!strcmp(&token[2], "96")) { 215 cs.final = (u_char)(token[4]); 216 cs.interm = '\0'; 217 cs.vers = '\0'; 218 cs.type = CS96; 219 } else if (!strcmp(&token[2], "94$")) { 220 cs.final = (u_char)(token[5]); 221 cs.interm = '\0'; 222 cs.vers = '\0'; 223 cs.type = CS94MULTI; 224 } else if (!strcmp(&token[2], "96$")) { 225 cs.final = (u_char)(token[5]); 226 cs.interm = '\0'; 227 cs.vers = '\0'; 228 cs.type = CS96MULTI; 229 } else { 230 return (_PARSEFAIL); 231 } 232 233 i = token[0] - '0'; 234 ei->recommendsize[i] += 1; 235 if (!ei->recommend[i]) { 236 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 237 } else { 238 ei->recommend[i] = 239 realloc(ei->recommend[i], 240 sizeof(_ISO2022Charset)* (ei->recommendsize[i])); 241 } 242 if (!ei->recommend[i]) 243 return (_PARSEFAIL); 244 245 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 246 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 247 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 248 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 249 250 return (_MATCH); 251 } 252 253 static __inline int 254 get_initg(_ISO2022EncodingInfo * __restrict ei, 255 const char * __restrict token) 256 { 257 _ISO2022Charset cs; 258 259 if (strncmp("INIT", &token[0], 4) || 260 !strchr("0123", token[4]) || 261 token[5] != '=') 262 return (_NOTMATCH); 263 264 if (getcs(&token[6], &cs) != 0) 265 return (_PARSEFAIL); 266 267 ei->initg[token[4] - '0'].type = cs.type; 268 ei->initg[token[4] - '0'].final = cs.final; 269 ei->initg[token[4] - '0'].interm = cs.interm; 270 ei->initg[token[4] - '0'].vers = cs.vers; 271 272 return (_MATCH); 273 } 274 275 static __inline int 276 get_max(_ISO2022EncodingInfo * __restrict ei, 277 const char * __restrict token) 278 { 279 if (!strcmp(token, "MAX1")) { 280 ei->maxcharset = 1; 281 } else if (!strcmp(token, "MAX2")) { 282 ei->maxcharset = 2; 283 } else if (!strcmp(token, "MAX3")) { 284 ei->maxcharset = 3; 285 } else 286 return (_NOTMATCH); 287 288 return (_MATCH); 289 } 290 291 292 static __inline int 293 get_flags(_ISO2022EncodingInfo * __restrict ei, 294 const char * __restrict token) 295 { 296 int i; 297 static struct { 298 const char *tag; 299 int flag; 300 } const tags[] = { 301 { "DUMMY", 0 }, 302 { "8BIT", F_8BIT }, 303 { "NOOLD", F_NOOLD }, 304 { "SI", F_SI }, 305 { "SO", F_SO }, 306 { "LS0", F_LS0 }, 307 { "LS1", F_LS1 }, 308 { "LS2", F_LS2 }, 309 { "LS3", F_LS3 }, 310 { "LS1R", F_LS1R }, 311 { "LS2R", F_LS2R }, 312 { "LS3R", F_LS3R }, 313 { "SS2", F_SS2 }, 314 { "SS3", F_SS3 }, 315 { "SS2R", F_SS2R }, 316 { "SS3R", F_SS3R }, 317 { NULL, 0 } 318 }; 319 320 for (i = 0; tags[i].tag; i++) { 321 if (!strcmp(token, tags[i].tag)) { 322 ei->flags |= tags[i].flag; 323 return (_MATCH); 324 } 325 } 326 327 return (_NOTMATCH); 328 } 329 330 331 static __inline int 332 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 333 const void * __restrict var, size_t lenvar) 334 { 335 char const *v, *e; 336 char buf[20]; 337 int i, len, ret; 338 339 _DIAGASSERT(ei != NULL); 340 341 342 /* 343 * parse VARIABLE section. 344 */ 345 346 if (!var) 347 return (EFTYPE); 348 349 v = (const char *) var; 350 351 /* initialize structure */ 352 ei->maxcharset = 0; 353 for (i = 0; i < 4; i++) { 354 ei->recommend[i] = NULL; 355 ei->recommendsize[i] = 0; 356 } 357 ei->flags = 0; 358 359 while (*v) { 360 while (*v == ' ' || *v == '\t') 361 ++v; 362 363 /* find the token */ 364 e = v; 365 while (*e && *e != ' ' && *e != '\t') 366 ++e; 367 if (*e) { 368 len = e-v; 369 if (len>=sizeof(buf)) 370 goto parsefail; 371 sprintf(buf, "%.*s", len, v); 372 ++e; 373 } 374 375 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 376 ; 377 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 378 ; 379 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 380 ; 381 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 382 ; 383 else 384 ret = _PARSEFAIL; 385 if (ret==_PARSEFAIL) 386 goto parsefail; 387 v = e; 388 389 } 390 391 return (0); 392 393 parsefail: 394 free(ei->recommend[0]); 395 free(ei->recommend[1]); 396 free(ei->recommend[2]); 397 free(ei->recommend[3]); 398 399 return (EFTYPE); 400 } 401 402 static __inline void 403 /*ARGSUSED*/ 404 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 405 _ISO2022State * __restrict s) 406 { 407 int i; 408 409 memset(s, 0, sizeof(*s)); 410 s->gl = 0; 411 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 412 413 for (i = 0; i < 4; i++) { 414 if (ei->initg[i].final) { 415 s->g[i].type = ei->initg[i].type; 416 s->g[i].final = ei->initg[i].final; 417 s->g[i].interm = ei->initg[i].interm; 418 } 419 } 420 s->singlegl = s->singlegr = -1; 421 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 422 } 423 424 static __inline void 425 /*ARGSUSED*/ 426 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 427 void * __restrict pspriv, 428 const _ISO2022State * __restrict s) 429 { 430 memcpy(pspriv, (const void *)s, sizeof(*s)); 431 } 432 433 static __inline void 434 /*ARGSUSED*/ 435 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 436 _ISO2022State * __restrict s, 437 const void * __restrict pspriv) 438 { 439 memcpy((void *)s, pspriv, sizeof(*s)); 440 } 441 442 static int 443 /*ARGSUSED*/ 444 _citrus_ISO2022_stdencoding_init(_ISO2022EncodingInfo * __restrict ei, 445 const void * __restrict var, size_t lenvar) 446 { 447 448 _DIAGASSERT(ei != NULL); 449 450 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 451 } 452 453 static void 454 /*ARGSUSED*/ 455 _citrus_ISO2022_stdencoding_uninit(_ISO2022EncodingInfo *ei) 456 { 457 } 458 459 #define ESC '\033' 460 #define ECMA -1 461 #define INTERM -2 462 #define OECMA -3 463 static struct seqtable { 464 int type; 465 int csoff; 466 int finaloff; 467 int intermoff; 468 int versoff; 469 int len; 470 int chars[10]; 471 } seqtable[] = { 472 /* G0 94MULTI special */ 473 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 474 /* G0 94MULTI special with version identification */ 475 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 476 /* G? 94 */ 477 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 478 /* G? 94 with 2nd intermediate char */ 479 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 480 /* G? 96 */ 481 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 482 /* G? 96 with 2nd intermediate char */ 483 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 484 /* G? 94MULTI */ 485 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 486 /* G? 96MULTI */ 487 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 488 /* G? 94MULTI with version specification */ 489 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 490 /* LS2/3 */ 491 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 492 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 493 /* LS1/2/3R */ 494 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 495 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 496 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 497 /* SS2/3 */ 498 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 499 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 500 /* end of records */ 501 { 0, } 502 }; 503 504 static int 505 seqmatch(const char * __restrict s, size_t n, 506 const struct seqtable * __restrict sp) 507 { 508 const int *p; 509 510 _DIAGASSERT(s != NULL); 511 _DIAGASSERT(sp != NULL); 512 513 p = sp->chars; 514 while (p - sp->chars < n && p - sp->chars < sp->len) { 515 switch (*p) { 516 case ECMA: 517 if (!isecma(*s)) 518 goto terminate; 519 break; 520 case OECMA: 521 if (*s && strchr("@AB", *s)) 522 break; 523 else 524 goto terminate; 525 case INTERM: 526 if (!isinterm(*s)) 527 goto terminate; 528 break; 529 case CS94: 530 if (*s && strchr("()*+", *s)) 531 break; 532 else 533 goto terminate; 534 case CS96: 535 if (*s && strchr(",-./", *s)) 536 break; 537 else 538 goto terminate; 539 default: 540 if (*s != *p) 541 goto terminate; 542 break; 543 } 544 545 p++; 546 s++; 547 } 548 549 terminate: 550 return p - sp->chars; 551 } 552 553 static wchar_t 554 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 555 const char * __restrict string, size_t n, 556 const char ** __restrict result, 557 _ISO2022State * __restrict psenc) 558 { 559 wchar_t wchar = 0; 560 int cur; 561 struct seqtable *sp; 562 int nmatch; 563 int i; 564 565 _DIAGASSERT(ei != NULL); 566 _DIAGASSERT(state != NULL); 567 _DIAGASSERT(string != NULL); 568 /* result may be NULL */ 569 570 while (1) { 571 /* SI/SO */ 572 if (1 <= n && string[0] == '\017') { 573 psenc->gl = 0; 574 string++; 575 n--; 576 continue; 577 } 578 if (1 <= n && string[0] == '\016') { 579 psenc->gl = 1; 580 string++; 581 n--; 582 continue; 583 } 584 585 /* SS2/3R */ 586 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 587 psenc->singlegl = psenc->singlegr = 588 (string[0] - '\216') + 2; 589 string++; 590 n--; 591 continue; 592 } 593 594 /* eat the letter if this is not ESC */ 595 if (1 <= n && string[0] != '\033') 596 break; 597 598 /* look for a perfect match from escape sequences */ 599 for (sp = &seqtable[0]; sp->len; sp++) { 600 nmatch = seqmatch(string, n, sp); 601 if (sp->len == nmatch && n >= sp->len) 602 break; 603 } 604 605 if (!sp->len) 606 goto notseq; 607 608 if (sp->type != -1) { 609 if (sp->csoff == -1) 610 i = 0; 611 else { 612 switch (sp->type) { 613 case CS94: 614 case CS94MULTI: 615 i = string[sp->csoff] - '('; 616 break; 617 case CS96: 618 case CS96MULTI: 619 i = string[sp->csoff] - ','; 620 break; 621 } 622 } 623 psenc->g[i].type = sp->type; 624 psenc->g[i].final = '\0'; 625 psenc->g[i].interm = '\0'; 626 psenc->g[i].vers = '\0'; 627 /* sp->finaloff must not be -1 */ 628 if (sp->finaloff != -1) 629 psenc->g[i].final = string[sp->finaloff]; 630 if (sp->intermoff != -1) 631 psenc->g[i].interm = string[sp->intermoff]; 632 if (sp->versoff != -1) 633 psenc->g[i].vers = string[sp->versoff]; 634 635 string += sp->len; 636 n -= sp->len; 637 continue; 638 } 639 640 /* LS2/3 */ 641 if (2 <= n && string[0] == '\033' 642 && string[1] && strchr("no", string[1])) { 643 psenc->gl = string[1] - 'n' + 2; 644 string += 2; 645 n -= 2; 646 continue; 647 } 648 649 /* LS1/2/3R */ 650 /* XXX: { for vi showmatch */ 651 if (2 <= n && string[0] == '\033' 652 && string[1] && strchr("~}|", string[1])) { 653 psenc->gr = 3 - (string[1] - '|'); 654 string += 2; 655 n -= 2; 656 continue; 657 } 658 659 /* SS2/3 */ 660 if (2 <= n && string[0] == '\033' 661 && string[1] && strchr("NO", string[1])) { 662 psenc->singlegl = (string[1] - 'N') + 2; 663 string += 2; 664 n -= 2; 665 continue; 666 } 667 668 notseq: 669 /* 670 * if we've got an unknown escape sequence, eat the ESC at the 671 * head. otherwise, wait till full escape sequence comes. 672 */ 673 for (sp = &seqtable[0]; sp->len; sp++) { 674 nmatch = seqmatch(string, n, sp); 675 if (!nmatch) 676 continue; 677 678 /* 679 * if we are in the middle of escape sequence, 680 * we still need to wait for more characters to come 681 */ 682 if (n < sp->len) { 683 if (nmatch == n) { 684 if (result) 685 *result = string; 686 return (_ISO2022INVALID); 687 } 688 } else { 689 if (nmatch == sp->len) { 690 /* this case should not happen */ 691 goto eat; 692 } 693 } 694 } 695 696 break; 697 } 698 699 eat: 700 /* no letter to eat */ 701 if (n < 1) { 702 if (result) 703 *result = string; 704 return (_ISO2022INVALID); 705 } 706 707 /* normal chars. always eat C0/C1 as is. */ 708 if (iscntl(*string & 0xff)) 709 cur = -1; 710 else if (*string & 0x80) { 711 cur = (psenc->singlegr == -1) 712 ? psenc->gr : psenc->singlegr; 713 } else { 714 cur = (psenc->singlegl == -1) 715 ? psenc->gl : psenc->singlegl; 716 } 717 718 if (cur == -1) { 719 asis: 720 wchar = *string++ & 0xff; 721 if (result) 722 *result = string; 723 /* reset single shift state */ 724 psenc->singlegr = psenc->singlegl = -1; 725 return wchar; 726 } 727 728 /* length error check */ 729 switch (psenc->g[cur].type) { 730 case CS94MULTI: 731 case CS96MULTI: 732 if (!isthree(psenc->g[cur].final)) { 733 if (2 <= n 734 && (string[0] & 0x80) == (string[1] & 0x80)) 735 break; 736 } else { 737 if (3 <= n 738 && (string[0] & 0x80) == (string[1] & 0x80) 739 && (string[0] & 0x80) == (string[2] & 0x80)) 740 break; 741 } 742 743 /* we still need to wait for more characters to come */ 744 if (result) 745 *result = string; 746 return (_ISO2022INVALID); 747 748 case CS94: 749 case CS96: 750 if (1 <= n) 751 break; 752 753 /* we still need to wait for more characters to come */ 754 if (result) 755 *result = string; 756 return (_ISO2022INVALID); 757 } 758 759 /* range check */ 760 switch (psenc->g[cur].type) { 761 case CS94: 762 if (!(is94(string[0] & 0x7f))) 763 goto asis; 764 case CS96: 765 if (!(is96(string[0] & 0x7f))) 766 goto asis; 767 break; 768 case CS94MULTI: 769 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 770 goto asis; 771 break; 772 case CS96MULTI: 773 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 774 goto asis; 775 break; 776 } 777 778 /* extract the character. */ 779 switch (psenc->g[cur].type) { 780 case CS94: 781 /* special case for ASCII. */ 782 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 783 wchar = *string++; 784 wchar &= 0x7f; 785 break; 786 } 787 wchar = psenc->g[cur].final; 788 wchar = (wchar << 8); 789 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 790 wchar = (wchar << 8); 791 wchar = (wchar << 8) | (*string++ & 0x7f); 792 break; 793 case CS96: 794 /* special case for ISO-8859-1. */ 795 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 796 wchar = *string++; 797 wchar &= 0x7f; 798 wchar |= 0x80; 799 break; 800 } 801 wchar = psenc->g[cur].final; 802 wchar = (wchar << 8); 803 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 804 wchar = (wchar << 8); 805 wchar = (wchar << 8) | (*string++ & 0x7f); 806 wchar |= 0x80; 807 break; 808 case CS94MULTI: 809 case CS96MULTI: 810 wchar = psenc->g[cur].final; 811 wchar = (wchar << 8); 812 if (isthree(psenc->g[cur].final)) 813 wchar |= (*string++ & 0x7f); 814 wchar = (wchar << 8) | (*string++ & 0x7f); 815 wchar = (wchar << 8) | (*string++ & 0x7f); 816 if (psenc->g[cur].type == CS96MULTI) 817 wchar |= 0x80; 818 break; 819 } 820 821 if (result) 822 *result = string; 823 /* reset single shift state */ 824 psenc->singlegr = psenc->singlegl = -1; 825 return wchar; 826 } 827 828 829 830 static int 831 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 832 wchar_t * __restrict pwc, 833 const char ** __restrict s, 834 size_t n, _ISO2022State * __restrict psenc, 835 size_t * __restrict nresult) 836 { 837 wchar_t wchar; 838 const char *s0, *p, *result; 839 int c; 840 int chlenbak; 841 842 _DIAGASSERT(nresult != 0); 843 _DIAGASSERT(ei != NULL); 844 _DIAGASSERT(psenc != NULL); 845 _DIAGASSERT(s != NULL); 846 847 s0 = *s; 848 c = 0; 849 chlenbak = psenc->chlen; 850 851 /* 852 * if we have something in buffer, use that. 853 * otherwise, skip here 854 */ 855 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 856 /* illgeal state */ 857 _citrus_ISO2022_init_state(ei, psenc); 858 goto encoding_error; 859 } 860 if (psenc->chlen == 0) 861 goto emptybuf; 862 863 /* buffer is not empty */ 864 p = psenc->ch; 865 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) { 866 if (n > 0) { 867 psenc->ch[psenc->chlen++] = *s0++; 868 n--; 869 } 870 871 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 872 &result, psenc); 873 if (wchar != _ISO2022INVALID) { 874 c += result - p; 875 if (psenc->chlen > c) 876 memmove(psenc->ch, result, psenc->chlen - c); 877 if (psenc->chlen < c) 878 psenc->chlen = 0; 879 else 880 psenc->chlen -= c; 881 goto output; 882 } 883 884 c += result - p; 885 p = result; 886 887 if (n == 0) 888 goto restart; 889 } 890 891 /* escape sequence too long? */ 892 goto encoding_error; 893 894 emptybuf: 895 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 896 if (wchar != _ISO2022INVALID) { 897 c += result - s0; 898 psenc->chlen = 0; 899 s0 = result; 900 goto output; 901 } 902 if (result > s0 && n > result - s0) { 903 c += (result - s0); 904 n -= (result - s0); 905 s0 = result; 906 goto emptybuf; 907 } 908 n += c; 909 if (n < sizeof(psenc->ch)) { 910 memcpy(psenc->ch, s0 - c, n); 911 psenc->chlen = n; 912 s0 = result; 913 goto restart; 914 } 915 916 /* escape sequence too long? */ 917 918 encoding_error: 919 psenc->chlen = 0; 920 *nresult = (size_t)-1; 921 return (EILSEQ); 922 923 output: 924 *s = s0; 925 if (pwc) 926 *pwc = wchar; 927 928 if (!wchar) 929 *nresult = 0; 930 else 931 *nresult = c - chlenbak; 932 933 return (0); 934 935 restart: 936 *s = s0; 937 *nresult = (size_t)-2; 938 939 return (0); 940 } 941 942 static int 943 recommendation(_ISO2022EncodingInfo * __restrict ei, 944 _ISO2022Charset * __restrict cs) 945 { 946 int i, j; 947 _ISO2022Charset *recommend; 948 949 _DIAGASSERT(ei != NULL); 950 _DIAGASSERT(cs != NULL); 951 952 /* first, try a exact match. */ 953 for (i = 0; i < 4; i++) { 954 recommend = ei->recommend[i]; 955 for (j = 0; j < ei->recommendsize[i]; j++) { 956 if (cs->type != recommend[j].type) 957 continue; 958 if (cs->final != recommend[j].final) 959 continue; 960 if (cs->interm != recommend[j].interm) 961 continue; 962 963 return i; 964 } 965 } 966 967 /* then, try a wildcard match over final char. */ 968 for (i = 0; i < 4; i++) { 969 recommend = ei->recommend[i]; 970 for (j = 0; j < ei->recommendsize[i]; j++) { 971 if (cs->type != recommend[j].type) 972 continue; 973 if (cs->final && (cs->final != recommend[j].final)) 974 continue; 975 if (cs->interm && (cs->interm != recommend[j].interm)) 976 continue; 977 978 return i; 979 } 980 } 981 982 /* there's no recommendation. make a guess. */ 983 if (ei->maxcharset == 0) { 984 return 0; 985 } else { 986 switch (cs->type) { 987 case CS94: 988 case CS94MULTI: 989 return 0; 990 case CS96: 991 case CS96MULTI: 992 return 1; 993 } 994 } 995 return 0; 996 } 997 998 static int 999 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t c, 1000 char * __restrict string, size_t n, 1001 char ** __restrict result, 1002 _ISO2022State * __restrict psenc) 1003 { 1004 int i = 0, len; 1005 _ISO2022Charset cs; 1006 char *p; 1007 char tmp[MB_LEN_MAX]; 1008 int target; 1009 u_char mask; 1010 int bit8; 1011 1012 _DIAGASSERT(ei != NULL); 1013 _DIAGASSERT(string != NULL); 1014 /* result may be NULL */ 1015 /* state appears to be unused */ 1016 1017 if (iscntl(c & 0xff)) { 1018 /* go back to ASCII on control chars */ 1019 cs.type = CS94; 1020 cs.final = 'B'; 1021 cs.interm = '\0'; 1022 } else if (!(c & ~0xff)) { 1023 if (c & 0x80) { 1024 /* special treatment for ISO-8859-1 */ 1025 cs.type = CS96; 1026 cs.final = 'A'; 1027 cs.interm = '\0'; 1028 } else { 1029 /* special treatment for ASCII */ 1030 cs.type = CS94; 1031 cs.final = 'B'; 1032 cs.interm = '\0'; 1033 } 1034 } else { 1035 cs.final = (c >> 24) & 0x7f; 1036 if ((c >> 16) & 0x80) 1037 cs.interm = (c >> 16) & 0x7f; 1038 else 1039 cs.interm = '\0'; 1040 if (c & 0x80) 1041 cs.type = (c & 0x00007f00) ? CS96MULTI : CS96; 1042 else 1043 cs.type = (c & 0x00007f00) ? CS94MULTI : CS94; 1044 } 1045 target = recommendation(ei, &cs); 1046 p = tmp; 1047 bit8 = ei->flags & F_8BIT; 1048 1049 /* designate the charset onto the target plane(G0/1/2/3). */ 1050 if (psenc->g[target].type == cs.type 1051 && psenc->g[target].final == cs.final 1052 && psenc->g[target].interm == cs.interm) 1053 goto planeok; 1054 1055 *p++ = '\033'; 1056 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1057 *p++ = '$'; 1058 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1059 && !cs.interm && !(ei->flags & F_NOOLD)) 1060 ; 1061 else if (cs.type == CS94 || cs.type == CS94MULTI) 1062 *p++ = "()*+"[target]; 1063 else 1064 *p++ = ",-./"[target]; 1065 if (cs.interm) 1066 *p++ = cs.interm; 1067 *p++ = cs.final; 1068 1069 psenc->g[target].type = cs.type; 1070 psenc->g[target].final = cs.final; 1071 psenc->g[target].interm = cs.interm; 1072 1073 planeok: 1074 1075 /* invoke the plane onto GL or GR. */ 1076 if (psenc->gl == target) 1077 goto sideok; 1078 if (bit8 && psenc->gr == target) 1079 goto sideok; 1080 1081 if (target == 0 && (ei->flags & F_LS0)) { 1082 *p++ = '\017'; 1083 psenc->gl = 0; 1084 } else if (target == 1 && (ei->flags & F_LS1)) { 1085 *p++ = '\016'; 1086 psenc->gl = 1; 1087 } else if (target == 2 && (ei->flags & F_LS2)) { 1088 *p++ = '\033'; 1089 *p++ = 'n'; 1090 psenc->gl = 2; 1091 } else if (target == 3 && (ei->flags & F_LS3)) { 1092 *p++ = '\033'; 1093 *p++ = 'o'; 1094 psenc->gl = 3; 1095 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1096 *p++ = '\033'; 1097 *p++ = '~'; 1098 psenc->gr = 1; 1099 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1100 *p++ = '\033'; 1101 /*{*/ 1102 *p++ = '}'; 1103 psenc->gr = 2; 1104 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1105 *p++ = '\033'; 1106 *p++ = '|'; 1107 psenc->gr = 3; 1108 } else if (target == 2 && (ei->flags & F_SS2)) { 1109 *p++ = '\033'; 1110 *p++ = 'N'; 1111 psenc->singlegl = 2; 1112 } else if (target == 3 && (ei->flags & F_SS3)) { 1113 *p++ = '\033'; 1114 *p++ = 'O'; 1115 psenc->singlegl = 3; 1116 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1117 *p++ = '\216'; 1118 *p++ = 'N'; 1119 psenc->singlegl = psenc->singlegr = 2; 1120 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1121 *p++ = '\217'; 1122 *p++ = 'O'; 1123 psenc->singlegl = psenc->singlegr = 3; 1124 } else 1125 abort(); 1126 1127 sideok: 1128 if (psenc->singlegl == target) 1129 mask = 0x00; 1130 else if (psenc->singlegr == target) 1131 mask = 0x80; 1132 else if (psenc->gl == target) 1133 mask = 0x00; 1134 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1135 mask = 0x80; 1136 else 1137 abort(); 1138 1139 switch (cs.type) { 1140 case CS94: 1141 case CS96: 1142 i = 1; 1143 break; 1144 case CS94MULTI: 1145 case CS96MULTI: 1146 i = isthree(cs.final) ? 3 : 2; 1147 break; 1148 } 1149 while (i-- > 0) 1150 *p++ = ((c >> (i << 3)) & 0x7f) | mask; 1151 1152 /* reset single shift state */ 1153 psenc->singlegl = psenc->singlegr = -1; 1154 1155 len = p - tmp; 1156 if (n < len) { 1157 if (result) 1158 *result = (char *)0; 1159 } else { 1160 if (result) 1161 *result = string + len; 1162 memcpy(string, tmp, len); 1163 } 1164 return len; 1165 } 1166 1167 static int 1168 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1169 char * __restrict s, size_t n, wchar_t wc, 1170 _ISO2022State * __restrict psenc, 1171 size_t * __restrict nresult) 1172 { 1173 char buf[MB_LEN_MAX]; 1174 char *result; 1175 int len; 1176 1177 _DIAGASSERT(ei != NULL); 1178 _DIAGASSERT(nresult != 0); 1179 _DIAGASSERT(s != NULL); 1180 1181 /* XXX state will be modified after this operation... */ 1182 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc); 1183 if (sizeof(buf) < len || n < len) { 1184 /* XXX should recover state? */ 1185 goto ilseq; 1186 } 1187 1188 memcpy(s, buf, len); 1189 *nresult = (size_t)len; 1190 return (0); 1191 1192 ilseq: 1193 /* bound check failure */ 1194 *nresult = (size_t)-1; 1195 return (EILSEQ); 1196 } 1197 1198 /* ---------------------------------------------------------------------- 1199 * public interface for ctype 1200 */ 1201 1202 _CITRUS_CTYPE_DECLS(ISO2022); 1203 _CITRUS_CTYPE_DEF_OPS(ISO2022); 1204 1205 #include "citrus_ctype_template.h" 1206