1 /* $OpenBSD: wsemul_subr.c,v 1.1 2013/10/18 22:06:41 miod Exp $ */ 2 3 /* 4 * Copyright (c) 2007, 2013 Miodrag Vallat. 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice, this permission notice, and the disclaimer below 9 * appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/errno.h> 23 24 #include <dev/wscons/wscons_features.h> 25 #include <dev/wscons/wsconsio.h> 26 #include <dev/wscons/wsdisplayvar.h> 27 #include <dev/wscons/wsemulvar.h> 28 #include <dev/wscons/wsksymdef.h> 29 30 int wsemul_local_translate(u_int32_t, kbd_t, u_char *); 31 32 /* 33 * Get characters from an input stream and update the input state. 34 * Processing stops when the stream is empty, or a complete character 35 * sequence has been recognized, in which case it returns zero. 36 */ 37 int 38 wsemul_getchar(const u_char **inbuf, u_int *inlen, 39 struct wsemul_inputstate *state, int allow_utf8) 40 { 41 #ifndef HAVE_UTF8_SUPPORT 42 u_int len = *inlen; 43 const u_char *buf = *inbuf; 44 45 if (len == 0) 46 return (EAGAIN); 47 48 state->inchar = *buf++; 49 state->mbleft = 0; 50 len--; 51 *inlen = len; 52 *inbuf = buf; 53 return (0); 54 #else 55 u_int len = *inlen; 56 const u_char *buf = *inbuf; 57 int rc = EAGAIN; 58 u_int32_t tmpchar; 59 u_int mbleft; 60 61 if (len == 0) 62 return (rc); 63 64 /* 65 * If we do not allow multibyte sequences, process as quickly 66 * as possible. 67 */ 68 if (!allow_utf8) { 69 state->inchar = *buf++; 70 state->mbleft = 0; 71 len--; 72 *inlen = len; 73 *inbuf = buf; 74 return (0); 75 } 76 77 tmpchar = state->inchar; 78 mbleft = state->mbleft; 79 80 while (len != 0) { 81 u_int32_t frag = (u_int32_t)*buf++; 82 len--; 83 84 /* 85 * If we are in the middle of a multibyte sequence, try 86 * to complete it. 87 */ 88 89 if (mbleft != 0) { 90 if ((frag & 0xc0) != 0x80) { 91 /* Abort the sequence and continue */ 92 mbleft = 0; 93 tmpchar = 0; 94 rc = EILSEQ; 95 } else { 96 tmpchar = (tmpchar << 6) | (frag & 0x3f); 97 mbleft--; 98 if (mbleft == 0) { 99 rc = 0; 100 break; 101 } 102 } 103 } 104 105 /* 106 * Otherwise let's decide if this is the start of a new 107 * multibyte sequence, or a 7-bit character. 108 */ 109 110 if ((frag & 0x80) == 0) { 111 tmpchar = frag; 112 rc = 0; 113 break; 114 } 115 116 if (frag == 0xfe || frag == 0xff || (frag & 0x40) == 0) { 117 /* Abort the sequence and continue */ 118 mbleft = 0; 119 tmpchar = 0; 120 rc = EILSEQ; 121 } else { 122 frag &= ~(0x80 | 0x40); 123 mbleft = 1; 124 125 if (frag & 0x20) { 126 frag &= ~0x20; 127 mbleft++; 128 } 129 if (frag & 0x10) { 130 frag &= ~0x10; 131 mbleft++; 132 } 133 if (frag & 0x08) { 134 frag &= ~0x08; 135 mbleft++; 136 } 137 if (frag & 0x04) { 138 frag &= ~0x04; 139 mbleft++; 140 } 141 142 tmpchar = frag; 143 } 144 } 145 146 state->inchar = tmpchar; 147 state->mbleft = mbleft; 148 *inlen = len; 149 *inbuf = buf; 150 return (rc); 151 #endif 152 } 153 154 /* 155 * Unicode Cyrillic to KOI8 translation table (starts at U+0400), 156 * from RFC 2319. 157 */ 158 const u_int8_t cyrillic_to_koi8[] = { 159 0x00, /* IE grave */ /* 0400 */ 160 0xb3, /* IO */ 161 0x00, /* DJE */ 162 0x00, /* GJE */ 163 0xb4, /* UKR IE */ 164 0x00, /* DZE */ 165 0xb6, /* BYE/UKR I */ 166 0xb7, /* YI */ 167 0x00, /* JE */ 168 0x00, /* LJE */ 169 0x00, /* NJE */ 170 0x00, /* TSHE */ 171 0x00, /* KJE */ 172 0x00, /* I grave */ 173 0x00, /* short U */ 174 0x00, /* DZHE */ 175 0xe1, /* A */ /* 0410 */ 176 0xe2, /* BE */ 177 0xf7, /* VE */ 178 0xe7, /* GHE */ 179 0xe4, /* DE */ 180 0xe5, /* IE */ 181 0xf6, /* ZHE */ 182 0xfa, /* ZE */ 183 0xe9, /* I */ 184 0xea, /* short I */ 185 0xeb, /* KA */ 186 0xec, /* EL */ 187 0xed, /* EM */ 188 0xee, /* EN */ 189 0xef, /* O */ 190 0xf0, /* PE */ 191 0xf2, /* ER */ /* 0420 */ 192 0xf3, /* ES */ 193 0xf4, /* TE */ 194 0xf5, /* U */ 195 0xe6, /* EF */ 196 0xe8, /* HA */ 197 0xe3, /* TSE */ 198 0xfe, /* CHE */ 199 0xfb, /* SHA */ 200 0xfd, /* SHCHA */ 201 0xff, /* HARD SIGN */ 202 0xf9, /* YERU */ 203 0xf8, /* SOFT SIGN */ 204 0xfc, /* E */ 205 0xe0, /* YU */ 206 0xf1, /* YA */ 207 0xc1, /* a */ /* 0430 */ 208 0xc2, /* be */ 209 0xd7, /* ve */ 210 0xc7, /* ghe */ 211 0xc4, /* de */ 212 0xc5, /* ie */ 213 0xd6, /* zhe */ 214 0xda, /* ze */ 215 0xc9, /* i */ 216 0xca, /* short i */ 217 0xcb, /* ka */ 218 0xcc, /* el */ 219 0xcd, /* em */ 220 0xce, /* en */ 221 0xcf, /* o */ 222 0xd0, /* pe */ 223 0xd2, /* er */ /* 0440 */ 224 0xd3, /* es */ 225 0xd4, /* te */ 226 0xd5, /* u */ 227 0xc6, /* ef */ 228 0xc8, /* ha */ 229 0xc3, /* tse */ 230 0xde, /* che */ 231 0xdb, /* sha */ 232 0xdd, /* shcha */ 233 0xdf, /* hard sign */ 234 0xd9, /* yeru */ 235 0xd8, /* soft sign */ 236 0xdc, /* e */ 237 0xc0, /* yu */ 238 0xd1, /* ya */ 239 0x00, /* ie grave */ /* 0450 */ 240 0xa3, /* io */ 241 0x00, /* dje */ 242 0x00, /* GJE */ 243 0xa4, /* UKR ie */ 244 0x00, /* DZE */ 245 0xa6, /* BYE/UKR I */ 246 0xa7, /* YI */ 247 0x00, /* JE */ 248 0x00, /* LJE */ 249 0x00, /* NJE */ 250 0x00, /* TSHE */ 251 0x00, /* KJE */ 252 0x00, /* I grave */ 253 0x00, /* short U */ 254 0x00 /* DZHE */ 255 }; 256 257 /* 258 * Europe to Latin-2 translation table (starts at U+0100). 259 */ 260 const u_int8_t unicode_to_latin2[] = { 261 0x00, /* A macron */ /* 0100 */ 262 0x00, /* a macron */ 263 0xc3, /* A breve */ 264 0xe3, /* a breve */ 265 0xa1, /* A ogonek */ 266 0xb1, /* a ogonek */ 267 0xc6, /* C acute */ 268 0xe6, /* c acute */ 269 0x00, /* C circumflex */ 270 0x00, /* c circumflex */ 271 0x00, /* C abovering */ 272 0x00, /* c abovering */ 273 0xc8, /* C caron */ 274 0xe8, /* c caron */ 275 0xcf, /* D caron */ 276 0xef, /* d caron */ 277 0xd0, /* D stroke */ /* 0110 */ 278 0xf0, /* d stroke */ 279 0x00, /* E macron */ 280 0x00, /* e macron */ 281 0x00, /* E breve */ 282 0x00, /* e breve */ 283 0x00, /* E abovering */ 284 0x00, /* e abovering */ 285 0xca, /* E ogonek */ 286 0xea, /* e ogonek */ 287 0xcc, /* E caron */ 288 0xec, /* e caron */ 289 0x00, /* G circumflex */ 290 0x00, /* g circumflex */ 291 0x00, /* G breve */ 292 0x00, /* g breve */ 293 0x00, /* G abovering */ /* 0120 */ 294 0x00, /* g abovering */ 295 0x00, /* G cedilla */ 296 0x00, /* g cedilla */ 297 0x00, /* H circumflex */ 298 0x00, /* h circumflex */ 299 0x00, /* H stroke */ 300 0x00, /* h stroke */ 301 0x00, /* I tilde */ 302 0x00, /* i tilde */ 303 0x00, /* I macron */ 304 0x00, /* i macron */ 305 0x00, /* I breve */ 306 0x00, /* i breve */ 307 0x00, /* I ogonek */ 308 0x00, /* i ogonek */ 309 0x00, /* dotted I */ /* 0130 */ 310 0x00, /* non-dotted i */ 311 0x00, /* ligature IJ */ 312 0x00, /* ligature ij */ 313 0x00, /* J circumflex */ 314 0x00, /* j circumflex */ 315 0x00, /* K cedilla */ 316 0x00, /* k cedilla */ 317 0x00, /* kra */ 318 0xc5, /* L acute */ 319 0xe5, /* l acute */ 320 0x00, /* L cedilla */ 321 0x00, /* l cedilla */ 322 0xa5, /* L caron */ 323 0xb5, /* l caron */ 324 0x00, /* L middle dot */ 325 0x00, /* l middle dot */ /* 0140 */ 326 0xa3, /* L stroke */ 327 0xb3, /* l stroke */ 328 0xd1, /* N acute */ 329 0xf1, /* n acute */ 330 0x00, /* N cedilla */ 331 0x00, /* n cedilla */ 332 0xd2, /* N caron */ 333 0xf2, /* n caron */ 334 0x00, /* N preceded by apostrophe */ 335 0x00, /* ENG */ 336 0x00, /* eng */ 337 0x00, /* O macron */ 338 0x00, /* o macron */ 339 0x00, /* O breve */ 340 0x00, /* o breve */ 341 0xd5, /* O double acute */ /* 0150 */ 342 0xf5, /* o double acute */ 343 0x00, /* ligature OE */ 344 0x00, /* ligature oe */ 345 0xc0, /* R acute */ 346 0xe0, /* r acute */ 347 0x00, /* R cedilla */ 348 0x00, /* r cedilla */ 349 0xd8, /* R caron */ 350 0xf8, /* r caron */ 351 0xa6, /* S acute */ 352 0xb6, /* s acute */ 353 0x00, /* S circumflex */ 354 0x00, /* s circumflex */ 355 0xaa, /* S cedilla */ 356 0xba, /* s cedilla */ 357 0xa9, /* S caron */ /* 0160 */ 358 0xb9, /* s caron */ 359 0xde, /* T cedilla */ 360 0xfe, /* t cedilla */ 361 0xab, /* T caron */ 362 0xbb, /* t caron */ 363 0x00, /* T stroke */ 364 0x00, /* t stroke */ 365 0x00, /* U tilde */ 366 0x00, /* u tilde */ 367 0x00, /* U macron */ 368 0x00, /* u macron */ 369 0x00, /* U breve */ 370 0x00, /* u breve */ 371 0xd9, /* U abovering */ 372 0xf9, /* u abovering */ 373 0xdb, /* U double acute */ /* 0170 */ 374 0xfb, /* u double acute */ 375 0x00, /* U ogonek */ 376 0x00, /* u ogonek */ 377 0x00, /* W circumflex */ 378 0x00, /* w circumflex */ 379 0x00, /* Y circumflex */ 380 0x00, /* y circumflex */ 381 0x00, /* Y diaeresis */ 382 0xac, /* Z acute */ 383 0xbc, /* z acute */ 384 0xaf, /* Z abovering */ 385 0xbf, /* z abovering */ 386 0xae, /* Z caron */ 387 0xbe, /* z caron */ 388 0x00 /* long s */ 389 }; 390 391 /* 392 * Baltic to Latin-7 translation table. 393 */ 394 const u_int8_t unicode_to_latin7[] = { 395 0xc2, /* A macron */ /* 0100 */ 396 0xe2, /* a macron */ 397 0x00, /* A breve */ 398 0x00, /* a breve */ 399 0xc0, /* A ogonek */ 400 0xe0, /* a ogonek */ 401 0xc3, /* C acute */ 402 0xe3, /* c acute */ 403 0x00, /* C circumflex */ 404 0x00, /* c circumflex */ 405 0x00, /* C abovering */ 406 0x00, /* c abovering */ 407 0xc8, /* C caron */ 408 0xe8, /* c caron */ 409 0x00, /* D caron */ 410 0x00, /* d caron */ 411 0x00, /* D stroke */ /* 0110 */ 412 0x00, /* d stroke */ 413 0xc7, /* E macron */ 414 0xe7, /* e macron */ 415 0x00, /* E breve */ 416 0x00, /* e breve */ 417 0xcb, /* E abovering */ 418 0xeb, /* e abovering */ 419 0xc6, /* E ogonek */ 420 0xe6, /* e ogonek */ 421 0x00, /* E caron */ 422 0x00, /* e caron */ 423 0x00, /* G circumflex */ 424 0x00, /* g circumflex */ 425 0x00, /* G breve */ 426 0x00, /* g breve */ 427 0x00, /* G abovering */ /* 0120 */ 428 0x00, /* g abovering */ 429 0xcc, /* G cedilla */ 430 0xec, /* g cedilla */ 431 0x00, /* H circumflex */ 432 0x00, /* h circumflex */ 433 0x00, /* H stroke */ 434 0x00, /* h stroke */ 435 0x00, /* I tilde */ 436 0x00, /* i tilde */ 437 0xce, /* I macron */ 438 0xee, /* i macron */ 439 0x00, /* I breve */ 440 0x00, /* i breve */ 441 0xc1, /* I ogonek */ 442 0xe1, /* i ogonek */ 443 0x00, /* dotted I */ /* 0130 */ 444 0x00, /* non-dotted I */ 445 0x00, /* ligature IJ */ 446 0x00, /* ligature ij */ 447 0x00, /* J circumflex */ 448 0x00, /* j circumflex */ 449 0xcd, /* K cedilla */ 450 0xed, /* k cedilla */ 451 0x00, /* kra */ 452 0x00, /* L acute */ 453 0x00, /* l acute */ 454 0xcf, /* L cedilla */ 455 0xef, /* l cedilla */ 456 0x00, /* L caron */ 457 0x00, /* l caron */ 458 0x00, /* L middle dot */ 459 0x00, /* l middle dot */ /* 0140 */ 460 0xd9, /* L stroke */ 461 0xf9, /* l stroke */ 462 0xd1, /* N acute */ 463 0xf1, /* n acute */ 464 0xd2, /* N cedilla */ 465 0xf2, /* n cedilla */ 466 0x00, /* N caron */ 467 0x00, /* n caron */ 468 0x00, /* N preceded by apostrophe */ 469 0x00, /* ENG */ 470 0x00, /* eng */ 471 0xd4, /* O macron */ 472 0xf4, /* o macron */ 473 0x00, /* O breve */ 474 0x00, /* o breve */ 475 0x00, /* O double acute */ /* 0150 */ 476 0x00, /* o double acute */ 477 0x00, /* ligature OE */ 478 0x00, /* ligature oe */ 479 0x00, /* R acute */ 480 0x00, /* r acute */ 481 0xaa, /* R cedilla */ 482 0xba, /* r cedilla */ 483 0x00, /* R caron */ 484 0x00, /* r caron */ 485 0xda, /* S acute */ 486 0xfa, /* s acute */ 487 0x00, /* S circumflex */ 488 0x00, /* s circumflex */ 489 0x00, /* S cedilla */ 490 0x00, /* s cedilla */ 491 0xd0, /* S caron */ /* 0160 */ 492 0xf0, /* s caron */ 493 0x00, /* T cedilla */ 494 0x00, /* t cedilla */ 495 0x00, /* T caron */ 496 0x00, /* t caron */ 497 0x00, /* T stroke */ 498 0x00, /* t stroke */ 499 0x00, /* U tilde */ 500 0x00, /* u tilde */ 501 0xdb, /* U macron */ 502 0xfb, /* u macron */ 503 0x00, /* U breve */ 504 0x00, /* u breve */ 505 0x00, /* U abovering */ 506 0x00, /* u abovering */ 507 0x00, /* U double acute */ /* 0170 */ 508 0x00, /* u double acute */ 509 0xd8, /* U ogonek */ 510 0xf8, /* u ogonek */ 511 0x00, /* W circumflex */ 512 0x00, /* w circumflex */ 513 0x00, /* Y circumflex */ 514 0x00, /* y circumflex */ 515 0x00, /* Y diaeresis */ 516 0xca, /* Z acute */ 517 0xea, /* z acute */ 518 0xdd, /* Z abovering */ 519 0xfd, /* z abovering */ 520 0xde, /* Z caron */ 521 0xfe, /* z caron */ 522 0x00 /* long s */ 523 }; 524 525 /* 526 * Keysym to local 8-bit charset sequence translation function. 527 * The out buffer is at least one character long. 528 * The keyboard layout is used as a hint to decide which latin charset to 529 * assume. 530 */ 531 int 532 wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out) 533 { 534 switch (unisym >> 7) { 535 case 0x0080 >> 7: 536 switch (KB_ENCODING(layout)) { 537 case KB_LT: 538 case KB_LV: 539 switch (unisym) { 540 case KS_L7_AE: 541 unisym = 0xaf; 542 break; 543 case KS_L7_Ostroke: 544 unisym = 0xa8; 545 break; 546 case KS_L7_ae: 547 unisym = 0xbf; 548 break; 549 case KS_L7_ostroke: 550 unisym = 0xb8; 551 break; 552 } 553 } 554 break; 555 556 case 0x0100 >> 7: 557 switch (KB_ENCODING(layout)) { 558 case KB_LT: 559 case KB_LV: 560 if (unisym < 0x100 + nitems(unicode_to_latin7) && 561 unicode_to_latin7[unisym - 0x100] != 0) 562 unisym = unicode_to_latin7[unisym - 0x100]; 563 break; 564 case KB_TR: 565 switch (unisym) { 566 case KS_L5_Gbreve: 567 unisym = 0xd0; 568 break; 569 case KS_L5_gbreve: 570 unisym = 0xf0; 571 break; 572 case KS_L5_Idotabove: 573 unisym = 0xdd; 574 break; 575 case KS_L5_idotless: 576 unisym = 0xfd; 577 break; 578 case KS_L5_Scedilla: 579 unisym = 0xde; 580 break; 581 case KS_L5_scedilla: 582 unisym = 0xfe; 583 break; 584 } 585 break; 586 case KB_PL: 587 case KB_SI: 588 if (unisym < 0x100 + nitems(unicode_to_latin2) && 589 unicode_to_latin2[unisym - 0x100] != 0) 590 unisym = unicode_to_latin2[unisym - 0x100]; 591 break; 592 } 593 break; 594 595 case 0x0280 >> 7: 596 switch (KB_ENCODING(layout)) { 597 case KB_PL: 598 case KB_SI: 599 switch (unisym) { 600 case KS_L2_caron: 601 unisym = 0xb7; 602 break; 603 case KS_L2_breve: 604 unisym = 0xa2; 605 break; 606 case KS_L2_dotabove: 607 unisym = 0xff; 608 break; 609 case KS_L2_ogonek: 610 unisym = 0xb2; 611 break; 612 case KS_L2_dblacute: 613 unisym = 0xbd; 614 break; 615 } 616 break; 617 } 618 break; 619 620 case 0x0400 >> 7: 621 if (unisym < 0x400 + 622 sizeof(cyrillic_to_koi8) / sizeof(cyrillic_to_koi8[0]) && 623 cyrillic_to_koi8[unisym - 0x400] != 0) 624 unisym = cyrillic_to_koi8[unisym - 0x400]; 625 break; 626 case 0x0480 >> 7: 627 if (unisym == KS_Cyrillic_GHEUKR) 628 unisym = 0xbd; /* ukrainian GHE */ 629 else if (unisym == KS_Cyrillic_gheukr) 630 unisym = 0xad; /* ukrainian ghe */ 631 break; 632 633 case 0x2000 >> 7: 634 switch (KB_ENCODING(layout)) { 635 case KB_LT: 636 case KB_LV: 637 switch (unisym) { 638 case KS_L7_rightsnglquot: 639 unisym = 0xff; 640 break; 641 case KS_L7_leftdblquot: 642 unisym = 0xb4; 643 break; 644 case KS_L7_rightdblquot: 645 unisym = 0xa1; 646 break; 647 case KS_L7_dbllow9quot: 648 unisym = 0xa5; 649 break; 650 } 651 } 652 break; 653 654 } 655 656 out[0] = unisym & 0xff; 657 return (1); 658 } 659 660 /* 661 * Keysym to UTF-8 sequence translation function. 662 * The out buffer is at least 6 characters long. 663 */ 664 int 665 wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out, 666 int allow_utf8) 667 { 668 #ifndef HAVE_UTF8_SUPPORT 669 return (wsemul_local_translate(unisym, layout, out)); 670 #else 671 u_int pos, length, headpat; 672 673 if (!allow_utf8) 674 return (wsemul_local_translate(unisym, layout, out)); 675 676 if (unisym >= 0x80000000) { 677 return (0); 678 } else if (unisym > 0x04000000) { 679 headpat = 0xfc; 680 length = 6; 681 } else if (unisym > 0x00200000) { 682 headpat = 0xf8; 683 length = 5; 684 } else if (unisym > 0x00010000) { 685 headpat = 0xf0; 686 length = 4; 687 } else if (unisym > 0x00000800) { 688 headpat = 0xe0; 689 length = 3; 690 } else if (unisym > 0x00000080) { 691 headpat = 0xc0; 692 length = 2; 693 } else { 694 headpat = 0x00; 695 length = 1; 696 } 697 698 for (pos = length - 1; pos > 0; pos--) { 699 out[pos] = 0x80 | (unisym & 0x3f); 700 unisym >>= 6; 701 } 702 out[0] = headpat | unisym; 703 704 return (length); 705 #endif 706 } 707