1 /* $OpenBSD: wsemul_subr.c,v 1.2 2023/03/06 17:14:44 miod Exp $ */
2
3 /*
4 * Copyright (c) 2007, 2013 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice, this permission notice, and the disclaimer below
9 * appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*
21 * Part of the UTF-8 state machine logic borrowed from citrus_utf8.c
22 * under the following licence:
23 */
24 /*-
25 * Copyright (c) 2002-2004 Tim J. Robbins
26 * All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/errno.h>
53
54 #include <dev/wscons/wscons_features.h>
55 #include <dev/wscons/wsconsio.h>
56 #include <dev/wscons/wsdisplayvar.h>
57 #include <dev/wscons/wsemulvar.h>
58 #include <dev/wscons/wsksymdef.h>
59
60 int wsemul_local_translate(u_int32_t, kbd_t, u_char *);
61
62 /*
63 * Get characters from an input stream and update the input state.
64 * Processing stops when the stream is empty, or a complete character
65 * sequence has been recognized, in which case it returns zero.
66 */
67 int
wsemul_getchar(const u_char ** inbuf,u_int * inlen,struct wsemul_inputstate * state,int allow_utf8)68 wsemul_getchar(const u_char **inbuf, u_int *inlen,
69 struct wsemul_inputstate *state, int allow_utf8)
70 {
71 u_int len = *inlen;
72 const u_char *buf = *inbuf;
73 #ifdef HAVE_UTF8_SUPPORT
74 int rc;
75 u_int32_t tmpchar, lbound;
76 u_int mbleft;
77 #endif
78
79 if (len == 0)
80 return EAGAIN;
81
82 #ifndef HAVE_UTF8_SUPPORT
83 state->inchar = *buf++;
84 state->mbleft = 0;
85 len--;
86 *inlen = len;
87 *inbuf = buf;
88 return 0;
89 #else
90 /*
91 * If we do not allow multibyte sequences, process as quickly
92 * as possible.
93 */
94 if (!allow_utf8) {
95 state->inchar = *buf++;
96 state->mbleft = 0;
97 len--;
98 *inlen = len;
99 *inbuf = buf;
100 return 0;
101 }
102
103 rc = EAGAIN;
104 tmpchar = state->inchar;
105 lbound = state->lbound;
106 mbleft = state->mbleft;
107
108 while (len != 0) {
109 u_int32_t frag = (u_int32_t)*buf++;
110 len--;
111
112 /*
113 * If we are in the middle of a multibyte sequence, try
114 * to complete it.
115 */
116
117 if (mbleft != 0) {
118 if ((frag & 0xc0) != 0x80)
119 goto invalid;
120
121 tmpchar = (tmpchar << 6) | (frag & 0x3f);
122 mbleft--;
123 if (mbleft == 0) {
124 if (tmpchar < lbound)
125 goto invalid;
126 if (tmpchar >= 0xd800 && tmpchar < 0xe000)
127 goto invalid;
128 if (tmpchar >= 0x110000)
129 goto invalid;
130 rc = 0;
131 break;
132 }
133 continue;
134 }
135
136 /*
137 * Otherwise let's decide if this is the start of a new
138 * multibyte sequence, or a 7-bit character.
139 */
140
141 if ((frag & 0x80) == 0) {
142 tmpchar = frag;
143 rc = 0;
144 break;
145 }
146
147 if ((frag & 0xe0) == 0xc0) {
148 frag &= 0x1f;
149 mbleft = 1;
150 lbound = 0x80;
151 } else if ((frag & 0xf0) == 0xe0) {
152 frag &= 0x0f;
153 mbleft = 2;
154 lbound = 0x800;
155 } else if ((frag & 0xf8) == 0xf0) {
156 frag &= 0x07;
157 mbleft = 3;
158 lbound = 0x10000;
159 } else {
160 goto invalid;
161 }
162
163 tmpchar = frag;
164 state->lbound = lbound;
165 continue;
166
167 invalid:
168 /* Abort the ill-formed sequence and continue */
169 mbleft = 0;
170 tmpchar = 0;
171 rc = EILSEQ;
172 }
173
174 state->inchar = tmpchar;
175 state->mbleft = mbleft;
176 *inlen = len;
177 *inbuf = buf;
178 return rc;
179 #endif
180 }
181
182 /*
183 * Unicode Cyrillic to KOI8 translation table (starts at U+0400),
184 * from RFC 2319.
185 */
186 const u_int8_t cyrillic_to_koi8[] = {
187 0x00, /* IE grave */ /* 0400 */
188 0xb3, /* IO */
189 0x00, /* DJE */
190 0x00, /* GJE */
191 0xb4, /* UKR IE */
192 0x00, /* DZE */
193 0xb6, /* BYE/UKR I */
194 0xb7, /* YI */
195 0x00, /* JE */
196 0x00, /* LJE */
197 0x00, /* NJE */
198 0x00, /* TSHE */
199 0x00, /* KJE */
200 0x00, /* I grave */
201 0x00, /* short U */
202 0x00, /* DZHE */
203 0xe1, /* A */ /* 0410 */
204 0xe2, /* BE */
205 0xf7, /* VE */
206 0xe7, /* GHE */
207 0xe4, /* DE */
208 0xe5, /* IE */
209 0xf6, /* ZHE */
210 0xfa, /* ZE */
211 0xe9, /* I */
212 0xea, /* short I */
213 0xeb, /* KA */
214 0xec, /* EL */
215 0xed, /* EM */
216 0xee, /* EN */
217 0xef, /* O */
218 0xf0, /* PE */
219 0xf2, /* ER */ /* 0420 */
220 0xf3, /* ES */
221 0xf4, /* TE */
222 0xf5, /* U */
223 0xe6, /* EF */
224 0xe8, /* HA */
225 0xe3, /* TSE */
226 0xfe, /* CHE */
227 0xfb, /* SHA */
228 0xfd, /* SHCHA */
229 0xff, /* HARD SIGN */
230 0xf9, /* YERU */
231 0xf8, /* SOFT SIGN */
232 0xfc, /* E */
233 0xe0, /* YU */
234 0xf1, /* YA */
235 0xc1, /* a */ /* 0430 */
236 0xc2, /* be */
237 0xd7, /* ve */
238 0xc7, /* ghe */
239 0xc4, /* de */
240 0xc5, /* ie */
241 0xd6, /* zhe */
242 0xda, /* ze */
243 0xc9, /* i */
244 0xca, /* short i */
245 0xcb, /* ka */
246 0xcc, /* el */
247 0xcd, /* em */
248 0xce, /* en */
249 0xcf, /* o */
250 0xd0, /* pe */
251 0xd2, /* er */ /* 0440 */
252 0xd3, /* es */
253 0xd4, /* te */
254 0xd5, /* u */
255 0xc6, /* ef */
256 0xc8, /* ha */
257 0xc3, /* tse */
258 0xde, /* che */
259 0xdb, /* sha */
260 0xdd, /* shcha */
261 0xdf, /* hard sign */
262 0xd9, /* yeru */
263 0xd8, /* soft sign */
264 0xdc, /* e */
265 0xc0, /* yu */
266 0xd1, /* ya */
267 0x00, /* ie grave */ /* 0450 */
268 0xa3, /* io */
269 0x00, /* dje */
270 0x00, /* GJE */
271 0xa4, /* UKR ie */
272 0x00, /* DZE */
273 0xa6, /* BYE/UKR I */
274 0xa7, /* YI */
275 0x00, /* JE */
276 0x00, /* LJE */
277 0x00, /* NJE */
278 0x00, /* TSHE */
279 0x00, /* KJE */
280 0x00, /* I grave */
281 0x00, /* short U */
282 0x00 /* DZHE */
283 };
284
285 /*
286 * Europe to Latin-2 translation table (starts at U+0100).
287 */
288 const u_int8_t unicode_to_latin2[] = {
289 0x00, /* A macron */ /* 0100 */
290 0x00, /* a macron */
291 0xc3, /* A breve */
292 0xe3, /* a breve */
293 0xa1, /* A ogonek */
294 0xb1, /* a ogonek */
295 0xc6, /* C acute */
296 0xe6, /* c acute */
297 0x00, /* C circumflex */
298 0x00, /* c circumflex */
299 0x00, /* C abovering */
300 0x00, /* c abovering */
301 0xc8, /* C caron */
302 0xe8, /* c caron */
303 0xcf, /* D caron */
304 0xef, /* d caron */
305 0xd0, /* D stroke */ /* 0110 */
306 0xf0, /* d stroke */
307 0x00, /* E macron */
308 0x00, /* e macron */
309 0x00, /* E breve */
310 0x00, /* e breve */
311 0x00, /* E abovering */
312 0x00, /* e abovering */
313 0xca, /* E ogonek */
314 0xea, /* e ogonek */
315 0xcc, /* E caron */
316 0xec, /* e caron */
317 0x00, /* G circumflex */
318 0x00, /* g circumflex */
319 0x00, /* G breve */
320 0x00, /* g breve */
321 0x00, /* G abovering */ /* 0120 */
322 0x00, /* g abovering */
323 0x00, /* G cedilla */
324 0x00, /* g cedilla */
325 0x00, /* H circumflex */
326 0x00, /* h circumflex */
327 0x00, /* H stroke */
328 0x00, /* h stroke */
329 0x00, /* I tilde */
330 0x00, /* i tilde */
331 0x00, /* I macron */
332 0x00, /* i macron */
333 0x00, /* I breve */
334 0x00, /* i breve */
335 0x00, /* I ogonek */
336 0x00, /* i ogonek */
337 0x00, /* dotted I */ /* 0130 */
338 0x00, /* non-dotted i */
339 0x00, /* ligature IJ */
340 0x00, /* ligature ij */
341 0x00, /* J circumflex */
342 0x00, /* j circumflex */
343 0x00, /* K cedilla */
344 0x00, /* k cedilla */
345 0x00, /* kra */
346 0xc5, /* L acute */
347 0xe5, /* l acute */
348 0x00, /* L cedilla */
349 0x00, /* l cedilla */
350 0xa5, /* L caron */
351 0xb5, /* l caron */
352 0x00, /* L middle dot */
353 0x00, /* l middle dot */ /* 0140 */
354 0xa3, /* L stroke */
355 0xb3, /* l stroke */
356 0xd1, /* N acute */
357 0xf1, /* n acute */
358 0x00, /* N cedilla */
359 0x00, /* n cedilla */
360 0xd2, /* N caron */
361 0xf2, /* n caron */
362 0x00, /* N preceded by apostrophe */
363 0x00, /* ENG */
364 0x00, /* eng */
365 0x00, /* O macron */
366 0x00, /* o macron */
367 0x00, /* O breve */
368 0x00, /* o breve */
369 0xd5, /* O double acute */ /* 0150 */
370 0xf5, /* o double acute */
371 0x00, /* ligature OE */
372 0x00, /* ligature oe */
373 0xc0, /* R acute */
374 0xe0, /* r acute */
375 0x00, /* R cedilla */
376 0x00, /* r cedilla */
377 0xd8, /* R caron */
378 0xf8, /* r caron */
379 0xa6, /* S acute */
380 0xb6, /* s acute */
381 0x00, /* S circumflex */
382 0x00, /* s circumflex */
383 0xaa, /* S cedilla */
384 0xba, /* s cedilla */
385 0xa9, /* S caron */ /* 0160 */
386 0xb9, /* s caron */
387 0xde, /* T cedilla */
388 0xfe, /* t cedilla */
389 0xab, /* T caron */
390 0xbb, /* t caron */
391 0x00, /* T stroke */
392 0x00, /* t stroke */
393 0x00, /* U tilde */
394 0x00, /* u tilde */
395 0x00, /* U macron */
396 0x00, /* u macron */
397 0x00, /* U breve */
398 0x00, /* u breve */
399 0xd9, /* U abovering */
400 0xf9, /* u abovering */
401 0xdb, /* U double acute */ /* 0170 */
402 0xfb, /* u double acute */
403 0x00, /* U ogonek */
404 0x00, /* u ogonek */
405 0x00, /* W circumflex */
406 0x00, /* w circumflex */
407 0x00, /* Y circumflex */
408 0x00, /* y circumflex */
409 0x00, /* Y diaeresis */
410 0xac, /* Z acute */
411 0xbc, /* z acute */
412 0xaf, /* Z abovering */
413 0xbf, /* z abovering */
414 0xae, /* Z caron */
415 0xbe, /* z caron */
416 0x00 /* long s */
417 };
418
419 /*
420 * Baltic to Latin-7 translation table.
421 */
422 const u_int8_t unicode_to_latin7[] = {
423 0xc2, /* A macron */ /* 0100 */
424 0xe2, /* a macron */
425 0x00, /* A breve */
426 0x00, /* a breve */
427 0xc0, /* A ogonek */
428 0xe0, /* a ogonek */
429 0xc3, /* C acute */
430 0xe3, /* c acute */
431 0x00, /* C circumflex */
432 0x00, /* c circumflex */
433 0x00, /* C abovering */
434 0x00, /* c abovering */
435 0xc8, /* C caron */
436 0xe8, /* c caron */
437 0x00, /* D caron */
438 0x00, /* d caron */
439 0x00, /* D stroke */ /* 0110 */
440 0x00, /* d stroke */
441 0xc7, /* E macron */
442 0xe7, /* e macron */
443 0x00, /* E breve */
444 0x00, /* e breve */
445 0xcb, /* E abovering */
446 0xeb, /* e abovering */
447 0xc6, /* E ogonek */
448 0xe6, /* e ogonek */
449 0x00, /* E caron */
450 0x00, /* e caron */
451 0x00, /* G circumflex */
452 0x00, /* g circumflex */
453 0x00, /* G breve */
454 0x00, /* g breve */
455 0x00, /* G abovering */ /* 0120 */
456 0x00, /* g abovering */
457 0xcc, /* G cedilla */
458 0xec, /* g cedilla */
459 0x00, /* H circumflex */
460 0x00, /* h circumflex */
461 0x00, /* H stroke */
462 0x00, /* h stroke */
463 0x00, /* I tilde */
464 0x00, /* i tilde */
465 0xce, /* I macron */
466 0xee, /* i macron */
467 0x00, /* I breve */
468 0x00, /* i breve */
469 0xc1, /* I ogonek */
470 0xe1, /* i ogonek */
471 0x00, /* dotted I */ /* 0130 */
472 0x00, /* non-dotted I */
473 0x00, /* ligature IJ */
474 0x00, /* ligature ij */
475 0x00, /* J circumflex */
476 0x00, /* j circumflex */
477 0xcd, /* K cedilla */
478 0xed, /* k cedilla */
479 0x00, /* kra */
480 0x00, /* L acute */
481 0x00, /* l acute */
482 0xcf, /* L cedilla */
483 0xef, /* l cedilla */
484 0x00, /* L caron */
485 0x00, /* l caron */
486 0x00, /* L middle dot */
487 0x00, /* l middle dot */ /* 0140 */
488 0xd9, /* L stroke */
489 0xf9, /* l stroke */
490 0xd1, /* N acute */
491 0xf1, /* n acute */
492 0xd2, /* N cedilla */
493 0xf2, /* n cedilla */
494 0x00, /* N caron */
495 0x00, /* n caron */
496 0x00, /* N preceded by apostrophe */
497 0x00, /* ENG */
498 0x00, /* eng */
499 0xd4, /* O macron */
500 0xf4, /* o macron */
501 0x00, /* O breve */
502 0x00, /* o breve */
503 0x00, /* O double acute */ /* 0150 */
504 0x00, /* o double acute */
505 0x00, /* ligature OE */
506 0x00, /* ligature oe */
507 0x00, /* R acute */
508 0x00, /* r acute */
509 0xaa, /* R cedilla */
510 0xba, /* r cedilla */
511 0x00, /* R caron */
512 0x00, /* r caron */
513 0xda, /* S acute */
514 0xfa, /* s acute */
515 0x00, /* S circumflex */
516 0x00, /* s circumflex */
517 0x00, /* S cedilla */
518 0x00, /* s cedilla */
519 0xd0, /* S caron */ /* 0160 */
520 0xf0, /* s caron */
521 0x00, /* T cedilla */
522 0x00, /* t cedilla */
523 0x00, /* T caron */
524 0x00, /* t caron */
525 0x00, /* T stroke */
526 0x00, /* t stroke */
527 0x00, /* U tilde */
528 0x00, /* u tilde */
529 0xdb, /* U macron */
530 0xfb, /* u macron */
531 0x00, /* U breve */
532 0x00, /* u breve */
533 0x00, /* U abovering */
534 0x00, /* u abovering */
535 0x00, /* U double acute */ /* 0170 */
536 0x00, /* u double acute */
537 0xd8, /* U ogonek */
538 0xf8, /* u ogonek */
539 0x00, /* W circumflex */
540 0x00, /* w circumflex */
541 0x00, /* Y circumflex */
542 0x00, /* y circumflex */
543 0x00, /* Y diaeresis */
544 0xca, /* Z acute */
545 0xea, /* z acute */
546 0xdd, /* Z abovering */
547 0xfd, /* z abovering */
548 0xde, /* Z caron */
549 0xfe, /* z caron */
550 0x00 /* long s */
551 };
552
553 /*
554 * Keysym to local 8-bit charset sequence translation function.
555 * The out buffer is at least one character long.
556 * The keyboard layout is used as a hint to decide which latin charset to
557 * assume.
558 */
559 int
wsemul_local_translate(u_int32_t unisym,kbd_t layout,u_char * out)560 wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out)
561 {
562 switch (unisym >> 7) {
563 case 0x0080 >> 7:
564 switch (KB_ENCODING(layout)) {
565 case KB_LT:
566 case KB_LV:
567 switch (unisym) {
568 case KS_L7_AE:
569 unisym = 0xaf;
570 break;
571 case KS_L7_Ostroke:
572 unisym = 0xa8;
573 break;
574 case KS_L7_ae:
575 unisym = 0xbf;
576 break;
577 case KS_L7_ostroke:
578 unisym = 0xb8;
579 break;
580 }
581 }
582 break;
583
584 case 0x0100 >> 7:
585 switch (KB_ENCODING(layout)) {
586 case KB_LT:
587 case KB_LV:
588 if (unisym < 0x100 + nitems(unicode_to_latin7) &&
589 unicode_to_latin7[unisym - 0x100] != 0)
590 unisym = unicode_to_latin7[unisym - 0x100];
591 break;
592 case KB_TR:
593 switch (unisym) {
594 case KS_L5_Gbreve:
595 unisym = 0xd0;
596 break;
597 case KS_L5_gbreve:
598 unisym = 0xf0;
599 break;
600 case KS_L5_Idotabove:
601 unisym = 0xdd;
602 break;
603 case KS_L5_idotless:
604 unisym = 0xfd;
605 break;
606 case KS_L5_Scedilla:
607 unisym = 0xde;
608 break;
609 case KS_L5_scedilla:
610 unisym = 0xfe;
611 break;
612 }
613 break;
614 case KB_PL:
615 case KB_SI:
616 if (unisym < 0x100 + nitems(unicode_to_latin2) &&
617 unicode_to_latin2[unisym - 0x100] != 0)
618 unisym = unicode_to_latin2[unisym - 0x100];
619 break;
620 }
621 break;
622
623 case 0x0280 >> 7:
624 switch (KB_ENCODING(layout)) {
625 case KB_PL:
626 case KB_SI:
627 switch (unisym) {
628 case KS_L2_caron:
629 unisym = 0xb7;
630 break;
631 case KS_L2_breve:
632 unisym = 0xa2;
633 break;
634 case KS_L2_dotabove:
635 unisym = 0xff;
636 break;
637 case KS_L2_ogonek:
638 unisym = 0xb2;
639 break;
640 case KS_L2_dblacute:
641 unisym = 0xbd;
642 break;
643 }
644 break;
645 }
646 break;
647
648 case 0x0400 >> 7:
649 if (unisym < 0x400 +
650 sizeof(cyrillic_to_koi8) / sizeof(cyrillic_to_koi8[0]) &&
651 cyrillic_to_koi8[unisym - 0x400] != 0)
652 unisym = cyrillic_to_koi8[unisym - 0x400];
653 break;
654 case 0x0480 >> 7:
655 if (unisym == KS_Cyrillic_GHEUKR)
656 unisym = 0xbd; /* ukrainian GHE */
657 else if (unisym == KS_Cyrillic_gheukr)
658 unisym = 0xad; /* ukrainian ghe */
659 break;
660
661 case 0x2000 >> 7:
662 switch (KB_ENCODING(layout)) {
663 case KB_LT:
664 case KB_LV:
665 switch (unisym) {
666 case KS_L7_rightsnglquot:
667 unisym = 0xff;
668 break;
669 case KS_L7_leftdblquot:
670 unisym = 0xb4;
671 break;
672 case KS_L7_rightdblquot:
673 unisym = 0xa1;
674 break;
675 case KS_L7_dbllow9quot:
676 unisym = 0xa5;
677 break;
678 }
679 }
680 break;
681
682 }
683
684 out[0] = unisym & 0xff;
685 return (1);
686 }
687
688 /*
689 * Keysym to UTF-8 sequence translation function.
690 * The out buffer is at least 4 characters long.
691 */
692 int
wsemul_utf8_translate(u_int32_t unisym,kbd_t layout,u_char * out,int allow_utf8)693 wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
694 int allow_utf8)
695 {
696 #ifndef HAVE_UTF8_SUPPORT
697 return (wsemul_local_translate(unisym, layout, out));
698 #else
699 u_int pos, length, headpat;
700
701 if (!allow_utf8)
702 return wsemul_local_translate(unisym, layout, out);
703
704 if (unisym < 0x80) {
705 /* Fast path for plain ASCII characters. */
706 *out = (u_char)unisym;
707 return 1;
708 }
709
710 if (unisym < 0x800) {
711 headpat = 0xc0;
712 length = 2;
713 } else if (unisym < 0x10000) {
714 if (unisym >= 0xd800 && unisym < 0xe000)
715 return 0;
716 headpat = 0xe0;
717 length = 3;
718 } else {
719 if (unisym >= 0x110000)
720 return 0;
721 headpat = 0xf0;
722 length = 4;
723 }
724
725 for (pos = length - 1; pos > 0; pos--) {
726 out[pos] = 0x80 | (unisym & 0x3f);
727 unisym >>= 6;
728 }
729 out[0] = headpat | unisym;
730
731 return length;
732 #endif
733 }
734