xref: /openbsd/sys/dev/wscons/wsemul_subr.c (revision ca22e28b)
1 /*	$OpenBSD: wsemul_subr.c,v 1.2 2023/03/06 17:14:44 miod Exp $	*/
2 
3 /*
4  * Copyright (c) 2007, 2013 Miodrag Vallat.
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice, this permission notice, and the disclaimer below
9  * appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*
21  * Part of the UTF-8 state machine logic borrowed from citrus_utf8.c
22  * under the following licence:
23  */
24 /*-
25  * Copyright (c) 2002-2004 Tim J. Robbins
26  * All rights reserved.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/errno.h>
53 
54 #include <dev/wscons/wscons_features.h>
55 #include <dev/wscons/wsconsio.h>
56 #include <dev/wscons/wsdisplayvar.h>
57 #include <dev/wscons/wsemulvar.h>
58 #include <dev/wscons/wsksymdef.h>
59 
60 int	wsemul_local_translate(u_int32_t, kbd_t, u_char *);
61 
62 /*
63  * Get characters from an input stream and update the input state.
64  * Processing stops when the stream is empty, or a complete character
65  * sequence has been recognized, in which case it returns zero.
66  */
67 int
wsemul_getchar(const u_char ** inbuf,u_int * inlen,struct wsemul_inputstate * state,int allow_utf8)68 wsemul_getchar(const u_char **inbuf, u_int *inlen,
69     struct wsemul_inputstate *state, int allow_utf8)
70 {
71 	u_int len = *inlen;
72 	const u_char *buf = *inbuf;
73 #ifdef HAVE_UTF8_SUPPORT
74 	int rc;
75 	u_int32_t tmpchar, lbound;
76 	u_int mbleft;
77 #endif
78 
79 	if (len == 0)
80 		return EAGAIN;
81 
82 #ifndef HAVE_UTF8_SUPPORT
83 	state->inchar = *buf++;
84 	state->mbleft = 0;
85 	len--;
86 	*inlen = len;
87 	*inbuf = buf;
88 	return 0;
89 #else
90 	/*
91 	 * If we do not allow multibyte sequences, process as quickly
92 	 * as possible.
93 	 */
94 	if (!allow_utf8) {
95 		state->inchar = *buf++;
96 		state->mbleft = 0;
97 		len--;
98 		*inlen = len;
99 		*inbuf = buf;
100 		return 0;
101 	}
102 
103 	rc = EAGAIN;
104 	tmpchar = state->inchar;
105 	lbound = state->lbound;
106 	mbleft = state->mbleft;
107 
108 	while (len != 0) {
109 		u_int32_t frag = (u_int32_t)*buf++;
110 		len--;
111 
112 		/*
113 		 * If we are in the middle of a multibyte sequence, try
114 		 * to complete it.
115 		 */
116 
117 		if (mbleft != 0) {
118 			if ((frag & 0xc0) != 0x80)
119 				goto invalid;
120 
121 			tmpchar = (tmpchar << 6) | (frag & 0x3f);
122 			mbleft--;
123 			if (mbleft == 0) {
124 				if (tmpchar < lbound)
125 					goto invalid;
126 				if (tmpchar >= 0xd800 && tmpchar < 0xe000)
127 					goto invalid;
128 				if (tmpchar >= 0x110000)
129 					goto invalid;
130 				rc = 0;
131 				break;
132 			}
133 			continue;
134 		}
135 
136 		/*
137 		 * Otherwise let's decide if this is the start of a new
138 		 * multibyte sequence, or a 7-bit character.
139 		 */
140 
141 		if ((frag & 0x80) == 0) {
142 			tmpchar = frag;
143 			rc = 0;
144 			break;
145 		}
146 
147 		if ((frag & 0xe0) == 0xc0) {
148 			frag &= 0x1f;
149 			mbleft = 1;
150 			lbound = 0x80;
151 		} else if ((frag & 0xf0) == 0xe0) {
152 			frag &= 0x0f;
153 			mbleft = 2;
154 			lbound = 0x800;
155 		} else if ((frag & 0xf8) == 0xf0) {
156 			frag &= 0x07;
157 			mbleft = 3;
158 			lbound = 0x10000;
159 		} else {
160 			goto invalid;
161 		}
162 
163 		tmpchar = frag;
164 		state->lbound = lbound;
165 		continue;
166 
167 invalid:
168 		/* Abort the ill-formed sequence and continue */
169 		mbleft = 0;
170 		tmpchar = 0;
171 		rc = EILSEQ;
172 	}
173 
174 	state->inchar = tmpchar;
175 	state->mbleft = mbleft;
176 	*inlen = len;
177 	*inbuf = buf;
178 	return rc;
179 #endif
180 }
181 
182 /*
183  * Unicode Cyrillic to KOI8 translation table (starts at U+0400),
184  * from RFC 2319.
185  */
186 const u_int8_t cyrillic_to_koi8[] = {
187 	0x00,	/* IE grave */		/* 0400 */
188 	0xb3,	/* IO */
189 	0x00,	/* DJE */
190 	0x00,	/* GJE */
191 	0xb4,	/* UKR IE */
192 	0x00,	/* DZE */
193 	0xb6,	/* BYE/UKR I */
194 	0xb7,	/* YI */
195 	0x00,	/* JE */
196 	0x00,	/* LJE */
197 	0x00,	/* NJE */
198 	0x00,	/* TSHE */
199 	0x00,	/* KJE */
200 	0x00,	/* I grave */
201 	0x00,	/* short U */
202 	0x00,	/* DZHE */
203 	0xe1,	/* A */			/* 0410 */
204 	0xe2,	/* BE */
205 	0xf7,	/* VE */
206 	0xe7,	/* GHE */
207 	0xe4,	/* DE */
208 	0xe5,	/* IE */
209 	0xf6,	/* ZHE */
210 	0xfa,	/* ZE */
211 	0xe9,	/* I */
212 	0xea,	/* short I */
213 	0xeb,	/* KA */
214 	0xec,	/* EL */
215 	0xed,	/* EM */
216 	0xee,	/* EN */
217 	0xef,	/* O */
218 	0xf0,	/* PE */
219 	0xf2,	/* ER */		/* 0420 */
220 	0xf3,	/* ES */
221 	0xf4,	/* TE */
222 	0xf5,	/* U */
223 	0xe6,	/* EF */
224 	0xe8,	/* HA */
225 	0xe3,	/* TSE */
226 	0xfe,	/* CHE */
227 	0xfb,	/* SHA */
228 	0xfd,	/* SHCHA */
229 	0xff,	/* HARD SIGN */
230 	0xf9,	/* YERU */
231 	0xf8,	/* SOFT SIGN */
232 	0xfc,	/* E */
233 	0xe0,	/* YU */
234 	0xf1,	/* YA */
235 	0xc1,	/* a */			/* 0430 */
236 	0xc2,	/* be */
237 	0xd7,	/* ve */
238 	0xc7,	/* ghe */
239 	0xc4,	/* de */
240 	0xc5,	/* ie */
241 	0xd6,	/* zhe */
242 	0xda,	/* ze */
243 	0xc9,	/* i */
244 	0xca,	/* short i */
245 	0xcb,	/* ka */
246 	0xcc,	/* el */
247 	0xcd,	/* em */
248 	0xce,	/* en */
249 	0xcf,	/* o */
250 	0xd0,	/* pe */
251 	0xd2,	/* er */		/* 0440 */
252 	0xd3,	/* es */
253 	0xd4,	/* te */
254 	0xd5,	/* u */
255 	0xc6,	/* ef */
256 	0xc8,	/* ha */
257 	0xc3,	/* tse */
258 	0xde,	/* che */
259 	0xdb,	/* sha */
260 	0xdd,	/* shcha */
261 	0xdf,	/* hard sign */
262 	0xd9,	/* yeru */
263 	0xd8,	/* soft sign */
264 	0xdc,	/* e */
265 	0xc0,	/* yu */
266 	0xd1,	/* ya */
267 	0x00,	/* ie grave */		/* 0450 */
268 	0xa3,	/* io */
269 	0x00,	/* dje */
270 	0x00,	/* GJE */
271 	0xa4,	/* UKR ie */
272 	0x00,	/* DZE */
273 	0xa6,	/* BYE/UKR I */
274 	0xa7,	/* YI */
275 	0x00,	/* JE */
276 	0x00,	/* LJE */
277 	0x00,	/* NJE */
278 	0x00,	/* TSHE */
279 	0x00,	/* KJE */
280 	0x00,	/* I grave */
281 	0x00,	/* short U */
282 	0x00	/* DZHE */
283 };
284 
285 /*
286  * Europe to Latin-2 translation table (starts at U+0100).
287  */
288 const u_int8_t unicode_to_latin2[] = {
289 	0x00,	/* A macron */		/* 0100 */
290 	0x00,	/* a macron */
291 	0xc3,	/* A breve */
292 	0xe3,	/* a breve */
293 	0xa1,	/* A ogonek */
294 	0xb1,	/* a ogonek */
295 	0xc6,	/* C acute */
296 	0xe6,	/* c acute */
297 	0x00,	/* C circumflex */
298 	0x00,	/* c circumflex */
299 	0x00,	/* C abovering */
300 	0x00,	/* c abovering */
301 	0xc8,	/* C caron */
302 	0xe8,	/* c caron */
303 	0xcf,	/* D caron */
304 	0xef,	/* d caron */
305 	0xd0,	/* D stroke */		/* 0110 */
306 	0xf0,	/* d stroke */
307 	0x00,	/* E macron */
308 	0x00,	/* e macron */
309 	0x00,	/* E breve */
310 	0x00,	/* e breve */
311 	0x00,	/* E abovering */
312 	0x00,	/* e abovering */
313 	0xca,	/* E ogonek */
314 	0xea,	/* e ogonek */
315 	0xcc,	/* E caron */
316 	0xec,	/* e caron */
317 	0x00,	/* G circumflex */
318 	0x00,	/* g circumflex */
319 	0x00,	/* G breve */
320 	0x00,	/* g breve */
321 	0x00,	/* G abovering */	/* 0120 */
322 	0x00,	/* g abovering */
323 	0x00,	/* G cedilla */
324 	0x00,	/* g cedilla */
325 	0x00,	/* H circumflex */
326 	0x00,	/* h circumflex */
327 	0x00,	/* H stroke */
328 	0x00,	/* h stroke */
329 	0x00,	/* I tilde */
330 	0x00,	/* i tilde */
331 	0x00,	/* I macron */
332 	0x00,	/* i macron */
333 	0x00,	/* I breve */
334 	0x00,	/* i breve */
335 	0x00,	/* I ogonek */
336 	0x00,	/* i ogonek */
337 	0x00,	/* dotted I */		/* 0130 */
338 	0x00,	/* non-dotted i */
339 	0x00,	/* ligature IJ */
340 	0x00,	/* ligature ij */
341 	0x00,	/* J circumflex */
342 	0x00,	/* j circumflex */
343 	0x00,	/* K cedilla */
344 	0x00,	/* k cedilla */
345 	0x00,	/* kra */
346 	0xc5,	/* L acute */
347 	0xe5,	/* l acute */
348 	0x00,	/* L cedilla */
349 	0x00,	/* l cedilla */
350 	0xa5,	/* L caron */
351 	0xb5,	/* l caron */
352 	0x00,	/* L middle dot */
353 	0x00,	/* l middle dot */	/* 0140 */
354 	0xa3,	/* L stroke */
355 	0xb3,	/* l stroke */
356 	0xd1,	/* N acute */
357 	0xf1,	/* n acute */
358 	0x00,	/* N cedilla */
359 	0x00,	/* n cedilla */
360 	0xd2,	/* N caron */
361 	0xf2,	/* n caron */
362 	0x00,	/* N preceded by apostrophe */
363 	0x00,	/* ENG */
364 	0x00,	/* eng */
365 	0x00,	/* O macron */
366 	0x00,	/* o macron */
367 	0x00,	/* O breve */
368 	0x00,	/* o breve */
369 	0xd5,	/* O double acute */	/* 0150 */
370 	0xf5,	/* o double acute */
371 	0x00,	/* ligature OE */
372 	0x00,	/* ligature oe */
373 	0xc0,	/* R acute */
374 	0xe0,	/* r acute */
375 	0x00,	/* R cedilla */
376 	0x00,	/* r cedilla */
377 	0xd8,	/* R caron */
378 	0xf8,	/* r caron */
379 	0xa6,	/* S acute */
380 	0xb6,	/* s acute */
381 	0x00,	/* S circumflex */
382 	0x00,	/* s circumflex */
383 	0xaa,	/* S cedilla */
384 	0xba,	/* s cedilla */
385 	0xa9,	/* S caron */		/* 0160 */
386 	0xb9,	/* s caron */
387 	0xde,	/* T cedilla */
388 	0xfe,	/* t cedilla */
389 	0xab,	/* T caron */
390 	0xbb,	/* t caron */
391 	0x00,	/* T stroke */
392 	0x00,	/* t stroke */
393 	0x00,	/* U tilde */
394 	0x00,	/* u tilde */
395 	0x00,	/* U macron */
396 	0x00,	/* u macron */
397 	0x00,	/* U breve */
398 	0x00,	/* u breve */
399 	0xd9,	/* U abovering */
400 	0xf9,	/* u abovering */
401 	0xdb,	/* U double acute */	/* 0170 */
402 	0xfb,	/* u double acute */
403 	0x00,	/* U ogonek */
404 	0x00,	/* u ogonek */
405 	0x00,	/* W circumflex */
406 	0x00,	/* w circumflex */
407 	0x00,	/* Y circumflex */
408 	0x00,	/* y circumflex */
409 	0x00,	/* Y diaeresis */
410 	0xac,	/* Z acute */
411 	0xbc,	/* z acute */
412 	0xaf,	/* Z abovering */
413 	0xbf,	/* z abovering */
414 	0xae,	/* Z caron */
415 	0xbe,	/* z caron */
416 	0x00	/* long s */
417 };
418 
419 /*
420  * Baltic to Latin-7 translation table.
421  */
422 const u_int8_t unicode_to_latin7[] = {
423 	0xc2,	/* A macron */		/* 0100 */
424 	0xe2,	/* a macron */
425 	0x00,	/* A breve */
426 	0x00,	/* a breve */
427 	0xc0,	/* A ogonek */
428 	0xe0,	/* a ogonek */
429 	0xc3,	/* C acute */
430 	0xe3,	/* c acute */
431 	0x00,	/* C circumflex */
432 	0x00,	/* c circumflex */
433 	0x00,	/* C abovering */
434 	0x00,	/* c abovering */
435 	0xc8,	/* C caron */
436 	0xe8,	/* c caron */
437 	0x00,	/* D caron */
438 	0x00,	/* d caron */
439 	0x00,	/* D stroke */		/* 0110 */
440 	0x00,	/* d stroke */
441 	0xc7,	/* E macron */
442 	0xe7,	/* e macron */
443 	0x00,	/* E breve */
444 	0x00,	/* e breve */
445 	0xcb,	/* E abovering */
446 	0xeb,	/* e abovering */
447 	0xc6,	/* E ogonek */
448 	0xe6,	/* e ogonek */
449 	0x00,	/* E caron */
450 	0x00,	/* e caron */
451 	0x00,	/* G circumflex */
452 	0x00,	/* g circumflex */
453 	0x00,	/* G breve */
454 	0x00,	/* g breve */
455 	0x00,	/* G abovering */	/* 0120 */
456 	0x00,	/* g abovering */
457 	0xcc,	/* G cedilla */
458 	0xec,	/* g cedilla */
459 	0x00,	/* H circumflex */
460 	0x00,	/* h circumflex */
461 	0x00,	/* H stroke */
462 	0x00,	/* h stroke */
463 	0x00,	/* I tilde */
464 	0x00,	/* i tilde */
465 	0xce,	/* I macron */
466 	0xee,	/* i macron */
467 	0x00,	/* I breve */
468 	0x00,	/* i breve */
469 	0xc1,	/* I ogonek */
470 	0xe1,	/* i ogonek */
471 	0x00,	/* dotted I */		/* 0130 */
472 	0x00,	/* non-dotted I */
473 	0x00,	/* ligature IJ */
474 	0x00,	/* ligature ij */
475 	0x00,	/* J circumflex */
476 	0x00,	/* j circumflex */
477 	0xcd,	/* K cedilla */
478 	0xed,	/* k cedilla */
479 	0x00,	/* kra */
480 	0x00,	/* L acute */
481 	0x00,	/* l acute */
482 	0xcf,	/* L cedilla */
483 	0xef,	/* l cedilla */
484 	0x00,	/* L caron */
485 	0x00,	/* l caron */
486 	0x00,	/* L middle dot */
487 	0x00,	/* l middle dot */	/* 0140 */
488 	0xd9,	/* L stroke */
489 	0xf9,	/* l stroke */
490 	0xd1,	/* N acute */
491 	0xf1,	/* n acute */
492 	0xd2,	/* N cedilla */
493 	0xf2,	/* n cedilla */
494 	0x00,	/* N caron */
495 	0x00,	/* n caron */
496 	0x00,	/* N preceded by apostrophe */
497 	0x00,	/* ENG */
498 	0x00,	/* eng */
499 	0xd4,	/* O macron */
500 	0xf4,	/* o macron */
501 	0x00,	/* O breve */
502 	0x00,	/* o breve */
503 	0x00,	/* O double acute */	/* 0150 */
504 	0x00,	/* o double acute */
505 	0x00,	/* ligature OE */
506 	0x00,	/* ligature oe */
507 	0x00,	/* R acute */
508 	0x00,	/* r acute */
509 	0xaa,	/* R cedilla */
510 	0xba,	/* r cedilla */
511 	0x00,	/* R caron */
512 	0x00,	/* r caron */
513 	0xda,	/* S acute */
514 	0xfa,	/* s acute */
515 	0x00,	/* S circumflex */
516 	0x00,	/* s circumflex */
517 	0x00,	/* S cedilla */
518 	0x00,	/* s cedilla */
519 	0xd0,	/* S caron */		/* 0160 */
520 	0xf0,	/* s caron */
521 	0x00,	/* T cedilla */
522 	0x00,	/* t cedilla */
523 	0x00,	/* T caron */
524 	0x00,	/* t caron */
525 	0x00,	/* T stroke */
526 	0x00,	/* t stroke */
527 	0x00,	/* U tilde */
528 	0x00,	/* u tilde */
529 	0xdb,	/* U macron */
530 	0xfb,	/* u macron */
531 	0x00,	/* U breve */
532 	0x00,	/* u breve */
533 	0x00,	/* U abovering */
534 	0x00,	/* u abovering */
535 	0x00,	/* U double acute */	/* 0170 */
536 	0x00,	/* u double acute */
537 	0xd8,	/* U ogonek */
538 	0xf8,	/* u ogonek */
539 	0x00,	/* W circumflex */
540 	0x00,	/* w circumflex */
541 	0x00,	/* Y circumflex */
542 	0x00,	/* y circumflex */
543 	0x00,	/* Y diaeresis */
544 	0xca,	/* Z acute */
545 	0xea,	/* z acute */
546 	0xdd,	/* Z abovering */
547 	0xfd,	/* z abovering */
548 	0xde,	/* Z caron */
549 	0xfe,	/* z caron */
550 	0x00	/* long s */
551 };
552 
553 /*
554  * Keysym to local 8-bit charset sequence translation function.
555  * The out buffer is at least one character long.
556  * The keyboard layout is used as a hint to decide which latin charset to
557  * assume.
558  */
559 int
wsemul_local_translate(u_int32_t unisym,kbd_t layout,u_char * out)560 wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out)
561 {
562 	switch (unisym >> 7) {
563 	case 0x0080 >> 7:
564 		switch (KB_ENCODING(layout)) {
565 		case KB_LT:
566 		case KB_LV:
567 			switch (unisym) {
568 			case KS_L7_AE:
569 				unisym = 0xaf;
570 				break;
571 			case KS_L7_Ostroke:
572 				unisym = 0xa8;
573 				break;
574 			case KS_L7_ae:
575 				unisym = 0xbf;
576 				break;
577 			case KS_L7_ostroke:
578 				unisym = 0xb8;
579 				break;
580 			}
581 		}
582 		break;
583 
584 	case 0x0100 >> 7:
585 		switch (KB_ENCODING(layout)) {
586 		case KB_LT:
587 		case KB_LV:
588 			if (unisym < 0x100 + nitems(unicode_to_latin7) &&
589 			    unicode_to_latin7[unisym - 0x100] != 0)
590 				unisym = unicode_to_latin7[unisym - 0x100];
591 			break;
592 		case KB_TR:
593 			switch (unisym) {
594 			case KS_L5_Gbreve:
595 				unisym = 0xd0;
596 				break;
597 			case KS_L5_gbreve:
598 				unisym = 0xf0;
599 				break;
600 			case KS_L5_Idotabove:
601 				unisym = 0xdd;
602 				break;
603 			case KS_L5_idotless:
604 				unisym = 0xfd;
605 				break;
606 			case KS_L5_Scedilla:
607 				unisym = 0xde;
608 				break;
609 			case KS_L5_scedilla:
610 				unisym = 0xfe;
611 				break;
612 			}
613 			break;
614 		case KB_PL:
615 		case KB_SI:
616 			if (unisym < 0x100 + nitems(unicode_to_latin2) &&
617 			    unicode_to_latin2[unisym - 0x100] != 0)
618 				unisym = unicode_to_latin2[unisym - 0x100];
619 			break;
620 		}
621 		break;
622 
623 	case 0x0280 >> 7:
624 		switch (KB_ENCODING(layout)) {
625 		case KB_PL:
626 		case KB_SI:
627 			switch (unisym) {
628 			case KS_L2_caron:
629 				unisym = 0xb7;
630 				break;
631 			case KS_L2_breve:
632 				unisym = 0xa2;
633 				break;
634 			case KS_L2_dotabove:
635 				unisym = 0xff;
636 				break;
637 			case KS_L2_ogonek:
638 				unisym = 0xb2;
639 				break;
640 			case KS_L2_dblacute:
641 				unisym = 0xbd;
642 				break;
643 			}
644 			break;
645 		}
646 		break;
647 
648 	case 0x0400 >> 7:
649 		if (unisym < 0x400 +
650 		    sizeof(cyrillic_to_koi8) / sizeof(cyrillic_to_koi8[0]) &&
651 		    cyrillic_to_koi8[unisym - 0x400] != 0)
652 			unisym = cyrillic_to_koi8[unisym - 0x400];
653 		break;
654 	case 0x0480 >> 7:
655 		if (unisym == KS_Cyrillic_GHEUKR)
656 			unisym = 0xbd;	/* ukrainian GHE */
657 		else if (unisym == KS_Cyrillic_gheukr)
658 			unisym = 0xad;	/* ukrainian ghe */
659 		break;
660 
661 	case 0x2000 >> 7:
662 		switch (KB_ENCODING(layout)) {
663 		case KB_LT:
664 		case KB_LV:
665 			switch (unisym) {
666 			case KS_L7_rightsnglquot:
667 				unisym = 0xff;
668 				break;
669 			case KS_L7_leftdblquot:
670 				unisym = 0xb4;
671 				break;
672 			case KS_L7_rightdblquot:
673 				unisym = 0xa1;
674 				break;
675 			case KS_L7_dbllow9quot:
676 				unisym = 0xa5;
677 				break;
678 			}
679 		}
680 		break;
681 
682 	}
683 
684 	out[0] = unisym & 0xff;
685 	return (1);
686 }
687 
688 /*
689  * Keysym to UTF-8 sequence translation function.
690  * The out buffer is at least 4 characters long.
691  */
692 int
wsemul_utf8_translate(u_int32_t unisym,kbd_t layout,u_char * out,int allow_utf8)693 wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
694     int allow_utf8)
695 {
696 #ifndef HAVE_UTF8_SUPPORT
697 	return (wsemul_local_translate(unisym, layout, out));
698 #else
699 	u_int pos, length, headpat;
700 
701 	if (!allow_utf8)
702 		return wsemul_local_translate(unisym, layout, out);
703 
704 	if (unisym < 0x80) {
705 		/* Fast path for plain ASCII characters. */
706 		*out = (u_char)unisym;
707 		return 1;
708 	}
709 
710 	if (unisym < 0x800) {
711 		headpat = 0xc0;
712 		length = 2;
713 	} else if (unisym < 0x10000) {
714 		if (unisym >= 0xd800 && unisym < 0xe000)
715 			return 0;
716 		headpat = 0xe0;
717 		length = 3;
718 	} else {
719 		if (unisym >= 0x110000)
720 			return 0;
721 		headpat = 0xf0;
722 		length = 4;
723 	}
724 
725 	for (pos = length - 1; pos > 0; pos--) {
726 		out[pos] = 0x80 | (unisym & 0x3f);
727 		unisym >>= 6;
728 	}
729 	out[0] = headpat | unisym;
730 
731 	return length;
732 #endif
733 }
734