xref: /dragonfly/lib/libc/locale/utf8.c (revision da0d35cf)
1 /*
2  * Copyright 2015 Matthew Dillon <dillon@backplane.com> (mbintowcr, wcrtombin)
3  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
4  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
5  * Copyright (c) 2002-2004 Tim J. Robbins
6  * All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * WCSBIN_EOF -		Indicate EOF on input buffer.
37  *
38  * WCSBIN_SURRO -	Pass-through surrogate space (typically if the UTF-8
39  *			has already been escaped), on bytes-to-wchars and
40  *			wchars-to-bytes.  Escaping of other illegal codes will
41  *			still occur on input but de-escaping will not occur
42  *			on output (they will remain in the surrogate space).
43  *
44  * WCSBIN_LONGCODES -	Allow 4-byte >= 0x10FFFF, 5-byte and 6-byte sequences
45  *			(normally illegal), otherwise escape it on input
46  *			and fail on output.
47  *
48  * WCSBIN_STRICT -	Allow byte-to-wide conversions to fail.
49  */
50 
51 #include <sys/param.h>
52 
53 #include <errno.h>
54 #include <limits.h>
55 #include <runetype.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <wchar.h>
59 #include "mblocal.h"
60 
61 static size_t	_UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
62 		    size_t, mbstate_t * __restrict);
63 static int	_UTF8_mbsinit(const mbstate_t *);
64 static size_t	_UTF8_mbsnrtowcs(wchar_t * __restrict,
65 		    const char ** __restrict, size_t, size_t,
66 		    mbstate_t * __restrict);
67 static size_t	_UTF8_wcrtomb(char * __restrict, wchar_t,
68 		    mbstate_t * __restrict);
69 static size_t	_UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
70 		    size_t, size_t, mbstate_t * __restrict);
71 static size_t	_UTF8_mbintowcr(wchar_t * __restrict dst,
72 		    const char * __restrict src,
73 		    size_t dlen, size_t *slen, int flags);
74 static size_t	_UTF8_wcrtombin(char * __restrict dst,
75 		    const wchar_t * __restrict src,
76 		    size_t dlen, size_t *slen, int flags);
77 
78 typedef struct {
79 	wchar_t	ch;
80 	int	want;
81 	wchar_t	lbound;
82 } _UTF8State;
83 
84 int
85 _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl)
86 {
87 
88 	l->__mbrtowc = _UTF8_mbrtowc;
89 	l->__wcrtomb = _UTF8_wcrtomb;
90 	l->__mbsinit = _UTF8_mbsinit;
91 	l->__mbsnrtowcs = _UTF8_mbsnrtowcs;
92 	l->__wcsnrtombs = _UTF8_wcsnrtombs;
93 	l->__mbintowcr = _UTF8_mbintowcr;
94 	l->__wcrtombin = _UTF8_wcrtombin;
95 	l->runes = rl;
96 	l->__mb_cur_max = 4;
97 	/*
98 	 * UCS-4 encoding used as the internal representation, so
99 	 * slots 0x0080-0x00FF are occuped and must be excluded
100 	 * from the single byte ctype by setting the limit.
101 	 */
102 	l->__mb_sb_limit = 128;
103 
104 	return (0);
105 }
106 
107 static int
108 _UTF8_mbsinit(const mbstate_t *ps)
109 {
110 
111 	return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
112 }
113 
114 static size_t
115 _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
116     mbstate_t * __restrict ps)
117 {
118 	_UTF8State *us;
119 	int ch, i, mask, want;
120 	wchar_t lbound, wch;
121 
122 	us = (_UTF8State *)ps;
123 
124 	if (us->want < 0 || us->want > 4) {
125 		errno = EINVAL;
126 		return ((size_t)-1);
127 	}
128 
129 	if (s == NULL) {
130 		s = "";
131 		n = 1;
132 		pwc = NULL;
133 	}
134 
135 	if (n == 0)
136 		/* Incomplete multibyte sequence */
137 		return ((size_t)-2);
138 
139 	if (us->want == 0) {
140 		/*
141 		 * Determine the number of octets that make up this character
142 		 * from the first octet, and a mask that extracts the
143 		 * interesting bits of the first octet. We already know
144 		 * the character is at least two bytes long.
145 		 *
146 		 * We also specify a lower bound for the character code to
147 		 * detect redundant, non-"shortest form" encodings. For
148 		 * example, the sequence C0 80 is _not_ a legal representation
149 		 * of the null character. This enforces a 1-to-1 mapping
150 		 * between character codes and their multibyte representations.
151 		 */
152 		ch = (unsigned char)*s;
153 		if ((ch & 0x80) == 0) {
154 			/* Fast path for plain ASCII characters. */
155 			if (pwc != NULL)
156 				*pwc = ch;
157 			return (ch != '\0' ? 1 : 0);
158 		}
159 		if ((ch & 0xe0) == 0xc0) {
160 			mask = 0x1f;
161 			want = 2;
162 			lbound = 0x80;
163 		} else if ((ch & 0xf0) == 0xe0) {
164 			mask = 0x0f;
165 			want = 3;
166 			lbound = 0x800;
167 		} else if ((ch & 0xf8) == 0xf0) {
168 			mask = 0x07;
169 			want = 4;
170 			lbound = 0x10000;
171 		} else {
172 			/*
173 			 * Malformed input; input is not UTF-8.
174 			 */
175 			errno = EILSEQ;
176 			return ((size_t)-1);
177 		}
178 	} else {
179 		want = us->want;
180 		lbound = us->lbound;
181 	}
182 
183 	/*
184 	 * Decode the octet sequence representing the character in chunks
185 	 * of 6 bits, most significant first.
186 	 */
187 	if (us->want == 0)
188 		wch = (unsigned char)*s++ & mask;
189 	else
190 		wch = us->ch;
191 
192 	for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
193 		if ((*s & 0xc0) != 0x80) {
194 			/*
195 			 * Malformed input; bad characters in the middle
196 			 * of a character.
197 			 */
198 			errno = EILSEQ;
199 			return ((size_t)-1);
200 		}
201 		wch <<= 6;
202 		wch |= *s++ & 0x3f;
203 	}
204 	if (i < want) {
205 		/* Incomplete multibyte sequence. */
206 		us->want = want - i;
207 		us->lbound = lbound;
208 		us->ch = wch;
209 		return ((size_t)-2);
210 	}
211 	if (wch < lbound || wch > 0x10ffff) {
212 		/*
213 		 * Malformed input; redundant encoding or illegal
214 		 *		    code sequence.
215 		 */
216 		errno = EILSEQ;
217 		return ((size_t)-1);
218 	}
219 	if (pwc != NULL)
220 		*pwc = wch;
221 	us->want = 0;
222 	return (wch == L'\0' ? 0 : want);
223 }
224 
225 static size_t
226 _UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
227     size_t nms, size_t len, mbstate_t * __restrict ps)
228 {
229 	_UTF8State *us;
230 	const char *s;
231 	size_t nchr;
232 	wchar_t wc;
233 	size_t nb;
234 
235 	us = (_UTF8State *)ps;
236 
237 	s = *src;
238 	nchr = 0;
239 
240 	if (dst == NULL) {
241 		/*
242 		 * The fast path in the loop below is not safe if an ASCII
243 		 * character appears as anything but the first byte of a
244 		 * multibyte sequence. Check now to avoid doing it in the loop.
245 		 */
246 		if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
247 			errno = EILSEQ;
248 			return ((size_t)-1);
249 		}
250 		for (;;) {
251 			if (nms > 0 && (signed char)*s > 0)
252 				/*
253 				 * Fast path for plain ASCII characters
254 				 * excluding NUL.
255 				 */
256 				nb = 1;
257 			else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
258 			    (size_t)-1)
259 				/* Invalid sequence - mbrtowc() sets errno. */
260 				return ((size_t)-1);
261 			else if (nb == 0 || nb == (size_t)-2)
262 				return (nchr);
263 			s += nb;
264 			nms -= nb;
265 			nchr++;
266 		}
267 		/*NOTREACHED*/
268 	}
269 
270 	/*
271 	 * The fast path in the loop below is not safe if an ASCII
272 	 * character appears as anything but the first byte of a
273 	 * multibyte sequence. Check now to avoid doing it in the loop.
274 	 */
275 	if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
276 		errno = EILSEQ;
277 		return ((size_t)-1);
278 	}
279 	while (len-- > 0) {
280 		if (nms > 0 && (signed char)*s > 0) {
281 			/*
282 			 * Fast path for plain ASCII characters
283 			 * excluding NUL.
284 			 */
285 			*dst = (wchar_t)*s;
286 			nb = 1;
287 		} else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
288 		    (size_t)-1) {
289 			*src = s;
290 			return ((size_t)-1);
291 		} else if (nb == (size_t)-2) {
292 			*src = s + nms;
293 			return (nchr);
294 		} else if (nb == 0) {
295 			*src = NULL;
296 			return (nchr);
297 		}
298 		s += nb;
299 		nms -= nb;
300 		nchr++;
301 		dst++;
302 	}
303 	*src = s;
304 	return (nchr);
305 }
306 
307 static size_t
308 _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
309 {
310 	_UTF8State *us;
311 	unsigned char lead;
312 	int i, len;
313 
314 	us = (_UTF8State *)ps;
315 
316 	if (us->want != 0) {
317 		errno = EINVAL;
318 		return ((size_t)-1);
319 	}
320 
321 	if (s == NULL)
322 		/* Reset to initial shift state (no-op) */
323 		return (1);
324 
325 	/*
326 	 * Determine the number of octets needed to represent this character.
327 	 * We always output the shortest sequence possible. Also specify the
328 	 * first few bits of the first octet, which contains the information
329 	 * about the sequence length.
330 	 */
331 	if ((wc & ~0x7f) == 0) {
332 		/* Fast path for plain ASCII characters. */
333 		*s = (char)wc;
334 		return (1);
335 	} else if ((wc & ~0x7ff) == 0) {
336 		lead = 0xc0;
337 		len = 2;
338 	} else if ((wc & ~0xffff) == 0) {
339 		lead = 0xe0;
340 		len = 3;
341 	} else if (wc <= 0x10ffff) {
342 		lead = 0xf0;
343 		len = 4;
344 	} else {
345 		errno = EILSEQ;
346 		return ((size_t)-1);
347 	}
348 
349 	/*
350 	 * Output the octets representing the character in chunks
351 	 * of 6 bits, least significant last. The first octet is
352 	 * a special case because it contains the sequence length
353 	 * information.
354 	 */
355 	for (i = len - 1; i > 0; i--) {
356 		s[i] = (wc & 0x3f) | 0x80;
357 		wc >>= 6;
358 	}
359 	*s = (wc & 0xff) | lead;
360 
361 	return (len);
362 }
363 
364 static size_t
365 _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
366     size_t nwc, size_t len, mbstate_t * __restrict ps)
367 {
368 	_UTF8State *us;
369 	char buf[MB_LEN_MAX];
370 	const wchar_t *s;
371 	size_t nbytes;
372 	size_t nb;
373 
374 	us = (_UTF8State *)ps;
375 
376 	if (us->want != 0) {
377 		errno = EINVAL;
378 		return ((size_t)-1);
379 	}
380 
381 	s = *src;
382 	nbytes = 0;
383 
384 	if (dst == NULL) {
385 		while (nwc-- > 0) {
386 			if (0 <= *s && *s < 0x80)
387 				/* Fast path for plain ASCII characters. */
388 				nb = 1;
389 			else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
390 			    (size_t)-1)
391 				/* Invalid character - wcrtomb() sets errno. */
392 				return ((size_t)-1);
393 			if (*s == L'\0')
394 				return (nbytes + nb - 1);
395 			s++;
396 			nbytes += nb;
397 		}
398 		return (nbytes);
399 	}
400 
401 	while (len > 0 && nwc-- > 0) {
402 		if (0 <= *s && *s < 0x80) {
403 			/* Fast path for plain ASCII characters. */
404 			nb = 1;
405 			*dst = *s;
406 		} else if (len > (size_t)MB_CUR_MAX) {
407 			/* Enough space to translate in-place. */
408 			if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
409 				*src = s;
410 				return ((size_t)-1);
411 			}
412 		} else {
413 			/*
414 			 * May not be enough space; use temp. buffer.
415 			 */
416 			if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
417 				*src = s;
418 				return ((size_t)-1);
419 			}
420 			if (nb > (int)len)
421 				/* MB sequence for character won't fit. */
422 				break;
423 			(void) memcpy(dst, buf, nb);
424 		}
425 		if (*s == L'\0') {
426 			*src = NULL;
427 			return (nbytes + nb - 1);
428 		}
429 		s++;
430 		dst += nb;
431 		len -= nb;
432 		nbytes += nb;
433 	}
434 	*src = s;
435 	return (nbytes);
436 }
437 
438 /*
439  * Clean binary to wchar buffer conversions.  This is basically like a normal
440  * buffer conversion but with a sane argument API and escaping.  See none.c
441  * for a more complete description.
442  */
443 static size_t
444 _UTF8_mbintowcr(wchar_t * __restrict dst, const char * __restrict src,
445 		size_t dlen, size_t *slen, int flags)
446 {
447 	size_t i;
448 	size_t j;
449 	size_t k;
450 	size_t n = *slen;
451 	int ch, mask, want;
452 	wchar_t lbound, wch;
453 
454 	for (i = j = 0; i < n; ++i) {
455 		if (j == dlen)
456 			break;
457 		ch = (unsigned char)src[i];
458 
459 		if ((ch & 0x80) == 0) {
460 			/* Fast path for plain ASCII characters. */
461 			if (dst)
462 				dst[j] = ch;
463 			++j;
464 			continue;
465 		}
466 		if ((ch & 0xe0) == 0xc0) {
467 			mask = 0x1f;
468 			want = 2;
469 			lbound = 0x80;
470 		} else if ((ch & 0xf0) == 0xe0) {
471 			mask = 0x0f;
472 			want = 3;
473 			lbound = 0x800;
474 		} else if ((ch & 0xf8) == 0xf0) {
475 			mask = 0x07;
476 			want = 4;
477 			lbound = 0x10000;
478 		} else if ((ch & 0xfc) == 0xf8) {
479 			/* normally illegal, handled down below */
480 			mask = 0x03;
481 			want = 5;
482 			lbound = 0x200000;
483 		} else if ((ch & 0xfe) == 0xfc) {
484 			/* normally illegal, handled down below */
485 			mask = 0x01;
486 			want = 6;
487 			lbound = 0x4000000;
488 		} else {
489 			/*
490 			 * Malformed input; input is not UTF-8, escape
491 			 * with UTF-8B.
492 			 */
493 			if (flags & WCSBIN_STRICT) {
494 				if (i == 0) {
495 					errno = EILSEQ;
496 					return ((size_t)-1);
497 				}
498 				break;
499 			}
500 			if (dst)
501 				dst[j] = 0xDC00 | ch;
502 			++j;
503 			continue;
504 		}
505 
506 		/*
507 		 * Construct wchar_t from multibyte sequence.
508 		 */
509 		wch = ch & mask;
510 		for (k = 1; k < want; ++k) {
511 			/*
512 			 * Stop if not enough input (don't do this early
513 			 * so we can detect illegal characters as they occur
514 			 * in the stream).
515 			 *
516 			 * If termination is requested force-escape all chars.
517 			 */
518 			if (i + k >= n)	{
519 				if (flags & WCSBIN_EOF) {
520 					want = n - i;
521 					goto forceesc;
522 				}
523 				goto breakout;
524 			}
525 
526 			ch = src[i+k];
527 			if ((ch & 0xc0) != 0x80) {
528 				/*
529 				 * Malformed input, bad characters in the
530 				 * middle of a multibyte sequence.  Escape
531 				 * with UTF-8B.
532 				 */
533 				if (flags & WCSBIN_STRICT) {
534 					if (i == 0) {
535 						errno = EILSEQ;
536 						return ((size_t)-1);
537 					}
538 					goto breakout;
539 				}
540 				if (dst)
541 					dst[j] = 0xDC00 | (unsigned char)src[i];
542 				++j;
543 				goto loopup;
544 			}
545 			wch <<= 6;
546 			wch |= ch & 0x3f;
547 		}
548 
549 		/*
550 		 * Check validity of the wchar.  If invalid we could escape
551 		 * just the first character and loop up, but it ought to be
552 		 * more readable if we escape all the chars in the sequence
553 		 * (since they are all >= 0x80 and might represent a legacy
554 		 * 5-byte or 6-byte code).
555 		 */
556 		if (wch < lbound ||
557 		    ((flags & WCSBIN_LONGCODES) == 0 && wch > 0x10ffff)) {
558 			goto forceesc;
559 		}
560 
561 		/*
562 		 * Check if wch is a surrogate code (which also encloses our
563 		 * UTF-8B escaping range).  This is normally illegal in UTF8.
564 		 * If it is, we need to escape each characer in the sequence.
565 		 * Breakout if there isn't enough output buffer space.
566 		 *
567 		 * If (flags & WCSBIN_SURRO) the caller wishes to accept
568 		 * surrogate codes, i.e. the input might potentially already
569 		 * be escaped UTF8-B or unchecked UTF-16 that was converted
570 		 * into UTF-8.
571 		 */
572 		if ((flags & WCSBIN_SURRO) == 0 &&
573 		    wch >= 0xD800 && wch <= 0xDFFF) {
574 forceesc:
575 			if (j + want > dlen)
576 				break;
577 			if (flags & WCSBIN_STRICT) {
578 				if (i == 0) {
579 					errno = EILSEQ;
580 					return ((size_t)-1);
581 				}
582 				break;
583 			}
584 			for (k = 0; k < want; ++k) {
585 				if (dst) {
586 					dst[j] = 0xDC00 |
587 						 (unsigned char)src[i+k];
588 				}
589 				++j;
590 			}
591 			i += k - 1;
592 		} else {
593 			i += k - 1;
594 			if (dst)
595 				dst[j] = wch;
596 			++j;
597 		}
598 loopup:
599 		;
600 	}
601 breakout:
602 	*slen = i;
603 
604 	return j;
605 }
606 
607 static size_t
608 _UTF8_wcrtombin(char * __restrict dst, const wchar_t * __restrict src,
609 		size_t dlen, size_t *slen, int flags)
610 {
611 	size_t i;
612 	size_t j;
613 	size_t k;
614 	size_t n = *slen;
615 	size_t len;
616 	unsigned char lead;
617 	wchar_t wc;
618 
619 	for (i = j = 0; i < n; ++i) {
620 		if (j == dlen)
621 			break;
622 		wc = src[i];
623 
624 		if ((wc & ~0x7f) == 0) {
625 			/* Fast path for plain ASCII characters. */
626 			if (dst)
627 				dst[j] = (unsigned char)wc;
628 			++j;
629 			continue;
630 		}
631 		if ((wc & ~0x7ff) == 0) {
632 			lead = 0xc0;
633 			len = 2;
634 		} else if (wc >= 0xDC80 && wc <= 0xDCFF &&
635 			   (flags & WCSBIN_SURRO) == 0) {
636 			if (flags & WCSBIN_STRICT) {
637 				/*
638 				 * STRICT without SURRO is an error for
639 				 * surrogates.
640 				 */
641 				if (i == 0) {
642 					errno = EILSEQ;
643 					return ((size_t)-1);
644 				}
645 				break;
646 			}
647 			if (dst)
648 				dst[j] = (unsigned char)wc;
649 			++j;
650 			continue;
651 		} else if ((wc & ~0xffff) == 0) {
652 			if (wc >= 0xD800 && wc <= 0xDFFF &&
653 			    (flags & (WCSBIN_SURRO | WCSBIN_STRICT)) ==
654 			    WCSBIN_STRICT) {
655 				/*
656 				 * Surrogates in general are an error
657 				 * if STRICT is specified and SURRO is not
658 				 * specified.
659 				 */
660 				if (i == 0) {
661 					errno = EILSEQ;
662 					return ((size_t)-1);
663 				}
664 				break;
665 			}
666 			lead = 0xe0;
667 			len = 3;
668 		} else if (wc <= 0x10ffff) {
669 			lead = 0xf0;
670 			len = 4;
671 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x200000) {
672 			/* normally illegal */
673 			lead = 0xf0;
674 			len = 4;
675 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x4000000) {
676 			/* normally illegal */
677 			lead = 0xf8;
678 			len = 5;
679 		} else if ((flags & WCSBIN_LONGCODES) &&
680 			   (uint32_t)wc < 0x80000000U) {
681 			/* normally illegal */
682 			lead = 0xfc;
683 			len = 6;
684 		} else {
685 			if (i == 0) {
686 				errno = EILSEQ;
687 				return ((size_t)-1);
688 			}
689 			/* stop here, process error on next loop */
690 			break;
691 		}
692 
693 		/*
694 		 * Output the octets representing the character in chunks
695 		 * of 6 bits, least significant last. The first octet is
696 		 * a special case because it contains the sequence length
697 		 * information.
698 		 */
699 		if (j + len > dlen)
700 			break;
701 		k = j;
702 		j += len;
703 		if (dst) {
704 			while (--len > 0) {
705 				dst[k + len] = (wc & 0x3f) | 0x80;
706 				wc >>= 6;
707 			}
708 			dst[k] = (wc & 0xff) | lead;
709 		}
710 	}
711 	*slen = i;
712 
713 	return j;
714 }
715 
716 size_t
717 utf8towcr(wchar_t * __restrict dst, const char * __restrict src,
718 		size_t dlen, size_t *slen, int flags)
719 {
720 	return _UTF8_mbintowcr(dst, src, dlen, slen, flags);
721 }
722 
723 size_t
724 wcrtoutf8(char * __restrict dst, const wchar_t * __restrict src,
725 	  size_t dlen, size_t *slen, int flags)
726 {
727 	return _UTF8_wcrtombin(dst, src, dlen, slen, flags);
728 }
729