xref: /dragonfly/lib/libc/locale/utf8.c (revision 6a3cbbc2)
1 /*
2  * Copyright 2015 Matthew Dillon <dillon@backplane.com> (mbintowcr, wcrtombin)
3  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
4  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
5  * Copyright (c) 2002-2004 Tim J. Robbins
6  * All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * WCSBIN_EOF -		Indicate EOF on input buffer.
37  *
38  * WCSBIN_SURRO -	Pass-through surrogate space (typically if the UTF-8
39  *			has already been escaped), on bytes-to-wchars and
40  *			wchars-to-bytes.  Escaping of other illegal codes will
41  *			still occur on input but de-escaping will not occur
42  *			on output (they will remain in the surrogate space).
43  *
44  * WCSBIN_LONGCODES -	Allow 4-byte >= 0x10FFFF, 5-byte and 6-byte sequences
45  *			(normally illegal), otherwise escape it on input
46  *			and fail on output.
47  *
48  * WCSBIN_STRICT -	Allow byte-to-wide conversions to fail.
49  */
50 
51 #include <sys/param.h>
52 
53 #include <errno.h>
54 #include <limits.h>
55 #include <runetype.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <wchar.h>
59 #include "mblocal.h"
60 
61 extern int __mb_sb_limit;
62 
63 static size_t	_UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
64 		    size_t, mbstate_t * __restrict);
65 static int	_UTF8_mbsinit(const mbstate_t *);
66 static size_t	_UTF8_mbsnrtowcs(wchar_t * __restrict,
67 		    const char ** __restrict, size_t, size_t,
68 		    mbstate_t * __restrict);
69 static size_t	_UTF8_wcrtomb(char * __restrict, wchar_t,
70 		    mbstate_t * __restrict);
71 static size_t	_UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
72 		    size_t, size_t, mbstate_t * __restrict);
73 static size_t	_UTF8_mbintowcr(wchar_t * __restrict dst,
74 		    const char * __restrict src,
75 		    size_t dlen, size_t *slen, int flags);
76 static size_t	_UTF8_wcrtombin(char * __restrict dst,
77 		    const wchar_t * __restrict src,
78 		    size_t dlen, size_t *slen, int flags);
79 
80 typedef struct {
81 	wchar_t	ch;
82 	int	want;
83 	wchar_t	lbound;
84 } _UTF8State;
85 
86 int
87 _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl)
88 {
89 
90 	l->__mbrtowc = _UTF8_mbrtowc;
91 	l->__wcrtomb = _UTF8_wcrtomb;
92 	l->__mbsinit = _UTF8_mbsinit;
93 	l->__mbsnrtowcs = _UTF8_mbsnrtowcs;
94 	l->__wcsnrtombs = _UTF8_wcsnrtombs;
95 	l->__mbintowcr = _UTF8_mbintowcr;
96 	l->__wcrtombin = _UTF8_wcrtombin;
97 	l->runes = rl;
98 	l->__mb_cur_max = 4;
99 	/*
100 	 * UCS-4 encoding used as the internal representation, so
101 	 * slots 0x0080-0x00FF are occuped and must be excluded
102 	 * from the single byte ctype by setting the limit.
103 	 */
104 	l->__mb_sb_limit = 128;
105 
106 	return (0);
107 }
108 
109 static int
110 _UTF8_mbsinit(const mbstate_t *ps)
111 {
112 
113 	return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
114 }
115 
116 static size_t
117 _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
118     mbstate_t * __restrict ps)
119 {
120 	_UTF8State *us;
121 	int ch, i, mask, want;
122 	wchar_t lbound, wch;
123 
124 	us = (_UTF8State *)ps;
125 
126 	if (us->want < 0 || us->want > 4) {
127 		errno = EINVAL;
128 		return ((size_t)-1);
129 	}
130 
131 	if (s == NULL) {
132 		s = "";
133 		n = 1;
134 		pwc = NULL;
135 	}
136 
137 	if (n == 0)
138 		/* Incomplete multibyte sequence */
139 		return ((size_t)-2);
140 
141 	if (us->want == 0) {
142 		/*
143 		 * Determine the number of octets that make up this character
144 		 * from the first octet, and a mask that extracts the
145 		 * interesting bits of the first octet. We already know
146 		 * the character is at least two bytes long.
147 		 *
148 		 * We also specify a lower bound for the character code to
149 		 * detect redundant, non-"shortest form" encodings. For
150 		 * example, the sequence C0 80 is _not_ a legal representation
151 		 * of the null character. This enforces a 1-to-1 mapping
152 		 * between character codes and their multibyte representations.
153 		 */
154 		ch = (unsigned char)*s;
155 		if ((ch & 0x80) == 0) {
156 			/* Fast path for plain ASCII characters. */
157 			if (pwc != NULL)
158 				*pwc = ch;
159 			return (ch != '\0' ? 1 : 0);
160 		}
161 		if ((ch & 0xe0) == 0xc0) {
162 			mask = 0x1f;
163 			want = 2;
164 			lbound = 0x80;
165 		} else if ((ch & 0xf0) == 0xe0) {
166 			mask = 0x0f;
167 			want = 3;
168 			lbound = 0x800;
169 		} else if ((ch & 0xf8) == 0xf0) {
170 			mask = 0x07;
171 			want = 4;
172 			lbound = 0x10000;
173 		} else {
174 			/*
175 			 * Malformed input; input is not UTF-8.
176 			 */
177 			errno = EILSEQ;
178 			return ((size_t)-1);
179 		}
180 	} else {
181 		want = us->want;
182 		lbound = us->lbound;
183 	}
184 
185 	/*
186 	 * Decode the octet sequence representing the character in chunks
187 	 * of 6 bits, most significant first.
188 	 */
189 	if (us->want == 0)
190 		wch = (unsigned char)*s++ & mask;
191 	else
192 		wch = us->ch;
193 
194 	for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
195 		if ((*s & 0xc0) != 0x80) {
196 			/*
197 			 * Malformed input; bad characters in the middle
198 			 * of a character.
199 			 */
200 			errno = EILSEQ;
201 			return ((size_t)-1);
202 		}
203 		wch <<= 6;
204 		wch |= *s++ & 0x3f;
205 	}
206 	if (i < want) {
207 		/* Incomplete multibyte sequence. */
208 		us->want = want - i;
209 		us->lbound = lbound;
210 		us->ch = wch;
211 		return ((size_t)-2);
212 	}
213 	if (wch < lbound || wch > 0x10ffff) {
214 		/*
215 		 * Malformed input; redundant encoding or illegal
216 		 *		    code sequence.
217 		 */
218 		errno = EILSEQ;
219 		return ((size_t)-1);
220 	}
221 	if (pwc != NULL)
222 		*pwc = wch;
223 	us->want = 0;
224 	return (wch == L'\0' ? 0 : want);
225 }
226 
227 static size_t
228 _UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
229     size_t nms, size_t len, mbstate_t * __restrict ps)
230 {
231 	_UTF8State *us;
232 	const char *s;
233 	size_t nchr;
234 	wchar_t wc;
235 	size_t nb;
236 
237 	us = (_UTF8State *)ps;
238 
239 	s = *src;
240 	nchr = 0;
241 
242 	if (dst == NULL) {
243 		/*
244 		 * The fast path in the loop below is not safe if an ASCII
245 		 * character appears as anything but the first byte of a
246 		 * multibyte sequence. Check now to avoid doing it in the loop.
247 		 */
248 		if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
249 			errno = EILSEQ;
250 			return ((size_t)-1);
251 		}
252 		for (;;) {
253 			if (nms > 0 && (signed char)*s > 0)
254 				/*
255 				 * Fast path for plain ASCII characters
256 				 * excluding NUL.
257 				 */
258 				nb = 1;
259 			else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
260 			    (size_t)-1)
261 				/* Invalid sequence - mbrtowc() sets errno. */
262 				return ((size_t)-1);
263 			else if (nb == 0 || nb == (size_t)-2)
264 				return (nchr);
265 			s += nb;
266 			nms -= nb;
267 			nchr++;
268 		}
269 		/*NOTREACHED*/
270 	}
271 
272 	/*
273 	 * The fast path in the loop below is not safe if an ASCII
274 	 * character appears as anything but the first byte of a
275 	 * multibyte sequence. Check now to avoid doing it in the loop.
276 	 */
277 	if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
278 		errno = EILSEQ;
279 		return ((size_t)-1);
280 	}
281 	while (len-- > 0) {
282 		if (nms > 0 && (signed char)*s > 0) {
283 			/*
284 			 * Fast path for plain ASCII characters
285 			 * excluding NUL.
286 			 */
287 			*dst = (wchar_t)*s;
288 			nb = 1;
289 		} else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
290 		    (size_t)-1) {
291 			*src = s;
292 			return ((size_t)-1);
293 		} else if (nb == (size_t)-2) {
294 			*src = s + nms;
295 			return (nchr);
296 		} else if (nb == 0) {
297 			*src = NULL;
298 			return (nchr);
299 		}
300 		s += nb;
301 		nms -= nb;
302 		nchr++;
303 		dst++;
304 	}
305 	*src = s;
306 	return (nchr);
307 }
308 
309 static size_t
310 _UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
311 {
312 	_UTF8State *us;
313 	unsigned char lead;
314 	int i, len;
315 
316 	us = (_UTF8State *)ps;
317 
318 	if (us->want != 0) {
319 		errno = EINVAL;
320 		return ((size_t)-1);
321 	}
322 
323 	if (s == NULL)
324 		/* Reset to initial shift state (no-op) */
325 		return (1);
326 
327 	/*
328 	 * Determine the number of octets needed to represent this character.
329 	 * We always output the shortest sequence possible. Also specify the
330 	 * first few bits of the first octet, which contains the information
331 	 * about the sequence length.
332 	 */
333 	if ((wc & ~0x7f) == 0) {
334 		/* Fast path for plain ASCII characters. */
335 		*s = (char)wc;
336 		return (1);
337 	} else if ((wc & ~0x7ff) == 0) {
338 		lead = 0xc0;
339 		len = 2;
340 	} else if ((wc & ~0xffff) == 0) {
341 		lead = 0xe0;
342 		len = 3;
343 	} else if (wc <= 0x10ffff) {
344 		lead = 0xf0;
345 		len = 4;
346 	} else {
347 		errno = EILSEQ;
348 		return ((size_t)-1);
349 	}
350 
351 	/*
352 	 * Output the octets representing the character in chunks
353 	 * of 6 bits, least significant last. The first octet is
354 	 * a special case because it contains the sequence length
355 	 * information.
356 	 */
357 	for (i = len - 1; i > 0; i--) {
358 		s[i] = (wc & 0x3f) | 0x80;
359 		wc >>= 6;
360 	}
361 	*s = (wc & 0xff) | lead;
362 
363 	return (len);
364 }
365 
366 static size_t
367 _UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
368     size_t nwc, size_t len, mbstate_t * __restrict ps)
369 {
370 	_UTF8State *us;
371 	char buf[MB_LEN_MAX];
372 	const wchar_t *s;
373 	size_t nbytes;
374 	size_t nb;
375 
376 	us = (_UTF8State *)ps;
377 
378 	if (us->want != 0) {
379 		errno = EINVAL;
380 		return ((size_t)-1);
381 	}
382 
383 	s = *src;
384 	nbytes = 0;
385 
386 	if (dst == NULL) {
387 		while (nwc-- > 0) {
388 			if (0 <= *s && *s < 0x80)
389 				/* Fast path for plain ASCII characters. */
390 				nb = 1;
391 			else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
392 			    (size_t)-1)
393 				/* Invalid character - wcrtomb() sets errno. */
394 				return ((size_t)-1);
395 			if (*s == L'\0')
396 				return (nbytes + nb - 1);
397 			s++;
398 			nbytes += nb;
399 		}
400 		return (nbytes);
401 	}
402 
403 	while (len > 0 && nwc-- > 0) {
404 		if (0 <= *s && *s < 0x80) {
405 			/* Fast path for plain ASCII characters. */
406 			nb = 1;
407 			*dst = *s;
408 		} else if (len > (size_t)MB_CUR_MAX) {
409 			/* Enough space to translate in-place. */
410 			if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
411 				*src = s;
412 				return ((size_t)-1);
413 			}
414 		} else {
415 			/*
416 			 * May not be enough space; use temp. buffer.
417 			 */
418 			if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
419 				*src = s;
420 				return ((size_t)-1);
421 			}
422 			if (nb > (int)len)
423 				/* MB sequence for character won't fit. */
424 				break;
425 			(void) memcpy(dst, buf, nb);
426 		}
427 		if (*s == L'\0') {
428 			*src = NULL;
429 			return (nbytes + nb - 1);
430 		}
431 		s++;
432 		dst += nb;
433 		len -= nb;
434 		nbytes += nb;
435 	}
436 	*src = s;
437 	return (nbytes);
438 }
439 
440 /*
441  * Clean binary to wchar buffer conversions.  This is basically like a normal
442  * buffer conversion but with a sane argument API and escaping.  See none.c
443  * for a more complete description.
444  */
445 static size_t
446 _UTF8_mbintowcr(wchar_t * __restrict dst, const char * __restrict src,
447 		size_t dlen, size_t *slen, int flags)
448 {
449 	size_t i;
450 	size_t j;
451 	size_t k;
452 	size_t n = *slen;
453 	int ch, mask, want;
454 	wchar_t lbound, wch;
455 
456 	for (i = j = 0; i < n; ++i) {
457 		if (j == dlen)
458 			break;
459 		ch = (unsigned char)src[i];
460 
461 		if ((ch & 0x80) == 0) {
462 			/* Fast path for plain ASCII characters. */
463 			if (dst)
464 				dst[j] = ch;
465 			++j;
466 			continue;
467 		}
468 		if ((ch & 0xe0) == 0xc0) {
469 			mask = 0x1f;
470 			want = 2;
471 			lbound = 0x80;
472 		} else if ((ch & 0xf0) == 0xe0) {
473 			mask = 0x0f;
474 			want = 3;
475 			lbound = 0x800;
476 		} else if ((ch & 0xf8) == 0xf0) {
477 			mask = 0x07;
478 			want = 4;
479 			lbound = 0x10000;
480 		} else if ((ch & 0xfc) == 0xf8) {
481 			/* normally illegal, handled down below */
482 			mask = 0x03;
483 			want = 5;
484 			lbound = 0x200000;
485 		} else if ((ch & 0xfe) == 0xfc) {
486 			/* normally illegal, handled down below */
487 			mask = 0x01;
488 			want = 6;
489 			lbound = 0x4000000;
490 		} else {
491 			/*
492 			 * Malformed input; input is not UTF-8, escape
493 			 * with UTF-8B.
494 			 */
495 			if (flags & WCSBIN_STRICT) {
496 				if (i == 0) {
497 					errno = EILSEQ;
498 					return ((size_t)-1);
499 				}
500 				break;
501 			}
502 			if (dst)
503 				dst[j] = 0xDC00 | ch;
504 			++j;
505 			continue;
506 		}
507 
508 		/*
509 		 * Construct wchar_t from multibyte sequence.
510 		 */
511 		wch = ch & mask;
512 		for (k = 1; k < want; ++k) {
513 			/*
514 			 * Stop if not enough input (don't do this early
515 			 * so we can detect illegal characters as they occur
516 			 * in the stream).
517 			 *
518 			 * If termination is requested force-escape all chars.
519 			 */
520 			if (i + k >= n)	{
521 				if (flags & WCSBIN_EOF) {
522 					want = n - i;
523 					goto forceesc;
524 				}
525 				goto breakout;
526 			}
527 
528 			ch = src[i+k];
529 			if ((ch & 0xc0) != 0x80) {
530 				/*
531 				 * Malformed input, bad characters in the
532 				 * middle of a multibyte sequence.  Escape
533 				 * with UTF-8B.
534 				 */
535 				if (flags & WCSBIN_STRICT) {
536 					if (i == 0) {
537 						errno = EILSEQ;
538 						return ((size_t)-1);
539 					}
540 					goto breakout;
541 				}
542 				if (dst)
543 					dst[j] = 0xDC00 | (unsigned char)src[i];
544 				++j;
545 				goto loopup;
546 			}
547 			wch <<= 6;
548 			wch |= ch & 0x3f;
549 		}
550 
551 		/*
552 		 * Check validity of the wchar.  If invalid we could escape
553 		 * just the first character and loop up, but it ought to be
554 		 * more readable if we escape all the chars in the sequence
555 		 * (since they are all >= 0x80 and might represent a legacy
556 		 * 5-byte or 6-byte code).
557 		 */
558 		if (wch < lbound ||
559 		    ((flags & WCSBIN_LONGCODES) == 0 && wch > 0x10ffff)) {
560 			goto forceesc;
561 		}
562 
563 		/*
564 		 * Check if wch is a surrogate code (which also encloses our
565 		 * UTF-8B escaping range).  This is normally illegal in UTF8.
566 		 * If it is, we need to escape each characer in the sequence.
567 		 * Breakout if there isn't enough output buffer space.
568 		 *
569 		 * If (flags & WCSBIN_SURRO) the caller wishes to accept
570 		 * surrogate codes, i.e. the input might potentially already
571 		 * be escaped UTF8-B or unchecked UTF-16 that was converted
572 		 * into UTF-8.
573 		 */
574 		if ((flags & WCSBIN_SURRO) == 0 &&
575 		    wch >= 0xD800 && wch <= 0xDFFF) {
576 forceesc:
577 			if (j + want > dlen)
578 				break;
579 			if (flags & WCSBIN_STRICT) {
580 				if (i == 0) {
581 					errno = EILSEQ;
582 					return ((size_t)-1);
583 				}
584 				break;
585 			}
586 			for (k = 0; k < want; ++k) {
587 				if (dst) {
588 					dst[j] = 0xDC00 |
589 						 (unsigned char)src[i+k];
590 				}
591 				++j;
592 			}
593 			i += k - 1;
594 		} else {
595 			i += k - 1;
596 			if (dst)
597 				dst[j] = wch;
598 			++j;
599 		}
600 loopup:
601 		;
602 	}
603 breakout:
604 	*slen = i;
605 
606 	return j;
607 }
608 
609 static size_t
610 _UTF8_wcrtombin(char * __restrict dst, const wchar_t * __restrict src,
611 		size_t dlen, size_t *slen, int flags)
612 {
613 	size_t i;
614 	size_t j;
615 	size_t k;
616 	size_t n = *slen;
617 	size_t len;
618 	unsigned char lead;
619 	wchar_t wc;
620 
621 	for (i = j = 0; i < n; ++i) {
622 		if (j == dlen)
623 			break;
624 		wc = src[i];
625 
626 		if ((wc & ~0x7f) == 0) {
627 			/* Fast path for plain ASCII characters. */
628 			if (dst)
629 				dst[j] = (unsigned char)wc;
630 			++j;
631 			continue;
632 		}
633 		if ((wc & ~0x7ff) == 0) {
634 			lead = 0xc0;
635 			len = 2;
636 		} else if (wc >= 0xDC80 && wc <= 0xDCFF &&
637 			   (flags & WCSBIN_SURRO) == 0) {
638 			if (flags & WCSBIN_STRICT) {
639 				/*
640 				 * STRICT without SURRO is an error for
641 				 * surrogates.
642 				 */
643 				if (i == 0) {
644 					errno = EILSEQ;
645 					return ((size_t)-1);
646 				}
647 				break;
648 			}
649 			if (dst)
650 				dst[j] = (unsigned char)wc;
651 			++j;
652 			continue;
653 		} else if ((wc & ~0xffff) == 0) {
654 			if (wc >= 0xD800 && wc <= 0xDFFF &&
655 			    (flags & (WCSBIN_SURRO | WCSBIN_STRICT)) ==
656 			    WCSBIN_STRICT) {
657 				/*
658 				 * Surrogates in general are an error
659 				 * if STRICT is specified and SURRO is not
660 				 * specified.
661 				 */
662 				if (i == 0) {
663 					errno = EILSEQ;
664 					return ((size_t)-1);
665 				}
666 				break;
667 			}
668 			lead = 0xe0;
669 			len = 3;
670 		} else if (wc <= 0x10ffff) {
671 			lead = 0xf0;
672 			len = 4;
673 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x200000) {
674 			/* normally illegal */
675 			lead = 0xf0;
676 			len = 4;
677 		} else if ((flags & WCSBIN_LONGCODES) && wc < 0x4000000) {
678 			/* normally illegal */
679 			lead = 0xf8;
680 			len = 5;
681 		} else if ((flags & WCSBIN_LONGCODES) &&
682 			   (uint32_t)wc < 0x80000000U) {
683 			/* normally illegal */
684 			lead = 0xfc;
685 			len = 6;
686 		} else {
687 			if (i == 0) {
688 				errno = EILSEQ;
689 				return ((size_t)-1);
690 			}
691 			/* stop here, process error on next loop */
692 			break;
693 		}
694 
695 		/*
696 		 * Output the octets representing the character in chunks
697 		 * of 6 bits, least significant last. The first octet is
698 		 * a special case because it contains the sequence length
699 		 * information.
700 		 */
701 		if (j + len > dlen)
702 			break;
703 		k = j;
704 		j += len;
705 		if (dst) {
706 			while (--len > 0) {
707 				dst[k + len] = (wc & 0x3f) | 0x80;
708 				wc >>= 6;
709 			}
710 			dst[k] = (wc & 0xff) | lead;
711 		}
712 	}
713 	*slen = i;
714 
715 	return j;
716 }
717 
718 size_t
719 utf8towcr(wchar_t * __restrict dst, const char * __restrict src,
720 		size_t dlen, size_t *slen, int flags)
721 {
722 	return _UTF8_mbintowcr(dst, src, dlen, slen, flags);
723 }
724 
725 size_t
726 wcrtoutf8(char * __restrict dst, const wchar_t * __restrict src,
727 	  size_t dlen, size_t *slen, int flags)
728 {
729 	return _UTF8_wcrtombin(dst, src, dlen, slen, flags);
730 }
731