xref: /freebsd/lib/libc/locale/gb2312.c (revision 1d386b48)
16d7a04b0SDavid Xu /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
3d915a14eSPedro F. Giffuni  *
47b247341SBaptiste Daroussin  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
57b247341SBaptiste Daroussin  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
6ca2dae42STim J. Robbins  * Copyright (c) 2004 Tim J. Robbins. All rights reserved.
76d7a04b0SDavid Xu  * Copyright (c) 2003 David Xu <davidxu@freebsd.org>
86d7a04b0SDavid Xu  * All rights reserved.
96d7a04b0SDavid Xu  *
103c87aa1dSDavid Chisnall  * Copyright (c) 2011 The FreeBSD Foundation
115b5fa75aSEd Maste  *
123c87aa1dSDavid Chisnall  * Portions of this software were developed by David Chisnall
133c87aa1dSDavid Chisnall  * under sponsorship from the FreeBSD Foundation.
143c87aa1dSDavid Chisnall  *
156d7a04b0SDavid Xu  * Redistribution and use in source and binary forms, with or without
166d7a04b0SDavid Xu  * modification, are permitted provided that the following conditions
176d7a04b0SDavid Xu  * are met:
186d7a04b0SDavid Xu  * 1. Redistributions of source code must retain the above copyright
196d7a04b0SDavid Xu  *    notice, this list of conditions and the following disclaimer.
206d7a04b0SDavid Xu  * 2. Redistributions in binary form must reproduce the above copyright
216d7a04b0SDavid Xu  *    notice, this list of conditions and the following disclaimer in the
226d7a04b0SDavid Xu  *    documentation and/or other materials provided with the distribution.
236d7a04b0SDavid Xu  *
246d7a04b0SDavid Xu  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
256d7a04b0SDavid Xu  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
266d7a04b0SDavid Xu  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
276d7a04b0SDavid Xu  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
286d7a04b0SDavid Xu  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
296d7a04b0SDavid Xu  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
306d7a04b0SDavid Xu  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
316d7a04b0SDavid Xu  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
326d7a04b0SDavid Xu  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
336d7a04b0SDavid Xu  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
346d7a04b0SDavid Xu  * SUCH DAMAGE.
356d7a04b0SDavid Xu  */
366d7a04b0SDavid Xu 
37ca2dae42STim J. Robbins #include <sys/param.h>
38fc813796STim J. Robbins #include <errno.h>
396d7a04b0SDavid Xu #include <runetype.h>
406d7a04b0SDavid Xu #include <stdlib.h>
41ca2dae42STim J. Robbins #include <string.h>
426d7a04b0SDavid Xu #include <wchar.h>
432051a8f2STim J. Robbins #include "mblocal.h"
446d7a04b0SDavid Xu 
45e94c6cb4SAlexey Zelkin static size_t	_GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict,
46e94c6cb4SAlexey Zelkin 		    size_t, mbstate_t * __restrict);
47e94c6cb4SAlexey Zelkin static int	_GB2312_mbsinit(const mbstate_t *);
48e94c6cb4SAlexey Zelkin static size_t	_GB2312_wcrtomb(char * __restrict, wchar_t,
496d7a04b0SDavid Xu 		    mbstate_t * __restrict);
507b247341SBaptiste Daroussin static size_t	_GB2312_mbsnrtowcs(wchar_t * __restrict,
517b247341SBaptiste Daroussin 		    const char ** __restrict, size_t, size_t,
527b247341SBaptiste Daroussin 		    mbstate_t * __restrict);
537b247341SBaptiste Daroussin static size_t	_GB2312_wcsnrtombs(char * __restrict,
547b247341SBaptiste Daroussin 		    const wchar_t ** __restrict, size_t, size_t,
557b247341SBaptiste Daroussin 		    mbstate_t * __restrict);
567b247341SBaptiste Daroussin 
576d7a04b0SDavid Xu 
58ca2dae42STim J. Robbins typedef struct {
59ca2dae42STim J. Robbins 	int	count;
60ca2dae42STim J. Robbins 	u_char	bytes[2];
61ca2dae42STim J. Robbins } _GB2312State;
62ca2dae42STim J. Robbins 
636d7a04b0SDavid Xu int
_GB2312_init(struct xlocale_ctype * l,_RuneLocale * rl)643c87aa1dSDavid Chisnall _GB2312_init(struct xlocale_ctype *l, _RuneLocale *rl)
656d7a04b0SDavid Xu {
666d7a04b0SDavid Xu 
673c87aa1dSDavid Chisnall 	l->runes = rl;
683c87aa1dSDavid Chisnall 	l->__mbrtowc = _GB2312_mbrtowc;
693c87aa1dSDavid Chisnall 	l->__wcrtomb = _GB2312_wcrtomb;
703c87aa1dSDavid Chisnall 	l->__mbsinit = _GB2312_mbsinit;
717b247341SBaptiste Daroussin 	l->__mbsnrtowcs = _GB2312_mbsnrtowcs;
727b247341SBaptiste Daroussin 	l->__wcsnrtombs = _GB2312_wcsnrtombs;
733c87aa1dSDavid Chisnall 	l->__mb_cur_max = 2;
743c87aa1dSDavid Chisnall 	l->__mb_sb_limit = 128;
756d7a04b0SDavid Xu 	return (0);
766d7a04b0SDavid Xu }
776d7a04b0SDavid Xu 
78e94c6cb4SAlexey Zelkin static int
_GB2312_mbsinit(const mbstate_t * ps)79ca2dae42STim J. Robbins _GB2312_mbsinit(const mbstate_t *ps)
80ca2dae42STim J. Robbins {
81ca2dae42STim J. Robbins 
82fa02ee78STim J. Robbins 	return (ps == NULL || ((const _GB2312State *)ps)->count == 0);
83ca2dae42STim J. Robbins }
84ca2dae42STim J. Robbins 
857b247341SBaptiste Daroussin static int
_GB2312_check(const char * str,size_t n)866d7a04b0SDavid Xu _GB2312_check(const char *str, size_t n)
876d7a04b0SDavid Xu {
886d7a04b0SDavid Xu 	const u_char *s = (const u_char *)str;
896d7a04b0SDavid Xu 
906d7a04b0SDavid Xu 	if (n == 0)
916d7a04b0SDavid Xu 		/* Incomplete multibyte sequence */
926d7a04b0SDavid Xu 		return (-2);
936d7a04b0SDavid Xu 	if (s[0] >= 0xa1 && s[0] <= 0xfe) {
946d7a04b0SDavid Xu 		if (n < 2)
956d7a04b0SDavid Xu 			/* Incomplete multibyte sequence */
966d7a04b0SDavid Xu 			return (-2);
976d7a04b0SDavid Xu 		if (s[1] < 0xa1 || s[1] > 0xfe)
986d7a04b0SDavid Xu 			/* Invalid multibyte sequence */
996d7a04b0SDavid Xu 			return (-1);
1006d7a04b0SDavid Xu 		return (2);
1016d7a04b0SDavid Xu 	} else if (s[0] & 0x80) {
1026d7a04b0SDavid Xu 		/* Invalid multibyte sequence */
1036d7a04b0SDavid Xu 		return (-1);
1046d7a04b0SDavid Xu 	}
1056d7a04b0SDavid Xu 	return (1);
1066d7a04b0SDavid Xu }
1076d7a04b0SDavid Xu 
108e94c6cb4SAlexey Zelkin static size_t
_GB2312_mbrtowc(wchar_t * __restrict pwc,const char * __restrict s,size_t n,mbstate_t * __restrict ps)1096d7a04b0SDavid Xu _GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
110ca2dae42STim J. Robbins     mbstate_t * __restrict ps)
1116d7a04b0SDavid Xu {
112ca2dae42STim J. Robbins 	_GB2312State *gs;
1136d7a04b0SDavid Xu 	wchar_t wc;
114ca2dae42STim J. Robbins 	int i, len, ocount;
115ca2dae42STim J. Robbins 	size_t ncopy;
1166d7a04b0SDavid Xu 
11764646503SDavid Xu 	gs = (_GB2312State *)ps;
118ca2dae42STim J. Robbins 
119fc813796STim J. Robbins 	if (gs->count < 0 || gs->count > sizeof(gs->bytes)) {
120fc813796STim J. Robbins 		errno = EINVAL;
121fc813796STim J. Robbins 		return ((size_t)-1);
122fc813796STim J. Robbins 	}
123fc813796STim J. Robbins 
124ca2dae42STim J. Robbins 	if (s == NULL) {
125ca2dae42STim J. Robbins 		s = "";
126ca2dae42STim J. Robbins 		n = 1;
127ca2dae42STim J. Robbins 		pwc = NULL;
128ca2dae42STim J. Robbins 	}
129ca2dae42STim J. Robbins 
130ca2dae42STim J. Robbins 	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count);
131ca2dae42STim J. Robbins 	memcpy(gs->bytes + gs->count, s, ncopy);
132ca2dae42STim J. Robbins 	ocount = gs->count;
133ca2dae42STim J. Robbins 	gs->count += ncopy;
134ca2dae42STim J. Robbins 	s = (char *)gs->bytes;
135ca2dae42STim J. Robbins 	n = gs->count;
136ca2dae42STim J. Robbins 
1376d7a04b0SDavid Xu 	if ((len = _GB2312_check(s, n)) < 0)
1386d7a04b0SDavid Xu 		return ((size_t)len);
1396d7a04b0SDavid Xu 	wc = 0;
1406d7a04b0SDavid Xu 	i = len;
1416d7a04b0SDavid Xu 	while (i-- > 0)
1426d7a04b0SDavid Xu 		wc = (wc << 8) | (unsigned char)*s++;
1436d7a04b0SDavid Xu 	if (pwc != NULL)
1446d7a04b0SDavid Xu 		*pwc = wc;
145ca2dae42STim J. Robbins 	gs->count = 0;
146ca2dae42STim J. Robbins 	return (wc == L'\0' ? 0 : len - ocount);
1476d7a04b0SDavid Xu }
1486d7a04b0SDavid Xu 
149e94c6cb4SAlexey Zelkin static size_t
_GB2312_wcrtomb(char * __restrict s,wchar_t wc,mbstate_t * __restrict ps)150fc813796STim J. Robbins _GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
1516d7a04b0SDavid Xu {
152fc813796STim J. Robbins 	_GB2312State *gs;
153fc813796STim J. Robbins 
154fc813796STim J. Robbins 	gs = (_GB2312State *)ps;
155fc813796STim J. Robbins 
156fc813796STim J. Robbins 	if (gs->count != 0) {
157fc813796STim J. Robbins 		errno = EINVAL;
158fc813796STim J. Robbins 		return ((size_t)-1);
159fc813796STim J. Robbins 	}
1606d7a04b0SDavid Xu 
1616d7a04b0SDavid Xu 	if (s == NULL)
1626d7a04b0SDavid Xu 		/* Reset to initial shift state (no-op) */
1636d7a04b0SDavid Xu 		return (1);
1646d7a04b0SDavid Xu 	if (wc & 0x8000) {
1656d7a04b0SDavid Xu 		*s++ = (wc >> 8) & 0xff;
1666d7a04b0SDavid Xu 		*s = wc & 0xff;
1676d7a04b0SDavid Xu 		return (2);
1686d7a04b0SDavid Xu 	}
1696d7a04b0SDavid Xu 	*s = wc & 0xff;
1706d7a04b0SDavid Xu 	return (1);
1716d7a04b0SDavid Xu }
1727b247341SBaptiste Daroussin 
1737b247341SBaptiste Daroussin static size_t
_GB2312_mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t nms,size_t len,mbstate_t * __restrict ps)1747b247341SBaptiste Daroussin _GB2312_mbsnrtowcs(wchar_t * __restrict dst,
1757b247341SBaptiste Daroussin     const char ** __restrict src, size_t nms, size_t len,
1767b247341SBaptiste Daroussin     mbstate_t * __restrict ps)
1777b247341SBaptiste Daroussin {
1787b247341SBaptiste Daroussin 	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc));
1797b247341SBaptiste Daroussin }
1807b247341SBaptiste Daroussin 
1817b247341SBaptiste Daroussin static size_t
_GB2312_wcsnrtombs(char * __restrict dst,const wchar_t ** __restrict src,size_t nwc,size_t len,mbstate_t * __restrict ps)1827b247341SBaptiste Daroussin _GB2312_wcsnrtombs(char * __restrict dst,
1837b247341SBaptiste Daroussin     const wchar_t ** __restrict src, size_t nwc, size_t len,
1847b247341SBaptiste Daroussin     mbstate_t * __restrict ps)
1857b247341SBaptiste Daroussin {
1867b247341SBaptiste Daroussin 	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb));
1877b247341SBaptiste Daroussin }
188