xref: /minix/lib/libc/citrus/modules/citrus_utf1632.c (revision f14fb602)
1*f14fb602SLionel Sambuc /*	$NetBSD: citrus_utf1632.c,v 1.12 2012/02/12 13:51:29 wiz Exp $	*/
22fe8fb19SBen Gras 
32fe8fb19SBen Gras /*-
42fe8fb19SBen Gras  * Copyright (c)2003 Citrus Project,
52fe8fb19SBen Gras  * All rights reserved.
62fe8fb19SBen Gras  *
72fe8fb19SBen Gras  * Redistribution and use in source and binary forms, with or without
82fe8fb19SBen Gras  * modification, are permitted provided that the following conditions
92fe8fb19SBen Gras  * are met:
102fe8fb19SBen Gras  * 1. Redistributions of source code must retain the above copyright
112fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer.
122fe8fb19SBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
132fe8fb19SBen Gras  *    notice, this list of conditions and the following disclaimer in the
142fe8fb19SBen Gras  *    documentation and/or other materials provided with the distribution.
152fe8fb19SBen Gras  *
162fe8fb19SBen Gras  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
172fe8fb19SBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
182fe8fb19SBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
192fe8fb19SBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
202fe8fb19SBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
212fe8fb19SBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
222fe8fb19SBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
232fe8fb19SBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
242fe8fb19SBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
252fe8fb19SBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
262fe8fb19SBen Gras  * SUCH DAMAGE.
272fe8fb19SBen Gras  */
282fe8fb19SBen Gras 
292fe8fb19SBen Gras #include <sys/cdefs.h>
302fe8fb19SBen Gras #if defined(LIBC_SCCS) && !defined(lint)
31*f14fb602SLionel Sambuc __RCSID("$NetBSD: citrus_utf1632.c,v 1.12 2012/02/12 13:51:29 wiz Exp $");
322fe8fb19SBen Gras #endif /* LIBC_SCCS and not lint */
332fe8fb19SBen Gras 
342fe8fb19SBen Gras #include <assert.h>
352fe8fb19SBen Gras #include <errno.h>
362fe8fb19SBen Gras #include <string.h>
372fe8fb19SBen Gras #include <stdio.h>
382fe8fb19SBen Gras #include <stdlib.h>
392fe8fb19SBen Gras #include <stddef.h>
402fe8fb19SBen Gras #include <limits.h>
412fe8fb19SBen Gras #include <wchar.h>
422fe8fb19SBen Gras #include <sys/types.h>
432fe8fb19SBen Gras #include <machine/endian.h>
442fe8fb19SBen Gras 
452fe8fb19SBen Gras #include "citrus_namespace.h"
462fe8fb19SBen Gras #include "citrus_types.h"
472fe8fb19SBen Gras #include "citrus_module.h"
482fe8fb19SBen Gras #include "citrus_stdenc.h"
492fe8fb19SBen Gras #include "citrus_bcs.h"
502fe8fb19SBen Gras 
512fe8fb19SBen Gras #include "citrus_utf1632.h"
522fe8fb19SBen Gras 
532fe8fb19SBen Gras 
542fe8fb19SBen Gras /* ----------------------------------------------------------------------
552fe8fb19SBen Gras  * private stuffs used by templates
562fe8fb19SBen Gras  */
572fe8fb19SBen Gras 
582fe8fb19SBen Gras typedef struct {
592fe8fb19SBen Gras 	u_int8_t		ch[4];
602fe8fb19SBen Gras 	int			chlen;
612fe8fb19SBen Gras 	int			current_endian;
622fe8fb19SBen Gras } _UTF1632State;
632fe8fb19SBen Gras 
642fe8fb19SBen Gras typedef struct {
652fe8fb19SBen Gras 	int		preffered_endian;
662fe8fb19SBen Gras 	unsigned int	cur_max;
672fe8fb19SBen Gras #define _ENDIAN_UNKNOWN	0
682fe8fb19SBen Gras #define _ENDIAN_BIG	1
692fe8fb19SBen Gras #define _ENDIAN_LITTLE	2
702fe8fb19SBen Gras 	u_int32_t	mode;
712fe8fb19SBen Gras #define _MODE_UTF32		0x00000001U
722fe8fb19SBen Gras #define _MODE_FORCE_ENDIAN	0x00000002U
732fe8fb19SBen Gras } _UTF1632EncodingInfo;
742fe8fb19SBen Gras 
752fe8fb19SBen Gras #define _FUNCNAME(m)			_citrus_UTF1632_##m
762fe8fb19SBen Gras #define _ENCODING_INFO			_UTF1632EncodingInfo
772fe8fb19SBen Gras #define _ENCODING_STATE			_UTF1632State
782fe8fb19SBen Gras #define _ENCODING_MB_CUR_MAX(_ei_)	((_ei_)->cur_max)
792fe8fb19SBen Gras #define _ENCODING_IS_STATE_DEPENDENT	0
802fe8fb19SBen Gras #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
812fe8fb19SBen Gras 
822fe8fb19SBen Gras 
832fe8fb19SBen Gras static __inline void
842fe8fb19SBen Gras /*ARGSUSED*/
_citrus_UTF1632_init_state(_UTF1632EncodingInfo * ei,_UTF1632State * s)852fe8fb19SBen Gras _citrus_UTF1632_init_state(_UTF1632EncodingInfo *ei, _UTF1632State *s)
862fe8fb19SBen Gras {
872fe8fb19SBen Gras 	memset(s, 0, sizeof(*s));
882fe8fb19SBen Gras }
892fe8fb19SBen Gras 
902fe8fb19SBen Gras static int
_citrus_UTF1632_mbrtowc_priv(_UTF1632EncodingInfo * ei,wchar_t * pwc,const char ** s,size_t n,_UTF1632State * psenc,size_t * nresult)912fe8fb19SBen Gras _citrus_UTF1632_mbrtowc_priv(_UTF1632EncodingInfo *ei, wchar_t *pwc,
922fe8fb19SBen Gras 			     const char **s, size_t n, _UTF1632State *psenc,
932fe8fb19SBen Gras 			     size_t *nresult)
942fe8fb19SBen Gras {
952fe8fb19SBen Gras 	int chlenbak, endian, needlen;
962fe8fb19SBen Gras 	wchar_t wc;
972fe8fb19SBen Gras 	size_t result;
982fe8fb19SBen Gras 	const char *s0;
992fe8fb19SBen Gras 
1002fe8fb19SBen Gras 	_DIAGASSERT(nresult != 0);
1012fe8fb19SBen Gras 	_DIAGASSERT(ei != NULL);
1022fe8fb19SBen Gras 	_DIAGASSERT(s != NULL);
1032fe8fb19SBen Gras 	_DIAGASSERT(psenc != NULL);
1042fe8fb19SBen Gras 
1052fe8fb19SBen Gras 	s0 = *s;
1062fe8fb19SBen Gras 
1072fe8fb19SBen Gras 	if (s0 == NULL) {
1082fe8fb19SBen Gras 		_citrus_UTF1632_init_state(ei, psenc);
1092fe8fb19SBen Gras 		*nresult = 0; /* state independent */
1102fe8fb19SBen Gras 		return (0);
1112fe8fb19SBen Gras 	}
1122fe8fb19SBen Gras 
1132fe8fb19SBen Gras 	result = 0;
1142fe8fb19SBen Gras 	chlenbak = psenc->chlen;
1152fe8fb19SBen Gras 
1162fe8fb19SBen Gras refetch:
1172fe8fb19SBen Gras 	if ((ei->mode & _MODE_UTF32) != 0 || chlenbak>=2)
1182fe8fb19SBen Gras 		needlen = 4;
1192fe8fb19SBen Gras 	else
1202fe8fb19SBen Gras 		needlen = 2;
1212fe8fb19SBen Gras 
1222fe8fb19SBen Gras 	while (chlenbak < needlen) {
1232fe8fb19SBen Gras 		if (n==0)
1242fe8fb19SBen Gras 			goto restart;
1252fe8fb19SBen Gras 		psenc->ch[chlenbak++] = *s0++;
1262fe8fb19SBen Gras 		n--;
1272fe8fb19SBen Gras 		result++;
1282fe8fb19SBen Gras 	}
1292fe8fb19SBen Gras 
1302fe8fb19SBen Gras 	if (psenc->current_endian == _ENDIAN_UNKNOWN) {
1312fe8fb19SBen Gras 		if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) {
1322fe8fb19SBen Gras 			/* judge endian marker */
1332fe8fb19SBen Gras 			if ((ei->mode & _MODE_UTF32) == 0) {
1342fe8fb19SBen Gras 				/* UTF16 */
1352fe8fb19SBen Gras 				if (psenc->ch[0]==0xFE && psenc->ch[1]==0xFF) {
1362fe8fb19SBen Gras 					psenc->current_endian = _ENDIAN_BIG;
1372fe8fb19SBen Gras 					chlenbak = 0;
1382fe8fb19SBen Gras 					goto refetch;
1392fe8fb19SBen Gras 				} else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE) {
1402fe8fb19SBen Gras 					psenc->current_endian = _ENDIAN_LITTLE;
1412fe8fb19SBen Gras 					chlenbak = 0;
1422fe8fb19SBen Gras 					goto refetch;
1432fe8fb19SBen Gras 				}
1442fe8fb19SBen Gras 			} else {
1452fe8fb19SBen Gras 				/* UTF32 */
1462fe8fb19SBen Gras 				if (psenc->ch[0]==0x00 && psenc->ch[1]==0x00 &&
1472fe8fb19SBen Gras 				    psenc->ch[2]==0xFE && psenc->ch[3]==0xFF) {
1482fe8fb19SBen Gras 					psenc->current_endian = _ENDIAN_BIG;
1492fe8fb19SBen Gras 					chlenbak = 0;
1502fe8fb19SBen Gras 					goto refetch;
1512fe8fb19SBen Gras 				} else if (psenc->ch[0]==0xFF && psenc->ch[1]==0xFE &&
1522fe8fb19SBen Gras 					   psenc->ch[2]==0x00 && psenc->ch[3]==0x00) {
1532fe8fb19SBen Gras 					psenc->current_endian = _ENDIAN_LITTLE;
1542fe8fb19SBen Gras 					chlenbak = 0;
1552fe8fb19SBen Gras 					goto refetch;
1562fe8fb19SBen Gras 				}
1572fe8fb19SBen Gras 			}
1582fe8fb19SBen Gras 		}
1592fe8fb19SBen Gras 		psenc->current_endian = ei->preffered_endian;
1602fe8fb19SBen Gras 	}
1612fe8fb19SBen Gras 	endian = psenc->current_endian;
1622fe8fb19SBen Gras 
1632fe8fb19SBen Gras 	/* get wc */
1642fe8fb19SBen Gras 	if ((ei->mode & _MODE_UTF32) == 0) {
1652fe8fb19SBen Gras 		/* UTF16 */
1662fe8fb19SBen Gras 		if (needlen==2) {
1672fe8fb19SBen Gras 			switch (endian) {
1682fe8fb19SBen Gras 			case _ENDIAN_LITTLE:
1692fe8fb19SBen Gras 				wc = (psenc->ch[0] |
1702fe8fb19SBen Gras 				      ((wchar_t)psenc->ch[1] << 8));
1712fe8fb19SBen Gras 				break;
1722fe8fb19SBen Gras 			case _ENDIAN_BIG:
1732fe8fb19SBen Gras 				wc = (psenc->ch[1] |
1742fe8fb19SBen Gras 				      ((wchar_t)psenc->ch[0] << 8));
1752fe8fb19SBen Gras 				break;
1762fe8fb19SBen Gras 			default:
1772fe8fb19SBen Gras 				goto ilseq;
1782fe8fb19SBen Gras 			}
1792fe8fb19SBen Gras 			if (wc >= 0xD800 && wc <= 0xDBFF) {
1802fe8fb19SBen Gras 				/* surrogate high */
1812fe8fb19SBen Gras 				needlen=4;
1822fe8fb19SBen Gras 				goto refetch;
1832fe8fb19SBen Gras 			}
1842fe8fb19SBen Gras 		} else {
1852fe8fb19SBen Gras 			/* surrogate low */
1862fe8fb19SBen Gras 			wc -= 0xD800; /* wc : surrogate high (see above) */
1872fe8fb19SBen Gras 			wc <<= 10;
1882fe8fb19SBen Gras 			switch (endian) {
1892fe8fb19SBen Gras 			case _ENDIAN_LITTLE:
1902fe8fb19SBen Gras 				if (psenc->ch[3]<0xDC || psenc->ch[3]>0xDF)
1912fe8fb19SBen Gras 					goto ilseq;
1922fe8fb19SBen Gras 				wc |= psenc->ch[2];
1932fe8fb19SBen Gras 				wc |= (wchar_t)(psenc->ch[3] & 3) << 8;
1942fe8fb19SBen Gras 				break;
1952fe8fb19SBen Gras 			case _ENDIAN_BIG:
1962fe8fb19SBen Gras 				if (psenc->ch[2]<0xDC || psenc->ch[2]>0xDF)
1972fe8fb19SBen Gras 					goto ilseq;
1982fe8fb19SBen Gras 				wc |= psenc->ch[3];
1992fe8fb19SBen Gras 				wc |= (wchar_t)(psenc->ch[2] & 3) << 8;
2002fe8fb19SBen Gras 				break;
2012fe8fb19SBen Gras 			default:
2022fe8fb19SBen Gras 				goto ilseq;
2032fe8fb19SBen Gras 			}
2042fe8fb19SBen Gras 			wc += 0x10000;
2052fe8fb19SBen Gras 		}
2062fe8fb19SBen Gras 	} else {
2072fe8fb19SBen Gras 		/* UTF32 */
2082fe8fb19SBen Gras 		switch (endian) {
2092fe8fb19SBen Gras 		case _ENDIAN_LITTLE:
2102fe8fb19SBen Gras 			wc = (psenc->ch[0] |
2112fe8fb19SBen Gras 			      ((wchar_t)psenc->ch[1] << 8) |
2122fe8fb19SBen Gras 			      ((wchar_t)psenc->ch[2] << 16) |
2132fe8fb19SBen Gras 			      ((wchar_t)psenc->ch[3] << 24));
2142fe8fb19SBen Gras 			break;
2152fe8fb19SBen Gras 		case _ENDIAN_BIG:
2162fe8fb19SBen Gras 			wc = (psenc->ch[3] |
2172fe8fb19SBen Gras 			      ((wchar_t)psenc->ch[2] << 8) |
2182fe8fb19SBen Gras 			      ((wchar_t)psenc->ch[1] << 16) |
2192fe8fb19SBen Gras 			      ((wchar_t)psenc->ch[0] << 24));
2202fe8fb19SBen Gras 			break;
2212fe8fb19SBen Gras 		default:
2222fe8fb19SBen Gras 			goto ilseq;
2232fe8fb19SBen Gras 		}
2242fe8fb19SBen Gras 		if (wc >= 0xD800 && wc <= 0xDFFF)
2252fe8fb19SBen Gras 			goto ilseq;
2262fe8fb19SBen Gras 	}
2272fe8fb19SBen Gras 
2282fe8fb19SBen Gras 
2292fe8fb19SBen Gras 	*pwc = wc;
2302fe8fb19SBen Gras 	psenc->chlen = 0;
2312fe8fb19SBen Gras 	*nresult = result;
2322fe8fb19SBen Gras 	*s = s0;
2332fe8fb19SBen Gras 
2342fe8fb19SBen Gras 	return (0);
2352fe8fb19SBen Gras 
2362fe8fb19SBen Gras ilseq:
2372fe8fb19SBen Gras 	*nresult = (size_t)-1;
2382fe8fb19SBen Gras 	psenc->chlen = 0;
2392fe8fb19SBen Gras 	return (EILSEQ);
2402fe8fb19SBen Gras 
2412fe8fb19SBen Gras restart:
2422fe8fb19SBen Gras 	*nresult = (size_t)-2;
2432fe8fb19SBen Gras 	psenc->chlen = chlenbak;
2442fe8fb19SBen Gras 	*s = s0;
2452fe8fb19SBen Gras 	return (0);
2462fe8fb19SBen Gras }
2472fe8fb19SBen Gras 
2482fe8fb19SBen Gras static int
_citrus_UTF1632_wcrtomb_priv(_UTF1632EncodingInfo * ei,char * s,size_t n,wchar_t wc,_UTF1632State * psenc,size_t * nresult)2492fe8fb19SBen Gras _citrus_UTF1632_wcrtomb_priv(_UTF1632EncodingInfo *ei, char *s, size_t n,
2502fe8fb19SBen Gras 			     wchar_t wc, _UTF1632State *psenc,
2512fe8fb19SBen Gras 			     size_t *nresult)
2522fe8fb19SBen Gras {
2532fe8fb19SBen Gras 	wchar_t wc2;
2542fe8fb19SBen Gras 	static const char _bom[4] = {
2552fe8fb19SBen Gras #if BYTE_ORDER == BIG_ENDIAN
2562fe8fb19SBen Gras 	    0x00, 0x00, 0xFE, 0xFF,
2572fe8fb19SBen Gras #else
2582fe8fb19SBen Gras 	    0xFF, 0xFE, 0x00, 0x00,
2592fe8fb19SBen Gras #endif
2602fe8fb19SBen Gras 	};
2612fe8fb19SBen Gras 	const char *bom = &_bom[0];
2622fe8fb19SBen Gras 	size_t cnt;
2632fe8fb19SBen Gras 
2642fe8fb19SBen Gras 	_DIAGASSERT(ei != NULL);
2652fe8fb19SBen Gras 	_DIAGASSERT(nresult != 0);
2662fe8fb19SBen Gras 	_DIAGASSERT(s != NULL);
2672fe8fb19SBen Gras 
2682fe8fb19SBen Gras 	cnt = (size_t)0;
2692fe8fb19SBen Gras 	if (psenc->current_endian == _ENDIAN_UNKNOWN) {
2702fe8fb19SBen Gras 		if ((ei->mode & _MODE_FORCE_ENDIAN) == 0) {
2712fe8fb19SBen Gras 			if (ei->mode & _MODE_UTF32) {
2722fe8fb19SBen Gras 				cnt = 4;
2732fe8fb19SBen Gras 			} else {
2742fe8fb19SBen Gras 				cnt = 2;
2752fe8fb19SBen Gras #if BYTE_ORDER == BIG_ENDIAN
2762fe8fb19SBen Gras 				bom += 2;
2772fe8fb19SBen Gras #endif
2782fe8fb19SBen Gras 			}
2792fe8fb19SBen Gras 			if (n < cnt)
2802fe8fb19SBen Gras 				goto e2big;
2812fe8fb19SBen Gras 			memcpy(s, bom, cnt);
2822fe8fb19SBen Gras 			s += cnt, n -= cnt;
2832fe8fb19SBen Gras 		}
2842fe8fb19SBen Gras 		psenc->current_endian = ei->preffered_endian;
2852fe8fb19SBen Gras 	}
2862fe8fb19SBen Gras 
2872fe8fb19SBen Gras 	wc2 = 0;
2882fe8fb19SBen Gras 	if ((ei->mode & _MODE_UTF32)==0) {
2892fe8fb19SBen Gras 		/* UTF16 */
2902fe8fb19SBen Gras 		if (wc>0xFFFF) {
2912fe8fb19SBen Gras 			/* surrogate */
2922fe8fb19SBen Gras 			if (wc>0x10FFFF)
2932fe8fb19SBen Gras 				goto ilseq;
2942fe8fb19SBen Gras 			if (n < 4)
2952fe8fb19SBen Gras 				goto e2big;
2962fe8fb19SBen Gras 			cnt += 4;
2972fe8fb19SBen Gras 			wc -= 0x10000;
2982fe8fb19SBen Gras 			wc2 = (wc & 0x3FF) | 0xDC00;
2992fe8fb19SBen Gras 			wc = (wc>>10) | 0xD800;
3002fe8fb19SBen Gras 		} else {
3012fe8fb19SBen Gras 			if (n < 2)
3022fe8fb19SBen Gras 				goto e2big;
3032fe8fb19SBen Gras 			cnt += 2;
3042fe8fb19SBen Gras 		}
3052fe8fb19SBen Gras 
3062fe8fb19SBen Gras surrogate:
3072fe8fb19SBen Gras 		switch (psenc->current_endian) {
3082fe8fb19SBen Gras 		case _ENDIAN_BIG:
3092fe8fb19SBen Gras 			s[1] = wc;
3102fe8fb19SBen Gras 			s[0] = (wc >>= 8);
3112fe8fb19SBen Gras 			break;
3122fe8fb19SBen Gras 		case _ENDIAN_LITTLE:
3132fe8fb19SBen Gras 			s[0] = wc;
3142fe8fb19SBen Gras 			s[1] = (wc >>= 8);
3152fe8fb19SBen Gras 			break;
3162fe8fb19SBen Gras 		}
3172fe8fb19SBen Gras 		if (wc2!=0) {
3182fe8fb19SBen Gras 			wc = wc2;
3192fe8fb19SBen Gras 			wc2 = 0;
3202fe8fb19SBen Gras 			s += 2;
3212fe8fb19SBen Gras 			goto surrogate;
3222fe8fb19SBen Gras 		}
3232fe8fb19SBen Gras 	} else {
3242fe8fb19SBen Gras 		/* UTF32 */
3252fe8fb19SBen Gras 		if (wc >= 0xD800 && wc <= 0xDFFF)
3262fe8fb19SBen Gras 			goto ilseq;
3272fe8fb19SBen Gras 		if (n < 4)
3282fe8fb19SBen Gras 			goto e2big;
3292fe8fb19SBen Gras 		cnt += 4;
3302fe8fb19SBen Gras 		switch (psenc->current_endian) {
3312fe8fb19SBen Gras 		case _ENDIAN_BIG:
3322fe8fb19SBen Gras 			s[3] = wc;
3332fe8fb19SBen Gras 			s[2] = (wc >>= 8);
3342fe8fb19SBen Gras 			s[1] = (wc >>= 8);
3352fe8fb19SBen Gras 			s[0] = (wc >>= 8);
3362fe8fb19SBen Gras 			break;
3372fe8fb19SBen Gras 		case _ENDIAN_LITTLE:
3382fe8fb19SBen Gras 			s[0] = wc;
3392fe8fb19SBen Gras 			s[1] = (wc >>= 8);
3402fe8fb19SBen Gras 			s[2] = (wc >>= 8);
3412fe8fb19SBen Gras 			s[3] = (wc >>= 8);
3422fe8fb19SBen Gras 			break;
3432fe8fb19SBen Gras 		}
3442fe8fb19SBen Gras 	}
3452fe8fb19SBen Gras 	*nresult = cnt;
3462fe8fb19SBen Gras 
3472fe8fb19SBen Gras 	return 0;
3482fe8fb19SBen Gras 
3492fe8fb19SBen Gras ilseq:
3502fe8fb19SBen Gras 	*nresult = (size_t)-1;
3512fe8fb19SBen Gras 	return EILSEQ;
3522fe8fb19SBen Gras e2big:
3532fe8fb19SBen Gras 	*nresult = (size_t)-1;
3542fe8fb19SBen Gras 	return E2BIG;
3552fe8fb19SBen Gras }
3562fe8fb19SBen Gras 
3572fe8fb19SBen Gras static void
parse_variable(_UTF1632EncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)3582fe8fb19SBen Gras parse_variable(_UTF1632EncodingInfo * __restrict ei,
3592fe8fb19SBen Gras 	       const void * __restrict var, size_t lenvar)
3602fe8fb19SBen Gras {
3612fe8fb19SBen Gras #define MATCH(x, act)						\
3622fe8fb19SBen Gras do {								\
3632fe8fb19SBen Gras 	if (lenvar >= (sizeof(#x)-1) &&				\
3642fe8fb19SBen Gras 	    _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {	\
3652fe8fb19SBen Gras 		act;						\
3662fe8fb19SBen Gras 		lenvar -= sizeof(#x)-1;				\
3672fe8fb19SBen Gras 		p += sizeof(#x)-1;				\
3682fe8fb19SBen Gras 	}							\
3692fe8fb19SBen Gras } while (/*CONSTCOND*/0)
3702fe8fb19SBen Gras 	const char *p;
3712fe8fb19SBen Gras 	p = var;
3722fe8fb19SBen Gras 	while (lenvar>0) {
3732fe8fb19SBen Gras 		switch (*p) {
3742fe8fb19SBen Gras 		case 'B':
3752fe8fb19SBen Gras 		case 'b':
3762fe8fb19SBen Gras 			MATCH(big, ei->preffered_endian = _ENDIAN_BIG);
3772fe8fb19SBen Gras 			break;
3782fe8fb19SBen Gras 		case 'L':
3792fe8fb19SBen Gras 		case 'l':
3802fe8fb19SBen Gras 			MATCH(little, ei->preffered_endian = _ENDIAN_LITTLE);
3812fe8fb19SBen Gras 			break;
3822fe8fb19SBen Gras 		case 'F':
3832fe8fb19SBen Gras 		case 'f':
3842fe8fb19SBen Gras 			MATCH(force, ei->mode |= _MODE_FORCE_ENDIAN);
3852fe8fb19SBen Gras 			break;
3862fe8fb19SBen Gras 		case 'U':
3872fe8fb19SBen Gras 		case 'u':
3882fe8fb19SBen Gras 			MATCH(utf32, ei->mode |= _MODE_UTF32);
3892fe8fb19SBen Gras 			break;
3902fe8fb19SBen Gras 		}
3912fe8fb19SBen Gras 		p++;
3922fe8fb19SBen Gras 		lenvar--;
3932fe8fb19SBen Gras 	}
3942fe8fb19SBen Gras }
3952fe8fb19SBen Gras 
3962fe8fb19SBen Gras static int
3972fe8fb19SBen Gras /*ARGSUSED*/
_citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)3982fe8fb19SBen Gras _citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei,
3992fe8fb19SBen Gras 				     const void * __restrict var,
4002fe8fb19SBen Gras 				     size_t lenvar)
4012fe8fb19SBen Gras {
4022fe8fb19SBen Gras 	_DIAGASSERT(ei != NULL);
4032fe8fb19SBen Gras 
4042fe8fb19SBen Gras 	memset((void *)ei, 0, sizeof(*ei));
4052fe8fb19SBen Gras 
4062fe8fb19SBen Gras 	parse_variable(ei, var, lenvar);
4072fe8fb19SBen Gras 
4082fe8fb19SBen Gras 	if ((ei->mode&_MODE_UTF32)==0)
4092fe8fb19SBen Gras 		ei->cur_max = 6; /* endian + surrogate */
4102fe8fb19SBen Gras 	else
4112fe8fb19SBen Gras 		ei->cur_max = 8; /* endian + normal */
4122fe8fb19SBen Gras 
4132fe8fb19SBen Gras 	if (ei->preffered_endian == _ENDIAN_UNKNOWN) {
4142fe8fb19SBen Gras #if BYTE_ORDER == BIG_ENDIAN
4152fe8fb19SBen Gras 		ei->preffered_endian = _ENDIAN_BIG;
4162fe8fb19SBen Gras #else
4172fe8fb19SBen Gras 		ei->preffered_endian = _ENDIAN_LITTLE;
4182fe8fb19SBen Gras #endif
4192fe8fb19SBen Gras 	}
4202fe8fb19SBen Gras 
4212fe8fb19SBen Gras 	return (0);
4222fe8fb19SBen Gras }
4232fe8fb19SBen Gras 
4242fe8fb19SBen Gras static void
4252fe8fb19SBen Gras /*ARGSUSED*/
_citrus_UTF1632_encoding_module_uninit(_UTF1632EncodingInfo * ei)4262fe8fb19SBen Gras _citrus_UTF1632_encoding_module_uninit(_UTF1632EncodingInfo *ei)
4272fe8fb19SBen Gras {
4282fe8fb19SBen Gras }
4292fe8fb19SBen Gras 
4302fe8fb19SBen Gras static __inline int
4312fe8fb19SBen Gras /*ARGSUSED*/
_citrus_UTF1632_stdenc_wctocs(_UTF1632EncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,_wc_t wc)4322fe8fb19SBen Gras _citrus_UTF1632_stdenc_wctocs(_UTF1632EncodingInfo * __restrict ei,
4332fe8fb19SBen Gras 			      _csid_t * __restrict csid,
4342fe8fb19SBen Gras 			      _index_t * __restrict idx,
4352fe8fb19SBen Gras 			      _wc_t wc)
4362fe8fb19SBen Gras {
4372fe8fb19SBen Gras 
4382fe8fb19SBen Gras 	_DIAGASSERT(csid != NULL && idx != NULL);
4392fe8fb19SBen Gras 
4402fe8fb19SBen Gras 	*csid = 0;
4412fe8fb19SBen Gras 	*idx = (_index_t)wc;
4422fe8fb19SBen Gras 
4432fe8fb19SBen Gras 	return (0);
4442fe8fb19SBen Gras }
4452fe8fb19SBen Gras 
4462fe8fb19SBen Gras static __inline int
4472fe8fb19SBen Gras /*ARGSUSED*/
_citrus_UTF1632_stdenc_cstowc(_UTF1632EncodingInfo * __restrict ei,_wc_t * __restrict wc,_csid_t csid,_index_t idx)4482fe8fb19SBen Gras _citrus_UTF1632_stdenc_cstowc(_UTF1632EncodingInfo * __restrict ei,
4492fe8fb19SBen Gras 			      _wc_t * __restrict wc,
4502fe8fb19SBen Gras 			      _csid_t csid, _index_t idx)
4512fe8fb19SBen Gras {
4522fe8fb19SBen Gras 
4532fe8fb19SBen Gras 	_DIAGASSERT(wc != NULL);
4542fe8fb19SBen Gras 
4552fe8fb19SBen Gras 	if (csid != 0)
4562fe8fb19SBen Gras 		return (EILSEQ);
4572fe8fb19SBen Gras 
4582fe8fb19SBen Gras 	*wc = (_wc_t)idx;
4592fe8fb19SBen Gras 
4602fe8fb19SBen Gras 	return (0);
4612fe8fb19SBen Gras }
4622fe8fb19SBen Gras 
4632fe8fb19SBen Gras static __inline int
4642fe8fb19SBen Gras /*ARGSUSED*/
_citrus_UTF1632_stdenc_get_state_desc_generic(_UTF1632EncodingInfo * __restrict ei,_UTF1632State * __restrict psenc,int * __restrict rstate)4652fe8fb19SBen Gras _citrus_UTF1632_stdenc_get_state_desc_generic(_UTF1632EncodingInfo * __restrict ei,
4662fe8fb19SBen Gras 					      _UTF1632State * __restrict psenc,
4672fe8fb19SBen Gras 					      int * __restrict rstate)
4682fe8fb19SBen Gras {
4692fe8fb19SBen Gras 
4702fe8fb19SBen Gras 	if (psenc->chlen == 0)
4712fe8fb19SBen Gras 		*rstate = _STDENC_SDGEN_INITIAL;
4722fe8fb19SBen Gras 	else
4732fe8fb19SBen Gras 		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
4742fe8fb19SBen Gras 
4752fe8fb19SBen Gras 	return 0;
4762fe8fb19SBen Gras }
4772fe8fb19SBen Gras 
4782fe8fb19SBen Gras /* ----------------------------------------------------------------------
4792fe8fb19SBen Gras  * public interface for stdenc
4802fe8fb19SBen Gras  */
4812fe8fb19SBen Gras 
4822fe8fb19SBen Gras _CITRUS_STDENC_DECLS(UTF1632);
4832fe8fb19SBen Gras _CITRUS_STDENC_DEF_OPS(UTF1632);
4842fe8fb19SBen Gras 
4852fe8fb19SBen Gras #include "citrus_stdenc_template.h"
486