xref: /minix/lib/libc/locale/rune.c (revision 84d9c625)
1 /*	$NetBSD: rune.c,v 1.46 2013/04/13 10:21:20 joerg Exp $	*/
2 /*-
3  * Copyright (c)2010 Citrus Project,
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/endian.h>
29 #include <sys/mman.h>
30 #include <sys/stat.h>
31 #include <assert.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #define __SETLOCALE_SOURCE__
35 #include <locale.h>
36 #include <stddef.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <wchar.h>
42 
43 #include "setlocale_local.h"
44 
45 #include "citrus_module.h"
46 #include "citrus_ctype.h"
47 
48 #include "runetype_local.h"
49 
50 #include "multibyte.h"
51 
52 #include "_wctype_local.h"
53 #include "_wctrans_local.h"
54 
55 typedef struct {
56 	_RuneLocale rl;
57 	unsigned short	rlp_ctype_tab  [_CTYPE_NUM_CHARS + 1];
58 	short		rlp_tolower_tab[_CTYPE_NUM_CHARS + 1];
59 	short		rlp_toupper_tab[_CTYPE_NUM_CHARS + 1];
60 	char		rlp_codeset[33]; /* XXX */
61 
62 #ifdef __BUILD_LEGACY
63 	unsigned char	rlp_compat_bsdctype[_CTYPE_NUM_CHARS + 1];
64 #endif
65 } _RuneLocalePriv;
66 
67 static __inline void
_rune_wctype_init(_RuneLocale * rl)68 _rune_wctype_init(_RuneLocale *rl)
69 {
70 	memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
71 	    sizeof(rl->rl_wctype));
72 }
73 
74 static __inline void
_rune_wctrans_init(_RuneLocale * rl)75 _rune_wctrans_init(_RuneLocale *rl)
76 {
77 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name   = "tolower";
78 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0];
79 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext;
80 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name   = "toupper";
81 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0];
82 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext;
83 }
84 
85 static __inline void
_rune_init_priv(_RuneLocalePriv * rlp)86 _rune_init_priv(_RuneLocalePriv *rlp)
87 {
88 #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS
89 	int i;
90 
91 	for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) {
92 		rlp->rlp_ctype_tab  [i + 1] = 0;
93 		rlp->rlp_tolower_tab[i + 1] = i;
94 		rlp->rlp_toupper_tab[i + 1] = i;
95 
96 #ifdef __BUILD_LEGACY
97 		rlp->rlp_compat_bsdctype[i + 1] = 0;
98 #endif
99 	}
100 #endif
101 	rlp->rlp_ctype_tab  [0] = 0;
102 	rlp->rlp_tolower_tab[0] = EOF;
103 	rlp->rlp_toupper_tab[0] = EOF;
104 
105 	rlp->rl.rl_ctype_tab   = (const unsigned short *)&rlp->rlp_ctype_tab[0];
106 	rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0];
107 	rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0];
108 	rlp->rl.rl_codeset     = (const char *)&rlp->rlp_codeset[0];
109 
110 	_rune_wctype_init(&rlp->rl);
111 	_rune_wctrans_init(&rlp->rl);
112 
113 #ifdef __BUILD_LEGACY
114 	rlp->rlp_compat_bsdctype[0] = 0;
115 	rlp->rl.rl_compat_bsdctype = (const unsigned char *)
116 	    &rlp->rlp_compat_bsdctype[0];
117 #endif
118 }
119 
120 static __inline void
_rune_find_codeset(char * s,size_t n,char * var,size_t * plenvar)121 _rune_find_codeset(char *s, size_t n,
122     char *var, size_t *plenvar)
123 {
124 	size_t lenvar;
125 	const char *endvar;
126 
127 #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1)
128 
129 	lenvar = *plenvar;
130 	for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) {
131 		if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) {
132 			*var = '\0';
133 			*plenvar -= lenvar;
134 			endvar = &var[_RUNE_CODESET_LEN];
135 			while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) {
136 				if (*endvar == ' ' || *endvar == '\t')
137 					break;
138 				*s++ = *endvar++;
139 			}
140 			break;
141 		}
142 	}
143 	*s = '\0';
144 }
145 
146 #ifdef __BUILD_LEGACY
147 static __inline int
_runetype_to_bsdctype(_RuneType bits)148 _runetype_to_bsdctype(_RuneType bits)
149 {
150 	int ret;
151 
152 	if (bits == (_RuneType)0)
153 		return 0;
154 	ret = 0;
155 	if (bits & _RUNETYPE_U)
156 		ret |= _COMPAT_U;
157 	if (bits & _RUNETYPE_L)
158 		ret |= _COMPAT_L;
159 	if (bits & _RUNETYPE_D)
160 		ret |= _COMPAT_N;
161 	if (bits & _RUNETYPE_S)
162 		ret |= _COMPAT_S;
163 	if (bits & _RUNETYPE_P)
164 		ret |= _COMPAT_P;
165 	if (bits & _RUNETYPE_C)
166 		ret |= _COMPAT_C;
167 	if ((bits & (_RUNETYPE_X | _RUNETYPE_D)) == _RUNETYPE_X)
168 		ret |= _COMPAT_X;
169 	if ((bits & (_RUNETYPE_R | _RUNETYPE_G)) == _RUNETYPE_R)
170 		ret |= _COMPAT_B;
171 	return ret;
172 }
173 #endif /* __BUILD_LEGACY */
174 
175 static __inline int
_rune_read_file(const char * __restrict var,size_t lenvar,_RuneLocale ** __restrict prl)176 _rune_read_file(const char * __restrict var, size_t lenvar,
177     _RuneLocale ** __restrict prl)
178 {
179 	int ret, i;
180 	const _FileRuneLocale *frl;
181 	const _FileRuneEntry *fre;
182 	const uint32_t *frune;
183 	_RuneLocalePriv *rlp;
184 	_RuneLocale *rl;
185 	_RuneEntry *re;
186 	uint32_t *rune;
187 	uint32_t runetype_len, maplower_len, mapupper_len, variable_len;
188 	size_t len, n;
189 
190 	if (lenvar < sizeof(*frl))
191 		return EFTYPE;
192 	lenvar -= sizeof(*frl);
193 	frl = (const _FileRuneLocale *)(const void *)var;
194 	if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic)))
195 		return EFTYPE;
196 
197 	runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges);
198 	maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges);
199 	mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges);
200 	len = runetype_len + maplower_len + mapupper_len;
201 
202 	fre = (const _FileRuneEntry *)(const void *)(frl + 1);
203 	frune = (const uint32_t *)(const void *)(fre + len);
204 
205 	variable_len = be32toh((uint32_t)frl->frl_variable_len);
206 
207 	n = len * sizeof(*fre);
208 	if (lenvar < n)
209 		return EFTYPE;
210 	lenvar -= n;
211 
212 	n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar;
213 	rlp = (_RuneLocalePriv *)malloc(n);
214 	if (rlp == NULL)
215 		return ENOMEM;
216 	_rune_init_priv(rlp);
217 
218 	rl = &rlp->rl;
219 	re = (_RuneEntry *)(void *)(rlp + 1);
220 	rune = (uint32_t *)(void *)(re + len);
221 
222 	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
223 		rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]);
224 		rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]);
225 		rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]);
226 	}
227 
228 #define READ_RANGE(name)						\
229 do {									\
230 	const _FileRuneEntry *end_fre;					\
231 	const uint32_t *end_frune;					\
232 									\
233 	rl->rl_##name##_ext.rr_nranges = name##_len;			\
234 	rl->rl_##name##_ext.rr_rune_ranges = re;			\
235 									\
236 	end_fre = fre + name##_len;					\
237 	while (fre < end_fre) {						\
238 		re->re_min = be32toh((uint32_t)fre->fre_min);		\
239 		re->re_max = be32toh((uint32_t)fre->fre_max);		\
240 		re->re_map = be32toh((uint32_t)fre->fre_map);		\
241 		if (re->re_map != 0) {					\
242 			re->re_rune_types = NULL;			\
243 		} else {						\
244 			re->re_rune_types = rune;			\
245 			len = re->re_max - re->re_min + 1;		\
246 			n = len * sizeof(*frune);			\
247 			if (lenvar < n) {				\
248 				ret = EFTYPE;				\
249 				goto err;				\
250 			}						\
251 			lenvar -= n;					\
252 			end_frune = frune + len;			\
253 			while (frune < end_frune)			\
254 				*rune++ = be32toh(*frune++);		\
255 		}							\
256 		++fre, ++re;						\
257 	}								\
258 } while (/*CONSTCOND*/0)
259 
260 	READ_RANGE(runetype);
261 	READ_RANGE(maplower);
262 	READ_RANGE(mapupper);
263 
264 	if (lenvar < variable_len) {
265 		ret = EFTYPE;
266 		goto err;
267 	}
268 
269 	memcpy((void *)rune, (void const *)frune, variable_len);
270 	rl->rl_variable_len = variable_len;
271 	rl->rl_variable = (void *)rune;
272 
273 	_rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset),
274 	    (char *)rl->rl_variable, &rl->rl_variable_len);
275 
276 	ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding,
277 	    rl->rl_variable, rl->rl_variable_len, _PRIVSIZE);
278 	if (ret)
279 		goto err;
280 	if (__mb_len_max_runtime <
281 	    _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) {
282 		ret = EINVAL;
283 		goto err;
284 	}
285 
286 	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
287 		wint_t wc;
288 		_RuneType rc;
289 
290 		ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc);
291 		if (ret)
292 			goto err;
293 		if (wc == WEOF) {
294 			rlp->rlp_ctype_tab[i + 1] = 0;
295 			rlp->rlp_tolower_tab[i + 1] = i;
296 			rlp->rlp_toupper_tab[i + 1] = i;
297 		} else {
298 			rc = _runetype_priv(rl, wc);
299 			rlp->rlp_ctype_tab[i + 1] = (unsigned short)
300 			    ((rc & ~_RUNETYPE_SWM) >> 8);
301 
302 #ifdef __BUILD_LEGACY
303 			rlp->rlp_compat_bsdctype[i + 1]
304 			  = _runetype_to_bsdctype(rc);
305 #endif
306 
307 #define CONVERT_MAP(name)						\
308 do {									\
309 	wint_t map;							\
310 	int c;								\
311 									\
312 	map = _towctrans_priv(wc, _wctrans_##name(rl));			\
313 	if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype,	\
314 	    map, &c)  || c == EOF))					\
315 		c = i;							\
316 	rlp->rlp_to##name##_tab[i + 1] = (short)c;			\
317 } while (/*CONSTCOND*/0)
318 
319 			CONVERT_MAP(lower);
320 			CONVERT_MAP(upper);
321 		}
322 	}
323 	*prl = rl;
324 	return 0;
325 
326 err:
327 	free(rlp);
328 	return ret;
329 }
330 
331 int
_rune_load(const char * __restrict var,size_t lenvar,_RuneLocale ** __restrict prl)332 _rune_load(const char * __restrict var, size_t lenvar,
333     _RuneLocale ** __restrict prl)
334 {
335 	int ret;
336 
337 	_DIAGASSERT(var != NULL || lenvar < 1);
338 	_DIAGASSERT(prl != NULL);
339 
340 	if (lenvar < 1)
341 		return EFTYPE;
342 	switch (*var) {
343 	case 'R':
344 		ret = _rune_read_file(var, lenvar, prl);
345 		break;
346 	default:
347 		ret = EFTYPE;
348 	}
349 	return ret;
350 }
351