xref: /netbsd/lib/libc/locale/rune.c (revision c4a72b64)
1 /*	$NetBSD: rune.c,v 1.17 2002/11/17 20:40:59 itojun Exp $	*/
2 
3 /*-
4  * Copyright (c)1999 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Paul Borman at Krystal Technologies.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 #include <sys/cdefs.h>
66 #if defined(LIBC_SCCS) && !defined(lint)
67 #if 0
68 static char sccsid[] = "@(#)rune.c	8.1 (Berkeley) 6/4/93";
69 #else
70 __RCSID("$NetBSD: rune.c,v 1.17 2002/11/17 20:40:59 itojun Exp $");
71 #endif
72 #endif /* LIBC_SCCS and not lint */
73 
74 #include <assert.h>
75 #include <stdio.h>
76 #include <string.h>
77 #include <stdlib.h>
78 #include <errno.h>
79 #include <sys/types.h>
80 #include <sys/stat.h>
81 #include <citrus/citrus_module.h>
82 #include <citrus/citrus_ctype.h>
83 #include "rune.h"
84 #include "rune_local.h"
85 
86 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *));
87 static void _freeentry __P((_RuneRange *));
88 
89 static int
90 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp,
91 	FILE *fp)
92 {
93 	uint32_t i;
94 	_RuneEntry *re;
95 	_FileRuneEntry fre;
96 
97 	_DIAGASSERT(rl != NULL);
98 	_DIAGASSERT(rr != NULL);
99 	_DIAGASSERT(frr != NULL);
100 	_DIAGASSERT(lastp != NULL);
101 	_DIAGASSERT(fp != NULL);
102 
103 	re = (_RuneEntry *)rl->rl_variable;
104 
105 	rr->rr_nranges = ntohl(frr->frr_nranges);
106 	if (rr->rr_nranges == 0) {
107 		rr->rr_rune_ranges = NULL;
108 		return 0;
109 	}
110 
111 	rr->rr_rune_ranges = re;
112 	for (i = 0; i < rr->rr_nranges; i++) {
113 		if (fread(&fre, sizeof(fre), 1, fp) != 1)
114 			return -1;
115 
116 		re->re_min = ntohl((u_int32_t)fre.fre_min);
117 		re->re_max = ntohl((u_int32_t)fre.fre_max);
118 		re->re_map = ntohl((u_int32_t)fre.fre_map);
119 		re++;
120 
121 		if ((void *)re > lastp)
122 			return -1;
123 	}
124 	rl->rl_variable = re;
125 	return 0;
126 }
127 
128 static int
129 readentry(_RuneRange *rr, FILE *fp)
130 {
131 	_RuneEntry *re;
132 	size_t l, i, j;
133 	int error;
134 
135 	_DIAGASSERT(rr != NULL);
136 	_DIAGASSERT(fp != NULL);
137 
138 	re = rr->rr_rune_ranges;
139 	for (i = 0; i < rr->rr_nranges; i++) {
140 		if (re[i].re_map != 0) {
141 			re[i].re_rune_types = NULL;
142 			continue;
143 		}
144 
145 		l = re[i].re_max - re[i].re_min + 1;
146 		re[i].re_rune_types = malloc(l * sizeof(_RuneType));
147 		if (!re[i].re_rune_types) {
148 			error = ENOMEM;
149 			goto fail;
150 		}
151 		memset(re[i].re_rune_types, 0, l * sizeof(_RuneType));
152 
153 		if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l)
154 			goto fail2;
155 
156 		for (j = 0; j < l; j++)
157 			re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]);
158 	}
159 	return 0;
160 
161 fail:
162 	for (j = 0; j < i; j++) {
163 		free(re[j].re_rune_types);
164 		re[j].re_rune_types = NULL;
165 	}
166 	return error;
167 fail2:
168 	for (j = 0; j <= i; j++) {
169 		free(re[j].re_rune_types);
170 		re[j].re_rune_types = NULL;
171 	}
172 	return errno;
173 }
174 
175 /* XXX: temporary implementation */
176 static void
177 find_codeset(_RuneLocale *rl)
178 {
179 	char *top, *codeset, *tail;
180 
181 	rl->rl_codeset = NULL;
182 	if (!(top = strstr(rl->rl_variable, _RUNE_CODESET)))
183 		return;
184 	tail = strpbrk(top, " \t");
185 	codeset = top + sizeof(_RUNE_CODESET)-1;
186 	if (tail) {
187 		*top = *tail;
188 		*tail = '\0';
189 		rl->rl_codeset = strdup(codeset);
190 		strcpy(top + 1, tail + 1);
191 	} else {
192 		*top = '\0';
193 		rl->rl_codeset = strdup(codeset);
194 	}
195 }
196 
197 void
198 _freeentry(_RuneRange *rr)
199 {
200 	_RuneEntry *re;
201 	uint32_t i;
202 
203 	_DIAGASSERT(rr != NULL);
204 
205 	re = rr->rr_rune_ranges;
206 	for (i = 0; i < rr->rr_nranges; i++) {
207 		if (re[i].re_rune_types)
208 			free(re[i].re_rune_types);
209 		re[i].re_rune_types = NULL;
210 	}
211 }
212 
213 _RuneLocale *
214 _Read_RuneMagi(fp)
215 	FILE *fp;
216 {
217 	/* file */
218 	_FileRuneLocale frl;
219 	/* host data */
220 	char *hostdata;
221 	size_t hostdatalen;
222 	void *lastp;
223 	_RuneLocale *rl;
224 	struct stat sb;
225 	int x;
226 
227 	_DIAGASSERT(fp != NULL);
228 
229 	if (fstat(fileno(fp), &sb) < 0)
230 		return NULL;
231 
232 	if (sb.st_size < sizeof(_RuneLocale))
233 		return NULL;
234 	/* XXX more validation? */
235 
236 	/* Someone might have read the magic number once already */
237 	rewind(fp);
238 
239 	if (fread(&frl, sizeof(frl), 1, fp) != 1)
240 		return NULL;
241 	if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic)))
242 		return NULL;
243 
244 	hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) +
245 	    ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) +
246 	    ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) +
247 	    ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry);
248 
249 	if ((hostdata = malloc(hostdatalen)) == NULL)
250 		return NULL;
251 	memset(hostdata, 0, hostdatalen);
252 	lastp = hostdata + hostdatalen;
253 
254 	rl = (_RuneLocale *)(void *)hostdata;
255 	rl->rl_variable = rl + 1;
256 
257 	memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic));
258 	memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding));
259 
260 	rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune);
261 	rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len);
262 
263 	for (x = 0; x < _CACHED_RUNES; ++x) {
264 		rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]);
265 
266 		/* XXX assumes rune_t = u_int32_t */
267 		rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]);
268 		rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]);
269 	}
270 
271 	if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp))
272 	{
273 		free(hostdata);
274 		return NULL;
275 	}
276 	if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp))
277 	{
278 		free(hostdata);
279 		return NULL;
280 	}
281 	if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp))
282 	{
283 		free(hostdata);
284 		return NULL;
285 	}
286 
287 	if (readentry(&rl->rl_runetype_ext, fp) != 0) {
288 		free(hostdata);
289 		return NULL;
290 	}
291 
292 	if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len >
293 	    (u_int8_t *)lastp) {
294 		_freeentry(&rl->rl_runetype_ext);
295 		free(hostdata);
296 		return NULL;
297 	}
298 	if (rl->rl_variable_len == 0)
299 		rl->rl_variable = NULL;
300 	else if (fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) {
301 		_freeentry(&rl->rl_runetype_ext);
302 		free(hostdata);
303 		return NULL;
304 	}
305 	find_codeset(rl);
306 
307 	/* error if we have junk at the tail */
308 	if (ftell(fp) != sb.st_size) {
309 		_freeentry(&rl->rl_runetype_ext);
310 		free(hostdata);
311 		return NULL;
312 	}
313 
314 	return(rl);
315 }
316 
317 void
318 _NukeRune(rl)
319 	_RuneLocale *rl;
320 {
321 
322 	_DIAGASSERT(rl != NULL);
323 
324 	if (rl != &_DefaultRuneLocale) {
325 		_freeentry(&rl->rl_runetype_ext);
326 		if (rl->rl_codeset)
327 			free(rl->rl_codeset);
328 		if (rl->rl_citrus_ctype)
329 			_citrus_ctype_close(rl->rl_citrus_ctype);
330 		free(rl);
331 	}
332 }
333 
334 /*
335  * read in old LC_CTYPE declaration file, convert into runelocale info
336  */
337 #define _CTYPE_PRIVATE
338 #include <limits.h>
339 #include <ctype.h>
340 
341 _RuneLocale *
342 _Read_CTypeAsRune(fp)
343 	FILE *fp;
344 {
345 	char id[sizeof(_CTYPE_ID) - 1];
346 	u_int32_t i, len;
347 	u_int8_t *new_ctype = NULL;
348 	int16_t *new_toupper = NULL, *new_tolower = NULL;
349 	/* host data */
350 	char *hostdata = NULL;
351 	size_t hostdatalen;
352 	_RuneLocale *rl;
353 	struct stat sb;
354 	int x;
355 
356 	_DIAGASSERT(fp != NULL);
357 
358 	if (fstat(fileno(fp), &sb) < 0)
359 		return NULL;
360 
361 	if (sb.st_size < sizeof(id))
362 		return NULL;
363 	/* XXX more validation? */
364 
365 	/* Someone might have read the magic number once already */
366 	rewind(fp);
367 
368 	if (fread(id, sizeof(id), 1, fp) != 1)
369 		goto bad;
370 	if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0)
371 		goto bad;
372 
373 	if (fread(&i, sizeof(u_int32_t), 1, fp) != 1)
374 		goto bad;
375 	if ((i = ntohl(i)) != _CTYPE_REV)
376 		goto bad;
377 
378 	if (fread(&len, sizeof(u_int32_t), 1, fp) != 1)
379 		goto bad;
380 	if ((len = ntohl(len)) != _CTYPE_NUM_CHARS)
381 		goto bad;
382 
383 	if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL ||
384 	    (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL ||
385 	    (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL)
386 		goto bad;
387 	new_ctype[0] = 0;
388 	if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
389 		goto bad;
390 	new_toupper[0] = EOF;
391 	if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len)
392 		goto bad;
393 	new_tolower[0] = EOF;
394 	if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len)
395 		goto bad;
396 
397 	hostdatalen = sizeof(*rl);
398 
399 	if ((hostdata = malloc(hostdatalen)) == NULL)
400 		goto bad;
401 	memset(hostdata, 0, hostdatalen);
402 	rl = (_RuneLocale *)(void *)hostdata;
403 	rl->rl_variable = NULL;
404 
405 	memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic));
406 	memcpy(rl->rl_encoding, "NONE", 4);
407 
408 	rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune;	/*XXX*/
409 	rl->rl_variable_len = 0;
410 
411 	for (x = 0; x < _CACHED_RUNES; ++x) {
412 		if ((uint32_t) x > len)
413 			continue;
414 
415 		/*
416 		 * TWEAKS!
417 		 * - old locale file declarations do not have proper _B
418 		 *   in many cases.
419 		 * - isprint() declaration in ctype.h incorrectly uses _B.
420 		 *   _B means "isprint but !isgraph", not "isblank" with the
421 		 *   declaration.
422 		 * - _X and _CTYPE_X have negligible difference in meaning.
423 		 * - we don't set digit value, fearing that it would be
424 		 *   too much of hardcoding.  we may need to revisit it.
425 		 */
426 
427 		if (new_ctype[1 + x] & _U)
428 			rl->rl_runetype[x] |= _CTYPE_U;
429 		if (new_ctype[1 + x] & _L)
430 			rl->rl_runetype[x] |= _CTYPE_L;
431 		if (new_ctype[1 + x] & _N)
432 			rl->rl_runetype[x] |= _CTYPE_D;
433 		if (new_ctype[1 + x] & _S)
434 			rl->rl_runetype[x] |= _CTYPE_S;
435 		if (new_ctype[1 + x] & _P)
436 			rl->rl_runetype[x] |= _CTYPE_P;
437 		if (new_ctype[1 + x] & _C)
438 			rl->rl_runetype[x] |= _CTYPE_C;
439 		/* derived flag bits, duplicate of ctype.h */
440 		if (new_ctype[1 + x] & (_U | _L))
441 			rl->rl_runetype[x] |= _CTYPE_A;
442 		if (new_ctype[1 + x] & (_N | _X))
443 			rl->rl_runetype[x] |= _CTYPE_X;
444 		if (new_ctype[1 + x] & (_P|_U|_L|_N))
445 			rl->rl_runetype[x] |= _CTYPE_G;
446 		/* we don't really trust _B in the file.  see above. */
447 		if (new_ctype[1 + x] & _B)
448 			rl->rl_runetype[x] |= _CTYPE_B;
449 		if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ')
450 			rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1);
451 		if (x == ' ' || x == '\t')
452 			rl->rl_runetype[x] |= _CTYPE_B;
453 
454 		/* XXX may fail on non-8bit encoding only */
455 		rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]);
456 		rl->rl_maplower[x] = ntohs(new_tolower[1 + x]);
457 	}
458 
459 	/*
460 	 * __runetable_to_netbsd_ctype() will be called from
461 	 * setlocale.c:loadlocale(), and fill old ctype table.
462 	 */
463 
464 	free(new_ctype);
465 	free(new_toupper);
466 	free(new_tolower);
467 	return(rl);
468 
469 bad:
470 	if (new_ctype)
471 		free(new_ctype);
472 	if (new_toupper)
473 		free(new_toupper);
474 	if (new_tolower)
475 		free(new_tolower);
476 	if (hostdata)
477 		free(hostdata);
478 	return NULL;
479 }
480