1 /* $NetBSD: rune.c,v 1.17 2002/11/17 20:40:59 itojun Exp $ */ 2 3 /*- 4 * Copyright (c)1999 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 #include <sys/cdefs.h> 66 #if defined(LIBC_SCCS) && !defined(lint) 67 #if 0 68 static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; 69 #else 70 __RCSID("$NetBSD: rune.c,v 1.17 2002/11/17 20:40:59 itojun Exp $"); 71 #endif 72 #endif /* LIBC_SCCS and not lint */ 73 74 #include <assert.h> 75 #include <stdio.h> 76 #include <string.h> 77 #include <stdlib.h> 78 #include <errno.h> 79 #include <sys/types.h> 80 #include <sys/stat.h> 81 #include <citrus/citrus_module.h> 82 #include <citrus/citrus_ctype.h> 83 #include "rune.h" 84 #include "rune_local.h" 85 86 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *)); 87 static void _freeentry __P((_RuneRange *)); 88 89 static int 90 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp, 91 FILE *fp) 92 { 93 uint32_t i; 94 _RuneEntry *re; 95 _FileRuneEntry fre; 96 97 _DIAGASSERT(rl != NULL); 98 _DIAGASSERT(rr != NULL); 99 _DIAGASSERT(frr != NULL); 100 _DIAGASSERT(lastp != NULL); 101 _DIAGASSERT(fp != NULL); 102 103 re = (_RuneEntry *)rl->rl_variable; 104 105 rr->rr_nranges = ntohl(frr->frr_nranges); 106 if (rr->rr_nranges == 0) { 107 rr->rr_rune_ranges = NULL; 108 return 0; 109 } 110 111 rr->rr_rune_ranges = re; 112 for (i = 0; i < rr->rr_nranges; i++) { 113 if (fread(&fre, sizeof(fre), 1, fp) != 1) 114 return -1; 115 116 re->re_min = ntohl((u_int32_t)fre.fre_min); 117 re->re_max = ntohl((u_int32_t)fre.fre_max); 118 re->re_map = ntohl((u_int32_t)fre.fre_map); 119 re++; 120 121 if ((void *)re > lastp) 122 return -1; 123 } 124 rl->rl_variable = re; 125 return 0; 126 } 127 128 static int 129 readentry(_RuneRange *rr, FILE *fp) 130 { 131 _RuneEntry *re; 132 size_t l, i, j; 133 int error; 134 135 _DIAGASSERT(rr != NULL); 136 _DIAGASSERT(fp != NULL); 137 138 re = rr->rr_rune_ranges; 139 for (i = 0; i < rr->rr_nranges; i++) { 140 if (re[i].re_map != 0) { 141 re[i].re_rune_types = NULL; 142 continue; 143 } 144 145 l = re[i].re_max - re[i].re_min + 1; 146 re[i].re_rune_types = malloc(l * sizeof(_RuneType)); 147 if (!re[i].re_rune_types) { 148 error = ENOMEM; 149 goto fail; 150 } 151 memset(re[i].re_rune_types, 0, l * sizeof(_RuneType)); 152 153 if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l) 154 goto fail2; 155 156 for (j = 0; j < l; j++) 157 re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]); 158 } 159 return 0; 160 161 fail: 162 for (j = 0; j < i; j++) { 163 free(re[j].re_rune_types); 164 re[j].re_rune_types = NULL; 165 } 166 return error; 167 fail2: 168 for (j = 0; j <= i; j++) { 169 free(re[j].re_rune_types); 170 re[j].re_rune_types = NULL; 171 } 172 return errno; 173 } 174 175 /* XXX: temporary implementation */ 176 static void 177 find_codeset(_RuneLocale *rl) 178 { 179 char *top, *codeset, *tail; 180 181 rl->rl_codeset = NULL; 182 if (!(top = strstr(rl->rl_variable, _RUNE_CODESET))) 183 return; 184 tail = strpbrk(top, " \t"); 185 codeset = top + sizeof(_RUNE_CODESET)-1; 186 if (tail) { 187 *top = *tail; 188 *tail = '\0'; 189 rl->rl_codeset = strdup(codeset); 190 strcpy(top + 1, tail + 1); 191 } else { 192 *top = '\0'; 193 rl->rl_codeset = strdup(codeset); 194 } 195 } 196 197 void 198 _freeentry(_RuneRange *rr) 199 { 200 _RuneEntry *re; 201 uint32_t i; 202 203 _DIAGASSERT(rr != NULL); 204 205 re = rr->rr_rune_ranges; 206 for (i = 0; i < rr->rr_nranges; i++) { 207 if (re[i].re_rune_types) 208 free(re[i].re_rune_types); 209 re[i].re_rune_types = NULL; 210 } 211 } 212 213 _RuneLocale * 214 _Read_RuneMagi(fp) 215 FILE *fp; 216 { 217 /* file */ 218 _FileRuneLocale frl; 219 /* host data */ 220 char *hostdata; 221 size_t hostdatalen; 222 void *lastp; 223 _RuneLocale *rl; 224 struct stat sb; 225 int x; 226 227 _DIAGASSERT(fp != NULL); 228 229 if (fstat(fileno(fp), &sb) < 0) 230 return NULL; 231 232 if (sb.st_size < sizeof(_RuneLocale)) 233 return NULL; 234 /* XXX more validation? */ 235 236 /* Someone might have read the magic number once already */ 237 rewind(fp); 238 239 if (fread(&frl, sizeof(frl), 1, fp) != 1) 240 return NULL; 241 if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic))) 242 return NULL; 243 244 hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) + 245 ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) + 246 ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) + 247 ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry); 248 249 if ((hostdata = malloc(hostdatalen)) == NULL) 250 return NULL; 251 memset(hostdata, 0, hostdatalen); 252 lastp = hostdata + hostdatalen; 253 254 rl = (_RuneLocale *)(void *)hostdata; 255 rl->rl_variable = rl + 1; 256 257 memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic)); 258 memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding)); 259 260 rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune); 261 rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len); 262 263 for (x = 0; x < _CACHED_RUNES; ++x) { 264 rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]); 265 266 /* XXX assumes rune_t = u_int32_t */ 267 rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]); 268 rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]); 269 } 270 271 if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp)) 272 { 273 free(hostdata); 274 return NULL; 275 } 276 if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp)) 277 { 278 free(hostdata); 279 return NULL; 280 } 281 if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp)) 282 { 283 free(hostdata); 284 return NULL; 285 } 286 287 if (readentry(&rl->rl_runetype_ext, fp) != 0) { 288 free(hostdata); 289 return NULL; 290 } 291 292 if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len > 293 (u_int8_t *)lastp) { 294 _freeentry(&rl->rl_runetype_ext); 295 free(hostdata); 296 return NULL; 297 } 298 if (rl->rl_variable_len == 0) 299 rl->rl_variable = NULL; 300 else if (fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) { 301 _freeentry(&rl->rl_runetype_ext); 302 free(hostdata); 303 return NULL; 304 } 305 find_codeset(rl); 306 307 /* error if we have junk at the tail */ 308 if (ftell(fp) != sb.st_size) { 309 _freeentry(&rl->rl_runetype_ext); 310 free(hostdata); 311 return NULL; 312 } 313 314 return(rl); 315 } 316 317 void 318 _NukeRune(rl) 319 _RuneLocale *rl; 320 { 321 322 _DIAGASSERT(rl != NULL); 323 324 if (rl != &_DefaultRuneLocale) { 325 _freeentry(&rl->rl_runetype_ext); 326 if (rl->rl_codeset) 327 free(rl->rl_codeset); 328 if (rl->rl_citrus_ctype) 329 _citrus_ctype_close(rl->rl_citrus_ctype); 330 free(rl); 331 } 332 } 333 334 /* 335 * read in old LC_CTYPE declaration file, convert into runelocale info 336 */ 337 #define _CTYPE_PRIVATE 338 #include <limits.h> 339 #include <ctype.h> 340 341 _RuneLocale * 342 _Read_CTypeAsRune(fp) 343 FILE *fp; 344 { 345 char id[sizeof(_CTYPE_ID) - 1]; 346 u_int32_t i, len; 347 u_int8_t *new_ctype = NULL; 348 int16_t *new_toupper = NULL, *new_tolower = NULL; 349 /* host data */ 350 char *hostdata = NULL; 351 size_t hostdatalen; 352 _RuneLocale *rl; 353 struct stat sb; 354 int x; 355 356 _DIAGASSERT(fp != NULL); 357 358 if (fstat(fileno(fp), &sb) < 0) 359 return NULL; 360 361 if (sb.st_size < sizeof(id)) 362 return NULL; 363 /* XXX more validation? */ 364 365 /* Someone might have read the magic number once already */ 366 rewind(fp); 367 368 if (fread(id, sizeof(id), 1, fp) != 1) 369 goto bad; 370 if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0) 371 goto bad; 372 373 if (fread(&i, sizeof(u_int32_t), 1, fp) != 1) 374 goto bad; 375 if ((i = ntohl(i)) != _CTYPE_REV) 376 goto bad; 377 378 if (fread(&len, sizeof(u_int32_t), 1, fp) != 1) 379 goto bad; 380 if ((len = ntohl(len)) != _CTYPE_NUM_CHARS) 381 goto bad; 382 383 if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL || 384 (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL || 385 (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL) 386 goto bad; 387 new_ctype[0] = 0; 388 if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len) 389 goto bad; 390 new_toupper[0] = EOF; 391 if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len) 392 goto bad; 393 new_tolower[0] = EOF; 394 if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len) 395 goto bad; 396 397 hostdatalen = sizeof(*rl); 398 399 if ((hostdata = malloc(hostdatalen)) == NULL) 400 goto bad; 401 memset(hostdata, 0, hostdatalen); 402 rl = (_RuneLocale *)(void *)hostdata; 403 rl->rl_variable = NULL; 404 405 memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic)); 406 memcpy(rl->rl_encoding, "NONE", 4); 407 408 rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune; /*XXX*/ 409 rl->rl_variable_len = 0; 410 411 for (x = 0; x < _CACHED_RUNES; ++x) { 412 if ((uint32_t) x > len) 413 continue; 414 415 /* 416 * TWEAKS! 417 * - old locale file declarations do not have proper _B 418 * in many cases. 419 * - isprint() declaration in ctype.h incorrectly uses _B. 420 * _B means "isprint but !isgraph", not "isblank" with the 421 * declaration. 422 * - _X and _CTYPE_X have negligible difference in meaning. 423 * - we don't set digit value, fearing that it would be 424 * too much of hardcoding. we may need to revisit it. 425 */ 426 427 if (new_ctype[1 + x] & _U) 428 rl->rl_runetype[x] |= _CTYPE_U; 429 if (new_ctype[1 + x] & _L) 430 rl->rl_runetype[x] |= _CTYPE_L; 431 if (new_ctype[1 + x] & _N) 432 rl->rl_runetype[x] |= _CTYPE_D; 433 if (new_ctype[1 + x] & _S) 434 rl->rl_runetype[x] |= _CTYPE_S; 435 if (new_ctype[1 + x] & _P) 436 rl->rl_runetype[x] |= _CTYPE_P; 437 if (new_ctype[1 + x] & _C) 438 rl->rl_runetype[x] |= _CTYPE_C; 439 /* derived flag bits, duplicate of ctype.h */ 440 if (new_ctype[1 + x] & (_U | _L)) 441 rl->rl_runetype[x] |= _CTYPE_A; 442 if (new_ctype[1 + x] & (_N | _X)) 443 rl->rl_runetype[x] |= _CTYPE_X; 444 if (new_ctype[1 + x] & (_P|_U|_L|_N)) 445 rl->rl_runetype[x] |= _CTYPE_G; 446 /* we don't really trust _B in the file. see above. */ 447 if (new_ctype[1 + x] & _B) 448 rl->rl_runetype[x] |= _CTYPE_B; 449 if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ') 450 rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1); 451 if (x == ' ' || x == '\t') 452 rl->rl_runetype[x] |= _CTYPE_B; 453 454 /* XXX may fail on non-8bit encoding only */ 455 rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]); 456 rl->rl_maplower[x] = ntohs(new_tolower[1 + x]); 457 } 458 459 /* 460 * __runetable_to_netbsd_ctype() will be called from 461 * setlocale.c:loadlocale(), and fill old ctype table. 462 */ 463 464 free(new_ctype); 465 free(new_toupper); 466 free(new_tolower); 467 return(rl); 468 469 bad: 470 if (new_ctype) 471 free(new_ctype); 472 if (new_toupper) 473 free(new_toupper); 474 if (new_tolower) 475 free(new_tolower); 476 if (hostdata) 477 free(hostdata); 478 return NULL; 479 } 480