1 /* @(#)sic_nls.c 1.18 14/01/15 Copyright 2007-2014 J. Schilling */ 2 #include <schily/mconfig.h> 3 #ifndef lint 4 static UConst char sccsid[] = 5 "@(#)sic_nls.c 1.18 14/01/15 Copyright 2007-2014 J. Schilling"; 6 #endif 7 /* 8 * This code reads translation files in the format used by 9 * the Unicode Organization (www.unicode.org). 10 * 11 * The current implementation is only useful to create translations 12 * from single byte character sets to unicode. 13 * We use this code on systems that do not provide the iconv() function. 14 * 15 * Copyright 2007-2014 J. Schilling 16 */ 17 /* 18 * The contents of this file are subject to the terms of the 19 * Common Development and Distribution License, Version 1.0 only 20 * (the "License"). You may not use this file except in compliance 21 * with the License. 22 * 23 * See the file CDDL.Schily.txt in this distribution for details. 24 * A copy of the CDDL is also available via the Internet at 25 * http://www.opensource.org/licenses/cddl1.txt 26 * 27 * When distributing Covered Code, include this CDDL HEADER in each 28 * file and include the License file CDDL.Schily.txt from this distribution. 29 */ 30 31 #include <schily/stdio.h> 32 #include <schily/stdlib.h> 33 #include <schily/string.h> 34 #include <schily/libport.h> /* For strdup() */ 35 #include <schily/unistd.h> /* For R_OK */ 36 #include <schily/schily.h> 37 #include <schily/dirent.h> 38 #include <schily/siconv.h> 39 40 #define TAB_SIZE (UINT8_MAX+1) 41 #define __CAN_TAB_SIZE__ 42 43 #ifndef PROTOTYPES 44 #undef __CAN_TAB_SIZE__ 45 #endif 46 #if (!defined(__STDC__) || __STDC__ < 1) && \ 47 !defined(__SUNPRO_C) /* Sun Compilers are OK even with __STDC__ 0 */ 48 /* 49 * C-preprocessors from K&R compilers cannot do the computation for TAB_SIZE 50 * in the next line We need to disable this test in case of a K&R compiler. 51 */ 52 #undef __CAN_TAB_SIZE__ 53 #endif 54 #ifdef __GNUC__ 55 #if __GNUC__ < 2 56 #undef __CAN_TAB_SIZE__ 57 #endif 58 #if __GNUC__ < 3 && __GNUC_MINOR__ < 95 59 #undef __CAN_TAB_SIZE__ 60 #endif 61 #endif 62 #if defined(VMS) && !defined(__GNUC__) 63 #undef __CAN_TAB_SIZE__ 64 #endif 65 66 #ifdef __CAN_TAB_SIZE__ 67 #if TAB_SIZE < 256 68 Error Table size too small 69 #endif 70 #endif 71 72 LOCAL UInt8_t nullpage[TAB_SIZE] = { 0 }; 73 LOCAL char *ins_base; 74 75 LOCAL siconvt_t *insert_sic __PR((siconvt_t *sip)); 76 LOCAL int remove_sic __PR((siconvt_t *sip)); 77 EXPORT siconvt_t *sic_open __PR((char *name)); 78 EXPORT const char *sic_base __PR((void)); 79 EXPORT int sic_close __PR((siconvt_t *sip)); 80 EXPORT int sic_list __PR((FILE *f)); 81 LOCAL void freetbl __PR((UInt8_t **uni2cs)); 82 LOCAL FILE *pfopen __PR((char *name)); 83 LOCAL siconvt_t *create_sic __PR((char *name)); 84 #ifdef USE_ICONV 85 LOCAL siconvt_t *create_iconv_sic __PR((char *name)); 86 LOCAL siconvt_t *dup_iconv_sic __PR((siconvt_t *sip)); 87 #endif 88 89 /* 90 * Global list for translation tables 91 */ 92 LOCAL siconvt_t *glist = (siconvt_t *) NULL; 93 94 /* 95 * Insert a table into the global list and allow to reuse it 96 */ 97 LOCAL siconvt_t * 98 insert_sic(sip) 99 siconvt_t *sip; 100 { 101 siconvt_t **sp = &glist; 102 103 if (sip == (siconvt_t *)NULL) /* No table arg */ 104 return ((siconvt_t *)NULL); 105 if (sip->sic_next) /* Already in list */ 106 return (sip); 107 108 while (*sp) { 109 if (sip == *sp) { /* Already in list */ 110 return (sip); 111 } 112 sp = &(*sp)->sic_next; 113 } 114 sip->sic_next = glist; 115 glist = sip; 116 return (sip); 117 } 118 119 /* 120 * Remove a table from the global list 121 */ 122 LOCAL int 123 remove_sic(sip) 124 siconvt_t *sip; 125 { 126 siconvt_t **sp = &glist; 127 128 while (*sp) { 129 #ifdef USE_ICONV 130 if (strcmp(sip->sic_name, (*sp)->sic_name) == 0) { 131 siconvt_t *sap = *sp; 132 133 if (sip == *sp) { 134 *sp = sip->sic_next; 135 return (0); 136 } 137 while (sap->sic_alt != NULL) { 138 if (sap->sic_alt == sip) { 139 sap->sic_alt = sip->sic_alt; 140 sip->sic_name = NULL; /* No free() */ 141 return (0); 142 } 143 sap = sap->sic_alt; 144 } 145 } 146 #endif 147 if (sip == *sp) { 148 *sp = sip->sic_next; 149 return (0); 150 } 151 sp = &(*sp)->sic_next; 152 } 153 return (-1); 154 } 155 156 /* 157 * Open a new translation 158 */ 159 EXPORT siconvt_t * 160 sic_open(charset) 161 char *charset; 162 { 163 siconvt_t *sip = glist; 164 165 if (charset == NULL || *charset == '\0') 166 return ((siconvt_t *)NULL); 167 168 while (sip) { 169 if (strcmp(sip->sic_name, charset) == 0) { 170 #ifdef USE_ICONV 171 if (sip->sic_cd2uni != 0) 172 return (dup_iconv_sic(sip)); 173 #endif 174 sip->sic_refcnt++; 175 return (sip); 176 } 177 sip = sip->sic_next; 178 } 179 return (create_sic(charset)); 180 } 181 182 /* 183 * Open a new translation 184 */ 185 EXPORT const char * 186 sic_base() 187 { 188 if (ins_base == NULL) { 189 ins_base = searchfileinpath("lib/siconv/iso8859-1", R_OK, 190 SIP_PLAIN_FILE, NULL); 191 if (ins_base != NULL) { 192 int len = strlen(ins_base); 193 194 ins_base[len - 9] = '\0'; 195 } 196 } 197 return (ins_base); 198 } 199 200 /* 201 * Close a translation 202 */ 203 EXPORT int 204 sic_close(sip) 205 siconvt_t *sip; 206 { 207 if (remove_sic(sip) < 0) 208 return (-1); 209 210 if (--sip->sic_refcnt > 0) 211 return (0); 212 213 if (sip->sic_name) 214 free(sip->sic_name); 215 if (sip->sic_uni2cs) 216 freetbl(sip->sic_uni2cs); 217 if (sip->sic_cs2uni) 218 free(sip->sic_cs2uni); 219 #ifdef USE_ICONV 220 if (sip->sic_cd2uni) 221 iconv_close(sip->sic_cd2uni); 222 if (sip->sic_uni2cd) 223 iconv_close(sip->sic_uni2cd); 224 #endif 225 226 return (0); 227 } 228 229 /* 230 * List all possible translation files in the install directory. 231 */ 232 EXPORT int 233 sic_list(f) 234 FILE *f; 235 { 236 char path[1024]; 237 DIR *d; 238 struct dirent *dp; 239 int i = 0; 240 241 if (ins_base == NULL) 242 (void) sic_base(); 243 244 if (ins_base != NULL) 245 snprintf(path, sizeof (path), "%s", ins_base); 246 else 247 snprintf(path, sizeof (path), "%s/lib/siconv/", INS_BASE); 248 if ((d = opendir(path)) == NULL) 249 return (-1); 250 251 while ((dp = readdir(d)) != NULL) { 252 if (dp->d_name[0] == '.') { 253 if (dp->d_name[1] == '\0') 254 continue; 255 if (dp->d_name[1] == '.' && dp->d_name[2] == '\0') 256 continue; 257 } 258 fprintf(f, "%s\n", dp->d_name); 259 i++; 260 } 261 return (i); 262 } 263 264 /* 265 * Free a reverse (uncode -> char) translation table 266 */ 267 LOCAL void 268 freetbl(uni2cs) 269 UInt8_t **uni2cs; 270 { 271 int i; 272 273 for (i = 0; i < TAB_SIZE; i++) { 274 if (uni2cs[i] != nullpage) { 275 free(uni2cs[i]); 276 } 277 } 278 free(uni2cs); 279 } 280 281 /* 282 * Search a tranlation table, first in the current directory and then 283 * in the install directory. 284 */ 285 LOCAL FILE * 286 pfopen(name) 287 char *name; 288 { 289 char path[1024]; 290 char *p; 291 292 if (strchr(name, '/')) 293 return (fopen(name, "r")); 294 295 if (ins_base == NULL) 296 (void) sic_base(); 297 298 p = ins_base; 299 if (p != NULL) { 300 snprintf(path, sizeof (path), "%s%s", p, name); 301 return (fopen(path, "r")); 302 } 303 snprintf(path, sizeof (path), "%s/lib/siconv/%s", INS_BASE, name); 304 return (fopen(path, "r")); 305 } 306 307 308 /* 309 * Create a new translation either from a file or from iconv_open() 310 */ 311 LOCAL siconvt_t * 312 create_sic(name) 313 char *name; 314 { 315 UInt16_t *cs2uni = NULL; 316 UInt8_t **uni2cs = NULL; 317 siconvt_t *sip; 318 char line[1024]; 319 FILE *f; 320 unsigned ch; 321 unsigned uni; 322 int i; 323 int numtrans = 0; 324 325 if (name == NULL || *name == '\0') 326 return ((siconvt_t *)NULL); 327 328 #ifdef USE_ICONV 329 /* 330 * Explicitly search for an iconv based translation 331 */ 332 if (strncmp("iconv:", name, 6) == 0) { 333 return (create_iconv_sic(name)); 334 } 335 #else 336 if (strncmp("iconv:", name, 6) == 0) { 337 return ((siconvt_t *)NULL); 338 } 339 #endif 340 341 if ((f = pfopen(name)) == (FILE *)NULL) { 342 if (strcmp(name, "default") == 0) { 343 if ((cs2uni = (UInt16_t *) 344 malloc(sizeof (UInt16_t) * TAB_SIZE)) == NULL) { 345 return ((siconvt_t *)NULL); 346 } 347 /* 348 * Set up a 1:1 translation table like ISO-8859-1 349 */ 350 for (i = 0; i < TAB_SIZE; i++) 351 cs2uni[i] = i; 352 goto do_reverse; 353 } 354 #ifdef USE_ICONV 355 return (create_iconv_sic(name)); 356 #else 357 return ((siconvt_t *)NULL); 358 #endif 359 } 360 361 if ((cs2uni = (UInt16_t *) 362 malloc(sizeof (UInt16_t) * TAB_SIZE)) == NULL) { 363 fclose(f); 364 return ((siconvt_t *)NULL); 365 } 366 367 /* 368 * Set up mapping base. 369 * Always map the control characters 0x00 .. 0x1F 370 */ 371 for (i = 0; i < 32; i++) 372 cs2uni[i] = i; 373 374 for (i = 32; i < TAB_SIZE; i++) 375 cs2uni[i] = '\0'; /* nul marks an illegal character */ 376 377 cs2uni[0x7f] = 0x7F; /* Always map DELETE character 0x7F */ 378 379 while (fgets(line, sizeof (line), f) != NULL) { 380 char *p; 381 382 if ((p = strchr(line, '#')) != NULL) 383 *p = '\0'; 384 385 if (sscanf(line, "%x%x", &ch, &uni) == 2) { 386 /* 387 * Only accept exactly two values in the right range. 388 */ 389 if (ch > 0xFF || uni > 0xFFFF) 390 continue; 391 392 cs2uni[ch] = uni; /* Set up unicode translation */ 393 numtrans++; 394 } 395 } 396 fclose(f); 397 398 if (numtrans == 0) { /* No valid translations found */ 399 free(cs2uni); 400 return ((siconvt_t *)NULL); 401 } 402 403 do_reverse: 404 if ((uni2cs = (UInt8_t **) 405 malloc(sizeof (unsigned char *) * TAB_SIZE)) == NULL) { 406 free(cs2uni); 407 return ((siconvt_t *)NULL); 408 } 409 for (i = 0; i < TAB_SIZE; i++) /* Map all pages to the nullpage */ 410 uni2cs[i] = nullpage; 411 412 /* 413 * Create a reversed table from the forward table read from the file. 414 */ 415 for (i = 0; i < TAB_SIZE; i++) { 416 UInt8_t high; 417 UInt8_t low; 418 UInt8_t *page; 419 420 uni = cs2uni[i]; 421 high = (uni >> 8) & 0xFF; 422 low = uni & 0xFF; 423 page = uni2cs[high]; 424 425 if (page == nullpage) { 426 int j; 427 428 /* 429 * Do not write to the nullpage but replace it by 430 * new and specific memory. 431 */ 432 if ((page = (UInt8_t *) malloc(TAB_SIZE)) == NULL) { 433 free(cs2uni); 434 freetbl(uni2cs); 435 return ((siconvt_t *)NULL); 436 } 437 for (j = 0; j < TAB_SIZE; j++) 438 page[j] = '\0'; 439 uni2cs[high] = page; 440 } 441 page[low] = i; /* Set up the reverse translation */ 442 } 443 444 if ((sip = (siconvt_t *)malloc(sizeof (siconvt_t))) == NULL) { 445 free(cs2uni); 446 freetbl(uni2cs); 447 return ((siconvt_t *)NULL); 448 } 449 450 sip->sic_name = strdup(name); 451 sip->sic_uni2cs = uni2cs; 452 sip->sic_cs2uni = cs2uni; 453 sip->sic_cd2uni = NULL; 454 sip->sic_uni2cd = NULL; 455 sip->sic_alt = NULL; 456 sip->sic_next = NULL; 457 sip->sic_refcnt = 1; 458 459 return (insert_sic(sip)); 460 } 461 462 463 #ifdef USE_ICONV 464 465 /* 466 * Create a new translation from iconv_open() 467 */ 468 LOCAL siconvt_t * 469 create_iconv_sic(name) 470 char *name; 471 { 472 siconvt_t *sip; 473 iconv_t to; 474 iconv_t from; 475 char *nm; 476 477 /*cerror("init_unls_iconv(%s)\n", name);*/ 478 if (name == NULL || *name == '\0') 479 return ((siconvt_t *)NULL); 480 481 nm = name; 482 if (strncmp("iconv:", name, 6) == 0) 483 nm = &name[6]; 484 485 if ((sip = (siconvt_t *)malloc(sizeof (siconvt_t))) 486 == NULL) { 487 return ((siconvt_t *)NULL); 488 } 489 if ((from = iconv_open("UCS-2BE", nm)) == (iconv_t)-1) { 490 free(sip); 491 return ((siconvt_t *)NULL); 492 } 493 if ((to = iconv_open(nm, "UCS-2BE")) == (iconv_t)-1) { 494 free(sip); 495 iconv_close(from); 496 return ((siconvt_t *)NULL); 497 } 498 499 sip->sic_name = strdup(name); 500 sip->sic_uni2cs = NULL; 501 sip->sic_cs2uni = NULL; 502 sip->sic_cd2uni = from; 503 sip->sic_uni2cd = to; 504 sip->sic_alt = NULL; 505 sip->sic_next = NULL; 506 sip->sic_refcnt = 1; 507 return (insert_sic(sip)); 508 } 509 510 /* 511 * As the iconv conversion is stateful, we need to create a new translation 512 * if we like to get the same translation again. 513 */ 514 LOCAL siconvt_t * 515 dup_iconv_sic(sip) 516 siconvt_t *sip; 517 { 518 siconvt_t *sp; 519 iconv_t to; 520 iconv_t from; 521 char *nm; 522 523 if ((sp = (siconvt_t *)malloc(sizeof (siconvt_t))) 524 == NULL) { 525 return ((siconvt_t *)NULL); 526 } 527 nm = sip->sic_name; 528 if (strncmp("iconv:", nm, 6) == 0) 529 nm = &nm[6]; 530 if ((from = iconv_open("UCS-2BE", nm)) == (iconv_t)-1) { 531 free(sp); 532 return ((siconvt_t *)NULL); 533 } 534 if ((to = iconv_open(nm, "UCS-2BE")) == (iconv_t)-1) { 535 free(sp); 536 iconv_close(from); 537 return ((siconvt_t *)NULL); 538 } 539 sp->sic_name = sip->sic_name; /* Allow to compare name pointers */ 540 sp->sic_uni2cs = NULL; 541 sp->sic_cs2uni = NULL; 542 sp->sic_cd2uni = from; 543 sp->sic_uni2cd = to; 544 sp->sic_alt = NULL; 545 sp->sic_next = NULL; 546 sp->sic_refcnt = 1; 547 sip->sic_alt = sp; 548 return (sp); 549 } 550 551 #endif /* USE_UNLS */ 552