1 /* @(#)sic_nls.c 1.18 14/01/15 Copyright 2007-2014 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)sic_nls.c 1.18 14/01/15 Copyright 2007-2014 J. Schilling";
6 #endif
7 /*
8 * This code reads translation files in the format used by
9 * the Unicode Organization (www.unicode.org).
10 *
11 * The current implementation is only useful to create translations
12 * from single byte character sets to unicode.
13 * We use this code on systems that do not provide the iconv() function.
14 *
15 * Copyright 2007-2014 J. Schilling
16 */
17 /*
18 * The contents of this file are subject to the terms of the
19 * Common Development and Distribution License, Version 1.0 only
20 * (the "License"). You may not use this file except in compliance
21 * with the License.
22 *
23 * See the file CDDL.Schily.txt in this distribution for details.
24 * A copy of the CDDL is also available via the Internet at
25 * http://www.opensource.org/licenses/cddl1.txt
26 *
27 * When distributing Covered Code, include this CDDL HEADER in each
28 * file and include the License file CDDL.Schily.txt from this distribution.
29 */
30
31 #include <schily/stdio.h>
32 #include <schily/stdlib.h>
33 #include <schily/string.h>
34 #include <schily/libport.h> /* For strdup() */
35 #include <schily/unistd.h> /* For R_OK */
36 #include <schily/schily.h>
37 #include <schily/dirent.h>
38 #include <schily/siconv.h>
39
40 #define TAB_SIZE (UINT8_MAX+1)
41 #define __CAN_TAB_SIZE__
42
43 #ifndef PROTOTYPES
44 #undef __CAN_TAB_SIZE__
45 #endif
46 #if (!defined(__STDC__) || __STDC__ < 1) && \
47 !defined(__SUNPRO_C) /* Sun Compilers are OK even with __STDC__ 0 */
48 /*
49 * C-preprocessors from K&R compilers cannot do the computation for TAB_SIZE
50 * in the next line We need to disable this test in case of a K&R compiler.
51 */
52 #undef __CAN_TAB_SIZE__
53 #endif
54 #ifdef __GNUC__
55 #if __GNUC__ < 2
56 #undef __CAN_TAB_SIZE__
57 #endif
58 #if __GNUC__ < 3 && __GNUC_MINOR__ < 95
59 #undef __CAN_TAB_SIZE__
60 #endif
61 #endif
62 #if defined(VMS) && !defined(__GNUC__)
63 #undef __CAN_TAB_SIZE__
64 #endif
65
66 #ifdef __CAN_TAB_SIZE__
67 #if TAB_SIZE < 256
68 Error Table size too small
69 #endif
70 #endif
71
72 LOCAL UInt8_t nullpage[TAB_SIZE] = { 0 };
73 LOCAL char *ins_base;
74
75 LOCAL siconvt_t *insert_sic __PR((siconvt_t *sip));
76 LOCAL int remove_sic __PR((siconvt_t *sip));
77 EXPORT siconvt_t *sic_open __PR((char *name));
78 EXPORT const char *sic_base __PR((void));
79 EXPORT int sic_close __PR((siconvt_t *sip));
80 EXPORT int sic_list __PR((FILE *f));
81 LOCAL void freetbl __PR((UInt8_t **uni2cs));
82 LOCAL FILE *pfopen __PR((char *name));
83 LOCAL siconvt_t *create_sic __PR((char *name));
84 #ifdef USE_ICONV
85 LOCAL siconvt_t *create_iconv_sic __PR((char *name));
86 LOCAL siconvt_t *dup_iconv_sic __PR((siconvt_t *sip));
87 #endif
88
89 /*
90 * Global list for translation tables
91 */
92 LOCAL siconvt_t *glist = (siconvt_t *) NULL;
93
94 /*
95 * Insert a table into the global list and allow to reuse it
96 */
97 LOCAL siconvt_t *
insert_sic(sip)98 insert_sic(sip)
99 siconvt_t *sip;
100 {
101 siconvt_t **sp = &glist;
102
103 if (sip == (siconvt_t *)NULL) /* No table arg */
104 return ((siconvt_t *)NULL);
105 if (sip->sic_next) /* Already in list */
106 return (sip);
107
108 while (*sp) {
109 if (sip == *sp) { /* Already in list */
110 return (sip);
111 }
112 sp = &(*sp)->sic_next;
113 }
114 sip->sic_next = glist;
115 glist = sip;
116 return (sip);
117 }
118
119 /*
120 * Remove a table from the global list
121 */
122 LOCAL int
remove_sic(sip)123 remove_sic(sip)
124 siconvt_t *sip;
125 {
126 siconvt_t **sp = &glist;
127
128 while (*sp) {
129 #ifdef USE_ICONV
130 if (strcmp(sip->sic_name, (*sp)->sic_name) == 0) {
131 siconvt_t *sap = *sp;
132
133 if (sip == *sp) {
134 *sp = sip->sic_next;
135 return (0);
136 }
137 while (sap->sic_alt != NULL) {
138 if (sap->sic_alt == sip) {
139 sap->sic_alt = sip->sic_alt;
140 sip->sic_name = NULL; /* No free() */
141 return (0);
142 }
143 sap = sap->sic_alt;
144 }
145 }
146 #endif
147 if (sip == *sp) {
148 *sp = sip->sic_next;
149 return (0);
150 }
151 sp = &(*sp)->sic_next;
152 }
153 return (-1);
154 }
155
156 /*
157 * Open a new translation
158 */
159 EXPORT siconvt_t *
sic_open(charset)160 sic_open(charset)
161 char *charset;
162 {
163 siconvt_t *sip = glist;
164
165 if (charset == NULL || *charset == '\0')
166 return ((siconvt_t *)NULL);
167
168 while (sip) {
169 if (strcmp(sip->sic_name, charset) == 0) {
170 #ifdef USE_ICONV
171 if (sip->sic_cd2uni != 0)
172 return (dup_iconv_sic(sip));
173 #endif
174 sip->sic_refcnt++;
175 return (sip);
176 }
177 sip = sip->sic_next;
178 }
179 return (create_sic(charset));
180 }
181
182 /*
183 * Open a new translation
184 */
185 EXPORT const char *
sic_base()186 sic_base()
187 {
188 if (ins_base == NULL) {
189 ins_base = searchfileinpath("lib/siconv/iso8859-1", R_OK,
190 SIP_PLAIN_FILE, NULL);
191 if (ins_base != NULL) {
192 int len = strlen(ins_base);
193
194 ins_base[len - 9] = '\0';
195 }
196 }
197 return (ins_base);
198 }
199
200 /*
201 * Close a translation
202 */
203 EXPORT int
sic_close(sip)204 sic_close(sip)
205 siconvt_t *sip;
206 {
207 if (remove_sic(sip) < 0)
208 return (-1);
209
210 if (--sip->sic_refcnt > 0)
211 return (0);
212
213 if (sip->sic_name)
214 free(sip->sic_name);
215 if (sip->sic_uni2cs)
216 freetbl(sip->sic_uni2cs);
217 if (sip->sic_cs2uni)
218 free(sip->sic_cs2uni);
219 #ifdef USE_ICONV
220 if (sip->sic_cd2uni)
221 iconv_close(sip->sic_cd2uni);
222 if (sip->sic_uni2cd)
223 iconv_close(sip->sic_uni2cd);
224 #endif
225
226 return (0);
227 }
228
229 /*
230 * List all possible translation files in the install directory.
231 */
232 EXPORT int
sic_list(f)233 sic_list(f)
234 FILE *f;
235 {
236 char path[1024];
237 DIR *d;
238 struct dirent *dp;
239 int i = 0;
240
241 if (ins_base == NULL)
242 (void) sic_base();
243
244 if (ins_base != NULL)
245 snprintf(path, sizeof (path), "%s", ins_base);
246 else
247 snprintf(path, sizeof (path), "%s/lib/siconv/", INS_BASE);
248 if ((d = opendir(path)) == NULL)
249 return (-1);
250
251 while ((dp = readdir(d)) != NULL) {
252 if (dp->d_name[0] == '.') {
253 if (dp->d_name[1] == '\0')
254 continue;
255 if (dp->d_name[1] == '.' && dp->d_name[2] == '\0')
256 continue;
257 }
258 fprintf(f, "%s\n", dp->d_name);
259 i++;
260 }
261 return (i);
262 }
263
264 /*
265 * Free a reverse (uncode -> char) translation table
266 */
267 LOCAL void
freetbl(uni2cs)268 freetbl(uni2cs)
269 UInt8_t **uni2cs;
270 {
271 int i;
272
273 for (i = 0; i < TAB_SIZE; i++) {
274 if (uni2cs[i] != nullpage) {
275 free(uni2cs[i]);
276 }
277 }
278 free(uni2cs);
279 }
280
281 /*
282 * Search a tranlation table, first in the current directory and then
283 * in the install directory.
284 */
285 LOCAL FILE *
pfopen(name)286 pfopen(name)
287 char *name;
288 {
289 char path[1024];
290 char *p;
291
292 if (strchr(name, '/'))
293 return (fopen(name, "r"));
294
295 if (ins_base == NULL)
296 (void) sic_base();
297
298 p = ins_base;
299 if (p != NULL) {
300 snprintf(path, sizeof (path), "%s%s", p, name);
301 return (fopen(path, "r"));
302 }
303 snprintf(path, sizeof (path), "%s/lib/siconv/%s", INS_BASE, name);
304 return (fopen(path, "r"));
305 }
306
307
308 /*
309 * Create a new translation either from a file or from iconv_open()
310 */
311 LOCAL siconvt_t *
create_sic(name)312 create_sic(name)
313 char *name;
314 {
315 UInt16_t *cs2uni = NULL;
316 UInt8_t **uni2cs = NULL;
317 siconvt_t *sip;
318 char line[1024];
319 FILE *f;
320 unsigned ch;
321 unsigned uni;
322 int i;
323 int numtrans = 0;
324
325 if (name == NULL || *name == '\0')
326 return ((siconvt_t *)NULL);
327
328 #ifdef USE_ICONV
329 /*
330 * Explicitly search for an iconv based translation
331 */
332 if (strncmp("iconv:", name, 6) == 0) {
333 return (create_iconv_sic(name));
334 }
335 #else
336 if (strncmp("iconv:", name, 6) == 0) {
337 return ((siconvt_t *)NULL);
338 }
339 #endif
340
341 if ((f = pfopen(name)) == (FILE *)NULL) {
342 if (strcmp(name, "default") == 0) {
343 if ((cs2uni = (UInt16_t *)
344 malloc(sizeof (UInt16_t) * TAB_SIZE)) == NULL) {
345 return ((siconvt_t *)NULL);
346 }
347 /*
348 * Set up a 1:1 translation table like ISO-8859-1
349 */
350 for (i = 0; i < TAB_SIZE; i++)
351 cs2uni[i] = i;
352 goto do_reverse;
353 }
354 #ifdef USE_ICONV
355 return (create_iconv_sic(name));
356 #else
357 return ((siconvt_t *)NULL);
358 #endif
359 }
360
361 if ((cs2uni = (UInt16_t *)
362 malloc(sizeof (UInt16_t) * TAB_SIZE)) == NULL) {
363 fclose(f);
364 return ((siconvt_t *)NULL);
365 }
366
367 /*
368 * Set up mapping base.
369 * Always map the control characters 0x00 .. 0x1F
370 */
371 for (i = 0; i < 32; i++)
372 cs2uni[i] = i;
373
374 for (i = 32; i < TAB_SIZE; i++)
375 cs2uni[i] = '\0'; /* nul marks an illegal character */
376
377 cs2uni[0x7f] = 0x7F; /* Always map DELETE character 0x7F */
378
379 while (fgets(line, sizeof (line), f) != NULL) {
380 char *p;
381
382 if ((p = strchr(line, '#')) != NULL)
383 *p = '\0';
384
385 if (sscanf(line, "%x%x", &ch, &uni) == 2) {
386 /*
387 * Only accept exactly two values in the right range.
388 */
389 if (ch > 0xFF || uni > 0xFFFF)
390 continue;
391
392 cs2uni[ch] = uni; /* Set up unicode translation */
393 numtrans++;
394 }
395 }
396 fclose(f);
397
398 if (numtrans == 0) { /* No valid translations found */
399 free(cs2uni);
400 return ((siconvt_t *)NULL);
401 }
402
403 do_reverse:
404 if ((uni2cs = (UInt8_t **)
405 malloc(sizeof (unsigned char *) * TAB_SIZE)) == NULL) {
406 free(cs2uni);
407 return ((siconvt_t *)NULL);
408 }
409 for (i = 0; i < TAB_SIZE; i++) /* Map all pages to the nullpage */
410 uni2cs[i] = nullpage;
411
412 /*
413 * Create a reversed table from the forward table read from the file.
414 */
415 for (i = 0; i < TAB_SIZE; i++) {
416 UInt8_t high;
417 UInt8_t low;
418 UInt8_t *page;
419
420 uni = cs2uni[i];
421 high = (uni >> 8) & 0xFF;
422 low = uni & 0xFF;
423 page = uni2cs[high];
424
425 if (page == nullpage) {
426 int j;
427
428 /*
429 * Do not write to the nullpage but replace it by
430 * new and specific memory.
431 */
432 if ((page = (UInt8_t *) malloc(TAB_SIZE)) == NULL) {
433 free(cs2uni);
434 freetbl(uni2cs);
435 return ((siconvt_t *)NULL);
436 }
437 for (j = 0; j < TAB_SIZE; j++)
438 page[j] = '\0';
439 uni2cs[high] = page;
440 }
441 page[low] = i; /* Set up the reverse translation */
442 }
443
444 if ((sip = (siconvt_t *)malloc(sizeof (siconvt_t))) == NULL) {
445 free(cs2uni);
446 freetbl(uni2cs);
447 return ((siconvt_t *)NULL);
448 }
449
450 sip->sic_name = strdup(name);
451 sip->sic_uni2cs = uni2cs;
452 sip->sic_cs2uni = cs2uni;
453 sip->sic_cd2uni = NULL;
454 sip->sic_uni2cd = NULL;
455 sip->sic_alt = NULL;
456 sip->sic_next = NULL;
457 sip->sic_refcnt = 1;
458
459 return (insert_sic(sip));
460 }
461
462
463 #ifdef USE_ICONV
464
465 /*
466 * Create a new translation from iconv_open()
467 */
468 LOCAL siconvt_t *
create_iconv_sic(name)469 create_iconv_sic(name)
470 char *name;
471 {
472 siconvt_t *sip;
473 iconv_t to;
474 iconv_t from;
475 char *nm;
476
477 /*cerror("init_unls_iconv(%s)\n", name);*/
478 if (name == NULL || *name == '\0')
479 return ((siconvt_t *)NULL);
480
481 nm = name;
482 if (strncmp("iconv:", name, 6) == 0)
483 nm = &name[6];
484
485 if ((sip = (siconvt_t *)malloc(sizeof (siconvt_t)))
486 == NULL) {
487 return ((siconvt_t *)NULL);
488 }
489 if ((from = iconv_open("UCS-2BE", nm)) == (iconv_t)-1) {
490 free(sip);
491 return ((siconvt_t *)NULL);
492 }
493 if ((to = iconv_open(nm, "UCS-2BE")) == (iconv_t)-1) {
494 free(sip);
495 iconv_close(from);
496 return ((siconvt_t *)NULL);
497 }
498
499 sip->sic_name = strdup(name);
500 sip->sic_uni2cs = NULL;
501 sip->sic_cs2uni = NULL;
502 sip->sic_cd2uni = from;
503 sip->sic_uni2cd = to;
504 sip->sic_alt = NULL;
505 sip->sic_next = NULL;
506 sip->sic_refcnt = 1;
507 return (insert_sic(sip));
508 }
509
510 /*
511 * As the iconv conversion is stateful, we need to create a new translation
512 * if we like to get the same translation again.
513 */
514 LOCAL siconvt_t *
dup_iconv_sic(sip)515 dup_iconv_sic(sip)
516 siconvt_t *sip;
517 {
518 siconvt_t *sp;
519 iconv_t to;
520 iconv_t from;
521 char *nm;
522
523 if ((sp = (siconvt_t *)malloc(sizeof (siconvt_t)))
524 == NULL) {
525 return ((siconvt_t *)NULL);
526 }
527 nm = sip->sic_name;
528 if (strncmp("iconv:", nm, 6) == 0)
529 nm = &nm[6];
530 if ((from = iconv_open("UCS-2BE", nm)) == (iconv_t)-1) {
531 free(sp);
532 return ((siconvt_t *)NULL);
533 }
534 if ((to = iconv_open(nm, "UCS-2BE")) == (iconv_t)-1) {
535 free(sp);
536 iconv_close(from);
537 return ((siconvt_t *)NULL);
538 }
539 sp->sic_name = sip->sic_name; /* Allow to compare name pointers */
540 sp->sic_uni2cs = NULL;
541 sp->sic_cs2uni = NULL;
542 sp->sic_cd2uni = from;
543 sp->sic_uni2cd = to;
544 sp->sic_alt = NULL;
545 sp->sic_next = NULL;
546 sp->sic_refcnt = 1;
547 sip->sic_alt = sp;
548 return (sp);
549 }
550
551 #endif /* USE_UNLS */
552