1 /* @(#)sic_nls.c	1.18 14/01/15 Copyright 2007-2014 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)sic_nls.c	1.18 14/01/15 Copyright 2007-2014 J. Schilling";
6 #endif
7 /*
8  * This code reads translation files in the format used by
9  * the Unicode Organization (www.unicode.org).
10  *
11  * The current implementation is only useful to create translations
12  * from single byte character sets to unicode.
13  * We use this code on systems that do not provide the iconv() function.
14  *
15  * Copyright 2007-2014 J. Schilling
16  */
17 /*
18  * The contents of this file are subject to the terms of the
19  * Common Development and Distribution License, Version 1.0 only
20  * (the "License").  You may not use this file except in compliance
21  * with the License.
22  *
23  * See the file CDDL.Schily.txt in this distribution for details.
24  * A copy of the CDDL is also available via the Internet at
25  * http://www.opensource.org/licenses/cddl1.txt
26  *
27  * When distributing Covered Code, include this CDDL HEADER in each
28  * file and include the License file CDDL.Schily.txt from this distribution.
29  */
30 
31 #include <schily/stdio.h>
32 #include <schily/stdlib.h>
33 #include <schily/string.h>
34 #include <schily/libport.h>	/* For strdup() */
35 #include <schily/unistd.h>	/* For R_OK	*/
36 #include <schily/schily.h>
37 #include <schily/dirent.h>
38 #include <schily/siconv.h>
39 
40 #define	TAB_SIZE	(UINT8_MAX+1)
41 #define	__CAN_TAB_SIZE__
42 
43 #ifndef	PROTOTYPES
44 #undef	__CAN_TAB_SIZE__
45 #endif
46 #if (!defined(__STDC__) || __STDC__ < 1) && \
47 	!defined(__SUNPRO_C) /* Sun Compilers are OK even with __STDC__ 0 */
48 /*
49  * C-preprocessors from K&R compilers cannot do the computation for TAB_SIZE
50  * in the next line We need to disable this test in case of a K&R compiler.
51  */
52 #undef	__CAN_TAB_SIZE__
53 #endif
54 #ifdef	__GNUC__
55 #if	__GNUC__ < 2
56 #undef	__CAN_TAB_SIZE__
57 #endif
58 #if	__GNUC__ < 3 && __GNUC_MINOR__ < 95
59 #undef	__CAN_TAB_SIZE__
60 #endif
61 #endif
62 #if defined(VMS) && !defined(__GNUC__)
63 #undef	__CAN_TAB_SIZE__
64 #endif
65 
66 #ifdef	__CAN_TAB_SIZE__
67 #if	TAB_SIZE < 256
68 Error Table size too small
69 #endif
70 #endif
71 
72 LOCAL UInt8_t	nullpage[TAB_SIZE] = { 0 };
73 LOCAL char	*ins_base;
74 
75 LOCAL	siconvt_t	*insert_sic		__PR((siconvt_t *sip));
76 LOCAL	int		remove_sic		__PR((siconvt_t *sip));
77 EXPORT	siconvt_t	*sic_open		__PR((char *name));
78 EXPORT	const char	*sic_base		__PR((void));
79 EXPORT	int		sic_close		__PR((siconvt_t *sip));
80 EXPORT	int		sic_list		__PR((FILE *f));
81 LOCAL	void		freetbl			__PR((UInt8_t **uni2cs));
82 LOCAL	FILE		*pfopen			__PR((char *name));
83 LOCAL	siconvt_t	*create_sic		__PR((char *name));
84 #ifdef	USE_ICONV
85 LOCAL	siconvt_t	*create_iconv_sic	__PR((char *name));
86 LOCAL	siconvt_t	*dup_iconv_sic		__PR((siconvt_t *sip));
87 #endif
88 
89 /*
90  * Global list for translation tables
91  */
92 LOCAL siconvt_t	*glist = (siconvt_t *) NULL;
93 
94 /*
95  * Insert a table into the global list and allow to reuse it
96  */
97 LOCAL siconvt_t *
insert_sic(sip)98 insert_sic(sip)
99 	siconvt_t	*sip;
100 {
101 	siconvt_t	**sp = &glist;
102 
103 	if (sip == (siconvt_t *)NULL)		/* No table arg */
104 		return ((siconvt_t *)NULL);
105 	if (sip->sic_next)			/* Already in list */
106 		return (sip);
107 
108 	while (*sp) {
109 		if (sip == *sp) {		/* Already in list */
110 			return (sip);
111 		}
112 		sp = &(*sp)->sic_next;
113 	}
114 	sip->sic_next = glist;
115 	glist = sip;
116 	return (sip);
117 }
118 
119 /*
120  * Remove a table from the global list
121  */
122 LOCAL int
remove_sic(sip)123 remove_sic(sip)
124 	siconvt_t	*sip;
125 {
126 	siconvt_t	**sp = &glist;
127 
128 	while (*sp) {
129 #ifdef	USE_ICONV
130 		if (strcmp(sip->sic_name, (*sp)->sic_name) == 0) {
131 			siconvt_t	*sap = *sp;
132 
133 			if (sip == *sp) {
134 				*sp = sip->sic_next;
135 				return (0);
136 			}
137 			while (sap->sic_alt != NULL) {
138 				if (sap->sic_alt == sip) {
139 					sap->sic_alt = sip->sic_alt;
140 					sip->sic_name = NULL;	/* No free() */
141 					return (0);
142 				}
143 				sap = sap->sic_alt;
144 			}
145 		}
146 #endif
147 		if (sip == *sp) {
148 			*sp = sip->sic_next;
149 			return (0);
150 		}
151 		sp = &(*sp)->sic_next;
152 	}
153 	return (-1);
154 }
155 
156 /*
157  * Open a new translation
158  */
159 EXPORT siconvt_t *
sic_open(charset)160 sic_open(charset)
161 	char	*charset;
162 {
163 	siconvt_t	*sip = glist;
164 
165 	if (charset == NULL || *charset == '\0')
166 		return ((siconvt_t *)NULL);
167 
168 	while (sip) {
169 		if (strcmp(sip->sic_name, charset) == 0) {
170 #ifdef	USE_ICONV
171 			if (sip->sic_cd2uni != 0)
172 				return (dup_iconv_sic(sip));
173 #endif
174 			sip->sic_refcnt++;
175 			return (sip);
176 		}
177 		sip = sip->sic_next;
178 	}
179 	return (create_sic(charset));
180 }
181 
182 /*
183  * Open a new translation
184  */
185 EXPORT const char *
sic_base()186 sic_base()
187 {
188 	if (ins_base == NULL) {
189 		ins_base = searchfileinpath("lib/siconv/iso8859-1", R_OK,
190 					SIP_PLAIN_FILE, NULL);
191 		if (ins_base != NULL) {
192 			int	len = strlen(ins_base);
193 
194 			ins_base[len - 9] = '\0';
195 		}
196 	}
197 	return (ins_base);
198 }
199 
200 /*
201  * Close a translation
202  */
203 EXPORT int
sic_close(sip)204 sic_close(sip)
205 	siconvt_t	*sip;
206 {
207 	if (remove_sic(sip) < 0)
208 		return (-1);
209 
210 	if (--sip->sic_refcnt > 0)
211 		return (0);
212 
213 	if (sip->sic_name)
214 		free(sip->sic_name);
215 	if (sip->sic_uni2cs)
216 		freetbl(sip->sic_uni2cs);
217 	if (sip->sic_cs2uni)
218 		free(sip->sic_cs2uni);
219 #ifdef	USE_ICONV
220 	if (sip->sic_cd2uni)
221 		iconv_close(sip->sic_cd2uni);
222 	if (sip->sic_uni2cd)
223 		iconv_close(sip->sic_uni2cd);
224 #endif
225 
226 	return (0);
227 }
228 
229 /*
230  * List all possible translation files in the install directory.
231  */
232 EXPORT int
sic_list(f)233 sic_list(f)
234 	FILE	*f;
235 {
236 	char		path[1024];
237 	DIR		*d;
238 	struct dirent	*dp;
239 	int		i = 0;
240 
241 	if (ins_base == NULL)
242 		(void) sic_base();
243 
244 	if (ins_base != NULL)
245 		snprintf(path, sizeof (path), "%s", ins_base);
246 	else
247 		snprintf(path, sizeof (path), "%s/lib/siconv/", INS_BASE);
248 	if ((d = opendir(path)) == NULL)
249 		return (-1);
250 
251 	while ((dp = readdir(d)) != NULL) {
252 		if (dp->d_name[0] == '.') {
253 			if (dp->d_name[1] == '\0')
254 				continue;
255 			if (dp->d_name[1] == '.' && dp->d_name[2] == '\0')
256 				continue;
257 		}
258 		fprintf(f, "%s\n", dp->d_name);
259 		i++;
260 	}
261 	return (i);
262 }
263 
264 /*
265  * Free a reverse (uncode -> char) translation table
266  */
267 LOCAL void
freetbl(uni2cs)268 freetbl(uni2cs)
269 	UInt8_t	**uni2cs;
270 {
271 	int	i;
272 
273 	for (i = 0; i < TAB_SIZE; i++) {
274 		if (uni2cs[i] != nullpage) {
275 			free(uni2cs[i]);
276 		}
277 	}
278 	free(uni2cs);
279 }
280 
281 /*
282  * Search a tranlation table, first in the current directory and then
283  * in the install directory.
284  */
285 LOCAL FILE *
pfopen(name)286 pfopen(name)
287 	char	*name;
288 {
289 	char	path[1024];
290 	char	*p;
291 
292 	if (strchr(name, '/'))
293 		return (fopen(name, "r"));
294 
295 	if (ins_base == NULL)
296 		(void) sic_base();
297 
298 	p = ins_base;
299 	if (p != NULL) {
300 		snprintf(path, sizeof (path), "%s%s", p, name);
301 		return (fopen(path, "r"));
302 	}
303 	snprintf(path, sizeof (path), "%s/lib/siconv/%s", INS_BASE, name);
304 	return (fopen(path, "r"));
305 }
306 
307 
308 /*
309  * Create a new translation either from a file or from iconv_open()
310  */
311 LOCAL siconvt_t *
create_sic(name)312 create_sic(name)
313 	char	*name;
314 {
315 	UInt16_t	*cs2uni  = NULL;
316 	UInt8_t		**uni2cs = NULL;
317 	siconvt_t	*sip;
318 	char		line[1024];
319 	FILE		*f;
320 	unsigned	ch;
321 	unsigned	uni;
322 	int		i;
323 	int		numtrans = 0;
324 
325 	if (name == NULL || *name == '\0')
326 		return ((siconvt_t *)NULL);
327 
328 #ifdef	USE_ICONV
329 	/*
330 	 * Explicitly search for an iconv based translation
331 	 */
332 	if (strncmp("iconv:", name, 6) == 0) {
333 		return (create_iconv_sic(name));
334 	}
335 #else
336 	if (strncmp("iconv:", name, 6) == 0) {
337 		return ((siconvt_t *)NULL);
338 	}
339 #endif
340 
341 	if ((f = pfopen(name)) == (FILE *)NULL) {
342 		if (strcmp(name, "default") == 0) {
343 			if ((cs2uni = (UInt16_t *)
344 			    malloc(sizeof (UInt16_t) * TAB_SIZE)) == NULL) {
345 				return ((siconvt_t *)NULL);
346 			}
347 			/*
348 			 * Set up a 1:1 translation table like ISO-8859-1
349 			 */
350 			for (i = 0; i < TAB_SIZE; i++)
351 				cs2uni[i] = i;
352 			goto do_reverse;
353 		}
354 #ifdef	USE_ICONV
355 		return (create_iconv_sic(name));
356 #else
357 		return ((siconvt_t *)NULL);
358 #endif
359 	}
360 
361 	if ((cs2uni = (UInt16_t *)
362 			malloc(sizeof (UInt16_t) * TAB_SIZE)) == NULL) {
363 		fclose(f);
364 		return ((siconvt_t *)NULL);
365 	}
366 
367 	/*
368 	 * Set up mapping base.
369 	 * Always map the control characters 0x00 .. 0x1F
370 	 */
371 	for (i = 0; i < 32; i++)
372 		cs2uni[i] = i;
373 
374 	for (i = 32; i < TAB_SIZE; i++)
375 		cs2uni[i] = '\0'; /* nul marks an illegal character */
376 
377 	cs2uni[0x7f] = 0x7F;	/* Always map DELETE character 0x7F */
378 
379 	while (fgets(line, sizeof (line), f) != NULL) {
380 		char	*p;
381 
382 		if ((p = strchr(line, '#')) != NULL)
383 			*p = '\0';
384 
385 		if (sscanf(line, "%x%x", &ch, &uni) == 2) {
386 			/*
387 			 * Only accept exactly two values in the right range.
388 			 */
389 			if (ch > 0xFF || uni > 0xFFFF)
390 				continue;
391 
392 			cs2uni[ch] = uni; /* Set up unicode translation */
393 			numtrans++;
394 		}
395 	}
396 	fclose(f);
397 
398 	if (numtrans == 0) {		/* No valid translations found */
399 		free(cs2uni);
400 		return ((siconvt_t *)NULL);
401 	}
402 
403 do_reverse:
404 	if ((uni2cs = (UInt8_t **)
405 			malloc(sizeof (unsigned char *) * TAB_SIZE)) == NULL) {
406 		free(cs2uni);
407 		return ((siconvt_t *)NULL);
408 	}
409 	for (i = 0; i < TAB_SIZE; i++)	/* Map all pages to the nullpage */
410 		uni2cs[i] = nullpage;
411 
412 	/*
413 	 * Create a reversed table from the forward table read from the file.
414 	 */
415 	for (i = 0; i < TAB_SIZE; i++) {
416 		UInt8_t	high;
417 		UInt8_t	low;
418 		UInt8_t	*page;
419 
420 		uni = cs2uni[i];
421 		high = (uni >> 8) & 0xFF;
422 		low = uni & 0xFF;
423 		page = uni2cs[high];
424 
425 		if (page == nullpage) {
426 			int	j;
427 
428 			/*
429 			 * Do not write to the nullpage but replace it by
430 			 * new and specific memory.
431 			 */
432 			if ((page = (UInt8_t *) malloc(TAB_SIZE)) == NULL) {
433 				free(cs2uni);
434 				freetbl(uni2cs);
435 				return ((siconvt_t *)NULL);
436 			}
437 			for (j = 0; j < TAB_SIZE; j++)
438 				page[j] = '\0';
439 			uni2cs[high] = page;
440 		}
441 		page[low] = i;		/* Set up the reverse translation */
442 	}
443 
444 	if ((sip = (siconvt_t *)malloc(sizeof (siconvt_t))) == NULL) {
445 		free(cs2uni);
446 		freetbl(uni2cs);
447 		return ((siconvt_t *)NULL);
448 	}
449 
450 	sip->sic_name = strdup(name);
451 	sip->sic_uni2cs = uni2cs;
452 	sip->sic_cs2uni = cs2uni;
453 	sip->sic_cd2uni = NULL;
454 	sip->sic_uni2cd = NULL;
455 	sip->sic_alt    = NULL;
456 	sip->sic_next   = NULL;
457 	sip->sic_refcnt = 1;
458 
459 	return (insert_sic(sip));
460 }
461 
462 
463 #ifdef	USE_ICONV
464 
465 /*
466  * Create a new translation from iconv_open()
467  */
468 LOCAL siconvt_t *
create_iconv_sic(name)469 create_iconv_sic(name)
470 	char	*name;
471 {
472 	siconvt_t	*sip;
473 	iconv_t		to;
474 	iconv_t		from;
475 	char		*nm;
476 
477 /*cerror("init_unls_iconv(%s)\n", name);*/
478 	if (name == NULL || *name == '\0')
479 		return ((siconvt_t *)NULL);
480 
481 	nm = name;
482 	if (strncmp("iconv:", name, 6) == 0)
483 		nm = &name[6];
484 
485 	if ((sip = (siconvt_t *)malloc(sizeof (siconvt_t)))
486 							== NULL) {
487 		return ((siconvt_t *)NULL);
488 	}
489 	if ((from = iconv_open("UCS-2BE", nm)) == (iconv_t)-1) {
490 		free(sip);
491 		return ((siconvt_t *)NULL);
492 	}
493 	if ((to = iconv_open(nm, "UCS-2BE")) == (iconv_t)-1) {
494 		free(sip);
495 		iconv_close(from);
496 		return ((siconvt_t *)NULL);
497 	}
498 
499 	sip->sic_name = strdup(name);
500 	sip->sic_uni2cs = NULL;
501 	sip->sic_cs2uni = NULL;
502 	sip->sic_cd2uni = from;
503 	sip->sic_uni2cd = to;
504 	sip->sic_alt    = NULL;
505 	sip->sic_next   = NULL;
506 	sip->sic_refcnt = 1;
507 	return (insert_sic(sip));
508 }
509 
510 /*
511  * As the iconv conversion is stateful, we need to create a new translation
512  * if we like to get the same translation again.
513  */
514 LOCAL siconvt_t *
dup_iconv_sic(sip)515 dup_iconv_sic(sip)
516 	siconvt_t	*sip;
517 {
518 	siconvt_t	*sp;
519 	iconv_t		to;
520 	iconv_t		from;
521 	char		*nm;
522 
523 	if ((sp = (siconvt_t *)malloc(sizeof (siconvt_t)))
524 							== NULL) {
525 		return ((siconvt_t *)NULL);
526 	}
527 	nm = sip->sic_name;
528 	if (strncmp("iconv:", nm, 6) == 0)
529 		nm = &nm[6];
530 	if ((from = iconv_open("UCS-2BE", nm)) == (iconv_t)-1) {
531 		free(sp);
532 		return ((siconvt_t *)NULL);
533 	}
534 	if ((to = iconv_open(nm, "UCS-2BE")) == (iconv_t)-1) {
535 		free(sp);
536 		iconv_close(from);
537 		return ((siconvt_t *)NULL);
538 	}
539 	sp->sic_name = sip->sic_name;	/* Allow to compare name pointers */
540 	sp->sic_uni2cs = NULL;
541 	sp->sic_cs2uni = NULL;
542 	sp->sic_cd2uni = from;
543 	sp->sic_uni2cd = to;
544 	sp->sic_alt    = NULL;
545 	sp->sic_next   = NULL;
546 	sp->sic_refcnt = 1;
547 	sip->sic_alt = sp;
548 	return (sp);
549 }
550 
551 #endif	/* USE_UNLS */
552