1cd1c6085SJohn Marino /*
2cd1c6085SJohn Marino * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
3cd1c6085SJohn Marino * Copyright 2015 John Marino <draco@marino.st>
48aa2b98bSJohn Marino *
58aa2b98bSJohn Marino * This source code is derived from the illumos localedef command, and
68aa2b98bSJohn Marino * provided under BSD-style license terms by Nexenta Systems, Inc.
78aa2b98bSJohn Marino *
88aa2b98bSJohn Marino * Redistribution and use in source and binary forms, with or without
98aa2b98bSJohn Marino * modification, are permitted provided that the following conditions
108aa2b98bSJohn Marino * are met:
118aa2b98bSJohn Marino *
128aa2b98bSJohn Marino * 1. Redistributions of source code must retain the above copyright
138aa2b98bSJohn Marino * notice, this list of conditions and the following disclaimer.
148aa2b98bSJohn Marino * 2. Redistributions in binary form must reproduce the above copyright
158aa2b98bSJohn Marino * notice, this list of conditions and the following disclaimer in the
168aa2b98bSJohn Marino * documentation and/or other materials provided with the distribution.
178aa2b98bSJohn Marino *
188aa2b98bSJohn Marino * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
198aa2b98bSJohn Marino * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
208aa2b98bSJohn Marino * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
218aa2b98bSJohn Marino * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
228aa2b98bSJohn Marino * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
238aa2b98bSJohn Marino * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
248aa2b98bSJohn Marino * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
258aa2b98bSJohn Marino * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
268aa2b98bSJohn Marino * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
278aa2b98bSJohn Marino * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
288aa2b98bSJohn Marino * POSSIBILITY OF SUCH DAMAGE.
29cd1c6085SJohn Marino */
30cd1c6085SJohn Marino
31cd1c6085SJohn Marino /*
32cd1c6085SJohn Marino * CHARMAP file handling for localedef.
33cd1c6085SJohn Marino */
34cd1c6085SJohn Marino
35*cacd7ea7SJohn Marino #include <sys/types.h>
36*cacd7ea7SJohn Marino #include <sys/tree.h>
37*cacd7ea7SJohn Marino
38cd1c6085SJohn Marino #include <stdio.h>
39cd1c6085SJohn Marino #include <stdlib.h>
40cd1c6085SJohn Marino #include <string.h>
41cd1c6085SJohn Marino #include <limits.h>
42cd1c6085SJohn Marino #include <unistd.h>
43cd1c6085SJohn Marino #include <stddef.h>
44cd1c6085SJohn Marino #include "localedef.h"
45cd1c6085SJohn Marino #include "parser.h"
46cd1c6085SJohn Marino
47cd1c6085SJohn Marino
48cd1c6085SJohn Marino typedef struct charmap {
49cd1c6085SJohn Marino const char *name;
50cd1c6085SJohn Marino wchar_t wc;
51*cacd7ea7SJohn Marino RB_ENTRY(charmap) rb_sym;
52*cacd7ea7SJohn Marino RB_ENTRY(charmap) rb_wc;
53cd1c6085SJohn Marino } charmap_t;
54cd1c6085SJohn Marino
55*cacd7ea7SJohn Marino static int cmap_compare_sym(const void *n1, const void *n2);
56*cacd7ea7SJohn Marino static int cmap_compare_wc(const void *n1, const void *n2);
57*cacd7ea7SJohn Marino
58*cacd7ea7SJohn Marino static RB_HEAD(cmap_sym, charmap) cmap_sym;
59*cacd7ea7SJohn Marino static RB_HEAD(cmap_wc, charmap) cmap_wc;
60*cacd7ea7SJohn Marino
61*cacd7ea7SJohn Marino RB_PROTOTYPE_STATIC(cmap_sym, charmap, rb_sym, cmap_compare_sym);
62*cacd7ea7SJohn Marino RB_PROTOTYPE_STATIC(cmap_wc, charmap, rb_wc, cmap_compare_wc);
63*cacd7ea7SJohn Marino
64*cacd7ea7SJohn Marino RB_GENERATE(cmap_sym, charmap, rb_sym, cmap_compare_sym);
65*cacd7ea7SJohn Marino RB_GENERATE(cmap_wc, charmap, rb_wc, cmap_compare_wc);
66cd1c6085SJohn Marino
67cd1c6085SJohn Marino /*
68cd1c6085SJohn Marino * Array of POSIX specific portable characters.
69cd1c6085SJohn Marino */
70cd1c6085SJohn Marino
71cd1c6085SJohn Marino static const struct {
72*cacd7ea7SJohn Marino const char *name;
73cd1c6085SJohn Marino int ch;
74cd1c6085SJohn Marino } portable_chars[] = {
75cd1c6085SJohn Marino { "NUL", '\0' },
76cd1c6085SJohn Marino { "alert", '\a' },
77cd1c6085SJohn Marino { "backspace", '\b' },
78cd1c6085SJohn Marino { "tab", '\t' },
79cd1c6085SJohn Marino { "carriage-return", '\r' },
80cd1c6085SJohn Marino { "newline", '\n' },
81cd1c6085SJohn Marino { "vertical-tab", '\v' },
82cd1c6085SJohn Marino { "form-feed", '\f' },
83cd1c6085SJohn Marino { "space", ' ' },
84cd1c6085SJohn Marino { "exclamation-mark", '!' },
85cd1c6085SJohn Marino { "quotation-mark", '"' },
86cd1c6085SJohn Marino { "number-sign", '#' },
87cd1c6085SJohn Marino { "dollar-sign", '$' },
88cd1c6085SJohn Marino { "percent-sign", '%' },
89cd1c6085SJohn Marino { "ampersand", '&' },
90cd1c6085SJohn Marino { "apostrophe", '\'' },
91cd1c6085SJohn Marino { "left-parenthesis", '(' },
92cd1c6085SJohn Marino { "right-parenthesis", '(' },
93cd1c6085SJohn Marino { "asterisk", '*' },
94cd1c6085SJohn Marino { "plus-sign", '+' },
95cd1c6085SJohn Marino { "comma", ','},
96cd1c6085SJohn Marino { "hyphen-minus", '-' },
97cd1c6085SJohn Marino { "hyphen", '-' },
98cd1c6085SJohn Marino { "full-stop", '.' },
99cd1c6085SJohn Marino { "period", '.' },
100cd1c6085SJohn Marino { "slash", '/' },
101cd1c6085SJohn Marino { "solidus", '/' },
102cd1c6085SJohn Marino { "zero", '0' },
103cd1c6085SJohn Marino { "one", '1' },
104cd1c6085SJohn Marino { "two", '2' },
105cd1c6085SJohn Marino { "three", '3' },
106cd1c6085SJohn Marino { "four", '4' },
107cd1c6085SJohn Marino { "five", '5' },
108cd1c6085SJohn Marino { "six", '6' },
109cd1c6085SJohn Marino { "seven", '7' },
110cd1c6085SJohn Marino { "eight", '8' },
111cd1c6085SJohn Marino { "nine", '9' },
112cd1c6085SJohn Marino { "colon", ':' },
113cd1c6085SJohn Marino { "semicolon", ';' },
114cd1c6085SJohn Marino { "less-than-sign", '<' },
115cd1c6085SJohn Marino { "equals-sign", '=' },
116cd1c6085SJohn Marino { "greater-than-sign", '>' },
117cd1c6085SJohn Marino { "question-mark", '?' },
118cd1c6085SJohn Marino { "commercial-at", '@' },
119cd1c6085SJohn Marino { "left-square-bracket", '[' },
120cd1c6085SJohn Marino { "backslash", '\\' },
121cd1c6085SJohn Marino { "reverse-solidus", '\\' },
122cd1c6085SJohn Marino { "right-square-bracket", ']' },
123cd1c6085SJohn Marino { "circumflex", '^' },
124cd1c6085SJohn Marino { "circumflex-accent", '^' },
125cd1c6085SJohn Marino { "low-line", '_' },
126cd1c6085SJohn Marino { "underscore", '_' },
127cd1c6085SJohn Marino { "grave-accent", '`' },
128cd1c6085SJohn Marino { "left-brace", '{' },
129cd1c6085SJohn Marino { "left-curly-bracket", '{' },
130cd1c6085SJohn Marino { "vertical-line", '|' },
131cd1c6085SJohn Marino { "right-brace", '}' },
132cd1c6085SJohn Marino { "right-curly-bracket", '}' },
133cd1c6085SJohn Marino { "tilde", '~' },
134cd1c6085SJohn Marino { "A", 'A' },
135cd1c6085SJohn Marino { "B", 'B' },
136cd1c6085SJohn Marino { "C", 'C' },
137cd1c6085SJohn Marino { "D", 'D' },
138cd1c6085SJohn Marino { "E", 'E' },
139cd1c6085SJohn Marino { "F", 'F' },
140cd1c6085SJohn Marino { "G", 'G' },
141cd1c6085SJohn Marino { "H", 'H' },
142cd1c6085SJohn Marino { "I", 'I' },
143cd1c6085SJohn Marino { "J", 'J' },
144cd1c6085SJohn Marino { "K", 'K' },
145cd1c6085SJohn Marino { "L", 'L' },
146cd1c6085SJohn Marino { "M", 'M' },
147cd1c6085SJohn Marino { "N", 'N' },
148cd1c6085SJohn Marino { "O", 'O' },
149cd1c6085SJohn Marino { "P", 'P' },
150cd1c6085SJohn Marino { "Q", 'Q' },
151cd1c6085SJohn Marino { "R", 'R' },
152cd1c6085SJohn Marino { "S", 'S' },
153cd1c6085SJohn Marino { "T", 'T' },
154cd1c6085SJohn Marino { "U", 'U' },
155cd1c6085SJohn Marino { "V", 'V' },
156cd1c6085SJohn Marino { "W", 'W' },
157cd1c6085SJohn Marino { "X", 'X' },
158cd1c6085SJohn Marino { "Y", 'Y' },
159cd1c6085SJohn Marino { "Z", 'Z' },
160cd1c6085SJohn Marino { "a", 'a' },
161cd1c6085SJohn Marino { "b", 'b' },
162cd1c6085SJohn Marino { "c", 'c' },
163cd1c6085SJohn Marino { "d", 'd' },
164cd1c6085SJohn Marino { "e", 'e' },
165cd1c6085SJohn Marino { "f", 'f' },
166cd1c6085SJohn Marino { "g", 'g' },
167cd1c6085SJohn Marino { "h", 'h' },
168cd1c6085SJohn Marino { "i", 'i' },
169cd1c6085SJohn Marino { "j", 'j' },
170cd1c6085SJohn Marino { "k", 'k' },
171cd1c6085SJohn Marino { "l", 'l' },
172cd1c6085SJohn Marino { "m", 'm' },
173cd1c6085SJohn Marino { "n", 'n' },
174cd1c6085SJohn Marino { "o", 'o' },
175cd1c6085SJohn Marino { "p", 'p' },
176cd1c6085SJohn Marino { "q", 'q' },
177cd1c6085SJohn Marino { "r", 'r' },
178cd1c6085SJohn Marino { "s", 's' },
179cd1c6085SJohn Marino { "t", 't' },
180cd1c6085SJohn Marino { "u", 'u' },
181cd1c6085SJohn Marino { "v", 'v' },
182cd1c6085SJohn Marino { "w", 'w' },
183cd1c6085SJohn Marino { "x", 'x' },
184cd1c6085SJohn Marino { "y", 'y' },
185cd1c6085SJohn Marino { "z", 'z' },
186cd1c6085SJohn Marino { NULL, 0 }
187cd1c6085SJohn Marino };
188cd1c6085SJohn Marino
189cd1c6085SJohn Marino static int
cmap_compare_sym(const void * n1,const void * n2)190cd1c6085SJohn Marino cmap_compare_sym(const void *n1, const void *n2)
191cd1c6085SJohn Marino {
192cd1c6085SJohn Marino const charmap_t *c1 = n1;
193cd1c6085SJohn Marino const charmap_t *c2 = n2;
194cd1c6085SJohn Marino int rv;
195cd1c6085SJohn Marino
196cd1c6085SJohn Marino rv = strcmp(c1->name, c2->name);
197cd1c6085SJohn Marino return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0);
198cd1c6085SJohn Marino }
199cd1c6085SJohn Marino
200cd1c6085SJohn Marino static int
cmap_compare_wc(const void * n1,const void * n2)201cd1c6085SJohn Marino cmap_compare_wc(const void *n1, const void *n2)
202cd1c6085SJohn Marino {
203cd1c6085SJohn Marino const charmap_t *c1 = n1;
204cd1c6085SJohn Marino const charmap_t *c2 = n2;
205cd1c6085SJohn Marino
206cd1c6085SJohn Marino return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0);
207cd1c6085SJohn Marino }
208cd1c6085SJohn Marino
209cd1c6085SJohn Marino void
init_charmap(void)210cd1c6085SJohn Marino init_charmap(void)
211cd1c6085SJohn Marino {
212*cacd7ea7SJohn Marino RB_INIT(&cmap_sym);
213cd1c6085SJohn Marino
214*cacd7ea7SJohn Marino RB_INIT(&cmap_wc);
215cd1c6085SJohn Marino }
216cd1c6085SJohn Marino
217cd1c6085SJohn Marino static void
add_charmap_impl(const char * sym,wchar_t wc,int nodups)218*cacd7ea7SJohn Marino add_charmap_impl(const char *sym, wchar_t wc, int nodups)
219cd1c6085SJohn Marino {
220cd1c6085SJohn Marino charmap_t srch;
221cd1c6085SJohn Marino charmap_t *n = NULL;
222cd1c6085SJohn Marino
223cd1c6085SJohn Marino srch.wc = wc;
224cd1c6085SJohn Marino srch.name = sym;
225cd1c6085SJohn Marino
226cd1c6085SJohn Marino /*
227cd1c6085SJohn Marino * also possibly insert the wide mapping, although note that there
228cd1c6085SJohn Marino * can only be one of these per wide character code.
229cd1c6085SJohn Marino */
230*cacd7ea7SJohn Marino if ((wc != (wchar_t)-1) && ((RB_FIND(cmap_wc, &cmap_wc, &srch)) == NULL)) {
231cd1c6085SJohn Marino if ((n = calloc(1, sizeof (*n))) == NULL) {
232cd1c6085SJohn Marino errf("out of memory");
233cd1c6085SJohn Marino return;
234cd1c6085SJohn Marino }
235cd1c6085SJohn Marino n->wc = wc;
236*cacd7ea7SJohn Marino RB_INSERT(cmap_wc, &cmap_wc, n);
237cd1c6085SJohn Marino }
238cd1c6085SJohn Marino
239cd1c6085SJohn Marino if (sym) {
240*cacd7ea7SJohn Marino if (RB_FIND(cmap_sym, &cmap_sym, &srch) != NULL) {
241cd1c6085SJohn Marino if (nodups) {
242cd1c6085SJohn Marino errf("duplicate character definition");
243cd1c6085SJohn Marino }
244cd1c6085SJohn Marino return;
245cd1c6085SJohn Marino }
246cd1c6085SJohn Marino if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) {
247cd1c6085SJohn Marino errf("out of memory");
248cd1c6085SJohn Marino return;
249cd1c6085SJohn Marino }
250cd1c6085SJohn Marino n->wc = wc;
251cd1c6085SJohn Marino n->name = sym;
252cd1c6085SJohn Marino
253*cacd7ea7SJohn Marino RB_INSERT(cmap_sym, &cmap_sym, n);
254cd1c6085SJohn Marino }
255cd1c6085SJohn Marino }
256cd1c6085SJohn Marino
257cd1c6085SJohn Marino void
add_charmap(const char * sym,int c)258*cacd7ea7SJohn Marino add_charmap(const char *sym, int c)
259cd1c6085SJohn Marino {
260cd1c6085SJohn Marino add_charmap_impl(sym, c, 1);
261cd1c6085SJohn Marino }
262cd1c6085SJohn Marino
263cd1c6085SJohn Marino void
add_charmap_undefined(char * sym)264cd1c6085SJohn Marino add_charmap_undefined(char *sym)
265cd1c6085SJohn Marino {
266cd1c6085SJohn Marino charmap_t srch;
267cd1c6085SJohn Marino charmap_t *cm = NULL;
268cd1c6085SJohn Marino
269cd1c6085SJohn Marino srch.name = sym;
270*cacd7ea7SJohn Marino cm = RB_FIND(cmap_sym, &cmap_sym, &srch);
271cd1c6085SJohn Marino
272*cacd7ea7SJohn Marino if ((undefok == 0) && ((cm == NULL) || (cm->wc == (wchar_t)-1))) {
273cd1c6085SJohn Marino warn("undefined symbol <%s>", sym);
274cd1c6085SJohn Marino add_charmap_impl(sym, -1, 0);
275cd1c6085SJohn Marino } else {
276cd1c6085SJohn Marino free(sym);
277cd1c6085SJohn Marino }
278cd1c6085SJohn Marino }
279cd1c6085SJohn Marino
280cd1c6085SJohn Marino void
add_charmap_range(char * s,char * e,int wc)281cd1c6085SJohn Marino add_charmap_range(char *s, char *e, int wc)
282cd1c6085SJohn Marino {
283cd1c6085SJohn Marino int ls, le;
284cd1c6085SJohn Marino int si;
285cd1c6085SJohn Marino int sn, en;
286cd1c6085SJohn Marino int i;
287cd1c6085SJohn Marino
288cd1c6085SJohn Marino static const char *digits = "0123456789";
289cd1c6085SJohn Marino
290cd1c6085SJohn Marino ls = strlen(s);
291cd1c6085SJohn Marino le = strlen(e);
292cd1c6085SJohn Marino
293cd1c6085SJohn Marino if (((si = strcspn(s, digits)) == 0) || (si == ls) ||
294cd1c6085SJohn Marino (strncmp(s, e, si) != 0) ||
295cd1c6085SJohn Marino ((int)strspn(s + si, digits) != (ls - si)) ||
296cd1c6085SJohn Marino ((int)strspn(e + si, digits) != (le - si)) ||
297cd1c6085SJohn Marino ((sn = atoi(s + si)) > ((en = atoi(e + si))))) {
298cd1c6085SJohn Marino errf("malformed charmap range");
299cd1c6085SJohn Marino return;
300cd1c6085SJohn Marino }
301cd1c6085SJohn Marino
302cd1c6085SJohn Marino s[si] = 0;
303cd1c6085SJohn Marino
304cd1c6085SJohn Marino for (i = sn; i <= en; i++) {
305cd1c6085SJohn Marino char *nn;
306cd1c6085SJohn Marino (void) asprintf(&nn, "%s%0*u", s, ls - si, i);
307cd1c6085SJohn Marino if (nn == NULL) {
308cd1c6085SJohn Marino errf("out of memory");
309cd1c6085SJohn Marino return;
310cd1c6085SJohn Marino }
311cd1c6085SJohn Marino
312cd1c6085SJohn Marino add_charmap_impl(nn, wc, 1);
313cd1c6085SJohn Marino wc++;
314cd1c6085SJohn Marino }
315cd1c6085SJohn Marino free(s);
316cd1c6085SJohn Marino free(e);
317cd1c6085SJohn Marino }
318cd1c6085SJohn Marino
319cd1c6085SJohn Marino void
add_charmap_char(const char * name,int val)320*cacd7ea7SJohn Marino add_charmap_char(const char *name, int val)
321cd1c6085SJohn Marino {
322cd1c6085SJohn Marino add_charmap_impl(name, val, 0);
323cd1c6085SJohn Marino }
324cd1c6085SJohn Marino
325cd1c6085SJohn Marino /*
326cd1c6085SJohn Marino * POSIX insists that certain entries be present, even when not in the
327cd1c6085SJohn Marino * orginal charmap file.
328cd1c6085SJohn Marino */
329cd1c6085SJohn Marino void
add_charmap_posix(void)330cd1c6085SJohn Marino add_charmap_posix(void)
331cd1c6085SJohn Marino {
332cd1c6085SJohn Marino int i;
333cd1c6085SJohn Marino
334cd1c6085SJohn Marino for (i = 0; portable_chars[i].name; i++) {
335cd1c6085SJohn Marino add_charmap_char(portable_chars[i].name, portable_chars[i].ch);
336cd1c6085SJohn Marino }
337cd1c6085SJohn Marino }
338cd1c6085SJohn Marino
339cd1c6085SJohn Marino int
lookup_charmap(const char * sym,wchar_t * wc)340cd1c6085SJohn Marino lookup_charmap(const char *sym, wchar_t *wc)
341cd1c6085SJohn Marino {
342cd1c6085SJohn Marino charmap_t srch;
343cd1c6085SJohn Marino charmap_t *n;
344cd1c6085SJohn Marino
345cd1c6085SJohn Marino srch.name = sym;
346*cacd7ea7SJohn Marino n = RB_FIND(cmap_sym, &cmap_sym, &srch);
347*cacd7ea7SJohn Marino if (n && n->wc != (wchar_t)-1) {
348cd1c6085SJohn Marino if (wc)
349cd1c6085SJohn Marino *wc = n->wc;
350cd1c6085SJohn Marino return (0);
351cd1c6085SJohn Marino }
352cd1c6085SJohn Marino return (-1);
353cd1c6085SJohn Marino }
354cd1c6085SJohn Marino
355cd1c6085SJohn Marino int
check_charmap(wchar_t wc)356cd1c6085SJohn Marino check_charmap(wchar_t wc)
357cd1c6085SJohn Marino {
358cd1c6085SJohn Marino charmap_t srch;
359cd1c6085SJohn Marino
360cd1c6085SJohn Marino srch.wc = wc;
361*cacd7ea7SJohn Marino return (RB_FIND(cmap_wc, &cmap_wc, &srch) ? 0 : -1);
362cd1c6085SJohn Marino }
363