1 /* 2 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3 * Copyright 2015 John Marino <draco@marino.st> 4 * 5 * This source code is derived from the illumos localedef command, and 6 * provided under BSD-style license terms by Nexenta Systems, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * CHARMAP file handling for localedef. 33 */ 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 #include <limits.h> 39 #include <unistd.h> 40 #include <stddef.h> 41 #include <unistd.h> 42 #include "localedef.h" 43 #include "parser.h" 44 #include "avl.h" 45 46 static avl_tree_t cmap_sym; 47 static avl_tree_t cmap_wc; 48 49 typedef struct charmap { 50 const char *name; 51 wchar_t wc; 52 avl_node_t avl_sym; 53 avl_node_t avl_wc; 54 } charmap_t; 55 56 57 /* 58 * Array of POSIX specific portable characters. 59 */ 60 61 #pragma GCC diagnostic push 62 #pragma GCC diagnostic ignored "-Wdiscarded-qualifiers" 63 64 static const struct { 65 char *name; 66 int ch; 67 } portable_chars[] = { 68 { "NUL", '\0' }, 69 { "alert", '\a' }, 70 { "backspace", '\b' }, 71 { "tab", '\t' }, 72 { "carriage-return", '\r' }, 73 { "newline", '\n' }, 74 { "vertical-tab", '\v' }, 75 { "form-feed", '\f' }, 76 { "space", ' ' }, 77 { "exclamation-mark", '!' }, 78 { "quotation-mark", '"' }, 79 { "number-sign", '#' }, 80 { "dollar-sign", '$' }, 81 { "percent-sign", '%' }, 82 { "ampersand", '&' }, 83 { "apostrophe", '\'' }, 84 { "left-parenthesis", '(' }, 85 { "right-parenthesis", '(' }, 86 { "asterisk", '*' }, 87 { "plus-sign", '+' }, 88 { "comma", ','}, 89 { "hyphen-minus", '-' }, 90 { "hyphen", '-' }, 91 { "full-stop", '.' }, 92 { "period", '.' }, 93 { "slash", '/' }, 94 { "solidus", '/' }, 95 { "zero", '0' }, 96 { "one", '1' }, 97 { "two", '2' }, 98 { "three", '3' }, 99 { "four", '4' }, 100 { "five", '5' }, 101 { "six", '6' }, 102 { "seven", '7' }, 103 { "eight", '8' }, 104 { "nine", '9' }, 105 { "colon", ':' }, 106 { "semicolon", ';' }, 107 { "less-than-sign", '<' }, 108 { "equals-sign", '=' }, 109 { "greater-than-sign", '>' }, 110 { "question-mark", '?' }, 111 { "commercial-at", '@' }, 112 { "left-square-bracket", '[' }, 113 { "backslash", '\\' }, 114 { "reverse-solidus", '\\' }, 115 { "right-square-bracket", ']' }, 116 { "circumflex", '^' }, 117 { "circumflex-accent", '^' }, 118 { "low-line", '_' }, 119 { "underscore", '_' }, 120 { "grave-accent", '`' }, 121 { "left-brace", '{' }, 122 { "left-curly-bracket", '{' }, 123 { "vertical-line", '|' }, 124 { "right-brace", '}' }, 125 { "right-curly-bracket", '}' }, 126 { "tilde", '~' }, 127 { "A", 'A' }, 128 { "B", 'B' }, 129 { "C", 'C' }, 130 { "D", 'D' }, 131 { "E", 'E' }, 132 { "F", 'F' }, 133 { "G", 'G' }, 134 { "H", 'H' }, 135 { "I", 'I' }, 136 { "J", 'J' }, 137 { "K", 'K' }, 138 { "L", 'L' }, 139 { "M", 'M' }, 140 { "N", 'N' }, 141 { "O", 'O' }, 142 { "P", 'P' }, 143 { "Q", 'Q' }, 144 { "R", 'R' }, 145 { "S", 'S' }, 146 { "T", 'T' }, 147 { "U", 'U' }, 148 { "V", 'V' }, 149 { "W", 'W' }, 150 { "X", 'X' }, 151 { "Y", 'Y' }, 152 { "Z", 'Z' }, 153 { "a", 'a' }, 154 { "b", 'b' }, 155 { "c", 'c' }, 156 { "d", 'd' }, 157 { "e", 'e' }, 158 { "f", 'f' }, 159 { "g", 'g' }, 160 { "h", 'h' }, 161 { "i", 'i' }, 162 { "j", 'j' }, 163 { "k", 'k' }, 164 { "l", 'l' }, 165 { "m", 'm' }, 166 { "n", 'n' }, 167 { "o", 'o' }, 168 { "p", 'p' }, 169 { "q", 'q' }, 170 { "r", 'r' }, 171 { "s", 's' }, 172 { "t", 't' }, 173 { "u", 'u' }, 174 { "v", 'v' }, 175 { "w", 'w' }, 176 { "x", 'x' }, 177 { "y", 'y' }, 178 { "z", 'z' }, 179 { NULL, 0 } 180 }; 181 182 #pragma GCC diagnostic pop 183 184 static int 185 cmap_compare_sym(const void *n1, const void *n2) 186 { 187 const charmap_t *c1 = n1; 188 const charmap_t *c2 = n2; 189 int rv; 190 191 rv = strcmp(c1->name, c2->name); 192 return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 193 } 194 195 static int 196 cmap_compare_wc(const void *n1, const void *n2) 197 { 198 const charmap_t *c1 = n1; 199 const charmap_t *c2 = n2; 200 201 return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0); 202 } 203 204 void 205 init_charmap(void) 206 { 207 avl_create(&cmap_sym, cmap_compare_sym, sizeof (charmap_t), 208 offsetof(charmap_t, avl_sym)); 209 210 avl_create(&cmap_wc, cmap_compare_wc, sizeof (charmap_t), 211 offsetof(charmap_t, avl_wc)); 212 } 213 214 static void 215 add_charmap_impl(char *sym, wchar_t wc, int nodups) 216 { 217 charmap_t srch; 218 charmap_t *n = NULL; 219 avl_index_t where; 220 221 srch.wc = wc; 222 srch.name = sym; 223 224 /* 225 * also possibly insert the wide mapping, although note that there 226 * can only be one of these per wide character code. 227 */ 228 if ((wc != -1) && ((avl_find(&cmap_wc, &srch, &where)) == NULL)) { 229 if ((n = calloc(1, sizeof (*n))) == NULL) { 230 errf("out of memory"); 231 return; 232 } 233 n->wc = wc; 234 avl_insert(&cmap_wc, n, where); 235 } 236 237 if (sym) { 238 if (avl_find(&cmap_sym, &srch, &where) != NULL) { 239 if (nodups) { 240 errf("duplicate character definition"); 241 } 242 return; 243 } 244 if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) { 245 errf("out of memory"); 246 return; 247 } 248 n->wc = wc; 249 n->name = sym; 250 251 avl_insert(&cmap_sym, n, where); 252 } 253 } 254 255 void 256 add_charmap(char *sym, int c) 257 { 258 add_charmap_impl(sym, c, 1); 259 } 260 261 void 262 add_charmap_undefined(char *sym) 263 { 264 charmap_t srch; 265 charmap_t *cm = NULL; 266 267 srch.name = sym; 268 cm = avl_find(&cmap_sym, &srch, NULL); 269 270 if ((undefok == 0) && ((cm == NULL) || (cm->wc == -1))) { 271 warn("undefined symbol <%s>", sym); 272 add_charmap_impl(sym, -1, 0); 273 } else { 274 free(sym); 275 } 276 } 277 278 void 279 add_charmap_range(char *s, char *e, int wc) 280 { 281 int ls, le; 282 int si; 283 int sn, en; 284 int i; 285 286 static const char *digits = "0123456789"; 287 288 ls = strlen(s); 289 le = strlen(e); 290 291 if (((si = strcspn(s, digits)) == 0) || (si == ls) || 292 (strncmp(s, e, si) != 0) || 293 ((int)strspn(s + si, digits) != (ls - si)) || 294 ((int)strspn(e + si, digits) != (le - si)) || 295 ((sn = atoi(s + si)) > ((en = atoi(e + si))))) { 296 errf("malformed charmap range"); 297 return; 298 } 299 300 s[si] = 0; 301 302 for (i = sn; i <= en; i++) { 303 char *nn; 304 (void) asprintf(&nn, "%s%0*u", s, ls - si, i); 305 if (nn == NULL) { 306 errf("out of memory"); 307 return; 308 } 309 310 add_charmap_impl(nn, wc, 1); 311 wc++; 312 } 313 free(s); 314 free(e); 315 } 316 317 void 318 add_charmap_char(char *name, int val) 319 { 320 add_charmap_impl(name, val, 0); 321 } 322 323 /* 324 * POSIX insists that certain entries be present, even when not in the 325 * orginal charmap file. 326 */ 327 void 328 add_charmap_posix(void) 329 { 330 int i; 331 332 for (i = 0; portable_chars[i].name; i++) { 333 add_charmap_char(portable_chars[i].name, portable_chars[i].ch); 334 } 335 } 336 337 int 338 lookup_charmap(const char *sym, wchar_t *wc) 339 { 340 charmap_t srch; 341 charmap_t *n; 342 343 srch.name = sym; 344 n = avl_find(&cmap_sym, &srch, NULL); 345 if (n && n->wc != -1) { 346 if (wc) 347 *wc = n->wc; 348 return (0); 349 } 350 return (-1); 351 } 352 353 int 354 check_charmap(wchar_t wc) 355 { 356 charmap_t srch; 357 358 srch.wc = wc; 359 return (avl_find(&cmap_wc, &srch, NULL) ? 0 : -1); 360 } 361