1cd1c6085SJohn Marino /* 28aa2b98bSJohn Marino * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 38aa2b98bSJohn Marino * Copyright 2012 Garrett D'Amore <garrett@damore.org> All rights reserved. 4cd1c6085SJohn Marino * Copyright 2015 John Marino <draco@marino.st> 58aa2b98bSJohn Marino * 68aa2b98bSJohn Marino * This source code is derived from the illumos localedef command, and 78aa2b98bSJohn Marino * provided under BSD-style license terms by Nexenta Systems, Inc. 88aa2b98bSJohn Marino * 98aa2b98bSJohn Marino * Redistribution and use in source and binary forms, with or without 108aa2b98bSJohn Marino * modification, are permitted provided that the following conditions 118aa2b98bSJohn Marino * are met: 128aa2b98bSJohn Marino * 138aa2b98bSJohn Marino * 1. Redistributions of source code must retain the above copyright 148aa2b98bSJohn Marino * notice, this list of conditions and the following disclaimer. 158aa2b98bSJohn Marino * 2. Redistributions in binary form must reproduce the above copyright 168aa2b98bSJohn Marino * notice, this list of conditions and the following disclaimer in the 178aa2b98bSJohn Marino * documentation and/or other materials provided with the distribution. 188aa2b98bSJohn Marino * 198aa2b98bSJohn Marino * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 208aa2b98bSJohn Marino * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 218aa2b98bSJohn Marino * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 228aa2b98bSJohn Marino * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 238aa2b98bSJohn Marino * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 248aa2b98bSJohn Marino * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 258aa2b98bSJohn Marino * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 268aa2b98bSJohn Marino * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 278aa2b98bSJohn Marino * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 288aa2b98bSJohn Marino * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 298aa2b98bSJohn Marino * POSSIBILITY OF SUCH DAMAGE. 30cd1c6085SJohn Marino */ 31cd1c6085SJohn Marino 32cd1c6085SJohn Marino /* 33cd1c6085SJohn Marino * LC_CTYPE database generation routines for localedef. 34cd1c6085SJohn Marino */ 35cd1c6085SJohn Marino 36cd1c6085SJohn Marino #include <stdio.h> 37cd1c6085SJohn Marino #include <stdlib.h> 38cd1c6085SJohn Marino #include <stddef.h> 39cd1c6085SJohn Marino #include <string.h> 40cd1c6085SJohn Marino #include <sys/types.h> 41cd1c6085SJohn Marino #include <wchar.h> 42cd1c6085SJohn Marino #include <ctype.h> 43cd1c6085SJohn Marino #include <wctype.h> 44cd1c6085SJohn Marino #include <unistd.h> 45cd1c6085SJohn Marino #include "localedef.h" 46cd1c6085SJohn Marino #include "parser.h" 47cd1c6085SJohn Marino #include "runefile.h" 48cd1c6085SJohn Marino #include "avl.h" 49cd1c6085SJohn Marino 501ed06f48SJohn Marino /* Needed for bootstrapping, _CTYPE_N not available before 1 Sep 2015 */ 511ed06f48SJohn Marino #ifndef _CTYPE_N 521ed06f48SJohn Marino #define _CTYPE_N 0x00400000L 531ed06f48SJohn Marino #endif 54cd1c6085SJohn Marino 55cd1c6085SJohn Marino #define _ISUPPER _CTYPE_U 56cd1c6085SJohn Marino #define _ISLOWER _CTYPE_L 57cd1c6085SJohn Marino #define _ISDIGIT _CTYPE_D 58cd1c6085SJohn Marino #define _ISXDIGIT _CTYPE_X 59cd1c6085SJohn Marino #define _ISSPACE _CTYPE_S 60cd1c6085SJohn Marino #define _ISBLANK _CTYPE_B 61cd1c6085SJohn Marino #define _ISALPHA _CTYPE_A 62cd1c6085SJohn Marino #define _ISPUNCT _CTYPE_P 63cd1c6085SJohn Marino #define _ISGRAPH _CTYPE_G 64cd1c6085SJohn Marino #define _ISPRINT _CTYPE_R 65cd1c6085SJohn Marino #define _ISCNTRL _CTYPE_C 66cd1c6085SJohn Marino #define _E1 _CTYPE_Q 67cd1c6085SJohn Marino #define _E2 _CTYPE_I 68cd1c6085SJohn Marino #define _E3 0 6931c9f6f2SJohn Marino #define _E4 _CTYPE_N 70cd1c6085SJohn Marino #define _E5 _CTYPE_T 71cd1c6085SJohn Marino 72cd1c6085SJohn Marino static avl_tree_t ctypes; 73cd1c6085SJohn Marino 74cd1c6085SJohn Marino static wchar_t last_ctype; 75cd1c6085SJohn Marino 76cd1c6085SJohn Marino typedef struct ctype_node { 77cd1c6085SJohn Marino wchar_t wc; 78cd1c6085SJohn Marino int32_t ctype; 79cd1c6085SJohn Marino int32_t toupper; 80cd1c6085SJohn Marino int32_t tolower; 81cd1c6085SJohn Marino avl_node_t avl; 82cd1c6085SJohn Marino } ctype_node_t; 83cd1c6085SJohn Marino 84cd1c6085SJohn Marino typedef struct width_node { 85cd1c6085SJohn Marino wchar_t start; 86cd1c6085SJohn Marino wchar_t end; 87cd1c6085SJohn Marino int8_t width; 88cd1c6085SJohn Marino avl_node_t avl; 89cd1c6085SJohn Marino } width_node_t; 90cd1c6085SJohn Marino 91cd1c6085SJohn Marino static int 92cd1c6085SJohn Marino ctype_compare(const void *n1, const void *n2) 93cd1c6085SJohn Marino { 94cd1c6085SJohn Marino const ctype_node_t *c1 = n1; 95cd1c6085SJohn Marino const ctype_node_t *c2 = n2; 96cd1c6085SJohn Marino 97cd1c6085SJohn Marino return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0); 98cd1c6085SJohn Marino } 99cd1c6085SJohn Marino 100cd1c6085SJohn Marino void 101cd1c6085SJohn Marino init_ctype(void) 102cd1c6085SJohn Marino { 103cd1c6085SJohn Marino avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t), 104cd1c6085SJohn Marino offsetof(ctype_node_t, avl)); 105cd1c6085SJohn Marino } 106cd1c6085SJohn Marino 107cd1c6085SJohn Marino 108cd1c6085SJohn Marino static void 109cd1c6085SJohn Marino add_ctype_impl(ctype_node_t *ctn) 110cd1c6085SJohn Marino { 111cd1c6085SJohn Marino switch (last_kw) { 112cd1c6085SJohn Marino case T_ISUPPER: 113cd1c6085SJohn Marino ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT); 114cd1c6085SJohn Marino break; 115cd1c6085SJohn Marino case T_ISLOWER: 116cd1c6085SJohn Marino ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT); 117cd1c6085SJohn Marino break; 118cd1c6085SJohn Marino case T_ISALPHA: 119cd1c6085SJohn Marino ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT); 120cd1c6085SJohn Marino break; 121cd1c6085SJohn Marino case T_ISDIGIT: 122348a405dSJohn Marino ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4); 123cd1c6085SJohn Marino break; 124cd1c6085SJohn Marino case T_ISSPACE: 125cd1c6085SJohn Marino ctn->ctype |= _ISSPACE; 126cd1c6085SJohn Marino break; 127cd1c6085SJohn Marino case T_ISCNTRL: 128cd1c6085SJohn Marino ctn->ctype |= _ISCNTRL; 129cd1c6085SJohn Marino break; 130cd1c6085SJohn Marino case T_ISGRAPH: 131cd1c6085SJohn Marino ctn->ctype |= (_ISGRAPH | _ISPRINT); 132cd1c6085SJohn Marino break; 133cd1c6085SJohn Marino case T_ISPRINT: 134cd1c6085SJohn Marino ctn->ctype |= _ISPRINT; 135cd1c6085SJohn Marino break; 136cd1c6085SJohn Marino case T_ISPUNCT: 137cd1c6085SJohn Marino ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT); 138cd1c6085SJohn Marino break; 139cd1c6085SJohn Marino case T_ISXDIGIT: 140*dd5ff2d3SJohn Marino ctn->ctype |= (_ISXDIGIT | _ISPRINT); 141cd1c6085SJohn Marino break; 142cd1c6085SJohn Marino case T_ISBLANK: 143cd1c6085SJohn Marino ctn->ctype |= (_ISBLANK | _ISSPACE); 144cd1c6085SJohn Marino break; 145cd1c6085SJohn Marino case T_ISPHONOGRAM: 146cd1c6085SJohn Marino ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH); 147cd1c6085SJohn Marino break; 148cd1c6085SJohn Marino case T_ISIDEOGRAM: 149cd1c6085SJohn Marino ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH); 150cd1c6085SJohn Marino break; 151cd1c6085SJohn Marino case T_ISENGLISH: 152cd1c6085SJohn Marino ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH); 153cd1c6085SJohn Marino break; 154cd1c6085SJohn Marino case T_ISNUMBER: 155cd1c6085SJohn Marino ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH); 156cd1c6085SJohn Marino break; 157cd1c6085SJohn Marino case T_ISSPECIAL: 158cd1c6085SJohn Marino ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH); 159cd1c6085SJohn Marino break; 160cd1c6085SJohn Marino case T_ISALNUM: 161cd1c6085SJohn Marino /* 162cd1c6085SJohn Marino * We can't do anything with this. The character 163cd1c6085SJohn Marino * should already be specified as a digit or alpha. 164cd1c6085SJohn Marino */ 165cd1c6085SJohn Marino break; 166cd1c6085SJohn Marino default: 167cd1c6085SJohn Marino errf("not a valid character class"); 168cd1c6085SJohn Marino } 169cd1c6085SJohn Marino } 170cd1c6085SJohn Marino 171cd1c6085SJohn Marino static ctype_node_t * 172cd1c6085SJohn Marino get_ctype(wchar_t wc) 173cd1c6085SJohn Marino { 174cd1c6085SJohn Marino ctype_node_t srch; 175cd1c6085SJohn Marino ctype_node_t *ctn; 176cd1c6085SJohn Marino avl_index_t where; 177cd1c6085SJohn Marino 178cd1c6085SJohn Marino srch.wc = wc; 179cd1c6085SJohn Marino if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) { 180cd1c6085SJohn Marino if ((ctn = calloc(1, sizeof (*ctn))) == NULL) { 181cd1c6085SJohn Marino errf("out of memory"); 182cd1c6085SJohn Marino return (NULL); 183cd1c6085SJohn Marino } 184cd1c6085SJohn Marino ctn->wc = wc; 185cd1c6085SJohn Marino 186cd1c6085SJohn Marino avl_insert(&ctypes, ctn, where); 187cd1c6085SJohn Marino } 188cd1c6085SJohn Marino return (ctn); 189cd1c6085SJohn Marino } 190cd1c6085SJohn Marino 191cd1c6085SJohn Marino void 192cd1c6085SJohn Marino add_ctype(int val) 193cd1c6085SJohn Marino { 194cd1c6085SJohn Marino ctype_node_t *ctn; 195cd1c6085SJohn Marino 196cd1c6085SJohn Marino if ((ctn = get_ctype(val)) == NULL) { 197cd1c6085SJohn Marino INTERR; 198cd1c6085SJohn Marino return; 199cd1c6085SJohn Marino } 200cd1c6085SJohn Marino add_ctype_impl(ctn); 201cd1c6085SJohn Marino last_ctype = ctn->wc; 202cd1c6085SJohn Marino } 203cd1c6085SJohn Marino 204cd1c6085SJohn Marino void 205cd1c6085SJohn Marino add_ctype_range(int end) 206cd1c6085SJohn Marino { 207cd1c6085SJohn Marino ctype_node_t *ctn; 208cd1c6085SJohn Marino wchar_t cur; 209cd1c6085SJohn Marino 210cd1c6085SJohn Marino if (end < last_ctype) { 211cd1c6085SJohn Marino errf("malformed character range (%u ... %u))", 212cd1c6085SJohn Marino last_ctype, end); 213cd1c6085SJohn Marino return; 214cd1c6085SJohn Marino } 215cd1c6085SJohn Marino for (cur = last_ctype + 1; cur <= end; cur++) { 216cd1c6085SJohn Marino if ((ctn = get_ctype(cur)) == NULL) { 217cd1c6085SJohn Marino INTERR; 218cd1c6085SJohn Marino return; 219cd1c6085SJohn Marino } 220cd1c6085SJohn Marino add_ctype_impl(ctn); 221cd1c6085SJohn Marino } 222cd1c6085SJohn Marino last_ctype = end; 223cd1c6085SJohn Marino 224cd1c6085SJohn Marino } 225cd1c6085SJohn Marino 226cd1c6085SJohn Marino /* 227cd1c6085SJohn Marino * A word about widths: if the width mask is specified, then libc 228cd1c6085SJohn Marino * unconditionally honors it. Otherwise, it assumes printable 229cd1c6085SJohn Marino * characters have width 1, and non-printable characters have width 230cd1c6085SJohn Marino * -1 (except for NULL which is special with with 0). Hence, we have 231cd1c6085SJohn Marino * no need to inject defaults here -- the "default" unset value of 0 232cd1c6085SJohn Marino * indicates that libc should use its own logic in wcwidth as described. 233cd1c6085SJohn Marino */ 234cd1c6085SJohn Marino void 235cd1c6085SJohn Marino add_width(int wc, int width) 236cd1c6085SJohn Marino { 237cd1c6085SJohn Marino ctype_node_t *ctn; 238cd1c6085SJohn Marino 239cd1c6085SJohn Marino if ((ctn = get_ctype(wc)) == NULL) { 240cd1c6085SJohn Marino INTERR; 241cd1c6085SJohn Marino return; 242cd1c6085SJohn Marino } 243cd1c6085SJohn Marino ctn->ctype &= ~(_CTYPE_SWM); 244cd1c6085SJohn Marino switch (width) { 245cd1c6085SJohn Marino case 0: 246cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW0; 247cd1c6085SJohn Marino break; 248cd1c6085SJohn Marino case 1: 249cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW1; 250cd1c6085SJohn Marino break; 251cd1c6085SJohn Marino case 2: 252cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW2; 253cd1c6085SJohn Marino break; 254cd1c6085SJohn Marino case 3: 255cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW3; 256cd1c6085SJohn Marino break; 257cd1c6085SJohn Marino } 258cd1c6085SJohn Marino } 259cd1c6085SJohn Marino 260cd1c6085SJohn Marino void 261cd1c6085SJohn Marino add_width_range(int start, int end, int width) 262cd1c6085SJohn Marino { 263cd1c6085SJohn Marino for (; start <= end; start++) { 264cd1c6085SJohn Marino add_width(start, width); 265cd1c6085SJohn Marino } 266cd1c6085SJohn Marino } 267cd1c6085SJohn Marino 268cd1c6085SJohn Marino void 269cd1c6085SJohn Marino add_caseconv(int val, int wc) 270cd1c6085SJohn Marino { 271cd1c6085SJohn Marino ctype_node_t *ctn; 272cd1c6085SJohn Marino 273cd1c6085SJohn Marino ctn = get_ctype(val); 274cd1c6085SJohn Marino if (ctn == NULL) { 275cd1c6085SJohn Marino INTERR; 276cd1c6085SJohn Marino return; 277cd1c6085SJohn Marino } 278cd1c6085SJohn Marino 279cd1c6085SJohn Marino switch (last_kw) { 280cd1c6085SJohn Marino case T_TOUPPER: 281cd1c6085SJohn Marino ctn->toupper = wc; 282cd1c6085SJohn Marino break; 283cd1c6085SJohn Marino case T_TOLOWER: 284cd1c6085SJohn Marino ctn->tolower = wc; 285cd1c6085SJohn Marino break; 286cd1c6085SJohn Marino default: 287cd1c6085SJohn Marino INTERR; 288cd1c6085SJohn Marino break; 289cd1c6085SJohn Marino } 290cd1c6085SJohn Marino } 291cd1c6085SJohn Marino 292cd1c6085SJohn Marino void 293cd1c6085SJohn Marino dump_ctype(void) 294cd1c6085SJohn Marino { 295cd1c6085SJohn Marino FILE *f; 296cd1c6085SJohn Marino _FileRuneLocale rl; 297cd1c6085SJohn Marino ctype_node_t *ctn, *last_ct, *last_lo, *last_up; 298cd1c6085SJohn Marino _FileRuneEntry *ct = NULL; 299cd1c6085SJohn Marino _FileRuneEntry *lo = NULL; 300cd1c6085SJohn Marino _FileRuneEntry *up = NULL; 301cd1c6085SJohn Marino wchar_t wc; 302cd1c6085SJohn Marino 303cd1c6085SJohn Marino (void) memset(&rl, 0, sizeof (rl)); 304cd1c6085SJohn Marino last_ct = NULL; 305cd1c6085SJohn Marino last_lo = NULL; 306cd1c6085SJohn Marino last_up = NULL; 307cd1c6085SJohn Marino 308cd1c6085SJohn Marino if ((f = open_category()) == NULL) 309cd1c6085SJohn Marino return; 310cd1c6085SJohn Marino 311cd1c6085SJohn Marino (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8); 312cd1c6085SJohn Marino (void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding)); 313cd1c6085SJohn Marino 314cd1c6085SJohn Marino /* 315cd1c6085SJohn Marino * Initialize the identity map. 316cd1c6085SJohn Marino */ 317cd1c6085SJohn Marino for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) { 318cd1c6085SJohn Marino rl.maplower[wc] = wc; 319cd1c6085SJohn Marino rl.mapupper[wc] = wc; 320cd1c6085SJohn Marino } 321cd1c6085SJohn Marino 322cd1c6085SJohn Marino for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) { 323cd1c6085SJohn Marino int conflict = 0; 324cd1c6085SJohn Marino 325cd1c6085SJohn Marino 326cd1c6085SJohn Marino wc = ctn->wc; 327cd1c6085SJohn Marino 328cd1c6085SJohn Marino /* 329cd1c6085SJohn Marino * POSIX requires certain portable characters have 330cd1c6085SJohn Marino * certain types. Add them if they are missing. 331cd1c6085SJohn Marino */ 332cd1c6085SJohn Marino if ((wc >= 1) && (wc <= 127)) { 333cd1c6085SJohn Marino if ((wc >= 'A') && (wc <= 'Z')) 334cd1c6085SJohn Marino ctn->ctype |= _ISUPPER; 335cd1c6085SJohn Marino if ((wc >= 'a') && (wc <= 'z')) 336cd1c6085SJohn Marino ctn->ctype |= _ISLOWER; 337cd1c6085SJohn Marino if ((wc >= '0') && (wc <= '9')) 338cd1c6085SJohn Marino ctn->ctype |= _ISDIGIT; 339cd1c6085SJohn Marino if (strchr(" \f\n\r\t\v", (char)wc) != NULL) 340cd1c6085SJohn Marino ctn->ctype |= _ISSPACE; 341cd1c6085SJohn Marino if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL) 342cd1c6085SJohn Marino ctn->ctype |= _ISXDIGIT; 343cd1c6085SJohn Marino if (strchr(" \t", (char)wc)) 344cd1c6085SJohn Marino ctn->ctype |= _ISBLANK; 34597055fc2SJohn Marino if (wc == ' ') 34697055fc2SJohn Marino ctn->ctype |= _ISPRINT; 347cd1c6085SJohn Marino 348cd1c6085SJohn Marino /* 349cd1c6085SJohn Marino * Technically these settings are only 350cd1c6085SJohn Marino * required for the C locale. However, it 351cd1c6085SJohn Marino * turns out that because of the historical 352cd1c6085SJohn Marino * version of isprint(), we need them for all 353cd1c6085SJohn Marino * locales as well. Note that these are not 354cd1c6085SJohn Marino * necessarily valid punctation characters in 355cd1c6085SJohn Marino * the current language, but ispunct() needs 356cd1c6085SJohn Marino * to return TRUE for them. 357cd1c6085SJohn Marino */ 358cd1c6085SJohn Marino if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~", 359cd1c6085SJohn Marino (char)wc)) 360cd1c6085SJohn Marino ctn->ctype |= _ISPUNCT; 361cd1c6085SJohn Marino } 362cd1c6085SJohn Marino 363cd1c6085SJohn Marino /* 364cd1c6085SJohn Marino * POSIX also requires that certain types imply 365cd1c6085SJohn Marino * others. Add any inferred types here. 366cd1c6085SJohn Marino */ 367cd1c6085SJohn Marino if (ctn->ctype & (_ISUPPER |_ISLOWER)) 368cd1c6085SJohn Marino ctn->ctype |= _ISALPHA; 369cd1c6085SJohn Marino if (ctn->ctype & _ISDIGIT) 370cd1c6085SJohn Marino ctn->ctype |= _ISXDIGIT; 371cd1c6085SJohn Marino if (ctn->ctype & _ISBLANK) 372cd1c6085SJohn Marino ctn->ctype |= _ISSPACE; 373cd1c6085SJohn Marino if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT)) 374cd1c6085SJohn Marino ctn->ctype |= _ISGRAPH; 375cd1c6085SJohn Marino if (ctn->ctype & _ISGRAPH) 376cd1c6085SJohn Marino ctn->ctype |= _ISPRINT; 377cd1c6085SJohn Marino 378cd1c6085SJohn Marino /* 379cd1c6085SJohn Marino * Finally, POSIX requires that certain combinations 380cd1c6085SJohn Marino * are invalid. We don't flag this as a fatal error, 381cd1c6085SJohn Marino * but we will warn about. 382cd1c6085SJohn Marino */ 383cd1c6085SJohn Marino if ((ctn->ctype & _ISALPHA) && 384cd1c6085SJohn Marino (ctn->ctype & (_ISPUNCT|_ISDIGIT))) 385cd1c6085SJohn Marino conflict++; 386cd1c6085SJohn Marino if ((ctn->ctype & _ISPUNCT) & 387cd1c6085SJohn Marino (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT))) 388cd1c6085SJohn Marino conflict++; 389cd1c6085SJohn Marino if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH)) 390cd1c6085SJohn Marino conflict++; 391cd1c6085SJohn Marino if ((ctn->ctype & _ISCNTRL) & _ISPRINT) 392cd1c6085SJohn Marino conflict++; 393cd1c6085SJohn Marino if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH))) 394cd1c6085SJohn Marino conflict++; 395cd1c6085SJohn Marino 396cd1c6085SJohn Marino if (conflict) { 397cd1c6085SJohn Marino warn("conflicting classes for character 0x%x (%x)", 398cd1c6085SJohn Marino wc, ctn->ctype); 399cd1c6085SJohn Marino } 400cd1c6085SJohn Marino /* 401cd1c6085SJohn Marino * Handle the lower 256 characters using the simple 402cd1c6085SJohn Marino * optimization. Note that if we have not defined the 403cd1c6085SJohn Marino * upper/lower case, then we identity map it. 404cd1c6085SJohn Marino */ 405cd1c6085SJohn Marino if ((unsigned)wc < _CACHED_RUNES) { 406cd1c6085SJohn Marino rl.runetype[wc] = ctn->ctype; 407cd1c6085SJohn Marino if (ctn->tolower) 408cd1c6085SJohn Marino rl.maplower[wc] = ctn->tolower; 409cd1c6085SJohn Marino if (ctn->toupper) 410cd1c6085SJohn Marino rl.mapupper[wc] = ctn->toupper; 411cd1c6085SJohn Marino continue; 412cd1c6085SJohn Marino } 413cd1c6085SJohn Marino 414cd1c6085SJohn Marino if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) { 415cd1c6085SJohn Marino ct[rl.runetype_ext_nranges-1].max = wc; 416cd1c6085SJohn Marino last_ct = ctn; 417cd1c6085SJohn Marino } else { 418cd1c6085SJohn Marino rl.runetype_ext_nranges++; 419cd1c6085SJohn Marino ct = realloc(ct, 420cd1c6085SJohn Marino sizeof (*ct) * rl.runetype_ext_nranges); 421cd1c6085SJohn Marino ct[rl.runetype_ext_nranges - 1].min = wc; 422cd1c6085SJohn Marino ct[rl.runetype_ext_nranges - 1].max = wc; 423cd1c6085SJohn Marino ct[rl.runetype_ext_nranges - 1].map = ctn->ctype; 424cd1c6085SJohn Marino last_ct = ctn; 425cd1c6085SJohn Marino } 426cd1c6085SJohn Marino if (ctn->tolower == 0) { 427cd1c6085SJohn Marino last_lo = NULL; 428cd1c6085SJohn Marino } else if ((last_lo != NULL) && 429cd1c6085SJohn Marino (last_lo->tolower + 1 == ctn->tolower)) { 430cd1c6085SJohn Marino lo[rl.maplower_ext_nranges-1].max = wc; 431cd1c6085SJohn Marino last_lo = ctn; 432cd1c6085SJohn Marino } else { 433cd1c6085SJohn Marino rl.maplower_ext_nranges++; 434cd1c6085SJohn Marino lo = realloc(lo, 435cd1c6085SJohn Marino sizeof (*lo) * rl.maplower_ext_nranges); 436cd1c6085SJohn Marino lo[rl.maplower_ext_nranges - 1].min = wc; 437cd1c6085SJohn Marino lo[rl.maplower_ext_nranges - 1].max = wc; 438cd1c6085SJohn Marino lo[rl.maplower_ext_nranges - 1].map = ctn->tolower; 439cd1c6085SJohn Marino last_lo = ctn; 440cd1c6085SJohn Marino } 441cd1c6085SJohn Marino 442cd1c6085SJohn Marino if (ctn->toupper == 0) { 443cd1c6085SJohn Marino last_up = NULL; 444cd1c6085SJohn Marino } else if ((last_up != NULL) && 445cd1c6085SJohn Marino (last_up->toupper + 1 == ctn->toupper)) { 446cd1c6085SJohn Marino up[rl.mapupper_ext_nranges-1].max = wc; 447cd1c6085SJohn Marino last_up = ctn; 448cd1c6085SJohn Marino } else { 449cd1c6085SJohn Marino rl.mapupper_ext_nranges++; 450cd1c6085SJohn Marino up = realloc(up, 451cd1c6085SJohn Marino sizeof (*up) * rl.mapupper_ext_nranges); 452cd1c6085SJohn Marino up[rl.mapupper_ext_nranges - 1].min = wc; 453cd1c6085SJohn Marino up[rl.mapupper_ext_nranges - 1].max = wc; 454cd1c6085SJohn Marino up[rl.mapupper_ext_nranges - 1].map = ctn->toupper; 455cd1c6085SJohn Marino last_up = ctn; 456cd1c6085SJohn Marino } 457cd1c6085SJohn Marino } 458cd1c6085SJohn Marino 459cd1c6085SJohn Marino if ((wr_category(&rl, sizeof (rl), f) < 0) || 460cd1c6085SJohn Marino (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) || 461cd1c6085SJohn Marino (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) || 462cd1c6085SJohn Marino (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) { 463cd1c6085SJohn Marino return; 464cd1c6085SJohn Marino } 465cd1c6085SJohn Marino 466cd1c6085SJohn Marino close_category(f); 467cd1c6085SJohn Marino } 468