1cd1c6085SJohn Marino /*
28aa2b98bSJohn Marino * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
38aa2b98bSJohn Marino * Copyright 2012 Garrett D'Amore <garrett@damore.org> All rights reserved.
4cd1c6085SJohn Marino * Copyright 2015 John Marino <draco@marino.st>
58aa2b98bSJohn Marino *
68aa2b98bSJohn Marino * This source code is derived from the illumos localedef command, and
78aa2b98bSJohn Marino * provided under BSD-style license terms by Nexenta Systems, Inc.
88aa2b98bSJohn Marino *
98aa2b98bSJohn Marino * Redistribution and use in source and binary forms, with or without
108aa2b98bSJohn Marino * modification, are permitted provided that the following conditions
118aa2b98bSJohn Marino * are met:
128aa2b98bSJohn Marino *
138aa2b98bSJohn Marino * 1. Redistributions of source code must retain the above copyright
148aa2b98bSJohn Marino * notice, this list of conditions and the following disclaimer.
158aa2b98bSJohn Marino * 2. Redistributions in binary form must reproduce the above copyright
168aa2b98bSJohn Marino * notice, this list of conditions and the following disclaimer in the
178aa2b98bSJohn Marino * documentation and/or other materials provided with the distribution.
188aa2b98bSJohn Marino *
198aa2b98bSJohn Marino * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
208aa2b98bSJohn Marino * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
218aa2b98bSJohn Marino * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
228aa2b98bSJohn Marino * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
238aa2b98bSJohn Marino * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
248aa2b98bSJohn Marino * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
258aa2b98bSJohn Marino * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
268aa2b98bSJohn Marino * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
278aa2b98bSJohn Marino * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
288aa2b98bSJohn Marino * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
298aa2b98bSJohn Marino * POSSIBILITY OF SUCH DAMAGE.
30cd1c6085SJohn Marino */
31cd1c6085SJohn Marino
32cd1c6085SJohn Marino /*
33cd1c6085SJohn Marino * LC_CTYPE database generation routines for localedef.
34cd1c6085SJohn Marino */
35cd1c6085SJohn Marino
36cacd7ea7SJohn Marino #include <sys/tree.h>
37cacd7ea7SJohn Marino
38cd1c6085SJohn Marino #include <stdio.h>
39cd1c6085SJohn Marino #include <stdlib.h>
40cd1c6085SJohn Marino #include <stddef.h>
41cd1c6085SJohn Marino #include <string.h>
42cd1c6085SJohn Marino #include <sys/types.h>
43cd1c6085SJohn Marino #include <wchar.h>
44cd1c6085SJohn Marino #include <ctype.h>
45cd1c6085SJohn Marino #include <wctype.h>
46cd1c6085SJohn Marino #include <unistd.h>
47cd1c6085SJohn Marino #include "localedef.h"
48cd1c6085SJohn Marino #include "parser.h"
49cd1c6085SJohn Marino #include "runefile.h"
50cacd7ea7SJohn Marino
51cd1c6085SJohn Marino
521ed06f48SJohn Marino /* Needed for bootstrapping, _CTYPE_N not available before 1 Sep 2015 */
531ed06f48SJohn Marino #ifndef _CTYPE_N
541ed06f48SJohn Marino #define _CTYPE_N 0x00400000L
551ed06f48SJohn Marino #endif
56cd1c6085SJohn Marino
57cd1c6085SJohn Marino #define _ISUPPER _CTYPE_U
58cd1c6085SJohn Marino #define _ISLOWER _CTYPE_L
59cd1c6085SJohn Marino #define _ISDIGIT _CTYPE_D
60cd1c6085SJohn Marino #define _ISXDIGIT _CTYPE_X
61cd1c6085SJohn Marino #define _ISSPACE _CTYPE_S
62cd1c6085SJohn Marino #define _ISBLANK _CTYPE_B
63cd1c6085SJohn Marino #define _ISALPHA _CTYPE_A
64cd1c6085SJohn Marino #define _ISPUNCT _CTYPE_P
65cd1c6085SJohn Marino #define _ISGRAPH _CTYPE_G
66cd1c6085SJohn Marino #define _ISPRINT _CTYPE_R
67cd1c6085SJohn Marino #define _ISCNTRL _CTYPE_C
68cd1c6085SJohn Marino #define _E1 _CTYPE_Q
69cd1c6085SJohn Marino #define _E2 _CTYPE_I
70cd1c6085SJohn Marino #define _E3 0
7131c9f6f2SJohn Marino #define _E4 _CTYPE_N
72cd1c6085SJohn Marino #define _E5 _CTYPE_T
73cd1c6085SJohn Marino
74cd1c6085SJohn Marino static wchar_t last_ctype;
75cacd7ea7SJohn Marino static int ctype_compare(const void *n1, const void *n2);
76cd1c6085SJohn Marino
77cd1c6085SJohn Marino typedef struct ctype_node {
78cd1c6085SJohn Marino wchar_t wc;
79cd1c6085SJohn Marino int32_t ctype;
80cd1c6085SJohn Marino int32_t toupper;
81cd1c6085SJohn Marino int32_t tolower;
82cacd7ea7SJohn Marino RB_ENTRY(ctype_node) entry;
83cd1c6085SJohn Marino } ctype_node_t;
84cd1c6085SJohn Marino
85cacd7ea7SJohn Marino static RB_HEAD(ctypes, ctype_node) ctypes;
86cacd7ea7SJohn Marino RB_PROTOTYPE_STATIC(ctypes, ctype_node, entry, ctype_compare);
87cacd7ea7SJohn Marino RB_GENERATE(ctypes, ctype_node, entry, ctype_compare);
88cd1c6085SJohn Marino
89cd1c6085SJohn Marino static int
ctype_compare(const void * n1,const void * n2)90cd1c6085SJohn Marino ctype_compare(const void *n1, const void *n2)
91cd1c6085SJohn Marino {
92cd1c6085SJohn Marino const ctype_node_t *c1 = n1;
93cd1c6085SJohn Marino const ctype_node_t *c2 = n2;
94cd1c6085SJohn Marino
95cd1c6085SJohn Marino return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
96cd1c6085SJohn Marino }
97cd1c6085SJohn Marino
98cd1c6085SJohn Marino void
init_ctype(void)99cd1c6085SJohn Marino init_ctype(void)
100cd1c6085SJohn Marino {
101cacd7ea7SJohn Marino RB_INIT(&ctypes);
102cd1c6085SJohn Marino }
103cd1c6085SJohn Marino
104cd1c6085SJohn Marino
105cd1c6085SJohn Marino static void
add_ctype_impl(ctype_node_t * ctn)106cd1c6085SJohn Marino add_ctype_impl(ctype_node_t *ctn)
107cd1c6085SJohn Marino {
108cd1c6085SJohn Marino switch (last_kw) {
109cd1c6085SJohn Marino case T_ISUPPER:
110cd1c6085SJohn Marino ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
111cd1c6085SJohn Marino break;
112cd1c6085SJohn Marino case T_ISLOWER:
113cd1c6085SJohn Marino ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
114cd1c6085SJohn Marino break;
115cd1c6085SJohn Marino case T_ISALPHA:
116cd1c6085SJohn Marino ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
117cd1c6085SJohn Marino break;
118cd1c6085SJohn Marino case T_ISDIGIT:
119348a405dSJohn Marino ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4);
120cd1c6085SJohn Marino break;
121cd1c6085SJohn Marino case T_ISSPACE:
122cd1c6085SJohn Marino ctn->ctype |= _ISSPACE;
123cd1c6085SJohn Marino break;
124cd1c6085SJohn Marino case T_ISCNTRL:
125cd1c6085SJohn Marino ctn->ctype |= _ISCNTRL;
126cd1c6085SJohn Marino break;
127cd1c6085SJohn Marino case T_ISGRAPH:
128cd1c6085SJohn Marino ctn->ctype |= (_ISGRAPH | _ISPRINT);
129cd1c6085SJohn Marino break;
130cd1c6085SJohn Marino case T_ISPRINT:
131cd1c6085SJohn Marino ctn->ctype |= _ISPRINT;
132cd1c6085SJohn Marino break;
133cd1c6085SJohn Marino case T_ISPUNCT:
134cd1c6085SJohn Marino ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
135cd1c6085SJohn Marino break;
136cd1c6085SJohn Marino case T_ISXDIGIT:
137dd5ff2d3SJohn Marino ctn->ctype |= (_ISXDIGIT | _ISPRINT);
138cd1c6085SJohn Marino break;
139cd1c6085SJohn Marino case T_ISBLANK:
140cd1c6085SJohn Marino ctn->ctype |= (_ISBLANK | _ISSPACE);
141cd1c6085SJohn Marino break;
142cd1c6085SJohn Marino case T_ISPHONOGRAM:
143cd1c6085SJohn Marino ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
144cd1c6085SJohn Marino break;
145cd1c6085SJohn Marino case T_ISIDEOGRAM:
146cd1c6085SJohn Marino ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
147cd1c6085SJohn Marino break;
148cd1c6085SJohn Marino case T_ISENGLISH:
149cd1c6085SJohn Marino ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
150cd1c6085SJohn Marino break;
151cd1c6085SJohn Marino case T_ISNUMBER:
152cd1c6085SJohn Marino ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
153cd1c6085SJohn Marino break;
154cd1c6085SJohn Marino case T_ISSPECIAL:
155cd1c6085SJohn Marino ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
156cd1c6085SJohn Marino break;
157cd1c6085SJohn Marino case T_ISALNUM:
158cd1c6085SJohn Marino /*
159cd1c6085SJohn Marino * We can't do anything with this. The character
160cd1c6085SJohn Marino * should already be specified as a digit or alpha.
161cd1c6085SJohn Marino */
162cd1c6085SJohn Marino break;
163cd1c6085SJohn Marino default:
164cd1c6085SJohn Marino errf("not a valid character class");
165cd1c6085SJohn Marino }
166cd1c6085SJohn Marino }
167cd1c6085SJohn Marino
168cd1c6085SJohn Marino static ctype_node_t *
get_ctype(wchar_t wc)169cd1c6085SJohn Marino get_ctype(wchar_t wc)
170cd1c6085SJohn Marino {
171cd1c6085SJohn Marino ctype_node_t srch;
172cd1c6085SJohn Marino ctype_node_t *ctn;
173cd1c6085SJohn Marino
174cd1c6085SJohn Marino srch.wc = wc;
175cacd7ea7SJohn Marino if ((ctn = RB_FIND(ctypes, &ctypes, &srch)) == NULL) {
176cd1c6085SJohn Marino if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
177cd1c6085SJohn Marino errf("out of memory");
178cd1c6085SJohn Marino return (NULL);
179cd1c6085SJohn Marino }
180cd1c6085SJohn Marino ctn->wc = wc;
181cd1c6085SJohn Marino
182cacd7ea7SJohn Marino RB_INSERT(ctypes, &ctypes, ctn);
183cd1c6085SJohn Marino }
184cd1c6085SJohn Marino return (ctn);
185cd1c6085SJohn Marino }
186cd1c6085SJohn Marino
187cd1c6085SJohn Marino void
add_ctype(int val)188cd1c6085SJohn Marino add_ctype(int val)
189cd1c6085SJohn Marino {
190cd1c6085SJohn Marino ctype_node_t *ctn;
191cd1c6085SJohn Marino
192cd1c6085SJohn Marino if ((ctn = get_ctype(val)) == NULL) {
193cd1c6085SJohn Marino INTERR;
194cd1c6085SJohn Marino return;
195cd1c6085SJohn Marino }
196cd1c6085SJohn Marino add_ctype_impl(ctn);
197cd1c6085SJohn Marino last_ctype = ctn->wc;
198cd1c6085SJohn Marino }
199cd1c6085SJohn Marino
200cd1c6085SJohn Marino void
add_ctype_range(wchar_t end)201cacd7ea7SJohn Marino add_ctype_range(wchar_t end)
202cd1c6085SJohn Marino {
203cd1c6085SJohn Marino ctype_node_t *ctn;
204cd1c6085SJohn Marino wchar_t cur;
205cd1c6085SJohn Marino
206cd1c6085SJohn Marino if (end < last_ctype) {
207cd1c6085SJohn Marino errf("malformed character range (%u ... %u))",
208cd1c6085SJohn Marino last_ctype, end);
209cd1c6085SJohn Marino return;
210cd1c6085SJohn Marino }
211cd1c6085SJohn Marino for (cur = last_ctype + 1; cur <= end; cur++) {
212cd1c6085SJohn Marino if ((ctn = get_ctype(cur)) == NULL) {
213cd1c6085SJohn Marino INTERR;
214cd1c6085SJohn Marino return;
215cd1c6085SJohn Marino }
216cd1c6085SJohn Marino add_ctype_impl(ctn);
217cd1c6085SJohn Marino }
218cd1c6085SJohn Marino last_ctype = end;
219cd1c6085SJohn Marino
220cd1c6085SJohn Marino }
221cd1c6085SJohn Marino
222cd1c6085SJohn Marino /*
223cd1c6085SJohn Marino * A word about widths: if the width mask is specified, then libc
224cd1c6085SJohn Marino * unconditionally honors it. Otherwise, it assumes printable
225cd1c6085SJohn Marino * characters have width 1, and non-printable characters have width
226cd1c6085SJohn Marino * -1 (except for NULL which is special with with 0). Hence, we have
227cd1c6085SJohn Marino * no need to inject defaults here -- the "default" unset value of 0
228cd1c6085SJohn Marino * indicates that libc should use its own logic in wcwidth as described.
229cd1c6085SJohn Marino */
230cd1c6085SJohn Marino void
add_width(int wc,int width)231cd1c6085SJohn Marino add_width(int wc, int width)
232cd1c6085SJohn Marino {
233cd1c6085SJohn Marino ctype_node_t *ctn;
234cd1c6085SJohn Marino
235cd1c6085SJohn Marino if ((ctn = get_ctype(wc)) == NULL) {
236cd1c6085SJohn Marino INTERR;
237cd1c6085SJohn Marino return;
238cd1c6085SJohn Marino }
239cd1c6085SJohn Marino ctn->ctype &= ~(_CTYPE_SWM);
240cd1c6085SJohn Marino switch (width) {
241cd1c6085SJohn Marino case 0:
242cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW0;
243cd1c6085SJohn Marino break;
244cd1c6085SJohn Marino case 1:
245cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW1;
246cd1c6085SJohn Marino break;
247cd1c6085SJohn Marino case 2:
248cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW2;
249cd1c6085SJohn Marino break;
250cd1c6085SJohn Marino case 3:
251cd1c6085SJohn Marino ctn->ctype |= _CTYPE_SW3;
252cd1c6085SJohn Marino break;
253cd1c6085SJohn Marino }
254cd1c6085SJohn Marino }
255cd1c6085SJohn Marino
256cd1c6085SJohn Marino void
add_width_range(int start,int end,int width)257cd1c6085SJohn Marino add_width_range(int start, int end, int width)
258cd1c6085SJohn Marino {
259cd1c6085SJohn Marino for (; start <= end; start++) {
260cd1c6085SJohn Marino add_width(start, width);
261cd1c6085SJohn Marino }
262cd1c6085SJohn Marino }
263cd1c6085SJohn Marino
264cd1c6085SJohn Marino void
add_caseconv(int val,int wc)265cd1c6085SJohn Marino add_caseconv(int val, int wc)
266cd1c6085SJohn Marino {
267cd1c6085SJohn Marino ctype_node_t *ctn;
268cd1c6085SJohn Marino
269cd1c6085SJohn Marino ctn = get_ctype(val);
270cd1c6085SJohn Marino if (ctn == NULL) {
271cd1c6085SJohn Marino INTERR;
272cd1c6085SJohn Marino return;
273cd1c6085SJohn Marino }
274cd1c6085SJohn Marino
275cd1c6085SJohn Marino switch (last_kw) {
276cd1c6085SJohn Marino case T_TOUPPER:
277cd1c6085SJohn Marino ctn->toupper = wc;
278cd1c6085SJohn Marino break;
279cd1c6085SJohn Marino case T_TOLOWER:
280cd1c6085SJohn Marino ctn->tolower = wc;
281cd1c6085SJohn Marino break;
282cd1c6085SJohn Marino default:
283cd1c6085SJohn Marino INTERR;
284cd1c6085SJohn Marino break;
285cd1c6085SJohn Marino }
286cd1c6085SJohn Marino }
287cd1c6085SJohn Marino
288cd1c6085SJohn Marino void
dump_ctype(void)289cd1c6085SJohn Marino dump_ctype(void)
290cd1c6085SJohn Marino {
291cd1c6085SJohn Marino FILE *f;
292cd1c6085SJohn Marino _FileRuneLocale rl;
293cd1c6085SJohn Marino ctype_node_t *ctn, *last_ct, *last_lo, *last_up;
294cd1c6085SJohn Marino _FileRuneEntry *ct = NULL;
295cd1c6085SJohn Marino _FileRuneEntry *lo = NULL;
296cd1c6085SJohn Marino _FileRuneEntry *up = NULL;
297cd1c6085SJohn Marino wchar_t wc;
298cd1c6085SJohn Marino
299cd1c6085SJohn Marino (void) memset(&rl, 0, sizeof (rl));
300cd1c6085SJohn Marino last_ct = NULL;
301cd1c6085SJohn Marino last_lo = NULL;
302cd1c6085SJohn Marino last_up = NULL;
303cd1c6085SJohn Marino
304cd1c6085SJohn Marino if ((f = open_category()) == NULL)
305cd1c6085SJohn Marino return;
306cd1c6085SJohn Marino
307cd1c6085SJohn Marino (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
308cd1c6085SJohn Marino (void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
309cd1c6085SJohn Marino
310cd1c6085SJohn Marino /*
311cd1c6085SJohn Marino * Initialize the identity map.
312cd1c6085SJohn Marino */
313cd1c6085SJohn Marino for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
314cd1c6085SJohn Marino rl.maplower[wc] = wc;
315cd1c6085SJohn Marino rl.mapupper[wc] = wc;
316cd1c6085SJohn Marino }
317cd1c6085SJohn Marino
318cacd7ea7SJohn Marino RB_FOREACH(ctn, ctypes, &ctypes) {
319cd1c6085SJohn Marino int conflict = 0;
320cd1c6085SJohn Marino
321cd1c6085SJohn Marino wc = ctn->wc;
322cd1c6085SJohn Marino
323cd1c6085SJohn Marino /*
324cd1c6085SJohn Marino * POSIX requires certain portable characters have
325cd1c6085SJohn Marino * certain types. Add them if they are missing.
326cd1c6085SJohn Marino */
327cd1c6085SJohn Marino if ((wc >= 1) && (wc <= 127)) {
328cd1c6085SJohn Marino if ((wc >= 'A') && (wc <= 'Z'))
329cd1c6085SJohn Marino ctn->ctype |= _ISUPPER;
330cd1c6085SJohn Marino if ((wc >= 'a') && (wc <= 'z'))
331cd1c6085SJohn Marino ctn->ctype |= _ISLOWER;
332cd1c6085SJohn Marino if ((wc >= '0') && (wc <= '9'))
333cd1c6085SJohn Marino ctn->ctype |= _ISDIGIT;
334cd1c6085SJohn Marino if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
335cd1c6085SJohn Marino ctn->ctype |= _ISSPACE;
336cd1c6085SJohn Marino if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
337cd1c6085SJohn Marino ctn->ctype |= _ISXDIGIT;
338cd1c6085SJohn Marino if (strchr(" \t", (char)wc))
339cd1c6085SJohn Marino ctn->ctype |= _ISBLANK;
34097055fc2SJohn Marino if (wc == ' ')
34197055fc2SJohn Marino ctn->ctype |= _ISPRINT;
342cd1c6085SJohn Marino
343cd1c6085SJohn Marino /*
344cd1c6085SJohn Marino * Technically these settings are only
345cd1c6085SJohn Marino * required for the C locale. However, it
346cd1c6085SJohn Marino * turns out that because of the historical
347cd1c6085SJohn Marino * version of isprint(), we need them for all
348cd1c6085SJohn Marino * locales as well. Note that these are not
349cd1c6085SJohn Marino * necessarily valid punctation characters in
350cd1c6085SJohn Marino * the current language, but ispunct() needs
351cd1c6085SJohn Marino * to return TRUE for them.
352cd1c6085SJohn Marino */
353cd1c6085SJohn Marino if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
354cd1c6085SJohn Marino (char)wc))
355cd1c6085SJohn Marino ctn->ctype |= _ISPUNCT;
356cd1c6085SJohn Marino }
357cd1c6085SJohn Marino
358cd1c6085SJohn Marino /*
359cd1c6085SJohn Marino * POSIX also requires that certain types imply
360cd1c6085SJohn Marino * others. Add any inferred types here.
361cd1c6085SJohn Marino */
362cd1c6085SJohn Marino if (ctn->ctype & (_ISUPPER |_ISLOWER))
363cd1c6085SJohn Marino ctn->ctype |= _ISALPHA;
364cd1c6085SJohn Marino if (ctn->ctype & _ISDIGIT)
365cd1c6085SJohn Marino ctn->ctype |= _ISXDIGIT;
366cd1c6085SJohn Marino if (ctn->ctype & _ISBLANK)
367cd1c6085SJohn Marino ctn->ctype |= _ISSPACE;
368cd1c6085SJohn Marino if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
369cd1c6085SJohn Marino ctn->ctype |= _ISGRAPH;
370cd1c6085SJohn Marino if (ctn->ctype & _ISGRAPH)
371cd1c6085SJohn Marino ctn->ctype |= _ISPRINT;
372cd1c6085SJohn Marino
373cd1c6085SJohn Marino /*
374cd1c6085SJohn Marino * Finally, POSIX requires that certain combinations
375cd1c6085SJohn Marino * are invalid. We don't flag this as a fatal error,
376cd1c6085SJohn Marino * but we will warn about.
377cd1c6085SJohn Marino */
378cd1c6085SJohn Marino if ((ctn->ctype & _ISALPHA) &&
379cd1c6085SJohn Marino (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
380cd1c6085SJohn Marino conflict++;
381cd1c6085SJohn Marino if ((ctn->ctype & _ISPUNCT) &
382cd1c6085SJohn Marino (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
383cd1c6085SJohn Marino conflict++;
384cd1c6085SJohn Marino if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
385cd1c6085SJohn Marino conflict++;
386cd1c6085SJohn Marino if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
387cd1c6085SJohn Marino conflict++;
388cd1c6085SJohn Marino if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
389cd1c6085SJohn Marino conflict++;
390cd1c6085SJohn Marino
391cd1c6085SJohn Marino if (conflict) {
392cd1c6085SJohn Marino warn("conflicting classes for character 0x%x (%x)",
393cd1c6085SJohn Marino wc, ctn->ctype);
394cd1c6085SJohn Marino }
395cd1c6085SJohn Marino /*
396cd1c6085SJohn Marino * Handle the lower 256 characters using the simple
397cd1c6085SJohn Marino * optimization. Note that if we have not defined the
398cd1c6085SJohn Marino * upper/lower case, then we identity map it.
399cd1c6085SJohn Marino */
400cd1c6085SJohn Marino if ((unsigned)wc < _CACHED_RUNES) {
401cd1c6085SJohn Marino rl.runetype[wc] = ctn->ctype;
402cd1c6085SJohn Marino if (ctn->tolower)
403cd1c6085SJohn Marino rl.maplower[wc] = ctn->tolower;
404cd1c6085SJohn Marino if (ctn->toupper)
405cd1c6085SJohn Marino rl.mapupper[wc] = ctn->toupper;
406cd1c6085SJohn Marino continue;
407cd1c6085SJohn Marino }
408cd1c6085SJohn Marino
409*07ed7d32SJohn Marino if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype) &&
410*07ed7d32SJohn Marino (last_ct->wc + 1 == wc)) {
411cd1c6085SJohn Marino ct[rl.runetype_ext_nranges-1].max = wc;
412cd1c6085SJohn Marino } else {
413cd1c6085SJohn Marino rl.runetype_ext_nranges++;
414cd1c6085SJohn Marino ct = realloc(ct,
415cd1c6085SJohn Marino sizeof (*ct) * rl.runetype_ext_nranges);
416cd1c6085SJohn Marino ct[rl.runetype_ext_nranges - 1].min = wc;
417cd1c6085SJohn Marino ct[rl.runetype_ext_nranges - 1].max = wc;
418cd1c6085SJohn Marino ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
419cd1c6085SJohn Marino }
420*07ed7d32SJohn Marino last_ct = ctn;
421cd1c6085SJohn Marino if (ctn->tolower == 0) {
422cd1c6085SJohn Marino last_lo = NULL;
423cd1c6085SJohn Marino } else if ((last_lo != NULL) &&
424cd1c6085SJohn Marino (last_lo->tolower + 1 == ctn->tolower)) {
425cd1c6085SJohn Marino lo[rl.maplower_ext_nranges-1].max = wc;
426cd1c6085SJohn Marino last_lo = ctn;
427cd1c6085SJohn Marino } else {
428cd1c6085SJohn Marino rl.maplower_ext_nranges++;
429cd1c6085SJohn Marino lo = realloc(lo,
430cd1c6085SJohn Marino sizeof (*lo) * rl.maplower_ext_nranges);
431cd1c6085SJohn Marino lo[rl.maplower_ext_nranges - 1].min = wc;
432cd1c6085SJohn Marino lo[rl.maplower_ext_nranges - 1].max = wc;
433cd1c6085SJohn Marino lo[rl.maplower_ext_nranges - 1].map = ctn->tolower;
434cd1c6085SJohn Marino last_lo = ctn;
435cd1c6085SJohn Marino }
436cd1c6085SJohn Marino
437cd1c6085SJohn Marino if (ctn->toupper == 0) {
438cd1c6085SJohn Marino last_up = NULL;
439cd1c6085SJohn Marino } else if ((last_up != NULL) &&
440cd1c6085SJohn Marino (last_up->toupper + 1 == ctn->toupper)) {
441cd1c6085SJohn Marino up[rl.mapupper_ext_nranges-1].max = wc;
442cd1c6085SJohn Marino last_up = ctn;
443cd1c6085SJohn Marino } else {
444cd1c6085SJohn Marino rl.mapupper_ext_nranges++;
445cd1c6085SJohn Marino up = realloc(up,
446cd1c6085SJohn Marino sizeof (*up) * rl.mapupper_ext_nranges);
447cd1c6085SJohn Marino up[rl.mapupper_ext_nranges - 1].min = wc;
448cd1c6085SJohn Marino up[rl.mapupper_ext_nranges - 1].max = wc;
449cd1c6085SJohn Marino up[rl.mapupper_ext_nranges - 1].map = ctn->toupper;
450cd1c6085SJohn Marino last_up = ctn;
451cd1c6085SJohn Marino }
452cd1c6085SJohn Marino }
453cd1c6085SJohn Marino
454cd1c6085SJohn Marino if ((wr_category(&rl, sizeof (rl), f) < 0) ||
455cd1c6085SJohn Marino (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) ||
456cd1c6085SJohn Marino (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) ||
457cd1c6085SJohn Marino (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) {
458cd1c6085SJohn Marino return;
459cd1c6085SJohn Marino }
460cd1c6085SJohn Marino
461cd1c6085SJohn Marino close_category(f);
462cd1c6085SJohn Marino }
463