xref: /dragonfly/usr.bin/localedef/ctype.c (revision cd1c6085)
1*cd1c6085SJohn Marino /*
2*cd1c6085SJohn Marino  * This file and its contents are supplied under the terms of the
3*cd1c6085SJohn Marino  * Common Development and Distribution License ("CDDL"), version 1.0.
4*cd1c6085SJohn Marino  * You may only use this file in accordance with the terms of version
5*cd1c6085SJohn Marino  * 1.0 of the CDDL.
6*cd1c6085SJohn Marino  *
7*cd1c6085SJohn Marino  * A full copy of the text of the CDDL should have accompanied this
8*cd1c6085SJohn Marino  * source.  A copy of the CDDL is also available via the Internet at
9*cd1c6085SJohn Marino  * http://www.illumos.org/license/CDDL.
10*cd1c6085SJohn Marino  */
11*cd1c6085SJohn Marino 
12*cd1c6085SJohn Marino /*
13*cd1c6085SJohn Marino  * Copyright 2010,2011 Nexenta Systems, Inc.  All rights reserved.
14*cd1c6085SJohn Marino  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
15*cd1c6085SJohn Marino  * Copyright 2013 DEY Storage Systems, Inc.
16*cd1c6085SJohn Marino  * Copyright 2015 John Marino <draco@marino.st>
17*cd1c6085SJohn Marino  */
18*cd1c6085SJohn Marino 
19*cd1c6085SJohn Marino /*
20*cd1c6085SJohn Marino  * LC_CTYPE database generation routines for localedef.
21*cd1c6085SJohn Marino  */
22*cd1c6085SJohn Marino 
23*cd1c6085SJohn Marino #include <stdio.h>
24*cd1c6085SJohn Marino #include <stdlib.h>
25*cd1c6085SJohn Marino #include <stddef.h>
26*cd1c6085SJohn Marino #include <string.h>
27*cd1c6085SJohn Marino #include <sys/types.h>
28*cd1c6085SJohn Marino #include <wchar.h>
29*cd1c6085SJohn Marino #include <ctype.h>
30*cd1c6085SJohn Marino #include <wctype.h>
31*cd1c6085SJohn Marino #include <unistd.h>
32*cd1c6085SJohn Marino #include "localedef.h"
33*cd1c6085SJohn Marino #include "parser.h"
34*cd1c6085SJohn Marino #include "runefile.h"
35*cd1c6085SJohn Marino #include "avl.h"
36*cd1c6085SJohn Marino 
37*cd1c6085SJohn Marino 
38*cd1c6085SJohn Marino #define _ISUPPER	_CTYPE_U
39*cd1c6085SJohn Marino #define _ISLOWER	_CTYPE_L
40*cd1c6085SJohn Marino #define	_ISDIGIT	_CTYPE_D
41*cd1c6085SJohn Marino #define	_ISXDIGIT	_CTYPE_X
42*cd1c6085SJohn Marino #define	_ISSPACE	_CTYPE_S
43*cd1c6085SJohn Marino #define	_ISBLANK	_CTYPE_B
44*cd1c6085SJohn Marino #define	_ISALPHA	_CTYPE_A
45*cd1c6085SJohn Marino #define	_ISPUNCT	_CTYPE_P
46*cd1c6085SJohn Marino #define	_ISGRAPH	_CTYPE_G
47*cd1c6085SJohn Marino #define	_ISPRINT	_CTYPE_R
48*cd1c6085SJohn Marino #define	_ISCNTRL	_CTYPE_C
49*cd1c6085SJohn Marino #define	_E1		_CTYPE_Q
50*cd1c6085SJohn Marino #define	_E2		_CTYPE_I
51*cd1c6085SJohn Marino #define	_E3		0
52*cd1c6085SJohn Marino #define	_E4		0
53*cd1c6085SJohn Marino #define	_E5		_CTYPE_T
54*cd1c6085SJohn Marino 
55*cd1c6085SJohn Marino static avl_tree_t	ctypes;
56*cd1c6085SJohn Marino 
57*cd1c6085SJohn Marino static wchar_t		last_ctype;
58*cd1c6085SJohn Marino 
59*cd1c6085SJohn Marino typedef struct ctype_node {
60*cd1c6085SJohn Marino 	wchar_t wc;
61*cd1c6085SJohn Marino 	int32_t	ctype;
62*cd1c6085SJohn Marino 	int32_t	toupper;
63*cd1c6085SJohn Marino 	int32_t	tolower;
64*cd1c6085SJohn Marino 	avl_node_t avl;
65*cd1c6085SJohn Marino } ctype_node_t;
66*cd1c6085SJohn Marino 
67*cd1c6085SJohn Marino typedef struct width_node {
68*cd1c6085SJohn Marino 	wchar_t start;
69*cd1c6085SJohn Marino 	wchar_t end;
70*cd1c6085SJohn Marino 	int8_t width;
71*cd1c6085SJohn Marino 	avl_node_t avl;
72*cd1c6085SJohn Marino } width_node_t;
73*cd1c6085SJohn Marino 
74*cd1c6085SJohn Marino static int
75*cd1c6085SJohn Marino ctype_compare(const void *n1, const void *n2)
76*cd1c6085SJohn Marino {
77*cd1c6085SJohn Marino 	const ctype_node_t *c1 = n1;
78*cd1c6085SJohn Marino 	const ctype_node_t *c2 = n2;
79*cd1c6085SJohn Marino 
80*cd1c6085SJohn Marino 	return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
81*cd1c6085SJohn Marino }
82*cd1c6085SJohn Marino 
83*cd1c6085SJohn Marino void
84*cd1c6085SJohn Marino init_ctype(void)
85*cd1c6085SJohn Marino {
86*cd1c6085SJohn Marino 	avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t),
87*cd1c6085SJohn Marino 	    offsetof(ctype_node_t, avl));
88*cd1c6085SJohn Marino }
89*cd1c6085SJohn Marino 
90*cd1c6085SJohn Marino 
91*cd1c6085SJohn Marino static void
92*cd1c6085SJohn Marino add_ctype_impl(ctype_node_t *ctn)
93*cd1c6085SJohn Marino {
94*cd1c6085SJohn Marino 	switch (last_kw) {
95*cd1c6085SJohn Marino 	case T_ISUPPER:
96*cd1c6085SJohn Marino 		ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
97*cd1c6085SJohn Marino 		break;
98*cd1c6085SJohn Marino 	case T_ISLOWER:
99*cd1c6085SJohn Marino 		ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
100*cd1c6085SJohn Marino 		break;
101*cd1c6085SJohn Marino 	case T_ISALPHA:
102*cd1c6085SJohn Marino 		ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
103*cd1c6085SJohn Marino 		break;
104*cd1c6085SJohn Marino 	case T_ISDIGIT:
105*cd1c6085SJohn Marino 		ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT);
106*cd1c6085SJohn Marino 		break;
107*cd1c6085SJohn Marino 	case T_ISSPACE:
108*cd1c6085SJohn Marino 		ctn->ctype |= _ISSPACE;
109*cd1c6085SJohn Marino 		break;
110*cd1c6085SJohn Marino 	case T_ISCNTRL:
111*cd1c6085SJohn Marino 		ctn->ctype |= _ISCNTRL;
112*cd1c6085SJohn Marino 		break;
113*cd1c6085SJohn Marino 	case T_ISGRAPH:
114*cd1c6085SJohn Marino 		ctn->ctype |= (_ISGRAPH | _ISPRINT);
115*cd1c6085SJohn Marino 		break;
116*cd1c6085SJohn Marino 	case T_ISPRINT:
117*cd1c6085SJohn Marino 		ctn->ctype |= _ISPRINT;
118*cd1c6085SJohn Marino 		break;
119*cd1c6085SJohn Marino 	case T_ISPUNCT:
120*cd1c6085SJohn Marino 		ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
121*cd1c6085SJohn Marino 		break;
122*cd1c6085SJohn Marino 	case T_ISXDIGIT:
123*cd1c6085SJohn Marino 		ctn->ctype |= (_ISXDIGIT | _ISPRINT);
124*cd1c6085SJohn Marino 		break;
125*cd1c6085SJohn Marino 	case T_ISBLANK:
126*cd1c6085SJohn Marino 		ctn->ctype |= (_ISBLANK | _ISSPACE);
127*cd1c6085SJohn Marino 		break;
128*cd1c6085SJohn Marino 	case T_ISPHONOGRAM:
129*cd1c6085SJohn Marino 		ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
130*cd1c6085SJohn Marino 		break;
131*cd1c6085SJohn Marino 	case T_ISIDEOGRAM:
132*cd1c6085SJohn Marino 		ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
133*cd1c6085SJohn Marino 		break;
134*cd1c6085SJohn Marino 	case T_ISENGLISH:
135*cd1c6085SJohn Marino 		ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
136*cd1c6085SJohn Marino 		break;
137*cd1c6085SJohn Marino 	case T_ISNUMBER:
138*cd1c6085SJohn Marino 		ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
139*cd1c6085SJohn Marino 		break;
140*cd1c6085SJohn Marino 	case T_ISSPECIAL:
141*cd1c6085SJohn Marino 		ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
142*cd1c6085SJohn Marino 		break;
143*cd1c6085SJohn Marino 	case T_ISALNUM:
144*cd1c6085SJohn Marino 		/*
145*cd1c6085SJohn Marino 		 * We can't do anything with this.  The character
146*cd1c6085SJohn Marino 		 * should already be specified as a digit or alpha.
147*cd1c6085SJohn Marino 		 */
148*cd1c6085SJohn Marino 		break;
149*cd1c6085SJohn Marino 	default:
150*cd1c6085SJohn Marino 		errf("not a valid character class");
151*cd1c6085SJohn Marino 	}
152*cd1c6085SJohn Marino }
153*cd1c6085SJohn Marino 
154*cd1c6085SJohn Marino static ctype_node_t *
155*cd1c6085SJohn Marino get_ctype(wchar_t wc)
156*cd1c6085SJohn Marino {
157*cd1c6085SJohn Marino 	ctype_node_t	srch;
158*cd1c6085SJohn Marino 	ctype_node_t	*ctn;
159*cd1c6085SJohn Marino 	avl_index_t	where;
160*cd1c6085SJohn Marino 
161*cd1c6085SJohn Marino 	srch.wc = wc;
162*cd1c6085SJohn Marino 	if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) {
163*cd1c6085SJohn Marino 		if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
164*cd1c6085SJohn Marino 			errf("out of memory");
165*cd1c6085SJohn Marino 			return (NULL);
166*cd1c6085SJohn Marino 		}
167*cd1c6085SJohn Marino 		ctn->wc = wc;
168*cd1c6085SJohn Marino 
169*cd1c6085SJohn Marino 		avl_insert(&ctypes, ctn, where);
170*cd1c6085SJohn Marino 	}
171*cd1c6085SJohn Marino 	return (ctn);
172*cd1c6085SJohn Marino }
173*cd1c6085SJohn Marino 
174*cd1c6085SJohn Marino void
175*cd1c6085SJohn Marino add_ctype(int val)
176*cd1c6085SJohn Marino {
177*cd1c6085SJohn Marino 	ctype_node_t	*ctn;
178*cd1c6085SJohn Marino 
179*cd1c6085SJohn Marino 	if ((ctn = get_ctype(val)) == NULL) {
180*cd1c6085SJohn Marino 		INTERR;
181*cd1c6085SJohn Marino 		return;
182*cd1c6085SJohn Marino 	}
183*cd1c6085SJohn Marino 	add_ctype_impl(ctn);
184*cd1c6085SJohn Marino 	last_ctype = ctn->wc;
185*cd1c6085SJohn Marino }
186*cd1c6085SJohn Marino 
187*cd1c6085SJohn Marino void
188*cd1c6085SJohn Marino add_ctype_range(int end)
189*cd1c6085SJohn Marino {
190*cd1c6085SJohn Marino 	ctype_node_t	*ctn;
191*cd1c6085SJohn Marino 	wchar_t		cur;
192*cd1c6085SJohn Marino 
193*cd1c6085SJohn Marino 	if (end < last_ctype) {
194*cd1c6085SJohn Marino 		errf("malformed character range (%u ... %u))",
195*cd1c6085SJohn Marino 		    last_ctype, end);
196*cd1c6085SJohn Marino 		return;
197*cd1c6085SJohn Marino 	}
198*cd1c6085SJohn Marino 	for (cur = last_ctype + 1; cur <= end; cur++) {
199*cd1c6085SJohn Marino 		if ((ctn = get_ctype(cur)) == NULL) {
200*cd1c6085SJohn Marino 			INTERR;
201*cd1c6085SJohn Marino 			return;
202*cd1c6085SJohn Marino 		}
203*cd1c6085SJohn Marino 		add_ctype_impl(ctn);
204*cd1c6085SJohn Marino 	}
205*cd1c6085SJohn Marino 	last_ctype = end;
206*cd1c6085SJohn Marino 
207*cd1c6085SJohn Marino }
208*cd1c6085SJohn Marino 
209*cd1c6085SJohn Marino /*
210*cd1c6085SJohn Marino  * A word about widths: if the width mask is specified, then libc
211*cd1c6085SJohn Marino  * unconditionally honors it.  Otherwise, it assumes printable
212*cd1c6085SJohn Marino  * characters have width 1, and non-printable characters have width
213*cd1c6085SJohn Marino  * -1 (except for NULL which is special with with 0).  Hence, we have
214*cd1c6085SJohn Marino  * no need to inject defaults here -- the "default" unset value of 0
215*cd1c6085SJohn Marino  * indicates that libc should use its own logic in wcwidth as described.
216*cd1c6085SJohn Marino  */
217*cd1c6085SJohn Marino void
218*cd1c6085SJohn Marino add_width(int wc, int width)
219*cd1c6085SJohn Marino {
220*cd1c6085SJohn Marino 	ctype_node_t	*ctn;
221*cd1c6085SJohn Marino 
222*cd1c6085SJohn Marino 	if ((ctn = get_ctype(wc)) == NULL) {
223*cd1c6085SJohn Marino 		INTERR;
224*cd1c6085SJohn Marino 		return;
225*cd1c6085SJohn Marino 	}
226*cd1c6085SJohn Marino 	ctn->ctype &= ~(_CTYPE_SWM);
227*cd1c6085SJohn Marino 	switch (width) {
228*cd1c6085SJohn Marino 	case 0:
229*cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW0;
230*cd1c6085SJohn Marino 		break;
231*cd1c6085SJohn Marino 	case 1:
232*cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW1;
233*cd1c6085SJohn Marino 		break;
234*cd1c6085SJohn Marino 	case 2:
235*cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW2;
236*cd1c6085SJohn Marino 		break;
237*cd1c6085SJohn Marino 	case 3:
238*cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW3;
239*cd1c6085SJohn Marino 		break;
240*cd1c6085SJohn Marino 	}
241*cd1c6085SJohn Marino }
242*cd1c6085SJohn Marino 
243*cd1c6085SJohn Marino void
244*cd1c6085SJohn Marino add_width_range(int start, int end, int width)
245*cd1c6085SJohn Marino {
246*cd1c6085SJohn Marino 	for (; start <= end; start++) {
247*cd1c6085SJohn Marino 		add_width(start, width);
248*cd1c6085SJohn Marino 	}
249*cd1c6085SJohn Marino }
250*cd1c6085SJohn Marino 
251*cd1c6085SJohn Marino void
252*cd1c6085SJohn Marino add_caseconv(int val, int wc)
253*cd1c6085SJohn Marino {
254*cd1c6085SJohn Marino 	ctype_node_t	*ctn;
255*cd1c6085SJohn Marino 
256*cd1c6085SJohn Marino 	ctn = get_ctype(val);
257*cd1c6085SJohn Marino 	if (ctn == NULL) {
258*cd1c6085SJohn Marino 		INTERR;
259*cd1c6085SJohn Marino 		return;
260*cd1c6085SJohn Marino 	}
261*cd1c6085SJohn Marino 
262*cd1c6085SJohn Marino 	switch (last_kw) {
263*cd1c6085SJohn Marino 	case T_TOUPPER:
264*cd1c6085SJohn Marino 		ctn->toupper = wc;
265*cd1c6085SJohn Marino 		break;
266*cd1c6085SJohn Marino 	case T_TOLOWER:
267*cd1c6085SJohn Marino 		ctn->tolower = wc;
268*cd1c6085SJohn Marino 		break;
269*cd1c6085SJohn Marino 	default:
270*cd1c6085SJohn Marino 		INTERR;
271*cd1c6085SJohn Marino 		break;
272*cd1c6085SJohn Marino 	}
273*cd1c6085SJohn Marino }
274*cd1c6085SJohn Marino 
275*cd1c6085SJohn Marino void
276*cd1c6085SJohn Marino dump_ctype(void)
277*cd1c6085SJohn Marino {
278*cd1c6085SJohn Marino 	FILE		*f;
279*cd1c6085SJohn Marino 	_FileRuneLocale	rl;
280*cd1c6085SJohn Marino 	ctype_node_t	*ctn, *last_ct, *last_lo, *last_up;
281*cd1c6085SJohn Marino 	_FileRuneEntry	*ct = NULL;
282*cd1c6085SJohn Marino 	_FileRuneEntry	*lo = NULL;
283*cd1c6085SJohn Marino 	_FileRuneEntry	*up = NULL;
284*cd1c6085SJohn Marino 	wchar_t		wc;
285*cd1c6085SJohn Marino 
286*cd1c6085SJohn Marino 	(void) memset(&rl, 0, sizeof (rl));
287*cd1c6085SJohn Marino 	last_ct = NULL;
288*cd1c6085SJohn Marino 	last_lo = NULL;
289*cd1c6085SJohn Marino 	last_up = NULL;
290*cd1c6085SJohn Marino 
291*cd1c6085SJohn Marino 	if ((f = open_category()) == NULL)
292*cd1c6085SJohn Marino 		return;
293*cd1c6085SJohn Marino 
294*cd1c6085SJohn Marino 	(void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
295*cd1c6085SJohn Marino 	(void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
296*cd1c6085SJohn Marino 
297*cd1c6085SJohn Marino 	/*
298*cd1c6085SJohn Marino 	 * Initialize the identity map.
299*cd1c6085SJohn Marino 	 */
300*cd1c6085SJohn Marino 	for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
301*cd1c6085SJohn Marino 		rl.maplower[wc] = wc;
302*cd1c6085SJohn Marino 		rl.mapupper[wc] = wc;
303*cd1c6085SJohn Marino 	}
304*cd1c6085SJohn Marino 
305*cd1c6085SJohn Marino 	for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {
306*cd1c6085SJohn Marino 		int conflict = 0;
307*cd1c6085SJohn Marino 
308*cd1c6085SJohn Marino 
309*cd1c6085SJohn Marino 		wc = ctn->wc;
310*cd1c6085SJohn Marino 
311*cd1c6085SJohn Marino 		/*
312*cd1c6085SJohn Marino 		 * POSIX requires certain portable characters have
313*cd1c6085SJohn Marino 		 * certain types.  Add them if they are missing.
314*cd1c6085SJohn Marino 		 */
315*cd1c6085SJohn Marino 		if ((wc >= 1) && (wc <= 127)) {
316*cd1c6085SJohn Marino 			if ((wc >= 'A') && (wc <= 'Z'))
317*cd1c6085SJohn Marino 				ctn->ctype |= _ISUPPER;
318*cd1c6085SJohn Marino 			if ((wc >= 'a') && (wc <= 'z'))
319*cd1c6085SJohn Marino 				ctn->ctype |= _ISLOWER;
320*cd1c6085SJohn Marino 			if ((wc >= '0') && (wc <= '9'))
321*cd1c6085SJohn Marino 				ctn->ctype |= _ISDIGIT;
322*cd1c6085SJohn Marino 			if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
323*cd1c6085SJohn Marino 				ctn->ctype |= _ISSPACE;
324*cd1c6085SJohn Marino 			if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
325*cd1c6085SJohn Marino 				ctn->ctype |= _ISXDIGIT;
326*cd1c6085SJohn Marino 			if (strchr(" \t", (char)wc))
327*cd1c6085SJohn Marino 				ctn->ctype |= _ISBLANK;
328*cd1c6085SJohn Marino 
329*cd1c6085SJohn Marino 			/*
330*cd1c6085SJohn Marino 			 * Technically these settings are only
331*cd1c6085SJohn Marino 			 * required for the C locale.  However, it
332*cd1c6085SJohn Marino 			 * turns out that because of the historical
333*cd1c6085SJohn Marino 			 * version of isprint(), we need them for all
334*cd1c6085SJohn Marino 			 * locales as well.  Note that these are not
335*cd1c6085SJohn Marino 			 * necessarily valid punctation characters in
336*cd1c6085SJohn Marino 			 * the current language, but ispunct() needs
337*cd1c6085SJohn Marino 			 * to return TRUE for them.
338*cd1c6085SJohn Marino 			 */
339*cd1c6085SJohn Marino 			if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
340*cd1c6085SJohn Marino 			    (char)wc))
341*cd1c6085SJohn Marino 				ctn->ctype |= _ISPUNCT;
342*cd1c6085SJohn Marino 		}
343*cd1c6085SJohn Marino 
344*cd1c6085SJohn Marino 		/*
345*cd1c6085SJohn Marino 		 * POSIX also requires that certain types imply
346*cd1c6085SJohn Marino 		 * others.  Add any inferred types here.
347*cd1c6085SJohn Marino 		 */
348*cd1c6085SJohn Marino 		if (ctn->ctype & (_ISUPPER |_ISLOWER))
349*cd1c6085SJohn Marino 			ctn->ctype |= _ISALPHA;
350*cd1c6085SJohn Marino 		if (ctn->ctype & _ISDIGIT)
351*cd1c6085SJohn Marino 			ctn->ctype |= _ISXDIGIT;
352*cd1c6085SJohn Marino 		if (ctn->ctype & _ISBLANK)
353*cd1c6085SJohn Marino 			ctn->ctype |= _ISSPACE;
354*cd1c6085SJohn Marino 		if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
355*cd1c6085SJohn Marino 			ctn->ctype |= _ISGRAPH;
356*cd1c6085SJohn Marino 		if (ctn->ctype & _ISGRAPH)
357*cd1c6085SJohn Marino 			ctn->ctype |= _ISPRINT;
358*cd1c6085SJohn Marino 
359*cd1c6085SJohn Marino 		/*
360*cd1c6085SJohn Marino 		 * Finally, POSIX requires that certain combinations
361*cd1c6085SJohn Marino 		 * are invalid.  We don't flag this as a fatal error,
362*cd1c6085SJohn Marino 		 * but we will warn about.
363*cd1c6085SJohn Marino 		 */
364*cd1c6085SJohn Marino 		if ((ctn->ctype & _ISALPHA) &&
365*cd1c6085SJohn Marino 		    (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
366*cd1c6085SJohn Marino 			conflict++;
367*cd1c6085SJohn Marino 		if ((ctn->ctype & _ISPUNCT) &
368*cd1c6085SJohn Marino 		    (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
369*cd1c6085SJohn Marino 			conflict++;
370*cd1c6085SJohn Marino 		if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
371*cd1c6085SJohn Marino 			conflict++;
372*cd1c6085SJohn Marino 		if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
373*cd1c6085SJohn Marino 			conflict++;
374*cd1c6085SJohn Marino 		if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
375*cd1c6085SJohn Marino 			conflict++;
376*cd1c6085SJohn Marino 
377*cd1c6085SJohn Marino 		if (conflict) {
378*cd1c6085SJohn Marino 			warn("conflicting classes for character 0x%x (%x)",
379*cd1c6085SJohn Marino 			    wc, ctn->ctype);
380*cd1c6085SJohn Marino 		}
381*cd1c6085SJohn Marino 		/*
382*cd1c6085SJohn Marino 		 * Handle the lower 256 characters using the simple
383*cd1c6085SJohn Marino 		 * optimization.  Note that if we have not defined the
384*cd1c6085SJohn Marino 		 * upper/lower case, then we identity map it.
385*cd1c6085SJohn Marino 		 */
386*cd1c6085SJohn Marino 		if ((unsigned)wc < _CACHED_RUNES) {
387*cd1c6085SJohn Marino 			rl.runetype[wc] = ctn->ctype;
388*cd1c6085SJohn Marino 			if (ctn->tolower)
389*cd1c6085SJohn Marino 				rl.maplower[wc] = ctn->tolower;
390*cd1c6085SJohn Marino 			if (ctn->toupper)
391*cd1c6085SJohn Marino 				rl.mapupper[wc] = ctn->toupper;
392*cd1c6085SJohn Marino 			continue;
393*cd1c6085SJohn Marino 		}
394*cd1c6085SJohn Marino 
395*cd1c6085SJohn Marino 		if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) {
396*cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges-1].max = wc;
397*cd1c6085SJohn Marino 			last_ct = ctn;
398*cd1c6085SJohn Marino 		} else {
399*cd1c6085SJohn Marino 			rl.runetype_ext_nranges++;
400*cd1c6085SJohn Marino 			ct = realloc(ct,
401*cd1c6085SJohn Marino 			    sizeof (*ct) * rl.runetype_ext_nranges);
402*cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges - 1].min = wc;
403*cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges - 1].max = wc;
404*cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
405*cd1c6085SJohn Marino 			last_ct = ctn;
406*cd1c6085SJohn Marino 		}
407*cd1c6085SJohn Marino 		if (ctn->tolower == 0) {
408*cd1c6085SJohn Marino 			last_lo = NULL;
409*cd1c6085SJohn Marino 		} else if ((last_lo != NULL) &&
410*cd1c6085SJohn Marino 		    (last_lo->tolower + 1 == ctn->tolower)) {
411*cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges-1].max = wc;
412*cd1c6085SJohn Marino 			last_lo = ctn;
413*cd1c6085SJohn Marino 		} else {
414*cd1c6085SJohn Marino 			rl.maplower_ext_nranges++;
415*cd1c6085SJohn Marino 			lo = realloc(lo,
416*cd1c6085SJohn Marino 			    sizeof (*lo) * rl.maplower_ext_nranges);
417*cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges - 1].min = wc;
418*cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges - 1].max = wc;
419*cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges - 1].map = ctn->tolower;
420*cd1c6085SJohn Marino 			last_lo = ctn;
421*cd1c6085SJohn Marino 		}
422*cd1c6085SJohn Marino 
423*cd1c6085SJohn Marino 		if (ctn->toupper == 0) {
424*cd1c6085SJohn Marino 			last_up = NULL;
425*cd1c6085SJohn Marino 		} else if ((last_up != NULL) &&
426*cd1c6085SJohn Marino 		    (last_up->toupper + 1 == ctn->toupper)) {
427*cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges-1].max = wc;
428*cd1c6085SJohn Marino 			last_up = ctn;
429*cd1c6085SJohn Marino 		} else {
430*cd1c6085SJohn Marino 			rl.mapupper_ext_nranges++;
431*cd1c6085SJohn Marino 			up = realloc(up,
432*cd1c6085SJohn Marino 			    sizeof (*up) * rl.mapupper_ext_nranges);
433*cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges - 1].min = wc;
434*cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges - 1].max = wc;
435*cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges - 1].map = ctn->toupper;
436*cd1c6085SJohn Marino 			last_up = ctn;
437*cd1c6085SJohn Marino 		}
438*cd1c6085SJohn Marino 	}
439*cd1c6085SJohn Marino 
440*cd1c6085SJohn Marino 	if ((wr_category(&rl, sizeof (rl), f) < 0) ||
441*cd1c6085SJohn Marino 	    (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) ||
442*cd1c6085SJohn Marino 	    (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) ||
443*cd1c6085SJohn Marino 	    (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) {
444*cd1c6085SJohn Marino 		return;
445*cd1c6085SJohn Marino 	}
446*cd1c6085SJohn Marino 
447*cd1c6085SJohn Marino 	close_category(f);
448*cd1c6085SJohn Marino }
449