xref: /dragonfly/usr.bin/localedef/ctype.c (revision dd5ff2d3)
1cd1c6085SJohn Marino /*
28aa2b98bSJohn Marino  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
38aa2b98bSJohn Marino  * Copyright 2012 Garrett D'Amore <garrett@damore.org>  All rights reserved.
4cd1c6085SJohn Marino  * Copyright 2015 John Marino <draco@marino.st>
58aa2b98bSJohn Marino  *
68aa2b98bSJohn Marino  * This source code is derived from the illumos localedef command, and
78aa2b98bSJohn Marino  * provided under BSD-style license terms by Nexenta Systems, Inc.
88aa2b98bSJohn Marino  *
98aa2b98bSJohn Marino  * Redistribution and use in source and binary forms, with or without
108aa2b98bSJohn Marino  * modification, are permitted provided that the following conditions
118aa2b98bSJohn Marino  * are met:
128aa2b98bSJohn Marino  *
138aa2b98bSJohn Marino  * 1. Redistributions of source code must retain the above copyright
148aa2b98bSJohn Marino  *    notice, this list of conditions and the following disclaimer.
158aa2b98bSJohn Marino  * 2. Redistributions in binary form must reproduce the above copyright
168aa2b98bSJohn Marino  *    notice, this list of conditions and the following disclaimer in the
178aa2b98bSJohn Marino  *    documentation and/or other materials provided with the distribution.
188aa2b98bSJohn Marino  *
198aa2b98bSJohn Marino  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
208aa2b98bSJohn Marino  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
218aa2b98bSJohn Marino  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
228aa2b98bSJohn Marino  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
238aa2b98bSJohn Marino  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
248aa2b98bSJohn Marino  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
258aa2b98bSJohn Marino  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
268aa2b98bSJohn Marino  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
278aa2b98bSJohn Marino  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
288aa2b98bSJohn Marino  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
298aa2b98bSJohn Marino  * POSSIBILITY OF SUCH DAMAGE.
30cd1c6085SJohn Marino  */
31cd1c6085SJohn Marino 
32cd1c6085SJohn Marino /*
33cd1c6085SJohn Marino  * LC_CTYPE database generation routines for localedef.
34cd1c6085SJohn Marino  */
35cd1c6085SJohn Marino 
36cd1c6085SJohn Marino #include <stdio.h>
37cd1c6085SJohn Marino #include <stdlib.h>
38cd1c6085SJohn Marino #include <stddef.h>
39cd1c6085SJohn Marino #include <string.h>
40cd1c6085SJohn Marino #include <sys/types.h>
41cd1c6085SJohn Marino #include <wchar.h>
42cd1c6085SJohn Marino #include <ctype.h>
43cd1c6085SJohn Marino #include <wctype.h>
44cd1c6085SJohn Marino #include <unistd.h>
45cd1c6085SJohn Marino #include "localedef.h"
46cd1c6085SJohn Marino #include "parser.h"
47cd1c6085SJohn Marino #include "runefile.h"
48cd1c6085SJohn Marino #include "avl.h"
49cd1c6085SJohn Marino 
501ed06f48SJohn Marino /* Needed for bootstrapping, _CTYPE_N not available before 1 Sep 2015 */
511ed06f48SJohn Marino #ifndef _CTYPE_N
521ed06f48SJohn Marino #define _CTYPE_N	0x00400000L
531ed06f48SJohn Marino #endif
54cd1c6085SJohn Marino 
55cd1c6085SJohn Marino #define _ISUPPER	_CTYPE_U
56cd1c6085SJohn Marino #define _ISLOWER	_CTYPE_L
57cd1c6085SJohn Marino #define	_ISDIGIT	_CTYPE_D
58cd1c6085SJohn Marino #define	_ISXDIGIT	_CTYPE_X
59cd1c6085SJohn Marino #define	_ISSPACE	_CTYPE_S
60cd1c6085SJohn Marino #define	_ISBLANK	_CTYPE_B
61cd1c6085SJohn Marino #define	_ISALPHA	_CTYPE_A
62cd1c6085SJohn Marino #define	_ISPUNCT	_CTYPE_P
63cd1c6085SJohn Marino #define	_ISGRAPH	_CTYPE_G
64cd1c6085SJohn Marino #define	_ISPRINT	_CTYPE_R
65cd1c6085SJohn Marino #define	_ISCNTRL	_CTYPE_C
66cd1c6085SJohn Marino #define	_E1		_CTYPE_Q
67cd1c6085SJohn Marino #define	_E2		_CTYPE_I
68cd1c6085SJohn Marino #define	_E3		0
6931c9f6f2SJohn Marino #define	_E4		_CTYPE_N
70cd1c6085SJohn Marino #define	_E5		_CTYPE_T
71cd1c6085SJohn Marino 
72cd1c6085SJohn Marino static avl_tree_t	ctypes;
73cd1c6085SJohn Marino 
74cd1c6085SJohn Marino static wchar_t		last_ctype;
75cd1c6085SJohn Marino 
76cd1c6085SJohn Marino typedef struct ctype_node {
77cd1c6085SJohn Marino 	wchar_t wc;
78cd1c6085SJohn Marino 	int32_t	ctype;
79cd1c6085SJohn Marino 	int32_t	toupper;
80cd1c6085SJohn Marino 	int32_t	tolower;
81cd1c6085SJohn Marino 	avl_node_t avl;
82cd1c6085SJohn Marino } ctype_node_t;
83cd1c6085SJohn Marino 
84cd1c6085SJohn Marino typedef struct width_node {
85cd1c6085SJohn Marino 	wchar_t start;
86cd1c6085SJohn Marino 	wchar_t end;
87cd1c6085SJohn Marino 	int8_t width;
88cd1c6085SJohn Marino 	avl_node_t avl;
89cd1c6085SJohn Marino } width_node_t;
90cd1c6085SJohn Marino 
91cd1c6085SJohn Marino static int
92cd1c6085SJohn Marino ctype_compare(const void *n1, const void *n2)
93cd1c6085SJohn Marino {
94cd1c6085SJohn Marino 	const ctype_node_t *c1 = n1;
95cd1c6085SJohn Marino 	const ctype_node_t *c2 = n2;
96cd1c6085SJohn Marino 
97cd1c6085SJohn Marino 	return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
98cd1c6085SJohn Marino }
99cd1c6085SJohn Marino 
100cd1c6085SJohn Marino void
101cd1c6085SJohn Marino init_ctype(void)
102cd1c6085SJohn Marino {
103cd1c6085SJohn Marino 	avl_create(&ctypes, ctype_compare, sizeof (ctype_node_t),
104cd1c6085SJohn Marino 	    offsetof(ctype_node_t, avl));
105cd1c6085SJohn Marino }
106cd1c6085SJohn Marino 
107cd1c6085SJohn Marino 
108cd1c6085SJohn Marino static void
109cd1c6085SJohn Marino add_ctype_impl(ctype_node_t *ctn)
110cd1c6085SJohn Marino {
111cd1c6085SJohn Marino 	switch (last_kw) {
112cd1c6085SJohn Marino 	case T_ISUPPER:
113cd1c6085SJohn Marino 		ctn->ctype |= (_ISUPPER | _ISALPHA | _ISGRAPH | _ISPRINT);
114cd1c6085SJohn Marino 		break;
115cd1c6085SJohn Marino 	case T_ISLOWER:
116cd1c6085SJohn Marino 		ctn->ctype |= (_ISLOWER | _ISALPHA | _ISGRAPH | _ISPRINT);
117cd1c6085SJohn Marino 		break;
118cd1c6085SJohn Marino 	case T_ISALPHA:
119cd1c6085SJohn Marino 		ctn->ctype |= (_ISALPHA | _ISGRAPH | _ISPRINT);
120cd1c6085SJohn Marino 		break;
121cd1c6085SJohn Marino 	case T_ISDIGIT:
122348a405dSJohn Marino 		ctn->ctype |= (_ISDIGIT | _ISGRAPH | _ISPRINT | _ISXDIGIT | _E4);
123cd1c6085SJohn Marino 		break;
124cd1c6085SJohn Marino 	case T_ISSPACE:
125cd1c6085SJohn Marino 		ctn->ctype |= _ISSPACE;
126cd1c6085SJohn Marino 		break;
127cd1c6085SJohn Marino 	case T_ISCNTRL:
128cd1c6085SJohn Marino 		ctn->ctype |= _ISCNTRL;
129cd1c6085SJohn Marino 		break;
130cd1c6085SJohn Marino 	case T_ISGRAPH:
131cd1c6085SJohn Marino 		ctn->ctype |= (_ISGRAPH | _ISPRINT);
132cd1c6085SJohn Marino 		break;
133cd1c6085SJohn Marino 	case T_ISPRINT:
134cd1c6085SJohn Marino 		ctn->ctype |= _ISPRINT;
135cd1c6085SJohn Marino 		break;
136cd1c6085SJohn Marino 	case T_ISPUNCT:
137cd1c6085SJohn Marino 		ctn->ctype |= (_ISPUNCT | _ISGRAPH | _ISPRINT);
138cd1c6085SJohn Marino 		break;
139cd1c6085SJohn Marino 	case T_ISXDIGIT:
140*dd5ff2d3SJohn Marino 		ctn->ctype |= (_ISXDIGIT | _ISPRINT);
141cd1c6085SJohn Marino 		break;
142cd1c6085SJohn Marino 	case T_ISBLANK:
143cd1c6085SJohn Marino 		ctn->ctype |= (_ISBLANK | _ISSPACE);
144cd1c6085SJohn Marino 		break;
145cd1c6085SJohn Marino 	case T_ISPHONOGRAM:
146cd1c6085SJohn Marino 		ctn->ctype |= (_E1 | _ISPRINT | _ISGRAPH);
147cd1c6085SJohn Marino 		break;
148cd1c6085SJohn Marino 	case T_ISIDEOGRAM:
149cd1c6085SJohn Marino 		ctn->ctype |= (_E2 | _ISPRINT | _ISGRAPH);
150cd1c6085SJohn Marino 		break;
151cd1c6085SJohn Marino 	case T_ISENGLISH:
152cd1c6085SJohn Marino 		ctn->ctype |= (_E3 | _ISPRINT | _ISGRAPH);
153cd1c6085SJohn Marino 		break;
154cd1c6085SJohn Marino 	case T_ISNUMBER:
155cd1c6085SJohn Marino 		ctn->ctype |= (_E4 | _ISPRINT | _ISGRAPH);
156cd1c6085SJohn Marino 		break;
157cd1c6085SJohn Marino 	case T_ISSPECIAL:
158cd1c6085SJohn Marino 		ctn->ctype |= (_E5 | _ISPRINT | _ISGRAPH);
159cd1c6085SJohn Marino 		break;
160cd1c6085SJohn Marino 	case T_ISALNUM:
161cd1c6085SJohn Marino 		/*
162cd1c6085SJohn Marino 		 * We can't do anything with this.  The character
163cd1c6085SJohn Marino 		 * should already be specified as a digit or alpha.
164cd1c6085SJohn Marino 		 */
165cd1c6085SJohn Marino 		break;
166cd1c6085SJohn Marino 	default:
167cd1c6085SJohn Marino 		errf("not a valid character class");
168cd1c6085SJohn Marino 	}
169cd1c6085SJohn Marino }
170cd1c6085SJohn Marino 
171cd1c6085SJohn Marino static ctype_node_t *
172cd1c6085SJohn Marino get_ctype(wchar_t wc)
173cd1c6085SJohn Marino {
174cd1c6085SJohn Marino 	ctype_node_t	srch;
175cd1c6085SJohn Marino 	ctype_node_t	*ctn;
176cd1c6085SJohn Marino 	avl_index_t	where;
177cd1c6085SJohn Marino 
178cd1c6085SJohn Marino 	srch.wc = wc;
179cd1c6085SJohn Marino 	if ((ctn = avl_find(&ctypes, &srch, &where)) == NULL) {
180cd1c6085SJohn Marino 		if ((ctn = calloc(1, sizeof (*ctn))) == NULL) {
181cd1c6085SJohn Marino 			errf("out of memory");
182cd1c6085SJohn Marino 			return (NULL);
183cd1c6085SJohn Marino 		}
184cd1c6085SJohn Marino 		ctn->wc = wc;
185cd1c6085SJohn Marino 
186cd1c6085SJohn Marino 		avl_insert(&ctypes, ctn, where);
187cd1c6085SJohn Marino 	}
188cd1c6085SJohn Marino 	return (ctn);
189cd1c6085SJohn Marino }
190cd1c6085SJohn Marino 
191cd1c6085SJohn Marino void
192cd1c6085SJohn Marino add_ctype(int val)
193cd1c6085SJohn Marino {
194cd1c6085SJohn Marino 	ctype_node_t	*ctn;
195cd1c6085SJohn Marino 
196cd1c6085SJohn Marino 	if ((ctn = get_ctype(val)) == NULL) {
197cd1c6085SJohn Marino 		INTERR;
198cd1c6085SJohn Marino 		return;
199cd1c6085SJohn Marino 	}
200cd1c6085SJohn Marino 	add_ctype_impl(ctn);
201cd1c6085SJohn Marino 	last_ctype = ctn->wc;
202cd1c6085SJohn Marino }
203cd1c6085SJohn Marino 
204cd1c6085SJohn Marino void
205cd1c6085SJohn Marino add_ctype_range(int end)
206cd1c6085SJohn Marino {
207cd1c6085SJohn Marino 	ctype_node_t	*ctn;
208cd1c6085SJohn Marino 	wchar_t		cur;
209cd1c6085SJohn Marino 
210cd1c6085SJohn Marino 	if (end < last_ctype) {
211cd1c6085SJohn Marino 		errf("malformed character range (%u ... %u))",
212cd1c6085SJohn Marino 		    last_ctype, end);
213cd1c6085SJohn Marino 		return;
214cd1c6085SJohn Marino 	}
215cd1c6085SJohn Marino 	for (cur = last_ctype + 1; cur <= end; cur++) {
216cd1c6085SJohn Marino 		if ((ctn = get_ctype(cur)) == NULL) {
217cd1c6085SJohn Marino 			INTERR;
218cd1c6085SJohn Marino 			return;
219cd1c6085SJohn Marino 		}
220cd1c6085SJohn Marino 		add_ctype_impl(ctn);
221cd1c6085SJohn Marino 	}
222cd1c6085SJohn Marino 	last_ctype = end;
223cd1c6085SJohn Marino 
224cd1c6085SJohn Marino }
225cd1c6085SJohn Marino 
226cd1c6085SJohn Marino /*
227cd1c6085SJohn Marino  * A word about widths: if the width mask is specified, then libc
228cd1c6085SJohn Marino  * unconditionally honors it.  Otherwise, it assumes printable
229cd1c6085SJohn Marino  * characters have width 1, and non-printable characters have width
230cd1c6085SJohn Marino  * -1 (except for NULL which is special with with 0).  Hence, we have
231cd1c6085SJohn Marino  * no need to inject defaults here -- the "default" unset value of 0
232cd1c6085SJohn Marino  * indicates that libc should use its own logic in wcwidth as described.
233cd1c6085SJohn Marino  */
234cd1c6085SJohn Marino void
235cd1c6085SJohn Marino add_width(int wc, int width)
236cd1c6085SJohn Marino {
237cd1c6085SJohn Marino 	ctype_node_t	*ctn;
238cd1c6085SJohn Marino 
239cd1c6085SJohn Marino 	if ((ctn = get_ctype(wc)) == NULL) {
240cd1c6085SJohn Marino 		INTERR;
241cd1c6085SJohn Marino 		return;
242cd1c6085SJohn Marino 	}
243cd1c6085SJohn Marino 	ctn->ctype &= ~(_CTYPE_SWM);
244cd1c6085SJohn Marino 	switch (width) {
245cd1c6085SJohn Marino 	case 0:
246cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW0;
247cd1c6085SJohn Marino 		break;
248cd1c6085SJohn Marino 	case 1:
249cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW1;
250cd1c6085SJohn Marino 		break;
251cd1c6085SJohn Marino 	case 2:
252cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW2;
253cd1c6085SJohn Marino 		break;
254cd1c6085SJohn Marino 	case 3:
255cd1c6085SJohn Marino 		ctn->ctype |= _CTYPE_SW3;
256cd1c6085SJohn Marino 		break;
257cd1c6085SJohn Marino 	}
258cd1c6085SJohn Marino }
259cd1c6085SJohn Marino 
260cd1c6085SJohn Marino void
261cd1c6085SJohn Marino add_width_range(int start, int end, int width)
262cd1c6085SJohn Marino {
263cd1c6085SJohn Marino 	for (; start <= end; start++) {
264cd1c6085SJohn Marino 		add_width(start, width);
265cd1c6085SJohn Marino 	}
266cd1c6085SJohn Marino }
267cd1c6085SJohn Marino 
268cd1c6085SJohn Marino void
269cd1c6085SJohn Marino add_caseconv(int val, int wc)
270cd1c6085SJohn Marino {
271cd1c6085SJohn Marino 	ctype_node_t	*ctn;
272cd1c6085SJohn Marino 
273cd1c6085SJohn Marino 	ctn = get_ctype(val);
274cd1c6085SJohn Marino 	if (ctn == NULL) {
275cd1c6085SJohn Marino 		INTERR;
276cd1c6085SJohn Marino 		return;
277cd1c6085SJohn Marino 	}
278cd1c6085SJohn Marino 
279cd1c6085SJohn Marino 	switch (last_kw) {
280cd1c6085SJohn Marino 	case T_TOUPPER:
281cd1c6085SJohn Marino 		ctn->toupper = wc;
282cd1c6085SJohn Marino 		break;
283cd1c6085SJohn Marino 	case T_TOLOWER:
284cd1c6085SJohn Marino 		ctn->tolower = wc;
285cd1c6085SJohn Marino 		break;
286cd1c6085SJohn Marino 	default:
287cd1c6085SJohn Marino 		INTERR;
288cd1c6085SJohn Marino 		break;
289cd1c6085SJohn Marino 	}
290cd1c6085SJohn Marino }
291cd1c6085SJohn Marino 
292cd1c6085SJohn Marino void
293cd1c6085SJohn Marino dump_ctype(void)
294cd1c6085SJohn Marino {
295cd1c6085SJohn Marino 	FILE		*f;
296cd1c6085SJohn Marino 	_FileRuneLocale	rl;
297cd1c6085SJohn Marino 	ctype_node_t	*ctn, *last_ct, *last_lo, *last_up;
298cd1c6085SJohn Marino 	_FileRuneEntry	*ct = NULL;
299cd1c6085SJohn Marino 	_FileRuneEntry	*lo = NULL;
300cd1c6085SJohn Marino 	_FileRuneEntry	*up = NULL;
301cd1c6085SJohn Marino 	wchar_t		wc;
302cd1c6085SJohn Marino 
303cd1c6085SJohn Marino 	(void) memset(&rl, 0, sizeof (rl));
304cd1c6085SJohn Marino 	last_ct = NULL;
305cd1c6085SJohn Marino 	last_lo = NULL;
306cd1c6085SJohn Marino 	last_up = NULL;
307cd1c6085SJohn Marino 
308cd1c6085SJohn Marino 	if ((f = open_category()) == NULL)
309cd1c6085SJohn Marino 		return;
310cd1c6085SJohn Marino 
311cd1c6085SJohn Marino 	(void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
312cd1c6085SJohn Marino 	(void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
313cd1c6085SJohn Marino 
314cd1c6085SJohn Marino 	/*
315cd1c6085SJohn Marino 	 * Initialize the identity map.
316cd1c6085SJohn Marino 	 */
317cd1c6085SJohn Marino 	for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
318cd1c6085SJohn Marino 		rl.maplower[wc] = wc;
319cd1c6085SJohn Marino 		rl.mapupper[wc] = wc;
320cd1c6085SJohn Marino 	}
321cd1c6085SJohn Marino 
322cd1c6085SJohn Marino 	for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {
323cd1c6085SJohn Marino 		int conflict = 0;
324cd1c6085SJohn Marino 
325cd1c6085SJohn Marino 
326cd1c6085SJohn Marino 		wc = ctn->wc;
327cd1c6085SJohn Marino 
328cd1c6085SJohn Marino 		/*
329cd1c6085SJohn Marino 		 * POSIX requires certain portable characters have
330cd1c6085SJohn Marino 		 * certain types.  Add them if they are missing.
331cd1c6085SJohn Marino 		 */
332cd1c6085SJohn Marino 		if ((wc >= 1) && (wc <= 127)) {
333cd1c6085SJohn Marino 			if ((wc >= 'A') && (wc <= 'Z'))
334cd1c6085SJohn Marino 				ctn->ctype |= _ISUPPER;
335cd1c6085SJohn Marino 			if ((wc >= 'a') && (wc <= 'z'))
336cd1c6085SJohn Marino 				ctn->ctype |= _ISLOWER;
337cd1c6085SJohn Marino 			if ((wc >= '0') && (wc <= '9'))
338cd1c6085SJohn Marino 				ctn->ctype |= _ISDIGIT;
339cd1c6085SJohn Marino 			if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
340cd1c6085SJohn Marino 				ctn->ctype |= _ISSPACE;
341cd1c6085SJohn Marino 			if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
342cd1c6085SJohn Marino 				ctn->ctype |= _ISXDIGIT;
343cd1c6085SJohn Marino 			if (strchr(" \t", (char)wc))
344cd1c6085SJohn Marino 				ctn->ctype |= _ISBLANK;
34597055fc2SJohn Marino 			if (wc == ' ')
34697055fc2SJohn Marino 				ctn->ctype |= _ISPRINT;
347cd1c6085SJohn Marino 
348cd1c6085SJohn Marino 			/*
349cd1c6085SJohn Marino 			 * Technically these settings are only
350cd1c6085SJohn Marino 			 * required for the C locale.  However, it
351cd1c6085SJohn Marino 			 * turns out that because of the historical
352cd1c6085SJohn Marino 			 * version of isprint(), we need them for all
353cd1c6085SJohn Marino 			 * locales as well.  Note that these are not
354cd1c6085SJohn Marino 			 * necessarily valid punctation characters in
355cd1c6085SJohn Marino 			 * the current language, but ispunct() needs
356cd1c6085SJohn Marino 			 * to return TRUE for them.
357cd1c6085SJohn Marino 			 */
358cd1c6085SJohn Marino 			if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
359cd1c6085SJohn Marino 			    (char)wc))
360cd1c6085SJohn Marino 				ctn->ctype |= _ISPUNCT;
361cd1c6085SJohn Marino 		}
362cd1c6085SJohn Marino 
363cd1c6085SJohn Marino 		/*
364cd1c6085SJohn Marino 		 * POSIX also requires that certain types imply
365cd1c6085SJohn Marino 		 * others.  Add any inferred types here.
366cd1c6085SJohn Marino 		 */
367cd1c6085SJohn Marino 		if (ctn->ctype & (_ISUPPER |_ISLOWER))
368cd1c6085SJohn Marino 			ctn->ctype |= _ISALPHA;
369cd1c6085SJohn Marino 		if (ctn->ctype & _ISDIGIT)
370cd1c6085SJohn Marino 			ctn->ctype |= _ISXDIGIT;
371cd1c6085SJohn Marino 		if (ctn->ctype & _ISBLANK)
372cd1c6085SJohn Marino 			ctn->ctype |= _ISSPACE;
373cd1c6085SJohn Marino 		if (ctn->ctype & (_ISALPHA|_ISDIGIT|_ISXDIGIT))
374cd1c6085SJohn Marino 			ctn->ctype |= _ISGRAPH;
375cd1c6085SJohn Marino 		if (ctn->ctype & _ISGRAPH)
376cd1c6085SJohn Marino 			ctn->ctype |= _ISPRINT;
377cd1c6085SJohn Marino 
378cd1c6085SJohn Marino 		/*
379cd1c6085SJohn Marino 		 * Finally, POSIX requires that certain combinations
380cd1c6085SJohn Marino 		 * are invalid.  We don't flag this as a fatal error,
381cd1c6085SJohn Marino 		 * but we will warn about.
382cd1c6085SJohn Marino 		 */
383cd1c6085SJohn Marino 		if ((ctn->ctype & _ISALPHA) &&
384cd1c6085SJohn Marino 		    (ctn->ctype & (_ISPUNCT|_ISDIGIT)))
385cd1c6085SJohn Marino 			conflict++;
386cd1c6085SJohn Marino 		if ((ctn->ctype & _ISPUNCT) &
387cd1c6085SJohn Marino 		    (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
388cd1c6085SJohn Marino 			conflict++;
389cd1c6085SJohn Marino 		if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
390cd1c6085SJohn Marino 			conflict++;
391cd1c6085SJohn Marino 		if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
392cd1c6085SJohn Marino 			conflict++;
393cd1c6085SJohn Marino 		if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
394cd1c6085SJohn Marino 			conflict++;
395cd1c6085SJohn Marino 
396cd1c6085SJohn Marino 		if (conflict) {
397cd1c6085SJohn Marino 			warn("conflicting classes for character 0x%x (%x)",
398cd1c6085SJohn Marino 			    wc, ctn->ctype);
399cd1c6085SJohn Marino 		}
400cd1c6085SJohn Marino 		/*
401cd1c6085SJohn Marino 		 * Handle the lower 256 characters using the simple
402cd1c6085SJohn Marino 		 * optimization.  Note that if we have not defined the
403cd1c6085SJohn Marino 		 * upper/lower case, then we identity map it.
404cd1c6085SJohn Marino 		 */
405cd1c6085SJohn Marino 		if ((unsigned)wc < _CACHED_RUNES) {
406cd1c6085SJohn Marino 			rl.runetype[wc] = ctn->ctype;
407cd1c6085SJohn Marino 			if (ctn->tolower)
408cd1c6085SJohn Marino 				rl.maplower[wc] = ctn->tolower;
409cd1c6085SJohn Marino 			if (ctn->toupper)
410cd1c6085SJohn Marino 				rl.mapupper[wc] = ctn->toupper;
411cd1c6085SJohn Marino 			continue;
412cd1c6085SJohn Marino 		}
413cd1c6085SJohn Marino 
414cd1c6085SJohn Marino 		if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) {
415cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges-1].max = wc;
416cd1c6085SJohn Marino 			last_ct = ctn;
417cd1c6085SJohn Marino 		} else {
418cd1c6085SJohn Marino 			rl.runetype_ext_nranges++;
419cd1c6085SJohn Marino 			ct = realloc(ct,
420cd1c6085SJohn Marino 			    sizeof (*ct) * rl.runetype_ext_nranges);
421cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges - 1].min = wc;
422cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges - 1].max = wc;
423cd1c6085SJohn Marino 			ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
424cd1c6085SJohn Marino 			last_ct = ctn;
425cd1c6085SJohn Marino 		}
426cd1c6085SJohn Marino 		if (ctn->tolower == 0) {
427cd1c6085SJohn Marino 			last_lo = NULL;
428cd1c6085SJohn Marino 		} else if ((last_lo != NULL) &&
429cd1c6085SJohn Marino 		    (last_lo->tolower + 1 == ctn->tolower)) {
430cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges-1].max = wc;
431cd1c6085SJohn Marino 			last_lo = ctn;
432cd1c6085SJohn Marino 		} else {
433cd1c6085SJohn Marino 			rl.maplower_ext_nranges++;
434cd1c6085SJohn Marino 			lo = realloc(lo,
435cd1c6085SJohn Marino 			    sizeof (*lo) * rl.maplower_ext_nranges);
436cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges - 1].min = wc;
437cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges - 1].max = wc;
438cd1c6085SJohn Marino 			lo[rl.maplower_ext_nranges - 1].map = ctn->tolower;
439cd1c6085SJohn Marino 			last_lo = ctn;
440cd1c6085SJohn Marino 		}
441cd1c6085SJohn Marino 
442cd1c6085SJohn Marino 		if (ctn->toupper == 0) {
443cd1c6085SJohn Marino 			last_up = NULL;
444cd1c6085SJohn Marino 		} else if ((last_up != NULL) &&
445cd1c6085SJohn Marino 		    (last_up->toupper + 1 == ctn->toupper)) {
446cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges-1].max = wc;
447cd1c6085SJohn Marino 			last_up = ctn;
448cd1c6085SJohn Marino 		} else {
449cd1c6085SJohn Marino 			rl.mapupper_ext_nranges++;
450cd1c6085SJohn Marino 			up = realloc(up,
451cd1c6085SJohn Marino 			    sizeof (*up) * rl.mapupper_ext_nranges);
452cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges - 1].min = wc;
453cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges - 1].max = wc;
454cd1c6085SJohn Marino 			up[rl.mapupper_ext_nranges - 1].map = ctn->toupper;
455cd1c6085SJohn Marino 			last_up = ctn;
456cd1c6085SJohn Marino 		}
457cd1c6085SJohn Marino 	}
458cd1c6085SJohn Marino 
459cd1c6085SJohn Marino 	if ((wr_category(&rl, sizeof (rl), f) < 0) ||
460cd1c6085SJohn Marino 	    (wr_category(ct, sizeof (*ct) * rl.runetype_ext_nranges, f) < 0) ||
461cd1c6085SJohn Marino 	    (wr_category(lo, sizeof (*lo) * rl.maplower_ext_nranges, f) < 0) ||
462cd1c6085SJohn Marino 	    (wr_category(up, sizeof (*up) * rl.mapupper_ext_nranges, f) < 0)) {
463cd1c6085SJohn Marino 		return;
464cd1c6085SJohn Marino 	}
465cd1c6085SJohn Marino 
466cd1c6085SJohn Marino 	close_category(f);
467cd1c6085SJohn Marino }
468