1 /*
2  * $Id: vl_ctype.h,v 1.40 2013/04/08 00:22:25 tom Exp $
3  *
4  * Character-type tests, like <ctype.h> for vile (vi-like-emacs).
5  *
6  * Copyright 2005-2010,2013 Thomas E. Dickey
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, distribute with modifications, sublicense, and/or sell
13  * copies of the Software, and to permit persons to whom the Software is
14  * furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be included
17  * in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
23  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
25  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  * Except as contained in this notice, the name(s) of the above copyright
28  * holders shall not be used in advertising or otherwise to promote the
29  * sale, use or other dealings in this Software without prior written
30  * authorization.
31  */
32 
33 #ifndef VL_CTYPE_H_incl
34 #define VL_CTYPE_H_incl 1
35 
36 #ifndef SMALLER
37 #define	SMALLER	0	/* strip some fluff -- not a lot smaller, but some */
38 #endif
39 
40 #ifndef OPT_VILE_CTYPE
41 #define OPT_VILE_CTYPE 1
42 #endif
43 
44 #ifndef OPT_MULTIBYTE
45 #define OPT_MULTIBYTE !SMALLER		/* multibyte characters */
46 #endif
47 
48 #ifndef OPT_WIDE_CTYPES
49 #define OPT_WIDE_CTYPES !SMALLER	/* extra char-types tests */
50 #endif
51 
52 #if defined(__OS2__)
53 # include <os2def.h>
54 #elif !defined(WIN32)
55 # define UCHAR unsigned char
56 #endif
57 
58 #ifndef N_chars
59 #define N_chars    256			/* must be a power-of-2		*/
60 #endif
61 
62 #define EOS        '\0'
63 #define BQUOTE     '`'
64 #define SQUOTE     '\''
65 #define DQUOTE     '"'
66 #define BACKSLASH  '\\'
67 #define CH_TILDE   '~'
68 
69 # undef  istype
70 # undef  isAlnum
71 # undef  isAlpha
72 # undef  isCntrl
73 # undef  isDigit
74 # undef  isLower
75 # undef  isPrint
76 # undef  isPunct
77 # undef  isSpace
78 # undef  isUpper
79 # undef  toUpper
80 # undef  toLower
81 # undef  isident
82 # undef  isXDigit
83 
84 #define CharOf(c)	((unsigned char)(c))
85 
86 #include <ctype.h>
87 
88 #if OPT_VILE_CTYPE
89 
90 /* these are the bits that go into the vl_chartypes_ array */
91 /* the macros below test for them */
92 #define chrBIT(n) ((CHARTYPE)(1L<<(n)))
93 
94 typedef enum {
95 	vl_ALPHA = 0
96 	, vl_UPPER
97 	, vl_LOWER
98 	, vl_DIGIT
99 	, vl_SPACE
100 	, vl_CNTRL
101 	, vl_PRINT
102 	, vl_PUNCT
103 	/* first 8 comprise a byte in gnreight.h */
104 	, vl_BSPACE
105 	, vl_IDENT
106 	, vl_PATHN
107 	, vl_WILD
108 	, vl_LINESPEC
109 	, vl_FENCE
110 	, vl_NONSPACE
111 	, vl_QIDENT
112 #if OPT_WIDE_CTYPES
113 	, vl_SCRTCH
114 	, vl_SHPIPE
115 	, vl_XDIGIT
116 #else
117 #define vl_XDIGIT 0
118 #endif
119 	, vl_UNUSED
120 } VL_CTYPES;
121 
122 #define vl_alpha    chrBIT(vl_ALPHA)	/* alphabetic */
123 #define vl_upper    chrBIT(vl_UPPER)	/* upper case */
124 #define vl_lower    chrBIT(vl_LOWER)	/* lower case */
125 #define vl_digit    chrBIT(vl_DIGIT)	/* digits */
126 #define vl_space    chrBIT(vl_SPACE)	/* whitespace */
127 #define vl_bspace   chrBIT(vl_BSPACE)	/* backspace character (^H, DEL, and user's) */
128 #define vl_cntrl    chrBIT(vl_CNTRL)	/* control characters, including DEL */
129 #define vl_print    chrBIT(vl_PRINT)	/* printable */
130 #define vl_punct    chrBIT(vl_PUNCT)	/* punctuation */
131 #define vl_ident    chrBIT(vl_IDENT)	/* is typically legal in "normal" identifier */
132 #define vl_pathn    chrBIT(vl_PATHN)	/* is typically legal in a file's pathname */
133 #define vl_wild     chrBIT(vl_WILD)	/* is typically a shell wildcard char */
134 #define vl_linespec chrBIT(vl_LINESPEC)	/* ex-style line range: 1,$ or 13,15 or % etc.*/
135 #define vl_fence    chrBIT(vl_FENCE)	/* a fence, i.e. (, ), [, ], {, } */
136 #define vl_nonspace chrBIT(vl_NONSPACE)	/* non-whitespace */
137 #define vl_qident   chrBIT(vl_QIDENT)	/* is typically legal in "qualified" identifier */
138 
139 #if OPT_WIDE_CTYPES
140 #define vl_scrtch   chrBIT(vl_SCRTCH)	/* legal in scratch-buffer names */
141 #define vl_shpipe   chrBIT(vl_SHPIPE)	/* legal in shell/pipe-buffer names */
142 #define vl_xdigit   chrBIT(vl_XDIGIT)	/* hex digit */
143 #define isXDigit(c)	istype(vl_xdigit, c)
144 
145 typedef	unsigned long CHARTYPE;
146 #else
147 typedef USHORT CHARTYPE;
148 #endif
149 
150 typedef struct {
151     char *locale;		/* "en_US" */
152     char *encoding;		/* "ISO-8859-1" */
153 } VL_CTYPE2;
154 
155 #define okCTYPE2(ct)	((ct).locale != 0 && *((ct).locale) != '\0')
156 
157 /* these parallel the ctypes.h definitions, except that
158 	they force the char to valid range first */
159 #define vlCTYPE(c)	vl_chartypes_[CharOf(c) + 1]
160 
161 #if !defined(inline) && defined(__GNUC__)
162 #define istype(m,c)	isVlCTYPE(m, (int)(c), (int)(c))
163 #else
164 #define istype(m,c)	((vlCTYPE(c) & (m)) != 0)
165 #endif
166 
167 #define addVlCTYPE(c,m)	vl_chartypes_[CharOf(c) + 1] |= (m)
168 #define clrVlCTYPE(c,m)	vl_chartypes_[CharOf(c) + 1] &= ~(m)
169 #define setVlCTYPE(c,m)	vl_chartypes_[CharOf(c) + 1] = (m)
170 
171 #define isAlnum(c)	istype(vl_lower | vl_upper | vl_digit, c)
172 #define isAlpha(c)	istype(vl_lower | vl_upper, c)
173 #define isCntrl(c)	istype(vl_cntrl, c)
174 #define isDigit(c)	istype(vl_digit, c)
175 #define isLower(c)	istype(vl_lower, c)
176 #define isPrint(c)	istype(vl_print, c)
177 #define isPunct(c)	istype(vl_punct, c)
178 #define isSpace(c)	istype(vl_space, c)
179 #define isUpper(c)	istype(vl_upper, c)
180 
181 #define isbackspace(c)	(istype(vl_bspace, c) || (c) == backspc)
182 #define isfence(c)	istype(vl_fence, c)
183 #define isident(c)	istype(vl_ident, c)
184 #define isqident(c)	istype(vl_qident, c)
185 #define islinespec(c)	istype(vl_linespec, c)
186 #define ispath(c)	istype(vl_pathn, c)
187 #define iswild(c)	istype(vl_wild, c)
188 
189 /* macro for whitespace (non-return) */
190 #define	isBlank(c)      ((c == '\t') || (c == ' '))
191 
192 #define	isGraph(c)	(!isSpecial(c) && !isSpace(c) && isPrint(c))
193 
194 /* DIFCASE represents the difference between upper and lower case letters,
195    DIFCNTRL the difference between upper case and control characters.
196    They are xor-able values.  */
197 #define	DIFCASE		0x20
198 #define	DIFCNTRL	0x40
199 #define toUpper(c)	vl_uppercase[CharOf(c) + 1]
200 #define toLower(c)	vl_lowercase[CharOf(c) + 1]
201 #define tocntrl(c)	(((unsigned)(c)) ^ DIFCNTRL)
202 #define toalpha(c)	(((unsigned)(c)) ^ DIFCNTRL)
203 
204 extern CHARTYPE vl_ctype_bits(int ch, int use_locale);
205 extern void vl_ctype_init(int print_lo, int print_hi);
206 extern void vl_ctype_apply(void);
207 extern void vl_ctype_discard(void);
208 extern void vl_ctype_set(int ch, CHARTYPE cclass);
209 extern void vl_ctype_clr(int ch, CHARTYPE cclass);
210 
211 #else
212 
213 # define isAlnum(c)	isalnum(c)
214 # define isAlpha(c)	isalpha(c)
215 # define isCntrl(c)	iscntrl(c)
216 # define isDigit(c)	isdigit(c)
217 # define isLower(c)	islower(c)
218 # define isGraph(c)	isgraph(c)
219 # define isPrint(c)	isprint(c)
220 # define isPunct(c)	ispunct(c)
221 # define isSpace(c)	isspace(c)
222 # define isUpper(c)	isupper(c)
223 # define toUpper(c)	toupper(c)
224 # define toLower(c)	tolower(c)
225 # define isXDigit(c)	isxdigit(c)
226 
227 # define isident(c)     (isalnum(c) || (c) == '_')
228 # define ispath(c)      (isalnum(c) || (c) == '/' || (c) == '\\' || (c) == '~')
229 
230 #endif
231 
232 #ifndef USE_WIDE_CTYPE
233 
234 #if OPT_MULTIBYTE
235 
236 #ifdef HAVE_WCTYPE
237 #include	<wctype.h>
238 #define USE_WIDE_CTYPE 1
239 #elif (defined(WIN32) && defined(_WCTYPE_DEFINED))
240 #define USE_WIDE_CTYPE 1
241 #else
242 #define USE_WIDE_CTYPE 0
243 #endif
244 
245 #endif
246 #endif /* ndef USE_WIDE_CTYPE */
247 
248 #if defined(USE_WIDE_CTYPE) && (USE_WIDE_CTYPE != 0)
249 
250 #define sys_WINT_T	wint_t
251 
252 #if !(defined(iswblank) || defined(HAVE_ISWBLANK))
253 #define iswblank(c) ((c) == ' ' || (c) == '\t')
254 #endif
255 #define sys_isalpha(n)  iswalpha((wint_t)(n))
256 #define sys_isalnum(n)  iswalnum((wint_t)(n))
257 #define sys_isblank(n)  iswblank((wint_t)(n))
258 #define sys_iscntrl(n)  iswcntrl((wint_t)(n))
259 #define sys_isdigit(n)  iswdigit((wint_t)(n))
260 #define sys_isgraph(n)  iswgraph((wint_t)(n))
261 #define sys_islower(n)  iswlower((wint_t)(n))
262 #define sys_isprint(n)  iswprint((wint_t)(n))
263 #define sys_ispunct(n)  iswpunct((wint_t)(n))
264 #define sys_isspace(n)  iswspace((wint_t)(n))
265 #define sys_isupper(n)  iswupper((wint_t)(n))
266 #define sys_isxdigit(n) iswxdigit((wint_t)(n))
267 #define sys_tolower(n)  towlower((wint_t)(n))
268 #define sys_toupper(n)  towupper((wint_t)(n))
269 
270 #else /* ! USE_WIDE_CTYPE */
271 
272 #define sys_WINT_T	int
273 
274 #define sys_isalpha(n)  isalpha(n)
275 #define sys_isalnum(n)  isalnum(n)
276 #define sys_isblank(n)  isblank(n)
277 #define sys_iscntrl(n)  iscntrl(n)
278 #define sys_isdigit(n)  isdigit(n)
279 #define sys_isgraph(n)  isgraph(n)
280 #define sys_islower(n)  islower(n)
281 #define sys_isprint(n)  isprint(n)
282 #define sys_ispunct(n)  ispunct(n)
283 #define sys_isspace(n)  isspace(n)
284 #define sys_isupper(n)  isupper(n)
285 #define sys_isxdigit(n) isxdigit(n)
286 #define sys_tolower(n)  toLower(n)
287 #define sys_toupper(n)  toUpper(n)
288 
289 #endif /* USE_WIDE_CTYPE */
290 
291 #if !(defined(isblank) || defined(HAVE_ISBLANK))
292 #define isblank(c) ((c) == ' ' || (c) == '\t')
293 #endif
294 
295 /* macro for cases where return & newline are equivalent */
296 #define	isreturn(c)	((c == '\r') || (c == '\n'))
297 
298 #define nocase_eq(bc,pc) (CharOf(bc) == CharOf(pc) || (toUpper(bc) == toUpper(pc)))
299 
300 /*
301  * Built-in/fallback data when locale information is incomplete or missing.
302  */
303 #define VL_LOC_ASCII  "ascii"
304 #define VL_LOC_LATIN1 "8bit"
305 
306 #define VL_ENC_ASCII  "ascii"
307 #define VL_ENC_LATIN1 "8bit"
308 
309 #endif /* VL_CTYPE_H_incl */
310