1 /*
2 * $Id: vl_ctype.c,v 1.21 2013/03/10 20:27:22 tom Exp $
3 *
4 * On Linux, the normal/wide ctypes give comparable results in the range 0-255,
5 * reflecting the fact that codes 128-255 in Unicode are the "same" as
6 * Latin-1. However, Solaris' wide ctypes give only "non-space" results for
7 * 128-255. Since we're using these functions in vile 9.6 only for the normal
8 * ctypes (the narrow 8-bit locale), just use the normal ctype functions.
9 */
10
11 #include <estruct.h>
12 #include <edef.h>
13
14 #if OPT_LOCALE
15 #include <locale.h>
16 #endif /* OPT_LOCALE */
17
18 static CHARTYPE *ctype_sets;
19 static CHARTYPE *ctype_clrs;
20
21 /* initialize our version of the "chartypes" stuff normally in ctypes.h */
22 /* also called later, if charset-affecting modes change, for instance */
23 void
vl_ctype_init(int print_lo,int print_hi)24 vl_ctype_init(int print_lo, int print_hi)
25 {
26 #if OPT_LOCALE
27 char *save_ctype = setlocale(LC_CTYPE, NULL);
28 #endif
29 int c;
30
31 TRACE((T_CALLED "vl_ctype_init() lo=%d, hi=%d\n",
32 print_lo,
33 print_hi));
34
35 /* If we're using the locale functions, set our flags based on its
36 * tables. Note that just because you have 'setlocale()' doesn't mean
37 * that the tables are present or correct. But this is a start.
38 *
39 * NOTE: Solaris8 and some versions of M$ incorrectly classify tab as a
40 * printable character (ANSI C says control characters are not printable).
41 * Ignore that (the former fixes it in Solaris9).
42 */
43 #if OPT_LOCALE
44 TRACE(("wide_locale:%s\n", NonNull(vl_wide_enc.locale)));
45 TRACE(("narrow_locale:%s\n", NonNull(vl_narrow_enc.locale)));
46 TRACE(("current_locale:%s\n", NonNull(save_ctype)));
47
48 if (okCTYPE2(vl_narrow_enc))
49 setlocale(LC_CTYPE, vl_narrow_enc.locale);
50 else if (okCTYPE2(vl_wide_enc))
51 setlocale(LC_CTYPE, vl_wide_enc.locale);
52
53 for (c = 0; c < N_chars; c++) {
54 if (print_hi > 0 && c > print_hi) {
55 setVlCTYPE(c, 0);
56 } else if (!vl_8bit_builtin() && okCTYPE2(vl_narrow_enc)) {
57 setVlCTYPE(c, vl_ctype_bits(c, -TRUE));
58 vl_uppercase[c + 1] = (char) toupper(c);
59 vl_lowercase[c + 1] = (char) tolower(c);
60 } else {
61 /* fallback to built-in character tables */
62 vl_8bit_ctype_init(okCTYPE2(vl_wide_enc), c);
63 }
64 }
65 #else /* ! OPT_LOCALE */
66 (void) memset((char *) vl_chartypes_, 0, sizeof(vl_chartypes_));
67
68 /* control characters */
69 for (c = 0; c < ' '; c++)
70 addVlCTYPE(c, vl_cntrl);
71 addVlCTYPE(127, vl_cntrl);
72
73 /* lowercase */
74 for (c = 'a'; c <= 'z'; c++)
75 addVlCTYPE(c, vl_lower);
76 #if OPT_ISO_8859
77 for (c = 0xc0; c <= 0xd6; c++)
78 addVlCTYPE(c, vl_lower);
79 for (c = 0xd8; c <= 0xde; c++)
80 addVlCTYPE(c, vl_lower);
81 #endif
82 /* uppercase */
83 for (c = 'A'; c <= 'Z'; c++)
84 addVlCTYPE(c, vl_upper);
85 #if OPT_ISO_8859
86 for (c = 0xdf; c <= 0xf6; c++)
87 addVlCTYPE(c, vl_upper);
88 for (c = 0xf8; c <= 0xff; c++)
89 addVlCTYPE(c, vl_upper);
90 #endif
91
92 /*
93 * If you want to do this properly, compile-in locale support.
94 */
95 for (c = 0; c < N_chars; c++) {
96 vl_uppercase[c + 1] = (char) c;
97 vl_lowercase[c + 1] = (char) c;
98 if (isAlpha(c)) {
99 if (isUpper(c)) {
100 vl_lowercase[c + 1] = (char) (c ^ DIFCASE);
101 } else {
102 vl_uppercase[c + 1] = (char) (c ^ DIFCASE);
103 }
104 }
105 }
106
107 /* digits */
108 for (c = '0'; c <= '9'; c++)
109 addVlCTYPE(c, vl_digit);
110 #ifdef vl_xdigit
111 /* hex digits */
112 for (c = '0'; c <= '9'; c++)
113 addVlCTYPE(c, vl_xdigit);
114 for (c = 'a'; c <= 'f'; c++)
115 addVlCTYPE(c, vl_xdigit);
116 for (c = 'A'; c <= 'F'; c++)
117 addVlCTYPE(c, vl_xdigit);
118 #endif
119
120 /* punctuation */
121 for (c = '!'; c <= '/'; c++)
122 addVlCTYPE(c, vl_punct);
123 for (c = ':'; c <= '@'; c++)
124 addVlCTYPE(c, vl_punct);
125 for (c = '['; c <= '`'; c++)
126 addVlCTYPE(c, vl_punct);
127 for (c = L_CURLY; c <= '~'; c++)
128 addVlCTYPE(c, vl_punct);
129 #if OPT_ISO_8859
130 for (c = 0xa1; c <= 0xbf; c++)
131 addVlCTYPE(c, vl_punct);
132 #endif
133
134 /* printable */
135 for (c = ' '; c <= '~'; c++)
136 addVlCTYPE(c, vl_print);
137
138 /* whitespace */
139 addVlCTYPE(' ', vl_space);
140 #if OPT_ISO_8859
141 addVlCTYPE(0xa0, vl_space);
142 #endif
143 addVlCTYPE('\t', vl_space);
144 addVlCTYPE('\r', vl_space);
145 addVlCTYPE('\n', vl_space);
146 addVlCTYPE('\f', vl_space);
147
148 #endif /* OPT_LOCALE */
149
150 /* legal in pathnames */
151 addVlCTYPE('.', vl_pathn);
152 addVlCTYPE('_', vl_pathn);
153 addVlCTYPE('~', vl_pathn);
154 addVlCTYPE('-', vl_pathn);
155 addVlCTYPE('/', vl_pathn);
156
157 /* legal in "identifiers" */
158 addVlCTYPE('_', vl_ident | vl_qident);
159 addVlCTYPE(':', vl_qident);
160 #if SYS_VMS
161 addVlCTYPE('$', vl_ident | vl_qident);
162 #endif
163
164 c = print_lo;
165
166 /*
167 * Guard against setting printing-high before printing-low while we have a
168 * buffer which may be repainted and possibly trashing the display.
169 */
170 if (c == 0
171 && print_hi >= 254)
172 c = 160;
173
174 if (c < HIGHBIT)
175 c = HIGHBIT;
176 TRACE(("Forcing printable for [%d..min(%d,%d)]\n",
177 c, print_hi - 1, N_chars - 1));
178 while (c <= print_hi && c < N_chars)
179 addVlCTYPE(c++, vl_print);
180
181 #if DISP_X11
182 for (c = 0; c < N_chars; c++) {
183 if (isPrint(c) && !gui_isprint(c)) {
184 clrVlCTYPE(c, vl_print);
185 }
186 }
187 #endif
188 /* backspacers: ^H, rubout */
189 addVlCTYPE('\b', vl_bspace);
190 addVlCTYPE(127, vl_bspace);
191
192 /* wildcard chars for most shells */
193 addVlCTYPE('*', vl_wild);
194 addVlCTYPE('?', vl_wild);
195 #if !OPT_VMS_PATH
196 #if SYS_UNIX
197 addVlCTYPE('~', vl_wild);
198 #endif
199 addVlCTYPE(L_BLOCK, vl_wild);
200 addVlCTYPE(R_BLOCK, vl_wild);
201 addVlCTYPE(L_CURLY, vl_wild);
202 addVlCTYPE(R_CURLY, vl_wild);
203 addVlCTYPE('$', vl_wild);
204 addVlCTYPE('`', vl_wild);
205 #endif
206
207 /* ex mode line specifiers */
208 addVlCTYPE(',', vl_linespec);
209 addVlCTYPE('%', vl_linespec);
210 addVlCTYPE('-', vl_linespec);
211 addVlCTYPE('+', vl_linespec);
212 addVlCTYPE(';', vl_linespec);
213 addVlCTYPE('.', vl_linespec);
214 addVlCTYPE('$', vl_linespec);
215 addVlCTYPE('\'', vl_linespec);
216
217 /* fences */
218 addVlCTYPE(L_CURLY, vl_fence);
219 addVlCTYPE(R_CURLY, vl_fence);
220 addVlCTYPE(L_PAREN, vl_fence);
221 addVlCTYPE(R_PAREN, vl_fence);
222 addVlCTYPE(L_BLOCK, vl_fence);
223 addVlCTYPE(R_BLOCK, vl_fence);
224
225 #if OPT_VMS_PATH
226 addVlCTYPE(L_BLOCK, vl_pathn);
227 addVlCTYPE(R_BLOCK, vl_pathn);
228 addVlCTYPE(L_ANGLE, vl_pathn);
229 addVlCTYPE(R_ANGLE, vl_pathn);
230 addVlCTYPE('$', vl_pathn);
231 addVlCTYPE(':', vl_pathn);
232 addVlCTYPE(';', vl_pathn);
233 #endif
234
235 #if OPT_MSDOS_PATH
236 addVlCTYPE(BACKSLASH, vl_pathn);
237 addVlCTYPE(':', vl_pathn);
238 #endif
239
240 #if OPT_WIDE_CTYPES
241 /* scratch-buffer-names (usually superset of vl_pathn) */
242 addVlCTYPE(SCRTCH_LEFT[0], vl_scrtch);
243 addVlCTYPE(SCRTCH_RIGHT[0], vl_scrtch);
244 addVlCTYPE(' ', vl_scrtch); /* ...to handle "[Buffer List]" */
245 #endif
246
247 for (c = 0; c < N_chars; c++) {
248 if (!(isSpace(c)))
249 addVlCTYPE(c, vl_nonspace);
250 if (isDigit(c))
251 addVlCTYPE(c, vl_linespec);
252 if (isAlpha(c) || isDigit(c))
253 addVlCTYPE(c, vl_ident | vl_pathn | vl_qident);
254 #if OPT_WIDE_CTYPES
255 if (isSpace(c) || isPrint(c))
256 addVlCTYPE(c, vl_shpipe);
257 if (ispath(c))
258 addVlCTYPE(c, vl_scrtch);
259 #endif
260 }
261
262 #if OPT_LOCALE
263 if (save_ctype != 0)
264 (void) setlocale(LC_CTYPE, save_ctype);
265 #endif
266
267 returnVoid();
268 }
269
270 /*
271 * Return the character-type bits for the given character. There are several
272 * cases.
273 *
274 * vile supports a 256-entry table for "character classes", which are used
275 * mainly to support systems with single-byte encodings. Some of those (no all
276 * older systems) may have incorrect character types; that is the reason for
277 * having the ability to change classes at runtime.
278 *
279 * If use_locale is TRUE, this uses the system's character type functions,
280 * (wide if available) e.g., for Unicode. However, we still allow the
281 * character-classes to override. The simple case is where the wide/narrow
282 * encodings coincide (up to latin1_codes).
283 *
284 * A more complicated case is for narrow encodings such as ISO-8859-2, where
285 * latin_codes is less than 256. Then we have to check first if it corresponds
286 * to the narrow encoding before using the system's character type functions.
287 *
288 * If use_locale is -TRUE (negative), then use the system's 8-bit character
289 * tests to get the narrow locale information used as a starting point for the
290 * character classes. On some systems, this may give odd results, but that is
291 * why it is configurable.
292 *
293 * If use_locale is FALSE, then use the 256-entry table of character classes.
294 */
295 CHARTYPE
vl_ctype_bits(int ch,int use_locale GCC_UNUSED)296 vl_ctype_bits(int ch, int use_locale GCC_UNUSED)
297 {
298 CHARTYPE result = 0;
299
300 if (ch < 0) {
301 ;
302 }
303 #if OPT_LOCALE
304 else if (use_locale > 0) {
305 int check;
306
307 /* handle case where character-classes can be overridden */
308 if (ch < latin1_codes) {
309 result = vlCTYPE(ch);
310 ch = -1;
311 } else if (vl_ucs_to_8bit(&check, ch)) {
312 result = vlCTYPE(check);
313 ch = -1;
314 }
315
316 if (ch >= 0) {
317 if (sys_isalpha(ch))
318 result |= (vl_ident | vl_pathn | vl_qident);
319 if (sys_iscntrl(ch))
320 result |= (vl_cntrl);
321 if (sys_isdigit(ch))
322 result |= (vl_digit | vl_ident | vl_pathn | vl_qident);
323 if (sys_islower(ch))
324 result |= vl_lower;
325 if (sys_isprint(ch) && ch != '\t')
326 result |= vl_print;
327 if (sys_ispunct(ch))
328 result |= vl_punct;
329 if (sys_isspace(ch))
330 result |= vl_space;
331 else
332 result |= vl_nonspace;
333 if (sys_isupper(ch))
334 result |= vl_upper;
335 #ifdef vl_xdigit
336 if (sys_isxdigit(ch))
337 result |= vl_xdigit;
338 #endif
339 }
340 } else if (use_locale < 0) {
341 if (isalpha(ch))
342 result |= (vl_ident | vl_pathn | vl_qident);
343 if (iscntrl(ch))
344 result |= (vl_cntrl);
345 if (isdigit(ch))
346 result |= (vl_digit | vl_ident | vl_pathn | vl_qident);
347 if (islower(ch))
348 result |= vl_lower;
349 if (isprint(ch) && ch != '\t')
350 result |= vl_print;
351 if (ispunct(ch))
352 result |= vl_punct;
353 if (isspace(ch))
354 result |= vl_space;
355 else
356 result |= vl_nonspace;
357 if (isupper(ch))
358 result |= vl_upper;
359 #ifdef vl_xdigit
360 if (isxdigit(ch))
361 result |= vl_xdigit;
362 #endif
363 } else
364 #endif /* OPT_LOCALE */
365 if (ch < N_chars)
366 result = vlCTYPE(ch);
367 return result;
368 }
369
370 /*
371 * Reapply set/clr customizations to the character class data, e.g., after
372 * calling vl_ctype_init().
373 */
374 void
vl_ctype_apply(void)375 vl_ctype_apply(void)
376 {
377 unsigned n;
378
379 TRACE(("vl_ctype_apply\n"));
380 if (ctype_sets) {
381 for (n = 0; n < N_chars; n++) {
382 addVlCTYPE(n, ctype_sets[n]);
383 TRACE(("...set %d:%#lx\n", n, (ULONG) vlCTYPE(n)));
384 }
385 }
386 if (ctype_clrs) {
387 for (n = 0; n < N_chars; n++) {
388 clrVlCTYPE(n, ctype_clrs[n]);
389 TRACE(("...clr %d:%#lx\n", n, (ULONG) vlCTYPE(n)));
390 }
391 }
392 }
393
394 /*
395 * Discard all set/clr customizations.
396 */
397 void
vl_ctype_discard(void)398 vl_ctype_discard(void)
399 {
400 FreeAndNull(ctype_sets);
401 FreeAndNull(ctype_clrs);
402 }
403
404 /*
405 * Set the given character class for the given character.
406 */
407 void
vl_ctype_set(int ch,CHARTYPE cclass)408 vl_ctype_set(int ch, CHARTYPE cclass)
409 {
410 TRACE(("vl_ctype_set %d:%#lx\n", ch, (ULONG) cclass));
411
412 if (ctype_sets == 0) {
413 ctype_sets = typecallocn(CHARTYPE, (size_t) N_chars);
414 }
415 if (ctype_sets != 0) {
416 ctype_sets[ch] |= cclass;
417 addVlCTYPE(ch, cclass);
418 }
419 if (ctype_clrs != 0) {
420 ctype_clrs[ch] &= ~cclass;
421 }
422 }
423
424 void
vl_ctype_clr(int ch,CHARTYPE cclass)425 vl_ctype_clr(int ch, CHARTYPE cclass)
426 {
427 TRACE(("vl_ctype_clr %d:%#lx\n", ch, (ULONG) cclass));
428
429 if (ctype_clrs == 0) {
430 ctype_clrs = typecallocn(CHARTYPE, (size_t) N_chars);
431 }
432 if (ctype_clrs != 0) {
433 ctype_clrs[ch] |= cclass;
434 clrVlCTYPE(ch, cclass);
435 }
436 if (ctype_sets != 0) {
437 ctype_sets[ch] &= ~cclass;
438 }
439 }
440
441 #if NO_LEAKS
442 void
vl_ctype_leaks(void)443 vl_ctype_leaks(void)
444 {
445 FreeAndNull(ctype_sets);
446 FreeAndNull(ctype_clrs);
447 }
448 #endif
449