1 /*
2  * $Id: vl_ctype.c,v 1.21 2013/03/10 20:27:22 tom Exp $
3  *
4  * On Linux, the normal/wide ctypes give comparable results in the range 0-255,
5  * reflecting the fact that codes 128-255 in Unicode are the "same" as
6  * Latin-1.  However, Solaris' wide ctypes give only "non-space" results for
7  * 128-255.  Since we're using these functions in vile 9.6 only for the normal
8  * ctypes (the narrow 8-bit locale), just use the normal ctype functions.
9  */
10 
11 #include <estruct.h>
12 #include <edef.h>
13 
14 #if OPT_LOCALE
15 #include <locale.h>
16 #endif /* OPT_LOCALE */
17 
18 static CHARTYPE *ctype_sets;
19 static CHARTYPE *ctype_clrs;
20 
21 /* initialize our version of the "chartypes" stuff normally in ctypes.h */
22 /* also called later, if charset-affecting modes change, for instance */
23 void
vl_ctype_init(int print_lo,int print_hi)24 vl_ctype_init(int print_lo, int print_hi)
25 {
26 #if OPT_LOCALE
27     char *save_ctype = setlocale(LC_CTYPE, NULL);
28 #endif
29     int c;
30 
31     TRACE((T_CALLED "vl_ctype_init() lo=%d, hi=%d\n",
32 	   print_lo,
33 	   print_hi));
34 
35     /* If we're using the locale functions, set our flags based on its
36      * tables.  Note that just because you have 'setlocale()' doesn't mean
37      * that the tables are present or correct.  But this is a start.
38      *
39      * NOTE:  Solaris8 and some versions of M$ incorrectly classify tab as a
40      * printable character (ANSI C says control characters are not printable).
41      * Ignore that (the former fixes it in Solaris9).
42      */
43 #if OPT_LOCALE
44     TRACE(("wide_locale:%s\n", NonNull(vl_wide_enc.locale)));
45     TRACE(("narrow_locale:%s\n", NonNull(vl_narrow_enc.locale)));
46     TRACE(("current_locale:%s\n", NonNull(save_ctype)));
47 
48     if (okCTYPE2(vl_narrow_enc))
49 	setlocale(LC_CTYPE, vl_narrow_enc.locale);
50     else if (okCTYPE2(vl_wide_enc))
51 	setlocale(LC_CTYPE, vl_wide_enc.locale);
52 
53     for (c = 0; c < N_chars; c++) {
54 	if (print_hi > 0 && c > print_hi) {
55 	    setVlCTYPE(c, 0);
56 	} else if (!vl_8bit_builtin() && okCTYPE2(vl_narrow_enc)) {
57 	    setVlCTYPE(c, vl_ctype_bits(c, -TRUE));
58 	    vl_uppercase[c + 1] = (char) toupper(c);
59 	    vl_lowercase[c + 1] = (char) tolower(c);
60 	} else {
61 	    /* fallback to built-in character tables */
62 	    vl_8bit_ctype_init(okCTYPE2(vl_wide_enc), c);
63 	}
64     }
65 #else /* ! OPT_LOCALE */
66     (void) memset((char *) vl_chartypes_, 0, sizeof(vl_chartypes_));
67 
68     /* control characters */
69     for (c = 0; c < ' '; c++)
70 	addVlCTYPE(c, vl_cntrl);
71     addVlCTYPE(127, vl_cntrl);
72 
73     /* lowercase */
74     for (c = 'a'; c <= 'z'; c++)
75 	addVlCTYPE(c, vl_lower);
76 #if OPT_ISO_8859
77     for (c = 0xc0; c <= 0xd6; c++)
78 	addVlCTYPE(c, vl_lower);
79     for (c = 0xd8; c <= 0xde; c++)
80 	addVlCTYPE(c, vl_lower);
81 #endif
82     /* uppercase */
83     for (c = 'A'; c <= 'Z'; c++)
84 	addVlCTYPE(c, vl_upper);
85 #if OPT_ISO_8859
86     for (c = 0xdf; c <= 0xf6; c++)
87 	addVlCTYPE(c, vl_upper);
88     for (c = 0xf8; c <= 0xff; c++)
89 	addVlCTYPE(c, vl_upper);
90 #endif
91 
92     /*
93      * If you want to do this properly, compile-in locale support.
94      */
95     for (c = 0; c < N_chars; c++) {
96 	vl_uppercase[c + 1] = (char) c;
97 	vl_lowercase[c + 1] = (char) c;
98 	if (isAlpha(c)) {
99 	    if (isUpper(c)) {
100 		vl_lowercase[c + 1] = (char) (c ^ DIFCASE);
101 	    } else {
102 		vl_uppercase[c + 1] = (char) (c ^ DIFCASE);
103 	    }
104 	}
105     }
106 
107     /* digits */
108     for (c = '0'; c <= '9'; c++)
109 	addVlCTYPE(c, vl_digit);
110 #ifdef vl_xdigit
111     /* hex digits */
112     for (c = '0'; c <= '9'; c++)
113 	addVlCTYPE(c, vl_xdigit);
114     for (c = 'a'; c <= 'f'; c++)
115 	addVlCTYPE(c, vl_xdigit);
116     for (c = 'A'; c <= 'F'; c++)
117 	addVlCTYPE(c, vl_xdigit);
118 #endif
119 
120     /* punctuation */
121     for (c = '!'; c <= '/'; c++)
122 	addVlCTYPE(c, vl_punct);
123     for (c = ':'; c <= '@'; c++)
124 	addVlCTYPE(c, vl_punct);
125     for (c = '['; c <= '`'; c++)
126 	addVlCTYPE(c, vl_punct);
127     for (c = L_CURLY; c <= '~'; c++)
128 	addVlCTYPE(c, vl_punct);
129 #if OPT_ISO_8859
130     for (c = 0xa1; c <= 0xbf; c++)
131 	addVlCTYPE(c, vl_punct);
132 #endif
133 
134     /* printable */
135     for (c = ' '; c <= '~'; c++)
136 	addVlCTYPE(c, vl_print);
137 
138     /* whitespace */
139     addVlCTYPE(' ', vl_space);
140 #if OPT_ISO_8859
141     addVlCTYPE(0xa0, vl_space);
142 #endif
143     addVlCTYPE('\t', vl_space);
144     addVlCTYPE('\r', vl_space);
145     addVlCTYPE('\n', vl_space);
146     addVlCTYPE('\f', vl_space);
147 
148 #endif /* OPT_LOCALE */
149 
150     /* legal in pathnames */
151     addVlCTYPE('.', vl_pathn);
152     addVlCTYPE('_', vl_pathn);
153     addVlCTYPE('~', vl_pathn);
154     addVlCTYPE('-', vl_pathn);
155     addVlCTYPE('/', vl_pathn);
156 
157     /* legal in "identifiers" */
158     addVlCTYPE('_', vl_ident | vl_qident);
159     addVlCTYPE(':', vl_qident);
160 #if SYS_VMS
161     addVlCTYPE('$', vl_ident | vl_qident);
162 #endif
163 
164     c = print_lo;
165 
166     /*
167      * Guard against setting printing-high before printing-low while we have a
168      * buffer which may be repainted and possibly trashing the display.
169      */
170     if (c == 0
171 	&& print_hi >= 254)
172 	c = 160;
173 
174     if (c < HIGHBIT)
175 	c = HIGHBIT;
176     TRACE(("Forcing printable for [%d..min(%d,%d)]\n",
177 	   c, print_hi - 1, N_chars - 1));
178     while (c <= print_hi && c < N_chars)
179 	addVlCTYPE(c++, vl_print);
180 
181 #if DISP_X11
182     for (c = 0; c < N_chars; c++) {
183 	if (isPrint(c) && !gui_isprint(c)) {
184 	    clrVlCTYPE(c, vl_print);
185 	}
186     }
187 #endif
188     /* backspacers: ^H, rubout */
189     addVlCTYPE('\b', vl_bspace);
190     addVlCTYPE(127, vl_bspace);
191 
192     /* wildcard chars for most shells */
193     addVlCTYPE('*', vl_wild);
194     addVlCTYPE('?', vl_wild);
195 #if !OPT_VMS_PATH
196 #if SYS_UNIX
197     addVlCTYPE('~', vl_wild);
198 #endif
199     addVlCTYPE(L_BLOCK, vl_wild);
200     addVlCTYPE(R_BLOCK, vl_wild);
201     addVlCTYPE(L_CURLY, vl_wild);
202     addVlCTYPE(R_CURLY, vl_wild);
203     addVlCTYPE('$', vl_wild);
204     addVlCTYPE('`', vl_wild);
205 #endif
206 
207     /* ex mode line specifiers */
208     addVlCTYPE(',', vl_linespec);
209     addVlCTYPE('%', vl_linespec);
210     addVlCTYPE('-', vl_linespec);
211     addVlCTYPE('+', vl_linespec);
212     addVlCTYPE(';', vl_linespec);
213     addVlCTYPE('.', vl_linespec);
214     addVlCTYPE('$', vl_linespec);
215     addVlCTYPE('\'', vl_linespec);
216 
217     /* fences */
218     addVlCTYPE(L_CURLY, vl_fence);
219     addVlCTYPE(R_CURLY, vl_fence);
220     addVlCTYPE(L_PAREN, vl_fence);
221     addVlCTYPE(R_PAREN, vl_fence);
222     addVlCTYPE(L_BLOCK, vl_fence);
223     addVlCTYPE(R_BLOCK, vl_fence);
224 
225 #if OPT_VMS_PATH
226     addVlCTYPE(L_BLOCK, vl_pathn);
227     addVlCTYPE(R_BLOCK, vl_pathn);
228     addVlCTYPE(L_ANGLE, vl_pathn);
229     addVlCTYPE(R_ANGLE, vl_pathn);
230     addVlCTYPE('$', vl_pathn);
231     addVlCTYPE(':', vl_pathn);
232     addVlCTYPE(';', vl_pathn);
233 #endif
234 
235 #if OPT_MSDOS_PATH
236     addVlCTYPE(BACKSLASH, vl_pathn);
237     addVlCTYPE(':', vl_pathn);
238 #endif
239 
240 #if OPT_WIDE_CTYPES
241     /* scratch-buffer-names (usually superset of vl_pathn) */
242     addVlCTYPE(SCRTCH_LEFT[0], vl_scrtch);
243     addVlCTYPE(SCRTCH_RIGHT[0], vl_scrtch);
244     addVlCTYPE(' ', vl_scrtch);	/* ...to handle "[Buffer List]" */
245 #endif
246 
247     for (c = 0; c < N_chars; c++) {
248 	if (!(isSpace(c)))
249 	    addVlCTYPE(c, vl_nonspace);
250 	if (isDigit(c))
251 	    addVlCTYPE(c, vl_linespec);
252 	if (isAlpha(c) || isDigit(c))
253 	    addVlCTYPE(c, vl_ident | vl_pathn | vl_qident);
254 #if OPT_WIDE_CTYPES
255 	if (isSpace(c) || isPrint(c))
256 	    addVlCTYPE(c, vl_shpipe);
257 	if (ispath(c))
258 	    addVlCTYPE(c, vl_scrtch);
259 #endif
260     }
261 
262 #if OPT_LOCALE
263     if (save_ctype != 0)
264 	(void) setlocale(LC_CTYPE, save_ctype);
265 #endif
266 
267     returnVoid();
268 }
269 
270 /*
271  * Return the character-type bits for the given character.  There are several
272  * cases.
273  *
274  * vile supports a 256-entry table for "character classes", which are used
275  * mainly to support systems with single-byte encodings.  Some of those (no all
276  * older systems) may have incorrect character types; that is the reason for
277  * having the ability to change classes at runtime.
278  *
279  * If use_locale is TRUE, this uses the system's character type functions,
280  * (wide if available) e.g., for Unicode.  However, we still allow the
281  * character-classes to override.  The simple case is where the wide/narrow
282  * encodings coincide (up to latin1_codes).
283  *
284  * A more complicated case is for narrow encodings such as ISO-8859-2, where
285  * latin_codes is less than 256.  Then we have to check first if it corresponds
286  * to the narrow encoding before using the system's character type functions.
287  *
288  * If use_locale is -TRUE (negative), then use the system's 8-bit character
289  * tests to get the narrow locale information used as a starting point for the
290  * character classes.  On some systems, this may give odd results, but that is
291  * why it is configurable.
292  *
293  * If use_locale is FALSE, then use the 256-entry table of character classes.
294  */
295 CHARTYPE
vl_ctype_bits(int ch,int use_locale GCC_UNUSED)296 vl_ctype_bits(int ch, int use_locale GCC_UNUSED)
297 {
298     CHARTYPE result = 0;
299 
300     if (ch < 0) {
301 	;
302     }
303 #if OPT_LOCALE
304     else if (use_locale > 0) {
305 	int check;
306 
307 	/* handle case where character-classes can be overridden */
308 	if (ch < latin1_codes) {
309 	    result = vlCTYPE(ch);
310 	    ch = -1;
311 	} else if (vl_ucs_to_8bit(&check, ch)) {
312 	    result = vlCTYPE(check);
313 	    ch = -1;
314 	}
315 
316 	if (ch >= 0) {
317 	    if (sys_isalpha(ch))
318 		result |= (vl_ident | vl_pathn | vl_qident);
319 	    if (sys_iscntrl(ch))
320 		result |= (vl_cntrl);
321 	    if (sys_isdigit(ch))
322 		result |= (vl_digit | vl_ident | vl_pathn | vl_qident);
323 	    if (sys_islower(ch))
324 		result |= vl_lower;
325 	    if (sys_isprint(ch) && ch != '\t')
326 		result |= vl_print;
327 	    if (sys_ispunct(ch))
328 		result |= vl_punct;
329 	    if (sys_isspace(ch))
330 		result |= vl_space;
331 	    else
332 		result |= vl_nonspace;
333 	    if (sys_isupper(ch))
334 		result |= vl_upper;
335 #ifdef vl_xdigit
336 	    if (sys_isxdigit(ch))
337 		result |= vl_xdigit;
338 #endif
339 	}
340     } else if (use_locale < 0) {
341 	if (isalpha(ch))
342 	    result |= (vl_ident | vl_pathn | vl_qident);
343 	if (iscntrl(ch))
344 	    result |= (vl_cntrl);
345 	if (isdigit(ch))
346 	    result |= (vl_digit | vl_ident | vl_pathn | vl_qident);
347 	if (islower(ch))
348 	    result |= vl_lower;
349 	if (isprint(ch) && ch != '\t')
350 	    result |= vl_print;
351 	if (ispunct(ch))
352 	    result |= vl_punct;
353 	if (isspace(ch))
354 	    result |= vl_space;
355 	else
356 	    result |= vl_nonspace;
357 	if (isupper(ch))
358 	    result |= vl_upper;
359 #ifdef vl_xdigit
360 	if (isxdigit(ch))
361 	    result |= vl_xdigit;
362 #endif
363     } else
364 #endif /* OPT_LOCALE */
365     if (ch < N_chars)
366 	result = vlCTYPE(ch);
367     return result;
368 }
369 
370 /*
371  * Reapply set/clr customizations to the character class data, e.g., after
372  * calling vl_ctype_init().
373  */
374 void
vl_ctype_apply(void)375 vl_ctype_apply(void)
376 {
377     unsigned n;
378 
379     TRACE(("vl_ctype_apply\n"));
380     if (ctype_sets) {
381 	for (n = 0; n < N_chars; n++) {
382 	    addVlCTYPE(n, ctype_sets[n]);
383 	    TRACE(("...set %d:%#lx\n", n, (ULONG) vlCTYPE(n)));
384 	}
385     }
386     if (ctype_clrs) {
387 	for (n = 0; n < N_chars; n++) {
388 	    clrVlCTYPE(n, ctype_clrs[n]);
389 	    TRACE(("...clr %d:%#lx\n", n, (ULONG) vlCTYPE(n)));
390 	}
391     }
392 }
393 
394 /*
395  * Discard all set/clr customizations.
396  */
397 void
vl_ctype_discard(void)398 vl_ctype_discard(void)
399 {
400     FreeAndNull(ctype_sets);
401     FreeAndNull(ctype_clrs);
402 }
403 
404 /*
405  * Set the given character class for the given character.
406  */
407 void
vl_ctype_set(int ch,CHARTYPE cclass)408 vl_ctype_set(int ch, CHARTYPE cclass)
409 {
410     TRACE(("vl_ctype_set %d:%#lx\n", ch, (ULONG) cclass));
411 
412     if (ctype_sets == 0) {
413 	ctype_sets = typecallocn(CHARTYPE, (size_t) N_chars);
414     }
415     if (ctype_sets != 0) {
416 	ctype_sets[ch] |= cclass;
417 	addVlCTYPE(ch, cclass);
418     }
419     if (ctype_clrs != 0) {
420 	ctype_clrs[ch] &= ~cclass;
421     }
422 }
423 
424 void
vl_ctype_clr(int ch,CHARTYPE cclass)425 vl_ctype_clr(int ch, CHARTYPE cclass)
426 {
427     TRACE(("vl_ctype_clr %d:%#lx\n", ch, (ULONG) cclass));
428 
429     if (ctype_clrs == 0) {
430 	ctype_clrs = typecallocn(CHARTYPE, (size_t) N_chars);
431     }
432     if (ctype_clrs != 0) {
433 	ctype_clrs[ch] |= cclass;
434 	clrVlCTYPE(ch, cclass);
435     }
436     if (ctype_sets != 0) {
437 	ctype_sets[ch] &= ~cclass;
438     }
439 }
440 
441 #if NO_LEAKS
442 void
vl_ctype_leaks(void)443 vl_ctype_leaks(void)
444 {
445     FreeAndNull(ctype_sets);
446     FreeAndNull(ctype_clrs);
447 }
448 #endif
449