xref: /minix/external/bsd/flex/dist/ccl.c (revision 0a6a1f1d)
1 /* ccl - routines for character classes */
2 
3 /*  Copyright (c) 1990 The Regents of the University of California. */
4 /*  All rights reserved. */
5 
6 /*  This code is derived from software contributed to Berkeley by */
7 /*  Vern Paxson. */
8 
9 /*  The United States Government has rights in this work pursuant */
10 /*  to contract no. DE-AC03-76SF00098 between the United States */
11  /*  Department of Energy and the University of California. */
12 
13 /*  This file is part of flex. */
14 
15 /*  Redistribution and use in source and binary forms, with or without */
16 /*  modification, are permitted provided that the following conditions */
17 /*  are met: */
18 
19 /*  1. Redistributions of source code must retain the above copyright */
20 /*     notice, this list of conditions and the following disclaimer. */
21 /*  2. Redistributions in binary form must reproduce the above copyright */
22 /*     notice, this list of conditions and the following disclaimer in the */
23 /*     documentation and/or other materials provided with the distribution. */
24 
25 /*  Neither the name of the University nor the names of its contributors */
26 /*  may be used to endorse or promote products derived from this software */
27 /*  without specific prior written permission. */
28 
29 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /*  PURPOSE. */
33 #include "flexdef.h"
34 __RCSID("$NetBSD: ccl.c,v 1.3 2014/10/30 18:44:05 christos Exp $");
35 
36 
37 /* return true if the chr is in the ccl. Takes negation into account. */
38 static bool
ccl_contains(const int cclp,const int ch)39 ccl_contains (const int cclp, const int ch)
40 {
41 	int     ind, len, i;
42 
43 	len = ccllen[cclp];
44 	ind = cclmap[cclp];
45 
46 	for (i = 0; i < len; ++i)
47 		if (ccltbl[ind + i] == ch)
48 			return !cclng[cclp];
49 
50     return cclng[cclp];
51 }
52 
53 
54 /* ccladd - add a single character to a ccl */
55 
ccladd(cclp,ch)56 void    ccladd (cclp, ch)
57      int     cclp;
58      int     ch;
59 {
60 	int     ind, len, newpos, i;
61 
62 	check_char (ch);
63 
64 	len = ccllen[cclp];
65 	ind = cclmap[cclp];
66 
67 	/* check to see if the character is already in the ccl */
68 
69 	for (i = 0; i < len; ++i)
70 		if (ccltbl[ind + i] == ch)
71 			return;
72 
73 	/* mark newlines */
74 	if (ch == nlch)
75 		ccl_has_nl[cclp] = true;
76 
77 	newpos = ind + len;
78 
79 	if (newpos >= current_max_ccl_tbl_size) {
80 		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
81 
82 		++num_reallocs;
83 
84 		ccltbl = reallocate_Character_array (ccltbl,
85 						     current_max_ccl_tbl_size);
86 	}
87 
88 	ccllen[cclp] = len + 1;
89 	ccltbl[newpos] = ch;
90 }
91 
92 /* dump_cclp - same thing as list_character_set, but for cclps.  */
93 
dump_cclp(FILE * file,int cclp)94 static void    dump_cclp (FILE* file, int cclp)
95 {
96 	register int i;
97 
98 	putc ('[', file);
99 
100 	for (i = 0; i < csize; ++i) {
101 		if (ccl_contains(cclp, i)){
102 			register int start_char = i;
103 
104 			putc (' ', file);
105 
106 			fputs (readable_form (i), file);
107 
108 			while (++i < csize && ccl_contains(cclp,i)) ;
109 
110 			if (i - 1 > start_char)
111 				/* this was a run */
112 				fprintf (file, "-%s",
113 					 readable_form (i - 1));
114 
115 			putc (' ', file);
116 		}
117 	}
118 
119 	putc (']', file);
120 }
121 
122 
123 
124 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
125 int
ccl_set_diff(int a,int b)126 ccl_set_diff (int a, int b)
127 {
128     int  d, ch;
129 
130     /* create new class  */
131     d = cclinit();
132 
133     /* In order to handle negation, we spin through all possible chars,
134      * addding each char in a that is not in b.
135      * (This could be O(n^2), but n is small and bounded.)
136      */
137 	for ( ch = 0; ch < csize; ++ch )
138         if (ccl_contains (a, ch) && !ccl_contains(b, ch))
139             ccladd (d, ch);
140 
141     /* debug */
142     if (0){
143         fprintf(stderr, "ccl_set_diff (");
144             fprintf(stderr, "\n    ");
145             dump_cclp (stderr, a);
146             fprintf(stderr, "\n    ");
147             dump_cclp (stderr, b);
148             fprintf(stderr, "\n    ");
149             dump_cclp (stderr, d);
150         fprintf(stderr, "\n)\n");
151     }
152     return d;
153 }
154 
155 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
156 int
ccl_set_union(int a,int b)157 ccl_set_union (int a, int b)
158 {
159     int  d, i;
160 
161     /* create new class  */
162     d = cclinit();
163 
164     /* Add all of a */
165     for (i = 0; i < ccllen[a]; ++i)
166 		ccladd (d, ccltbl[cclmap[a] + i]);
167 
168     /* Add all of b */
169     for (i = 0; i < ccllen[b]; ++i)
170 		ccladd (d, ccltbl[cclmap[b] + i]);
171 
172     /* debug */
173     if (0){
174         fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
175             fprintf(stderr, "\n    ");
176             dump_cclp (stderr, a);
177             fprintf(stderr, "\n    ");
178             dump_cclp (stderr, b);
179             fprintf(stderr, "\n    ");
180             dump_cclp (stderr, d);
181         fprintf(stderr, "\n)\n");
182     }
183     return d;
184 }
185 
186 
187 /* cclinit - return an empty ccl */
188 
cclinit()189 int     cclinit ()
190 {
191 	if (++lastccl >= current_maxccls) {
192 		current_maxccls += MAX_CCLS_INCREMENT;
193 
194 		++num_reallocs;
195 
196 		cclmap =
197 			reallocate_integer_array (cclmap, current_maxccls);
198 		ccllen =
199 			reallocate_integer_array (ccllen, current_maxccls);
200 		cclng = reallocate_integer_array (cclng, current_maxccls);
201 		ccl_has_nl =
202 			reallocate_bool_array (ccl_has_nl,
203 					       current_maxccls);
204 	}
205 
206 	if (lastccl == 1)
207 		/* we're making the first ccl */
208 		cclmap[lastccl] = 0;
209 
210 	else
211 		/* The new pointer is just past the end of the last ccl.
212 		 * Since the cclmap points to the \first/ character of a
213 		 * ccl, adding the length of the ccl to the cclmap pointer
214 		 * will produce a cursor to the first free space.
215 		 */
216 		cclmap[lastccl] =
217 			cclmap[lastccl - 1] + ccllen[lastccl - 1];
218 
219 	ccllen[lastccl] = 0;
220 	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
221 	ccl_has_nl[lastccl] = false;
222 
223 	return lastccl;
224 }
225 
226 
227 /* cclnegate - negate the given ccl */
228 
cclnegate(cclp)229 void    cclnegate (cclp)
230      int     cclp;
231 {
232 	cclng[cclp] = 1;
233 	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
234 }
235 
236 
237 /* list_character_set - list the members of a set of characters in CCL form
238  *
239  * Writes to the given file a character-class representation of those
240  * characters present in the given CCL.  A character is present if it
241  * has a non-zero value in the cset array.
242  */
243 
list_character_set(file,cset)244 void    list_character_set (file, cset)
245      FILE   *file;
246      int     cset[];
247 {
248 	register int i;
249 
250 	putc ('[', file);
251 
252 	for (i = 0; i < csize; ++i) {
253 		if (cset[i]) {
254 			register int start_char = i;
255 
256 			putc (' ', file);
257 
258 			fputs (readable_form (i), file);
259 
260 			while (++i < csize && cset[i]) ;
261 
262 			if (i - 1 > start_char)
263 				/* this was a run */
264 				fprintf (file, "-%s",
265 					 readable_form (i - 1));
266 
267 			putc (' ', file);
268 		}
269 	}
270 
271 	putc (']', file);
272 }
273 
274 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
275  * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
276  * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
277  * be in the range. If not, then this range is ambiguous, and the function
278  * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
279  * [a-z] will be labeled ambiguous because it does not include [A-Z].
280  *
281  * @param c1 the lower end of the range
282  * @param c2 the upper end of the range
283  * @return true if [c1-c2] is not ambiguous for a caseless scanner.
284  */
range_covers_case(int c1,int c2)285 bool range_covers_case (int c1, int c2)
286 {
287 	int     i, o;
288 
289 	for (i = c1; i <= c2; i++) {
290 		if (has_case (i)) {
291 			o = reverse_case (i);
292 			if (o < c1 || c2 < o)
293 				return false;
294 		}
295 	}
296 	return true;
297 }
298 
299 /** Reverse the case of a character, if possible.
300  * @return c if case-reversal does not apply.
301  */
reverse_case(int c)302 int reverse_case (int c)
303 {
304 	return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
305 }
306 
307 /** Return true if c is uppercase or lowercase. */
has_case(int c)308 bool has_case (int c)
309 {
310 	return (isupper (c) || islower (c)) ? true : false;
311 }
312