1 /* ccl - routines for character classes */
2
3 /* Copyright (c) 1990 The Regents of the University of California. */
4 /* All rights reserved. */
5
6 /* This code is derived from software contributed to Berkeley by */
7 /* Vern Paxson. */
8
9 /* The United States Government has rights in this work pursuant */
10 /* to contract no. DE-AC03-76SF00098 between the United States */
11 /* Department of Energy and the University of California. */
12
13 /* This file is part of flex. */
14
15 /* Redistribution and use in source and binary forms, with or without */
16 /* modification, are permitted provided that the following conditions */
17 /* are met: */
18
19 /* 1. Redistributions of source code must retain the above copyright */
20 /* notice, this list of conditions and the following disclaimer. */
21 /* 2. Redistributions in binary form must reproduce the above copyright */
22 /* notice, this list of conditions and the following disclaimer in the */
23 /* documentation and/or other materials provided with the distribution. */
24
25 /* Neither the name of the University nor the names of its contributors */
26 /* may be used to endorse or promote products derived from this software */
27 /* without specific prior written permission. */
28
29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /* PURPOSE. */
33 #include "flexdef.h"
34 __RCSID("$NetBSD: ccl.c,v 1.3 2014/10/30 18:44:05 christos Exp $");
35
36
37 /* return true if the chr is in the ccl. Takes negation into account. */
38 static bool
ccl_contains(const int cclp,const int ch)39 ccl_contains (const int cclp, const int ch)
40 {
41 int ind, len, i;
42
43 len = ccllen[cclp];
44 ind = cclmap[cclp];
45
46 for (i = 0; i < len; ++i)
47 if (ccltbl[ind + i] == ch)
48 return !cclng[cclp];
49
50 return cclng[cclp];
51 }
52
53
54 /* ccladd - add a single character to a ccl */
55
ccladd(cclp,ch)56 void ccladd (cclp, ch)
57 int cclp;
58 int ch;
59 {
60 int ind, len, newpos, i;
61
62 check_char (ch);
63
64 len = ccllen[cclp];
65 ind = cclmap[cclp];
66
67 /* check to see if the character is already in the ccl */
68
69 for (i = 0; i < len; ++i)
70 if (ccltbl[ind + i] == ch)
71 return;
72
73 /* mark newlines */
74 if (ch == nlch)
75 ccl_has_nl[cclp] = true;
76
77 newpos = ind + len;
78
79 if (newpos >= current_max_ccl_tbl_size) {
80 current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
81
82 ++num_reallocs;
83
84 ccltbl = reallocate_Character_array (ccltbl,
85 current_max_ccl_tbl_size);
86 }
87
88 ccllen[cclp] = len + 1;
89 ccltbl[newpos] = ch;
90 }
91
92 /* dump_cclp - same thing as list_character_set, but for cclps. */
93
dump_cclp(FILE * file,int cclp)94 static void dump_cclp (FILE* file, int cclp)
95 {
96 register int i;
97
98 putc ('[', file);
99
100 for (i = 0; i < csize; ++i) {
101 if (ccl_contains(cclp, i)){
102 register int start_char = i;
103
104 putc (' ', file);
105
106 fputs (readable_form (i), file);
107
108 while (++i < csize && ccl_contains(cclp,i)) ;
109
110 if (i - 1 > start_char)
111 /* this was a run */
112 fprintf (file, "-%s",
113 readable_form (i - 1));
114
115 putc (' ', file);
116 }
117 }
118
119 putc (']', file);
120 }
121
122
123
124 /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
125 int
ccl_set_diff(int a,int b)126 ccl_set_diff (int a, int b)
127 {
128 int d, ch;
129
130 /* create new class */
131 d = cclinit();
132
133 /* In order to handle negation, we spin through all possible chars,
134 * addding each char in a that is not in b.
135 * (This could be O(n^2), but n is small and bounded.)
136 */
137 for ( ch = 0; ch < csize; ++ch )
138 if (ccl_contains (a, ch) && !ccl_contains(b, ch))
139 ccladd (d, ch);
140
141 /* debug */
142 if (0){
143 fprintf(stderr, "ccl_set_diff (");
144 fprintf(stderr, "\n ");
145 dump_cclp (stderr, a);
146 fprintf(stderr, "\n ");
147 dump_cclp (stderr, b);
148 fprintf(stderr, "\n ");
149 dump_cclp (stderr, d);
150 fprintf(stderr, "\n)\n");
151 }
152 return d;
153 }
154
155 /* ccl_set_union - create a new ccl as the set union of the two given ccls. */
156 int
ccl_set_union(int a,int b)157 ccl_set_union (int a, int b)
158 {
159 int d, i;
160
161 /* create new class */
162 d = cclinit();
163
164 /* Add all of a */
165 for (i = 0; i < ccllen[a]; ++i)
166 ccladd (d, ccltbl[cclmap[a] + i]);
167
168 /* Add all of b */
169 for (i = 0; i < ccllen[b]; ++i)
170 ccladd (d, ccltbl[cclmap[b] + i]);
171
172 /* debug */
173 if (0){
174 fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
175 fprintf(stderr, "\n ");
176 dump_cclp (stderr, a);
177 fprintf(stderr, "\n ");
178 dump_cclp (stderr, b);
179 fprintf(stderr, "\n ");
180 dump_cclp (stderr, d);
181 fprintf(stderr, "\n)\n");
182 }
183 return d;
184 }
185
186
187 /* cclinit - return an empty ccl */
188
cclinit()189 int cclinit ()
190 {
191 if (++lastccl >= current_maxccls) {
192 current_maxccls += MAX_CCLS_INCREMENT;
193
194 ++num_reallocs;
195
196 cclmap =
197 reallocate_integer_array (cclmap, current_maxccls);
198 ccllen =
199 reallocate_integer_array (ccllen, current_maxccls);
200 cclng = reallocate_integer_array (cclng, current_maxccls);
201 ccl_has_nl =
202 reallocate_bool_array (ccl_has_nl,
203 current_maxccls);
204 }
205
206 if (lastccl == 1)
207 /* we're making the first ccl */
208 cclmap[lastccl] = 0;
209
210 else
211 /* The new pointer is just past the end of the last ccl.
212 * Since the cclmap points to the \first/ character of a
213 * ccl, adding the length of the ccl to the cclmap pointer
214 * will produce a cursor to the first free space.
215 */
216 cclmap[lastccl] =
217 cclmap[lastccl - 1] + ccllen[lastccl - 1];
218
219 ccllen[lastccl] = 0;
220 cclng[lastccl] = 0; /* ccl's start out life un-negated */
221 ccl_has_nl[lastccl] = false;
222
223 return lastccl;
224 }
225
226
227 /* cclnegate - negate the given ccl */
228
cclnegate(cclp)229 void cclnegate (cclp)
230 int cclp;
231 {
232 cclng[cclp] = 1;
233 ccl_has_nl[cclp] = !ccl_has_nl[cclp];
234 }
235
236
237 /* list_character_set - list the members of a set of characters in CCL form
238 *
239 * Writes to the given file a character-class representation of those
240 * characters present in the given CCL. A character is present if it
241 * has a non-zero value in the cset array.
242 */
243
list_character_set(file,cset)244 void list_character_set (file, cset)
245 FILE *file;
246 int cset[];
247 {
248 register int i;
249
250 putc ('[', file);
251
252 for (i = 0; i < csize; ++i) {
253 if (cset[i]) {
254 register int start_char = i;
255
256 putc (' ', file);
257
258 fputs (readable_form (i), file);
259
260 while (++i < csize && cset[i]) ;
261
262 if (i - 1 > start_char)
263 /* this was a run */
264 fprintf (file, "-%s",
265 readable_form (i - 1));
266
267 putc (' ', file);
268 }
269 }
270
271 putc (']', file);
272 }
273
274 /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
275 * scanner. Specifically, if a lowercase or uppercase character, x, is in the
276 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
277 * be in the range. If not, then this range is ambiguous, and the function
278 * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
279 * [a-z] will be labeled ambiguous because it does not include [A-Z].
280 *
281 * @param c1 the lower end of the range
282 * @param c2 the upper end of the range
283 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
284 */
range_covers_case(int c1,int c2)285 bool range_covers_case (int c1, int c2)
286 {
287 int i, o;
288
289 for (i = c1; i <= c2; i++) {
290 if (has_case (i)) {
291 o = reverse_case (i);
292 if (o < c1 || c2 < o)
293 return false;
294 }
295 }
296 return true;
297 }
298
299 /** Reverse the case of a character, if possible.
300 * @return c if case-reversal does not apply.
301 */
reverse_case(int c)302 int reverse_case (int c)
303 {
304 return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
305 }
306
307 /** Return true if c is uppercase or lowercase. */
has_case(int c)308 bool has_case (int c)
309 {
310 return (isupper (c) || islower (c)) ? true : false;
311 }
312