1 /* $NetBSD: parse_rx.c,v 1.1.1.1 2008/12/22 00:18:37 haad Exp $ */
2
3 /*
4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of the device-mapper userspace tools.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "dmlib.h"
19 #include "parse_rx.h"
20
21 struct parse_sp { /* scratch pad for the parsing process */
22 struct dm_pool *mem;
23 int type; /* token type, 0 indicates a charset */
24 dm_bitset_t charset; /* The current charset */
25 const char *cursor; /* where we are in the regex */
26 const char *rx_end; /* 1pte for the expression being parsed */
27 };
28
29 static struct rx_node *_or_term(struct parse_sp *ps);
30
_single_char(struct parse_sp * ps,unsigned int c,const char * ptr)31 static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
32 {
33 ps->type = 0;
34 ps->cursor = ptr + 1;
35 dm_bit_clear_all(ps->charset);
36 dm_bit_set(ps->charset, c);
37 }
38
39 /*
40 * Get the next token from the regular expression.
41 * Returns: 1 success, 0 end of input, -1 error.
42 */
_rx_get_token(struct parse_sp * ps)43 static int _rx_get_token(struct parse_sp *ps)
44 {
45 int neg = 0, range = 0;
46 char c, lc = 0;
47 const char *ptr = ps->cursor;
48 if (ptr == ps->rx_end) { /* end of input ? */
49 ps->type = -1;
50 return 0;
51 }
52
53 switch (*ptr) {
54 /* charsets and ncharsets */
55 case '[':
56 ptr++;
57 if (*ptr == '^') {
58 dm_bit_set_all(ps->charset);
59
60 /* never transition on zero */
61 dm_bit_clear(ps->charset, 0);
62 neg = 1;
63 ptr++;
64
65 } else
66 dm_bit_clear_all(ps->charset);
67
68 while ((ptr < ps->rx_end) && (*ptr != ']')) {
69 if (*ptr == '\\') {
70 /* an escaped character */
71 ptr++;
72 switch (*ptr) {
73 case 'n':
74 c = '\n';
75 break;
76 case 'r':
77 c = '\r';
78 break;
79 case 't':
80 c = '\t';
81 break;
82 default:
83 c = *ptr;
84 }
85 } else if (*ptr == '-' && lc) {
86 /* we've got a range on our hands */
87 range = 1;
88 ptr++;
89 if (ptr == ps->rx_end) {
90 log_error("Incomplete range"
91 "specification");
92 return -1;
93 }
94 c = *ptr;
95 } else
96 c = *ptr;
97
98 if (range) {
99 /* add lc - c into the bitset */
100 if (lc > c) {
101 char tmp = c;
102 c = lc;
103 lc = tmp;
104 }
105
106 for (; lc <= c; lc++) {
107 if (neg)
108 dm_bit_clear(ps->charset, lc);
109 else
110 dm_bit_set(ps->charset, lc);
111 }
112 range = 0;
113 } else {
114 /* add c into the bitset */
115 if (neg)
116 dm_bit_clear(ps->charset, c);
117 else
118 dm_bit_set(ps->charset, c);
119 }
120 ptr++;
121 lc = c;
122 }
123
124 if (ptr >= ps->rx_end) {
125 ps->type = -1;
126 return -1;
127 }
128
129 ps->type = 0;
130 ps->cursor = ptr + 1;
131 break;
132
133 /* These characters are special, we just return their ASCII
134 codes as the type. Sorted into ascending order to help the
135 compiler */
136 case '(':
137 case ')':
138 case '*':
139 case '+':
140 case '?':
141 case '|':
142 ps->type = (int) *ptr;
143 ps->cursor = ptr + 1;
144 break;
145
146 case '^':
147 _single_char(ps, HAT_CHAR, ptr);
148 break;
149
150 case '$':
151 _single_char(ps, DOLLAR_CHAR, ptr);
152 break;
153
154 case '.':
155 /* The 'all but newline' character set */
156 ps->type = 0;
157 ps->cursor = ptr + 1;
158 dm_bit_set_all(ps->charset);
159 dm_bit_clear(ps->charset, (int) '\n');
160 dm_bit_clear(ps->charset, (int) '\r');
161 dm_bit_clear(ps->charset, 0);
162 break;
163
164 case '\\':
165 /* escaped character */
166 ptr++;
167 if (ptr >= ps->rx_end) {
168 log_error("Badly quoted character at end "
169 "of expression");
170 ps->type = -1;
171 return -1;
172 }
173
174 ps->type = 0;
175 ps->cursor = ptr + 1;
176 dm_bit_clear_all(ps->charset);
177 switch (*ptr) {
178 case 'n':
179 dm_bit_set(ps->charset, (int) '\n');
180 break;
181 case 'r':
182 dm_bit_set(ps->charset, (int) '\r');
183 break;
184 case 't':
185 dm_bit_set(ps->charset, (int) '\t');
186 break;
187 default:
188 dm_bit_set(ps->charset, (int) *ptr);
189 }
190 break;
191
192 default:
193 /* add a single character to the bitset */
194 ps->type = 0;
195 ps->cursor = ptr + 1;
196 dm_bit_clear_all(ps->charset);
197 dm_bit_set(ps->charset, (int) *ptr);
198 break;
199 }
200
201 return 1;
202 }
203
_node(struct dm_pool * mem,int type,struct rx_node * l,struct rx_node * r)204 static struct rx_node *_node(struct dm_pool *mem, int type,
205 struct rx_node *l, struct rx_node *r)
206 {
207 struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
208
209 if (n) {
210 if (!(n->charset = dm_bitset_create(mem, 256))) {
211 dm_pool_free(mem, n);
212 return NULL;
213 }
214
215 n->type = type;
216 n->left = l;
217 n->right = r;
218 }
219
220 return n;
221 }
222
_term(struct parse_sp * ps)223 static struct rx_node *_term(struct parse_sp *ps)
224 {
225 struct rx_node *n;
226
227 switch (ps->type) {
228 case 0:
229 if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) {
230 stack;
231 return NULL;
232 }
233
234 dm_bit_copy(n->charset, ps->charset);
235 _rx_get_token(ps); /* match charset */
236 break;
237
238 case '(':
239 _rx_get_token(ps); /* match '(' */
240 n = _or_term(ps);
241 if (ps->type != ')') {
242 log_error("missing ')' in regular expression");
243 return 0;
244 }
245 _rx_get_token(ps); /* match ')' */
246 break;
247
248 default:
249 n = 0;
250 }
251
252 return n;
253 }
254
_closure_term(struct parse_sp * ps)255 static struct rx_node *_closure_term(struct parse_sp *ps)
256 {
257 struct rx_node *l, *n;
258
259 if (!(l = _term(ps)))
260 return NULL;
261
262 for (;;) {
263 switch (ps->type) {
264 case '*':
265 n = _node(ps->mem, STAR, l, NULL);
266 break;
267
268 case '+':
269 n = _node(ps->mem, PLUS, l, NULL);
270 break;
271
272 case '?':
273 n = _node(ps->mem, QUEST, l, NULL);
274 break;
275
276 default:
277 return l;
278 }
279
280 if (!n) {
281 stack;
282 return NULL;
283 }
284
285 _rx_get_token(ps);
286 l = n;
287 }
288
289 return n;
290 }
291
_cat_term(struct parse_sp * ps)292 static struct rx_node *_cat_term(struct parse_sp *ps)
293 {
294 struct rx_node *l, *r, *n;
295
296 if (!(l = _closure_term(ps)))
297 return NULL;
298
299 if (ps->type == '|')
300 return l;
301
302 if (!(r = _cat_term(ps)))
303 return l;
304
305 if (!(n = _node(ps->mem, CAT, l, r)))
306 stack;
307
308 return n;
309 }
310
_or_term(struct parse_sp * ps)311 static struct rx_node *_or_term(struct parse_sp *ps)
312 {
313 struct rx_node *l, *r, *n;
314
315 if (!(l = _cat_term(ps)))
316 return NULL;
317
318 if (ps->type != '|')
319 return l;
320
321 _rx_get_token(ps); /* match '|' */
322
323 if (!(r = _or_term(ps))) {
324 log_error("Badly formed 'or' expression");
325 return NULL;
326 }
327
328 if (!(n = _node(ps->mem, OR, l, r)))
329 stack;
330
331 return n;
332 }
333
rx_parse_tok(struct dm_pool * mem,const char * begin,const char * end)334 struct rx_node *rx_parse_tok(struct dm_pool *mem,
335 const char *begin, const char *end)
336 {
337 struct rx_node *r;
338 struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
339
340 if (!ps) {
341 stack;
342 return NULL;
343 }
344
345 ps->mem = mem;
346 ps->charset = dm_bitset_create(mem, 256);
347 ps->cursor = begin;
348 ps->rx_end = end;
349 _rx_get_token(ps); /* load the first token */
350
351 if (!(r = _or_term(ps))) {
352 log_error("Parse error in regex");
353 dm_pool_free(mem, ps);
354 }
355
356 return r;
357 }
358
rx_parse_str(struct dm_pool * mem,const char * str)359 struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
360 {
361 return rx_parse_tok(mem, str, str + strlen(str));
362 }
363