xref: /openbsd/usr.sbin/nsd/zlexer.lex (revision 91f110e0)
1 %{
2 /*
3  * zlexer.lex - lexical analyzer for (DNS) zone files
4  *
5  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
6  *
7  * See LICENSE for the license.
8  *
9  */
10 
11 #include "config.h"
12 
13 #include <ctype.h>
14 #include <errno.h>
15 #include <string.h>
16 #include <strings.h>
17 
18 #include "zonec.h"
19 #include "dname.h"
20 #include "zparser.h"
21 
22 #if 0
23 #define LEXOUT(s)  printf s /* used ONLY when debugging */
24 #else
25 #define LEXOUT(s)
26 #endif
27 
28 enum lexer_state {
29 	EXPECT_OWNER,
30 	PARSING_OWNER,
31 	PARSING_TTL_CLASS_TYPE,
32 	PARSING_RDATA
33 };
34 
35 static int parse_token(int token, char *yytext, enum lexer_state *lexer_state);
36 
37 static YY_BUFFER_STATE include_stack[MAXINCLUDES];
38 static zparser_type zparser_stack[MAXINCLUDES];
39 static int include_stack_ptr = 0;
40 
41 /*
42  * Saves the file specific variables on the include stack.
43  */
44 static void
45 push_parser_state(FILE *input)
46 {
47 	zparser_stack[include_stack_ptr].filename = parser->filename;
48 	zparser_stack[include_stack_ptr].line = parser->line;
49 	zparser_stack[include_stack_ptr].origin = parser->origin;
50 	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
51 	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
52 	++include_stack_ptr;
53 }
54 
55 /*
56  * Restores the file specific variables from the include stack.
57  */
58 static void
59 pop_parser_state(void)
60 {
61 	--include_stack_ptr;
62 	parser->filename = zparser_stack[include_stack_ptr].filename;
63 	parser->line = zparser_stack[include_stack_ptr].line;
64 	parser->origin = zparser_stack[include_stack_ptr].origin;
65 	yy_delete_buffer(YY_CURRENT_BUFFER);
66 	yy_switch_to_buffer(include_stack[include_stack_ptr]);
67 }
68 
69 static YY_BUFFER_STATE oldstate;
70 /* Start string scan */
71 void
72 parser_push_stringbuf(char* str)
73 {
74 	oldstate = YY_CURRENT_BUFFER;
75 	yy_switch_to_buffer(yy_scan_string(str));
76 }
77 
78 void
79 parser_pop_stringbuf(void)
80 {
81 	yy_delete_buffer(YY_CURRENT_BUFFER);
82 	yy_switch_to_buffer(oldstate);
83 	oldstate = NULL;
84 }
85 
86 #ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
87 #define yy_set_bol(at_bol) \
88 	{ \
89 		if ( ! yy_current_buffer ) \
90 			yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
91 		yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
92 	}
93 #endif
94 
95 %}
96 %option noinput
97 %option nounput
98 %{
99 #ifndef YY_NO_UNPUT
100 #define YY_NO_UNPUT 1
101 #endif
102 #ifndef YY_NO_INPUT
103 #define YY_NO_INPUT 1
104 #endif
105 %}
106 
107 SPACE   [ \t]
108 LETTER  [a-zA-Z]
109 NEWLINE [\n\r]
110 ZONESTR [^ \t\n\r();.\"\$]
111 DOLLAR  \$
112 COMMENT ;
113 DOT     \.
114 BIT	[^\]\n]|\\.
115 ANY     [^\"\n\\]|\\.
116 
117 %x	incl bitlabel quotedstring
118 
119 %%
120 	static int paren_open = 0;
121 	static enum lexer_state lexer_state = EXPECT_OWNER;
122 {SPACE}*{COMMENT}.*	/* ignore */
123 ^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
124 ^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
125 
126 	/*
127 	 * Handle $INCLUDE directives.  See
128 	 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
129 	 */
130 ^{DOLLAR}INCLUDE        {
131 	BEGIN(incl);
132 }
133 <incl>\n 		|
134 <incl><<EOF>>		{
135 	int error_occurred = parser->error_occurred;
136 	BEGIN(INITIAL);
137 	zc_error("missing file name in $INCLUDE directive");
138 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
139 	++parser->line;
140 	parser->error_occurred = error_occurred;
141 }
142 <incl>.+ 		{
143 	char *tmp;
144 	domain_type *origin = parser->origin;
145 	int error_occurred = parser->error_occurred;
146 
147 	BEGIN(INITIAL);
148 	if (include_stack_ptr >= MAXINCLUDES ) {
149 		zc_error("includes nested too deeply, skipped (>%d)",
150 			 MAXINCLUDES);
151 	} else {
152 		FILE *input;
153 
154 		/* Remove trailing comment.  */
155 		tmp = strrchr(yytext, ';');
156 		if (tmp) {
157 			*tmp = '\0';
158 		}
159 		strip_string(yytext);
160 
161 		/* Parse origin for include file.  */
162 		tmp = strrchr(yytext, ' ');
163 		if (!tmp) {
164 			tmp = strrchr(yytext, '\t');
165 		}
166 		if (tmp) {
167 			const dname_type *dname;
168 
169 			/* split the original yytext */
170 			*tmp = '\0';
171 			strip_string(yytext);
172 
173 			dname = dname_parse(parser->region, tmp + 1);
174 			if (!dname) {
175 				zc_error("incorrect include origin '%s'",
176 					 tmp + 1);
177 			} else if (*(tmp + strlen(tmp + 1)) != '.') {
178 				zc_error("$INCLUDE directive requires absolute domain name");
179 			} else {
180 				origin = domain_table_insert(
181 					parser->db->domains, dname);
182 			}
183 		}
184 
185 		if (strlen(yytext) == 0) {
186 			zc_error("missing file name in $INCLUDE directive");
187 		} else if (!(input = fopen(yytext, "r"))) {
188 			zc_error("cannot open include file '%s': %s",
189 				 yytext, strerror(errno));
190 		} else {
191 			/* Initialize parser for include file.  */
192 			char *filename = region_strdup(parser->region, yytext);
193 			push_parser_state(input); /* Destroys yytext.  */
194 			parser->filename = filename;
195 			parser->line = 1;
196 			parser->origin = origin;
197 			lexer_state = EXPECT_OWNER;
198 		}
199 	}
200 
201 	parser->error_occurred = error_occurred;
202 }
203 <INITIAL><<EOF>>	{
204 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
205 	if (include_stack_ptr == 0) {
206 		yyterminate();
207 	} else {
208 		fclose(yyin);
209 		pop_parser_state();
210 	}
211 }
212 ^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
213 {DOT}	{
214 	LEXOUT((". "));
215 	return parse_token('.', yytext, &lexer_state);
216 }
217 @	{
218 	LEXOUT(("@ "));
219 	return parse_token('@', yytext, &lexer_state);
220 }
221 \\#	{
222 	LEXOUT(("\\# "));
223 	return parse_token(URR, yytext, &lexer_state);
224 }
225 {NEWLINE}	{
226 	++parser->line;
227 	if (!paren_open) {
228 		lexer_state = EXPECT_OWNER;
229 		LEXOUT(("NL\n"));
230 		return NL;
231 	} else {
232 		LEXOUT(("SP "));
233 		return SP;
234 	}
235 }
236 \(	{
237 	if (paren_open) {
238 		zc_error("nested parentheses");
239 		yyterminate();
240 	}
241 	LEXOUT(("( "));
242 	paren_open = 1;
243 	return SP;
244 }
245 \)	{
246 	if (!paren_open) {
247 		zc_error("closing parentheses without opening parentheses");
248 		yyterminate();
249 	}
250 	LEXOUT((") "));
251 	paren_open = 0;
252 	return SP;
253 }
254 {SPACE}+	{
255 	if (!paren_open && lexer_state == EXPECT_OWNER) {
256 		lexer_state = PARSING_TTL_CLASS_TYPE;
257 		LEXOUT(("PREV "));
258 		return PREV;
259 	}
260 	if (lexer_state == PARSING_OWNER) {
261 		lexer_state = PARSING_TTL_CLASS_TYPE;
262 	}
263 	LEXOUT(("SP "));
264 	return SP;
265 }
266 
267 	/* Bitlabels.  Strip leading and ending brackets.  */
268 \\\[			{ BEGIN(bitlabel); }
269 <bitlabel><<EOF>>	{
270 	zc_error("EOF inside bitlabel");
271 	BEGIN(INITIAL);
272 	yyrestart(yyin); /* this is so that lex does not give an internal err */
273 	yyterminate();
274 }
275 <bitlabel>{BIT}*	{ yymore(); }
276 <bitlabel>\n		{ ++parser->line; yymore(); }
277 <bitlabel>\]		{
278 	BEGIN(INITIAL);
279 	yytext[yyleng - 1] = '\0';
280 	return parse_token(BITLAB, yytext, &lexer_state);
281 }
282 
283 	/* Quoted strings.  Strip leading and ending quotes.  */
284 \"			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
285 <quotedstring><<EOF>> 	{
286 	zc_error("EOF inside quoted string");
287 	BEGIN(INITIAL);
288 	yyrestart(yyin); /* this is so that lex does not give an internal err */
289 	yyterminate();
290 }
291 <quotedstring>{ANY}*	{ LEXOUT(("STR ")); yymore(); }
292 <quotedstring>\n 	{ ++parser->line; yymore(); }
293 <quotedstring>\" {
294 	LEXOUT(("\" "));
295 	BEGIN(INITIAL);
296 	yytext[yyleng - 1] = '\0';
297 	return parse_token(STR, yytext, &lexer_state);
298 }
299 
300 ({ZONESTR}|\\.|\\\n)+ {
301 	/* Any allowed word.  */
302 	return parse_token(STR, yytext, &lexer_state);
303 }
304 . {
305 	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
306 		 (int) yytext[0], (int) yytext[0]);
307 }
308 %%
309 
310 /*
311  * Analyze "word" to see if it matches an RR type, possibly by using
312  * the "TYPExxx" notation.  If it matches, the corresponding token is
313  * returned and the TYPE parameter is set to the RR type value.
314  */
315 static int
316 rrtype_to_token(const char *word, uint16_t *type)
317 {
318 	uint16_t t = rrtype_from_string(word);
319 	if (t != 0) {
320 		rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t);
321 		*type = t;
322 		return entry->token;
323 	}
324 
325 	return 0;
326 }
327 
328 
329 /*
330  * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
331  */
332 static size_t
333 zoctet(char *text)
334 {
335 	/*
336 	 * s follows the string, p lags behind and rebuilds the new
337 	 * string
338 	 */
339 	char *s;
340 	char *p;
341 
342 	for (s = p = text; *s; ++s, ++p) {
343 		assert(p <= s);
344 		if (s[0] != '\\') {
345 			/* Ordinary character.  */
346 			*p = *s;
347 		} else if (isdigit((int)s[1]) && isdigit((int)s[2]) && isdigit((int)s[3])) {
348 			/* \DDD escape.  */
349 			int val = (hexdigit_to_int(s[1]) * 100 +
350 				   hexdigit_to_int(s[2]) * 10 +
351 				   hexdigit_to_int(s[3]));
352 			if (0 <= val && val <= 255) {
353 				s += 3;
354 				*p = val;
355 			} else {
356 				zc_warning("text escape \\DDD overflow");
357 				*p = *++s;
358 			}
359 		} else if (s[1] != '\0') {
360 			/* \X where X is any character, keep X.  */
361 			*p = *++s;
362 		} else {
363 			/* Trailing backslash, ignore it.  */
364 			zc_warning("trailing backslash ignored");
365 			--p;
366 		}
367 	}
368 	*p = '\0';
369 	return p - text;
370 }
371 
372 static int
373 parse_token(int token, char *yytext, enum lexer_state *lexer_state)
374 {
375 	size_t len;
376 	char *str;
377 
378 	if (*lexer_state == EXPECT_OWNER) {
379 		*lexer_state = PARSING_OWNER;
380 	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
381 		const char *t;
382 		int token;
383 		uint16_t rrclass;
384 
385 		/* type */
386 		token = rrtype_to_token(yytext, &yylval.type);
387 		if (token != 0) {
388 			*lexer_state = PARSING_RDATA;
389 			LEXOUT(("%d[%s] ", token, yytext));
390 			return token;
391 		}
392 
393 		/* class */
394 		rrclass = rrclass_from_string(yytext);
395 		if (rrclass != 0) {
396 			yylval.klass = rrclass;
397 			LEXOUT(("CLASS "));
398 			return T_RRCLASS;
399 		}
400 
401 		/* ttl */
402 		yylval.ttl = strtottl(yytext, &t);
403 		if (*t == '\0') {
404 			LEXOUT(("TTL "));
405 			return T_TTL;
406 		}
407 	}
408 
409 	str = region_strdup(parser->rr_region, yytext);
410 	len = zoctet(str);
411 
412 	yylval.data.str = str;
413 	yylval.data.len = len;
414 
415 	LEXOUT(("%d[%s] ", token, yytext));
416 	return token;
417 }
418