xref: /openbsd/usr.sbin/nsd/zlexer.lex (revision 8529ddd3)
1 %{
2 /*
3  * zlexer.lex - lexical analyzer for (DNS) zone files
4  *
5  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved
6  *
7  * See LICENSE for the license.
8  *
9  */
10 
11 #include "config.h"
12 
13 #include <ctype.h>
14 #include <errno.h>
15 #include <string.h>
16 #include <strings.h>
17 
18 #include "zonec.h"
19 #include "dname.h"
20 #include "zparser.h"
21 
22 #if 0
23 #define LEXOUT(s)  printf s /* used ONLY when debugging */
24 #else
25 #define LEXOUT(s)
26 #endif
27 
28 enum lexer_state {
29 	EXPECT_OWNER,
30 	PARSING_OWNER,
31 	PARSING_TTL_CLASS_TYPE,
32 	PARSING_RDATA
33 };
34 
35 static int parse_token(int token, char *yytext, enum lexer_state *lexer_state);
36 
37 static YY_BUFFER_STATE include_stack[MAXINCLUDES];
38 static zparser_type zparser_stack[MAXINCLUDES];
39 static int include_stack_ptr = 0;
40 
41 /*
42  * Saves the file specific variables on the include stack.
43  */
44 static void
45 push_parser_state(FILE *input)
46 {
47 	zparser_stack[include_stack_ptr].filename = parser->filename;
48 	zparser_stack[include_stack_ptr].line = parser->line;
49 	zparser_stack[include_stack_ptr].origin = parser->origin;
50 	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
51 	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE));
52 	++include_stack_ptr;
53 }
54 
55 /*
56  * Restores the file specific variables from the include stack.
57  */
58 static void
59 pop_parser_state(void)
60 {
61 	--include_stack_ptr;
62 	parser->filename = zparser_stack[include_stack_ptr].filename;
63 	parser->line = zparser_stack[include_stack_ptr].line;
64 	parser->origin = zparser_stack[include_stack_ptr].origin;
65 	yy_delete_buffer(YY_CURRENT_BUFFER);
66 	yy_switch_to_buffer(include_stack[include_stack_ptr]);
67 }
68 
69 static YY_BUFFER_STATE oldstate;
70 /* Start string scan */
71 void
72 parser_push_stringbuf(char* str)
73 {
74 	oldstate = YY_CURRENT_BUFFER;
75 	yy_switch_to_buffer(yy_scan_string(str));
76 }
77 
78 void
79 parser_pop_stringbuf(void)
80 {
81 	yy_delete_buffer(YY_CURRENT_BUFFER);
82 	yy_switch_to_buffer(oldstate);
83 	oldstate = NULL;
84 }
85 
86 #ifndef yy_set_bol /* compat definition, for flex 2.4.6 */
87 #define yy_set_bol(at_bol) \
88 	{ \
89 		if ( ! yy_current_buffer ) \
90 			yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
91 		yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
92 	}
93 #endif
94 
95 %}
96 %option noinput
97 %option nounput
98 %{
99 #ifndef YY_NO_UNPUT
100 #define YY_NO_UNPUT 1
101 #endif
102 #ifndef YY_NO_INPUT
103 #define YY_NO_INPUT 1
104 #endif
105 %}
106 
107 SPACE   [ \t]
108 LETTER  [a-zA-Z]
109 NEWLINE [\n\r]
110 ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n
111 CHARSTR [^ \t\n\r();.]|\\.|\\\n
112 QUOTE   \"
113 DOLLAR  \$
114 COMMENT ;
115 DOT     \.
116 BIT	[^\]\n]|\\.
117 ANY     [^\"\n\\]|\\.
118 
119 %x	incl bitlabel quotedstring
120 
121 %%
122 	static int paren_open = 0;
123 	static enum lexer_state lexer_state = EXPECT_OWNER;
124 {SPACE}*{COMMENT}.*	/* ignore */
125 ^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
126 ^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
127 
128 	/*
129 	 * Handle $INCLUDE directives.  See
130 	 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
131 	 */
132 ^{DOLLAR}INCLUDE        {
133 	BEGIN(incl);
134 }
135 <incl>\n 		|
136 <incl><<EOF>>		{
137 	int error_occurred = parser->error_occurred;
138 	BEGIN(INITIAL);
139 	zc_error("missing file name in $INCLUDE directive");
140 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
141 	++parser->line;
142 	parser->error_occurred = error_occurred;
143 }
144 <incl>.+ 		{
145 	char *tmp;
146 	domain_type *origin = parser->origin;
147 	int error_occurred = parser->error_occurred;
148 
149 	BEGIN(INITIAL);
150 	if (include_stack_ptr >= MAXINCLUDES ) {
151 		zc_error("includes nested too deeply, skipped (>%d)",
152 			 MAXINCLUDES);
153 	} else {
154 		FILE *input;
155 
156 		/* Remove trailing comment.  */
157 		tmp = strrchr(yytext, ';');
158 		if (tmp) {
159 			*tmp = '\0';
160 		}
161 		strip_string(yytext);
162 
163 		/* Parse origin for include file.  */
164 		tmp = strrchr(yytext, ' ');
165 		if (!tmp) {
166 			tmp = strrchr(yytext, '\t');
167 		}
168 		if (tmp) {
169 			const dname_type *dname;
170 
171 			/* split the original yytext */
172 			*tmp = '\0';
173 			strip_string(yytext);
174 
175 			dname = dname_parse(parser->region, tmp + 1);
176 			if (!dname) {
177 				zc_error("incorrect include origin '%s'",
178 					 tmp + 1);
179 			} else if (*(tmp + strlen(tmp + 1)) != '.') {
180 				zc_error("$INCLUDE directive requires absolute domain name");
181 			} else {
182 				origin = domain_table_insert(
183 					parser->db->domains, dname);
184 			}
185 		}
186 
187 		if (strlen(yytext) == 0) {
188 			zc_error("missing file name in $INCLUDE directive");
189 		} else if (!(input = fopen(yytext, "r"))) {
190 			zc_error("cannot open include file '%s': %s",
191 				 yytext, strerror(errno));
192 		} else {
193 			/* Initialize parser for include file.  */
194 			char *filename = region_strdup(parser->region, yytext);
195 			push_parser_state(input); /* Destroys yytext.  */
196 			parser->filename = filename;
197 			parser->line = 1;
198 			parser->origin = origin;
199 			lexer_state = EXPECT_OWNER;
200 		}
201 	}
202 
203 	parser->error_occurred = error_occurred;
204 }
205 <INITIAL><<EOF>>	{
206 	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
207 	if (include_stack_ptr == 0) {
208 		yyterminate();
209 	} else {
210 		fclose(yyin);
211 		pop_parser_state();
212 	}
213 }
214 ^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
215 {DOT}	{
216 	LEXOUT((". "));
217 	return parse_token('.', yytext, &lexer_state);
218 }
219 @	{
220 	LEXOUT(("@ "));
221 	return parse_token('@', yytext, &lexer_state);
222 }
223 \\#	{
224 	LEXOUT(("\\# "));
225 	return parse_token(URR, yytext, &lexer_state);
226 }
227 {NEWLINE}	{
228 	++parser->line;
229 	if (!paren_open) {
230 		lexer_state = EXPECT_OWNER;
231 		LEXOUT(("NL\n"));
232 		return NL;
233 	} else {
234 		LEXOUT(("SP "));
235 		return SP;
236 	}
237 }
238 \(	{
239 	if (paren_open) {
240 		zc_error("nested parentheses");
241 		yyterminate();
242 	}
243 	LEXOUT(("( "));
244 	paren_open = 1;
245 	return SP;
246 }
247 \)	{
248 	if (!paren_open) {
249 		zc_error("closing parentheses without opening parentheses");
250 		yyterminate();
251 	}
252 	LEXOUT((") "));
253 	paren_open = 0;
254 	return SP;
255 }
256 {SPACE}+	{
257 	if (!paren_open && lexer_state == EXPECT_OWNER) {
258 		lexer_state = PARSING_TTL_CLASS_TYPE;
259 		LEXOUT(("PREV "));
260 		return PREV;
261 	}
262 	if (lexer_state == PARSING_OWNER) {
263 		lexer_state = PARSING_TTL_CLASS_TYPE;
264 	}
265 	LEXOUT(("SP "));
266 	return SP;
267 }
268 
269 	/* Bitlabels.  Strip leading and ending brackets.  */
270 \\\[			{ BEGIN(bitlabel); }
271 <bitlabel><<EOF>>	{
272 	zc_error("EOF inside bitlabel");
273 	BEGIN(INITIAL);
274 	yyrestart(yyin); /* this is so that lex does not give an internal err */
275 	yyterminate();
276 }
277 <bitlabel>{BIT}*	{ yymore(); }
278 <bitlabel>\n		{ ++parser->line; yymore(); }
279 <bitlabel>\]		{
280 	BEGIN(INITIAL);
281 	yytext[yyleng - 1] = '\0';
282 	return parse_token(BITLAB, yytext, &lexer_state);
283 }
284 
285 	/* Quoted strings.  Strip leading and ending quotes.  */
286 {QUOTE}			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
287 <quotedstring><<EOF>> 	{
288 	zc_error("EOF inside quoted string");
289 	BEGIN(INITIAL);
290 	yyrestart(yyin); /* this is so that lex does not give an internal err */
291 	yyterminate();
292 }
293 <quotedstring>{ANY}*	{ LEXOUT(("STR ")); yymore(); }
294 <quotedstring>\n 	{ ++parser->line; yymore(); }
295 <quotedstring>{QUOTE} {
296 	LEXOUT(("\" "));
297 	BEGIN(INITIAL);
298 	yytext[yyleng - 1] = '\0';
299 	return parse_token(STR, yytext, &lexer_state);
300 }
301 
302 {ZONESTR}({CHARSTR})* {
303 	/* Any allowed word.  */
304 	return parse_token(STR, yytext, &lexer_state);
305 }
306 . {
307 	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
308 		 (int) yytext[0], (int) yytext[0]);
309 }
310 %%
311 
312 /*
313  * Analyze "word" to see if it matches an RR type, possibly by using
314  * the "TYPExxx" notation.  If it matches, the corresponding token is
315  * returned and the TYPE parameter is set to the RR type value.
316  */
317 static int
318 rrtype_to_token(const char *word, uint16_t *type)
319 {
320 	uint16_t t = rrtype_from_string(word);
321 	if (t != 0) {
322 		rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t);
323 		*type = t;
324 		return entry->token;
325 	}
326 
327 	return 0;
328 }
329 
330 
331 /*
332  * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
333  */
334 static size_t
335 zoctet(char *text)
336 {
337 	/*
338 	 * s follows the string, p lags behind and rebuilds the new
339 	 * string
340 	 */
341 	char *s;
342 	char *p;
343 
344 	for (s = p = text; *s; ++s, ++p) {
345 		assert(p <= s);
346 		if (s[0] != '\\') {
347 			/* Ordinary character.  */
348 			*p = *s;
349 		} else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) {
350 			/* \DDD escape.  */
351 			int val = (hexdigit_to_int(s[1]) * 100 +
352 				   hexdigit_to_int(s[2]) * 10 +
353 				   hexdigit_to_int(s[3]));
354 			if (0 <= val && val <= 255) {
355 				s += 3;
356 				*p = val;
357 			} else {
358 				zc_warning("text escape \\DDD overflow");
359 				*p = *++s;
360 			}
361 		} else if (s[1] != '\0') {
362 			/* \X where X is any character, keep X.  */
363 			*p = *++s;
364 		} else {
365 			/* Trailing backslash, ignore it.  */
366 			zc_warning("trailing backslash ignored");
367 			--p;
368 		}
369 	}
370 	*p = '\0';
371 	return p - text;
372 }
373 
374 static int
375 parse_token(int token, char *yytext, enum lexer_state *lexer_state)
376 {
377 	size_t len;
378 	char *str;
379 
380 	if (*lexer_state == EXPECT_OWNER) {
381 		*lexer_state = PARSING_OWNER;
382 	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
383 		const char *t;
384 		int token;
385 		uint16_t rrclass;
386 
387 		/* type */
388 		token = rrtype_to_token(yytext, &yylval.type);
389 		if (token != 0) {
390 			*lexer_state = PARSING_RDATA;
391 			LEXOUT(("%d[%s] ", token, yytext));
392 			return token;
393 		}
394 
395 		/* class */
396 		rrclass = rrclass_from_string(yytext);
397 		if (rrclass != 0) {
398 			yylval.klass = rrclass;
399 			LEXOUT(("CLASS "));
400 			return T_RRCLASS;
401 		}
402 
403 		/* ttl */
404 		yylval.ttl = strtottl(yytext, &t);
405 		if (*t == '\0') {
406 			LEXOUT(("TTL "));
407 			return T_TTL;
408 		}
409 	}
410 
411 	str = region_strdup(parser->rr_region, yytext);
412 	len = zoctet(str);
413 
414 	yylval.data.str = str;
415 	yylval.data.len = len;
416 
417 	LEXOUT(("%d[%s] ", token, yytext));
418 	return token;
419 }
420