1 %{ 2 /* 3 * zlexer.lex - lexical analyzer for (DNS) zone files 4 * 5 * Copyright (c) 2001-2006, NLnet Labs. All rights reserved 6 * 7 * See LICENSE for the license. 8 * 9 */ 10 11 #include "config.h" 12 13 #include <ctype.h> 14 #include <errno.h> 15 #include <string.h> 16 #include <strings.h> 17 18 #include "zonec.h" 19 #include "dname.h" 20 #include "zparser.h" 21 22 #if 0 23 #define LEXOUT(s) printf s /* used ONLY when debugging */ 24 #else 25 #define LEXOUT(s) 26 #endif 27 28 enum lexer_state { 29 EXPECT_OWNER, 30 PARSING_OWNER, 31 PARSING_TTL_CLASS_TYPE, 32 PARSING_RDATA 33 }; 34 35 static int parse_token(int token, char *yytext, enum lexer_state *lexer_state); 36 37 static YY_BUFFER_STATE include_stack[MAXINCLUDES]; 38 static zparser_type zparser_stack[MAXINCLUDES]; 39 static int include_stack_ptr = 0; 40 41 /* 42 * Saves the file specific variables on the include stack. 43 */ 44 static void 45 push_parser_state(FILE *input) 46 { 47 zparser_stack[include_stack_ptr].filename = parser->filename; 48 zparser_stack[include_stack_ptr].line = parser->line; 49 zparser_stack[include_stack_ptr].origin = parser->origin; 50 include_stack[include_stack_ptr] = YY_CURRENT_BUFFER; 51 yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE)); 52 ++include_stack_ptr; 53 } 54 55 /* 56 * Restores the file specific variables from the include stack. 57 */ 58 static void 59 pop_parser_state(void) 60 { 61 --include_stack_ptr; 62 parser->filename = zparser_stack[include_stack_ptr].filename; 63 parser->line = zparser_stack[include_stack_ptr].line; 64 parser->origin = zparser_stack[include_stack_ptr].origin; 65 yy_delete_buffer(YY_CURRENT_BUFFER); 66 yy_switch_to_buffer(include_stack[include_stack_ptr]); 67 } 68 69 static YY_BUFFER_STATE oldstate; 70 /* Start string scan */ 71 void 72 parser_push_stringbuf(char* str) 73 { 74 oldstate = YY_CURRENT_BUFFER; 75 yy_switch_to_buffer(yy_scan_string(str)); 76 } 77 78 void 79 parser_pop_stringbuf(void) 80 { 81 yy_delete_buffer(YY_CURRENT_BUFFER); 82 yy_switch_to_buffer(oldstate); 83 oldstate = NULL; 84 } 85 86 #ifndef yy_set_bol /* compat definition, for flex 2.4.6 */ 87 #define yy_set_bol(at_bol) \ 88 { \ 89 if ( ! yy_current_buffer ) \ 90 yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ 91 yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \ 92 } 93 #endif 94 95 %} 96 %option noinput 97 %option nounput 98 %{ 99 #ifndef YY_NO_UNPUT 100 #define YY_NO_UNPUT 1 101 #endif 102 #ifndef YY_NO_INPUT 103 #define YY_NO_INPUT 1 104 #endif 105 %} 106 107 SPACE [ \t] 108 LETTER [a-zA-Z] 109 NEWLINE [\n\r] 110 ZONESTR [^ \t\n\r();.\"\$]|\\.|\\\n 111 CHARSTR [^ \t\n\r();.]|\\.|\\\n 112 QUOTE \" 113 DOLLAR \$ 114 COMMENT ; 115 DOT \. 116 BIT [^\]\n]|\\. 117 ANY [^\"\n\\]|\\. 118 119 %x incl bitlabel quotedstring 120 121 %% 122 static int paren_open = 0; 123 static enum lexer_state lexer_state = EXPECT_OWNER; 124 {SPACE}*{COMMENT}.* /* ignore */ 125 ^{DOLLAR}TTL { lexer_state = PARSING_RDATA; return DOLLAR_TTL; } 126 ^{DOLLAR}ORIGIN { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; } 127 128 /* 129 * Handle $INCLUDE directives. See 130 * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12. 131 */ 132 ^{DOLLAR}INCLUDE { 133 BEGIN(incl); 134 } 135 <incl>\n | 136 <incl><<EOF>> { 137 int error_occurred = parser->error_occurred; 138 BEGIN(INITIAL); 139 zc_error("missing file name in $INCLUDE directive"); 140 yy_set_bol(1); /* Set beginning of line, so "^" rules match. */ 141 ++parser->line; 142 parser->error_occurred = error_occurred; 143 } 144 <incl>.+ { 145 char *tmp; 146 domain_type *origin = parser->origin; 147 int error_occurred = parser->error_occurred; 148 149 BEGIN(INITIAL); 150 if (include_stack_ptr >= MAXINCLUDES ) { 151 zc_error("includes nested too deeply, skipped (>%d)", 152 MAXINCLUDES); 153 } else { 154 FILE *input; 155 156 /* Remove trailing comment. */ 157 tmp = strrchr(yytext, ';'); 158 if (tmp) { 159 *tmp = '\0'; 160 } 161 strip_string(yytext); 162 163 /* Parse origin for include file. */ 164 tmp = strrchr(yytext, ' '); 165 if (!tmp) { 166 tmp = strrchr(yytext, '\t'); 167 } 168 if (tmp) { 169 const dname_type *dname; 170 171 /* split the original yytext */ 172 *tmp = '\0'; 173 strip_string(yytext); 174 175 dname = dname_parse(parser->region, tmp + 1); 176 if (!dname) { 177 zc_error("incorrect include origin '%s'", 178 tmp + 1); 179 } else if (*(tmp + strlen(tmp + 1)) != '.') { 180 zc_error("$INCLUDE directive requires absolute domain name"); 181 } else { 182 origin = domain_table_insert( 183 parser->db->domains, dname); 184 } 185 } 186 187 if (strlen(yytext) == 0) { 188 zc_error("missing file name in $INCLUDE directive"); 189 } else if (!(input = fopen(yytext, "r"))) { 190 zc_error("cannot open include file '%s': %s", 191 yytext, strerror(errno)); 192 } else { 193 /* Initialize parser for include file. */ 194 char *filename = region_strdup(parser->region, yytext); 195 push_parser_state(input); /* Destroys yytext. */ 196 parser->filename = filename; 197 parser->line = 1; 198 parser->origin = origin; 199 lexer_state = EXPECT_OWNER; 200 } 201 } 202 203 parser->error_occurred = error_occurred; 204 } 205 <INITIAL><<EOF>> { 206 yy_set_bol(1); /* Set beginning of line, so "^" rules match. */ 207 if (include_stack_ptr == 0) { 208 yyterminate(); 209 } else { 210 fclose(yyin); 211 pop_parser_state(); 212 } 213 } 214 ^{DOLLAR}{LETTER}+ { zc_warning("Unknown directive: %s", yytext); } 215 {DOT} { 216 LEXOUT((". ")); 217 return parse_token('.', yytext, &lexer_state); 218 } 219 @ { 220 LEXOUT(("@ ")); 221 return parse_token('@', yytext, &lexer_state); 222 } 223 \\# { 224 LEXOUT(("\\# ")); 225 return parse_token(URR, yytext, &lexer_state); 226 } 227 {NEWLINE} { 228 ++parser->line; 229 if (!paren_open) { 230 lexer_state = EXPECT_OWNER; 231 LEXOUT(("NL\n")); 232 return NL; 233 } else { 234 LEXOUT(("SP ")); 235 return SP; 236 } 237 } 238 \( { 239 if (paren_open) { 240 zc_error("nested parentheses"); 241 yyterminate(); 242 } 243 LEXOUT(("( ")); 244 paren_open = 1; 245 return SP; 246 } 247 \) { 248 if (!paren_open) { 249 zc_error("closing parentheses without opening parentheses"); 250 yyterminate(); 251 } 252 LEXOUT((") ")); 253 paren_open = 0; 254 return SP; 255 } 256 {SPACE}+ { 257 if (!paren_open && lexer_state == EXPECT_OWNER) { 258 lexer_state = PARSING_TTL_CLASS_TYPE; 259 LEXOUT(("PREV ")); 260 return PREV; 261 } 262 if (lexer_state == PARSING_OWNER) { 263 lexer_state = PARSING_TTL_CLASS_TYPE; 264 } 265 LEXOUT(("SP ")); 266 return SP; 267 } 268 269 /* Bitlabels. Strip leading and ending brackets. */ 270 \\\[ { BEGIN(bitlabel); } 271 <bitlabel><<EOF>> { 272 zc_error("EOF inside bitlabel"); 273 BEGIN(INITIAL); 274 yyrestart(yyin); /* this is so that lex does not give an internal err */ 275 yyterminate(); 276 } 277 <bitlabel>{BIT}* { yymore(); } 278 <bitlabel>\n { ++parser->line; yymore(); } 279 <bitlabel>\] { 280 BEGIN(INITIAL); 281 yytext[yyleng - 1] = '\0'; 282 return parse_token(BITLAB, yytext, &lexer_state); 283 } 284 285 /* Quoted strings. Strip leading and ending quotes. */ 286 {QUOTE} { BEGIN(quotedstring); LEXOUT(("\" ")); } 287 <quotedstring><<EOF>> { 288 zc_error("EOF inside quoted string"); 289 BEGIN(INITIAL); 290 yyrestart(yyin); /* this is so that lex does not give an internal err */ 291 yyterminate(); 292 } 293 <quotedstring>{ANY}* { LEXOUT(("STR ")); yymore(); } 294 <quotedstring>\n { ++parser->line; yymore(); } 295 <quotedstring>{QUOTE} { 296 LEXOUT(("\" ")); 297 BEGIN(INITIAL); 298 yytext[yyleng - 1] = '\0'; 299 return parse_token(STR, yytext, &lexer_state); 300 } 301 302 {ZONESTR}({CHARSTR})* { 303 /* Any allowed word. */ 304 return parse_token(STR, yytext, &lexer_state); 305 } 306 . { 307 zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?", 308 (int) yytext[0], (int) yytext[0]); 309 } 310 %% 311 312 /* 313 * Analyze "word" to see if it matches an RR type, possibly by using 314 * the "TYPExxx" notation. If it matches, the corresponding token is 315 * returned and the TYPE parameter is set to the RR type value. 316 */ 317 static int 318 rrtype_to_token(const char *word, uint16_t *type) 319 { 320 uint16_t t = rrtype_from_string(word); 321 if (t != 0) { 322 rrtype_descriptor_type *entry = rrtype_descriptor_by_type(t); 323 *type = t; 324 return entry->token; 325 } 326 327 return 0; 328 } 329 330 331 /* 332 * Remove \DDD constructs from the input. See RFC 1035, section 5.1. 333 */ 334 static size_t 335 zoctet(char *text) 336 { 337 /* 338 * s follows the string, p lags behind and rebuilds the new 339 * string 340 */ 341 char *s; 342 char *p; 343 344 for (s = p = text; *s; ++s, ++p) { 345 assert(p <= s); 346 if (s[0] != '\\') { 347 /* Ordinary character. */ 348 *p = *s; 349 } else if (isdigit((unsigned char)s[1]) && isdigit((unsigned char)s[2]) && isdigit((unsigned char)s[3])) { 350 /* \DDD escape. */ 351 int val = (hexdigit_to_int(s[1]) * 100 + 352 hexdigit_to_int(s[2]) * 10 + 353 hexdigit_to_int(s[3])); 354 if (0 <= val && val <= 255) { 355 s += 3; 356 *p = val; 357 } else { 358 zc_warning("text escape \\DDD overflow"); 359 *p = *++s; 360 } 361 } else if (s[1] != '\0') { 362 /* \X where X is any character, keep X. */ 363 *p = *++s; 364 } else { 365 /* Trailing backslash, ignore it. */ 366 zc_warning("trailing backslash ignored"); 367 --p; 368 } 369 } 370 *p = '\0'; 371 return p - text; 372 } 373 374 static int 375 parse_token(int token, char *yytext, enum lexer_state *lexer_state) 376 { 377 size_t len; 378 char *str; 379 380 if (*lexer_state == EXPECT_OWNER) { 381 *lexer_state = PARSING_OWNER; 382 } else if (*lexer_state == PARSING_TTL_CLASS_TYPE) { 383 const char *t; 384 int token; 385 uint16_t rrclass; 386 387 /* type */ 388 token = rrtype_to_token(yytext, &yylval.type); 389 if (token != 0) { 390 *lexer_state = PARSING_RDATA; 391 LEXOUT(("%d[%s] ", token, yytext)); 392 return token; 393 } 394 395 /* class */ 396 rrclass = rrclass_from_string(yytext); 397 if (rrclass != 0) { 398 yylval.klass = rrclass; 399 LEXOUT(("CLASS ")); 400 return T_RRCLASS; 401 } 402 403 /* ttl */ 404 yylval.ttl = strtottl(yytext, &t); 405 if (*t == '\0') { 406 LEXOUT(("TTL ")); 407 return T_TTL; 408 } 409 } 410 411 str = region_strdup(parser->rr_region, yytext); 412 len = zoctet(str); 413 414 yylval.data.str = str; 415 yylval.data.len = len; 416 417 LEXOUT(("%d[%s] ", token, yytext)); 418 return token; 419 } 420