1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * This Source Code Form is subject to the terms of the Mozilla Public 5 * License, v. 2.0. If a copy of the MPL was not distributed with this 6 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 7 * 8 * See the COPYRIGHT file distributed with this work for additional 9 * information regarding copyright ownership. 10 */ 11 12 #ifndef ISC_LEX_H 13 #define ISC_LEX_H 1 14 15 /***** 16 ***** Module Info 17 *****/ 18 19 /*! \file isc/lex.h 20 * \brief The "lex" module provides a lightweight tokenizer. It can operate 21 * on files or buffers, and can handle "include". It is designed for 22 * parsing of DNS master files and the BIND configuration file, but 23 * should be general enough to tokenize other things, e.g. HTTP. 24 * 25 * \li MP: 26 * No synchronization is provided. Clients must ensure exclusive 27 * access. 28 * 29 * \li Reliability: 30 * No anticipated impact. 31 * 32 * \li Resources: 33 * TBS 34 * 35 * \li Security: 36 * No anticipated impact. 37 * 38 * \li Standards: 39 * None. 40 */ 41 42 /*** 43 *** Imports 44 ***/ 45 46 #include <stdbool.h> 47 #include <stdio.h> 48 49 #include <isc/lang.h> 50 #include <isc/region.h> 51 #include <isc/types.h> 52 53 ISC_LANG_BEGINDECLS 54 55 /*** 56 *** Options 57 ***/ 58 59 /*@{*/ 60 /*! 61 * Various options for isc_lex_gettoken(). 62 */ 63 64 #define ISC_LEXOPT_EOL 0x0001 /*%< Want end-of-line token. */ 65 #define ISC_LEXOPT_EOF 0x0002 /*%< Want end-of-file token. */ 66 #define ISC_LEXOPT_INITIALWS 0x0004 /*%< Want initial whitespace. */ 67 #define ISC_LEXOPT_NUMBER 0x0008 /*%< Recognize numbers. */ 68 #define ISC_LEXOPT_QSTRING 0x0010 /*%< Recognize qstrings. */ 69 /*@}*/ 70 71 /*@{*/ 72 /*! 73 * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in 74 * the DNS master file format. If this option is set, then the 75 * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when 76 * the paren count is > 0. To use this option, '(' and ')' must be special 77 * characters. 78 */ 79 #define ISC_LEXOPT_DNSMULTILINE 0x0020 /*%< Handle '(' and ')'. */ 80 #define ISC_LEXOPT_NOMORE 0x0040 /*%< Want "no more" token. */ 81 82 #define ISC_LEXOPT_CNUMBER 0x0080 /*%< Recognize octal and hex. */ 83 #define ISC_LEXOPT_ESCAPE 0x0100 /*%< Recognize escapes. */ 84 #define ISC_LEXOPT_QSTRINGMULTILINE 0x0200 /*%< Allow multiline "" strings */ 85 #define ISC_LEXOPT_OCTAL 0x0400 /*%< Expect a octal number. */ 86 #define ISC_LEXOPT_BTEXT 0x0800 /*%< Bracketed text. */ 87 #define ISC_LEXOPT_VPAIR 0x1000 /*%< Recognize value pair. */ 88 #define ISC_LEXOPT_QVPAIR 0x2000 /*%< Recognize quoted value pair. */ 89 /*@}*/ 90 /*@{*/ 91 /*! 92 * Various commenting styles, which may be changed at any time with 93 * isc_lex_setcomments(). 94 */ 95 96 #define ISC_LEXCOMMENT_C 0x01 97 #define ISC_LEXCOMMENT_CPLUSPLUS 0x02 98 #define ISC_LEXCOMMENT_SHELL 0x04 99 #define ISC_LEXCOMMENT_DNSMASTERFILE 0x08 100 /*@}*/ 101 102 /*** 103 *** Types 104 ***/ 105 106 /*! Lex */ 107 108 typedef char isc_lexspecials_t[256]; 109 110 /* Tokens */ 111 112 typedef enum { 113 isc_tokentype_unknown = 0, 114 isc_tokentype_string = 1, 115 isc_tokentype_number = 2, 116 isc_tokentype_qstring = 3, 117 isc_tokentype_eol = 4, 118 isc_tokentype_eof = 5, 119 isc_tokentype_initialws = 6, 120 isc_tokentype_special = 7, 121 isc_tokentype_nomore = 8, 122 isc_tokentype_btext = 9, 123 isc_tokentype_vpair = 10, 124 isc_tokentype_qvpair = 11, 125 } isc_tokentype_t; 126 127 typedef union { 128 char as_char; 129 unsigned long as_ulong; 130 isc_region_t as_region; 131 isc_textregion_t as_textregion; 132 void * as_pointer; 133 } isc_tokenvalue_t; 134 135 typedef struct isc_token { 136 isc_tokentype_t type; 137 isc_tokenvalue_t value; 138 } isc_token_t; 139 140 /*** 141 *** Functions 142 ***/ 143 144 isc_result_t 145 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp); 146 /*%< 147 * Create a lexer. 148 * 149 * 'max_token' is a hint of the number of bytes in the largest token. 150 * 151 * Requires: 152 *\li '*lexp' is a valid lexer. 153 * 154 * Ensures: 155 *\li On success, *lexp is attached to the newly created lexer. 156 * 157 * Returns: 158 *\li #ISC_R_SUCCESS 159 *\li #ISC_R_NOMEMORY 160 */ 161 162 void 163 isc_lex_destroy(isc_lex_t **lexp); 164 /*%< 165 * Destroy the lexer. 166 * 167 * Requires: 168 *\li '*lexp' is a valid lexer. 169 * 170 * Ensures: 171 *\li *lexp == NULL 172 */ 173 174 unsigned int 175 isc_lex_getcomments(isc_lex_t *lex); 176 /*%< 177 * Return the current lexer commenting styles. 178 * 179 * Requires: 180 *\li 'lex' is a valid lexer. 181 * 182 * Returns: 183 *\li The commenting styles which are currently allowed. 184 */ 185 186 void 187 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments); 188 /*%< 189 * Set allowed lexer commenting styles. 190 * 191 * Requires: 192 *\li 'lex' is a valid lexer. 193 * 194 *\li 'comments' has meaningful values. 195 */ 196 197 void 198 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials); 199 /*%< 200 * Put the current list of specials into 'specials'. 201 * 202 * Requires: 203 *\li 'lex' is a valid lexer. 204 */ 205 206 void 207 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials); 208 /*!< 209 * The characters in 'specials' are returned as tokens. Along with 210 * whitespace, they delimit strings and numbers. 211 * 212 * Note: 213 *\li Comment processing takes precedence over special character 214 * recognition. 215 * 216 * Requires: 217 *\li 'lex' is a valid lexer. 218 */ 219 220 isc_result_t 221 isc_lex_openfile(isc_lex_t *lex, const char *filename); 222 /*%< 223 * Open 'filename' and make it the current input source for 'lex'. 224 * 225 * Requires: 226 *\li 'lex' is a valid lexer. 227 * 228 *\li filename is a valid C string. 229 * 230 * Returns: 231 *\li #ISC_R_SUCCESS 232 *\li #ISC_R_NOMEMORY Out of memory 233 *\li #ISC_R_NOTFOUND File not found 234 *\li #ISC_R_NOPERM No permission to open file 235 *\li #ISC_R_FAILURE Couldn't open file, not sure why 236 *\li #ISC_R_UNEXPECTED 237 */ 238 239 isc_result_t 240 isc_lex_openstream(isc_lex_t *lex, FILE *stream); 241 /*%< 242 * Make 'stream' the current input source for 'lex'. 243 * 244 * Requires: 245 *\li 'lex' is a valid lexer. 246 * 247 *\li 'stream' is a valid C stream. 248 * 249 * Returns: 250 *\li #ISC_R_SUCCESS 251 *\li #ISC_R_NOMEMORY Out of memory 252 */ 253 254 isc_result_t 255 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer); 256 /*%< 257 * Make 'buffer' the current input source for 'lex'. 258 * 259 * Requires: 260 *\li 'lex' is a valid lexer. 261 * 262 *\li 'buffer' is a valid buffer. 263 * 264 * Returns: 265 *\li #ISC_R_SUCCESS 266 *\li #ISC_R_NOMEMORY Out of memory 267 */ 268 269 isc_result_t 270 isc_lex_close(isc_lex_t *lex); 271 /*%< 272 * Close the most recently opened object (i.e. file or buffer). 273 * 274 * Returns: 275 *\li #ISC_R_SUCCESS 276 *\li #ISC_R_NOMORE No more input sources 277 */ 278 279 isc_result_t 280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp); 281 /*%< 282 * Get the next token. 283 * 284 * Requires: 285 *\li 'lex' is a valid lexer. 286 * 287 *\li 'lex' has an input source. 288 * 289 *\li 'options' contains valid options. 290 * 291 *\li '*tokenp' is a valid pointer. 292 * 293 * Returns: 294 *\li #ISC_R_SUCCESS 295 *\li #ISC_R_UNEXPECTEDEND 296 *\li #ISC_R_NOMEMORY 297 * 298 * These two results are returned only if their corresponding lexer 299 * options are not set. 300 * 301 *\li #ISC_R_EOF End of input source 302 *\li #ISC_R_NOMORE No more input sources 303 */ 304 305 isc_result_t 306 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token, 307 isc_tokentype_t expect, bool eol); 308 /*%< 309 * Get the next token from a DNS master file type stream. This is a 310 * convenience function that sets appropriate options and handles quoted 311 * strings and end of line correctly for master files. It also ungets 312 * unexpected tokens. If `eol` is set then expect end-of-line otherwise 313 * eol is a error. 314 * 315 * Requires: 316 *\li 'lex' is a valid lexer. 317 * 318 *\li 'token' is a valid pointer 319 * 320 * Returns: 321 * 322 * \li any return code from isc_lex_gettoken(). 323 */ 324 325 isc_result_t 326 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, bool eol); 327 /*%< 328 * Get the next token from a DNS master file type stream. This is a 329 * convenience function that sets appropriate options and handles end 330 * of line correctly for master files. It also ungets unexpected tokens. 331 * If `eol` is set then expect end-of-line otherwise eol is a error. 332 * 333 * Requires: 334 *\li 'lex' is a valid lexer. 335 * 336 *\li 'token' is a valid pointer 337 * 338 * Returns: 339 * 340 * \li any return code from isc_lex_gettoken(). 341 */ 342 343 void 344 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp); 345 /*%< 346 * Unget the current token. 347 * 348 * Requires: 349 *\li 'lex' is a valid lexer. 350 * 351 *\li 'lex' has an input source. 352 * 353 *\li 'tokenp' points to a valid token. 354 * 355 *\li There is no ungotten token already. 356 */ 357 358 void 359 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r); 360 /*%< 361 * Returns a region containing the text of the last token returned. 362 * 363 * Requires: 364 *\li 'lex' is a valid lexer. 365 * 366 *\li 'lex' has an input source. 367 * 368 *\li 'tokenp' points to a valid token. 369 * 370 *\li A token has been gotten and not ungotten. 371 */ 372 373 char * 374 isc_lex_getsourcename(isc_lex_t *lex); 375 /*%< 376 * Return the input source name. 377 * 378 * Requires: 379 *\li 'lex' is a valid lexer. 380 * 381 * Returns: 382 * \li source name or NULL if no current source. 383 *\li result valid while current input source exists. 384 */ 385 386 unsigned long 387 isc_lex_getsourceline(isc_lex_t *lex); 388 /*%< 389 * Return the input source line number. 390 * 391 * Requires: 392 *\li 'lex' is a valid lexer. 393 * 394 * Returns: 395 *\li Current line number or 0 if no current source. 396 */ 397 398 isc_result_t 399 isc_lex_setsourcename(isc_lex_t *lex, const char *name); 400 /*%< 401 * Assigns a new name to the input source. 402 * 403 * Requires: 404 * 405 * \li 'lex' is a valid lexer. 406 * 407 * Returns: 408 * \li #ISC_R_SUCCESS 409 * \li #ISC_R_NOMEMORY 410 * \li #ISC_R_NOTFOUND - there are no sources. 411 */ 412 413 isc_result_t 414 isc_lex_setsourceline(isc_lex_t *lex, unsigned long line); 415 /*%< 416 * Assigns a new line number to the input source. This can be used 417 * when parsing a buffer that's been excerpted from the middle a file, 418 * allowing logged messages to display the correct line number, 419 * rather than the line number within the buffer. 420 * 421 * Requires: 422 * 423 * \li 'lex' is a valid lexer. 424 * 425 * Returns: 426 * \li #ISC_R_SUCCESS 427 * \li #ISC_R_NOTFOUND - there are no sources. 428 */ 429 430 bool 431 isc_lex_isfile(isc_lex_t *lex); 432 /*%< 433 * Return whether the current input source is a file. 434 * 435 * Requires: 436 *\li 'lex' is a valid lexer. 437 * 438 * Returns: 439 * \li #true if the current input is a file, 440 *\li #false otherwise. 441 */ 442 443 ISC_LANG_ENDDECLS 444 445 #endif /* ISC_LEX_H */ 446