1 /* $NetBSD: lex.h,v 1.4 2014/12/10 04:38:00 christos Exp $ */ 2 3 /* 4 * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") 5 * Copyright (C) 1998-2002 Internet Software Consortium. 6 * 7 * Permission to use, copy, modify, and/or distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 17 * PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 /* Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp */ 21 22 #ifndef ISC_LEX_H 23 #define ISC_LEX_H 1 24 25 /***** 26 ***** Module Info 27 *****/ 28 29 /*! \file isc/lex.h 30 * \brief The "lex" module provides a lightweight tokenizer. It can operate 31 * on files or buffers, and can handle "include". It is designed for 32 * parsing of DNS master files and the BIND configuration file, but 33 * should be general enough to tokenize other things, e.g. HTTP. 34 * 35 * \li MP: 36 * No synchronization is provided. Clients must ensure exclusive 37 * access. 38 * 39 * \li Reliability: 40 * No anticipated impact. 41 * 42 * \li Resources: 43 * TBS 44 * 45 * \li Security: 46 * No anticipated impact. 47 * 48 * \li Standards: 49 * None. 50 */ 51 52 /*** 53 *** Imports 54 ***/ 55 56 #include <stdio.h> 57 58 #include <isc/lang.h> 59 #include <isc/region.h> 60 #include <isc/types.h> 61 62 ISC_LANG_BEGINDECLS 63 64 /*** 65 *** Options 66 ***/ 67 68 /*@{*/ 69 /*! 70 * Various options for isc_lex_gettoken(). 71 */ 72 73 #define ISC_LEXOPT_EOL 0x01 /*%< Want end-of-line token. */ 74 #define ISC_LEXOPT_EOF 0x02 /*%< Want end-of-file token. */ 75 #define ISC_LEXOPT_INITIALWS 0x04 /*%< Want initial whitespace. */ 76 #define ISC_LEXOPT_NUMBER 0x08 /*%< Recognize numbers. */ 77 #define ISC_LEXOPT_QSTRING 0x10 /*%< Recognize qstrings. */ 78 /*@}*/ 79 80 /*@{*/ 81 /*! 82 * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in 83 * the DNS master file format. If this option is set, then the 84 * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when 85 * the paren count is > 0. To use this option, '(' and ')' must be special 86 * characters. 87 */ 88 #define ISC_LEXOPT_DNSMULTILINE 0x20 /*%< Handle '(' and ')'. */ 89 #define ISC_LEXOPT_NOMORE 0x40 /*%< Want "no more" token. */ 90 91 #define ISC_LEXOPT_CNUMBER 0x80 /*%< Recognize octal and hex. */ 92 #define ISC_LEXOPT_ESCAPE 0x100 /*%< Recognize escapes. */ 93 #define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /*%< Allow multiline "" strings */ 94 #define ISC_LEXOPT_OCTAL 0x400 /*%< Expect a octal number. */ 95 /*@}*/ 96 /*@{*/ 97 /*! 98 * Various commenting styles, which may be changed at any time with 99 * isc_lex_setcomments(). 100 */ 101 102 #define ISC_LEXCOMMENT_C 0x01 103 #define ISC_LEXCOMMENT_CPLUSPLUS 0x02 104 #define ISC_LEXCOMMENT_SHELL 0x04 105 #define ISC_LEXCOMMENT_DNSMASTERFILE 0x08 106 /*@}*/ 107 108 /*** 109 *** Types 110 ***/ 111 112 /*! Lex */ 113 114 typedef char isc_lexspecials_t[256]; 115 116 /* Tokens */ 117 118 typedef enum { 119 isc_tokentype_unknown = 0, 120 isc_tokentype_string = 1, 121 isc_tokentype_number = 2, 122 isc_tokentype_qstring = 3, 123 isc_tokentype_eol = 4, 124 isc_tokentype_eof = 5, 125 isc_tokentype_initialws = 6, 126 isc_tokentype_special = 7, 127 isc_tokentype_nomore = 8 128 } isc_tokentype_t; 129 130 typedef union { 131 char as_char; 132 unsigned long as_ulong; 133 isc_region_t as_region; 134 isc_textregion_t as_textregion; 135 void * as_pointer; 136 } isc_tokenvalue_t; 137 138 typedef struct isc_token { 139 isc_tokentype_t type; 140 isc_tokenvalue_t value; 141 } isc_token_t; 142 143 /*** 144 *** Functions 145 ***/ 146 147 isc_result_t 148 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp); 149 /*%< 150 * Create a lexer. 151 * 152 * 'max_token' is a hint of the number of bytes in the largest token. 153 * 154 * Requires: 155 *\li '*lexp' is a valid lexer. 156 * 157 *\li max_token > 0. 158 * 159 * Ensures: 160 *\li On success, *lexp is attached to the newly created lexer. 161 * 162 * Returns: 163 *\li #ISC_R_SUCCESS 164 *\li #ISC_R_NOMEMORY 165 */ 166 167 void 168 isc_lex_destroy(isc_lex_t **lexp); 169 /*%< 170 * Destroy the lexer. 171 * 172 * Requires: 173 *\li '*lexp' is a valid lexer. 174 * 175 * Ensures: 176 *\li *lexp == NULL 177 */ 178 179 unsigned int 180 isc_lex_getcomments(isc_lex_t *lex); 181 /*%< 182 * Return the current lexer commenting styles. 183 * 184 * Requires: 185 *\li 'lex' is a valid lexer. 186 * 187 * Returns: 188 *\li The commenting sytles which are currently allowed. 189 */ 190 191 void 192 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments); 193 /*%< 194 * Set allowed lexer commenting styles. 195 * 196 * Requires: 197 *\li 'lex' is a valid lexer. 198 * 199 *\li 'comments' has meaningful values. 200 */ 201 202 void 203 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials); 204 /*%< 205 * Put the current list of specials into 'specials'. 206 * 207 * Requires: 208 *\li 'lex' is a valid lexer. 209 */ 210 211 void 212 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials); 213 /*!< 214 * The characters in 'specials' are returned as tokens. Along with 215 * whitespace, they delimit strings and numbers. 216 * 217 * Note: 218 *\li Comment processing takes precedence over special character 219 * recognition. 220 * 221 * Requires: 222 *\li 'lex' is a valid lexer. 223 */ 224 225 isc_result_t 226 isc_lex_openfile(isc_lex_t *lex, const char *filename); 227 /*%< 228 * Open 'filename' and make it the current input source for 'lex'. 229 * 230 * Requires: 231 *\li 'lex' is a valid lexer. 232 * 233 *\li filename is a valid C string. 234 * 235 * Returns: 236 *\li #ISC_R_SUCCESS 237 *\li #ISC_R_NOMEMORY Out of memory 238 *\li #ISC_R_NOTFOUND File not found 239 *\li #ISC_R_NOPERM No permission to open file 240 *\li #ISC_R_FAILURE Couldn't open file, not sure why 241 *\li #ISC_R_UNEXPECTED 242 */ 243 244 isc_result_t 245 isc_lex_openstream(isc_lex_t *lex, FILE *stream); 246 /*%< 247 * Make 'stream' the current input source for 'lex'. 248 * 249 * Requires: 250 *\li 'lex' is a valid lexer. 251 * 252 *\li 'stream' is a valid C stream. 253 * 254 * Returns: 255 *\li #ISC_R_SUCCESS 256 *\li #ISC_R_NOMEMORY Out of memory 257 */ 258 259 isc_result_t 260 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer); 261 /*%< 262 * Make 'buffer' the current input source for 'lex'. 263 * 264 * Requires: 265 *\li 'lex' is a valid lexer. 266 * 267 *\li 'buffer' is a valid buffer. 268 * 269 * Returns: 270 *\li #ISC_R_SUCCESS 271 *\li #ISC_R_NOMEMORY Out of memory 272 */ 273 274 isc_result_t 275 isc_lex_close(isc_lex_t *lex); 276 /*%< 277 * Close the most recently opened object (i.e. file or buffer). 278 * 279 * Returns: 280 *\li #ISC_R_SUCCESS 281 *\li #ISC_R_NOMORE No more input sources 282 */ 283 284 isc_result_t 285 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp); 286 /*%< 287 * Get the next token. 288 * 289 * Requires: 290 *\li 'lex' is a valid lexer. 291 * 292 *\li 'lex' has an input source. 293 * 294 *\li 'options' contains valid options. 295 * 296 *\li '*tokenp' is a valid pointer. 297 * 298 * Returns: 299 *\li #ISC_R_SUCCESS 300 *\li #ISC_R_UNEXPECTEDEND 301 *\li #ISC_R_NOMEMORY 302 * 303 * These two results are returned only if their corresponding lexer 304 * options are not set. 305 * 306 *\li #ISC_R_EOF End of input source 307 *\li #ISC_R_NOMORE No more input sources 308 */ 309 310 isc_result_t 311 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token, 312 isc_tokentype_t expect, isc_boolean_t eol); 313 /*%< 314 * Get the next token from a DNS master file type stream. This is a 315 * convenience function that sets appropriate options and handles quoted 316 * strings and end of line correctly for master files. It also ungets 317 * unexpected tokens. 318 * 319 * Requires: 320 *\li 'lex' is a valid lexer. 321 * 322 *\li 'token' is a valid pointer 323 * 324 * Returns: 325 * 326 * \li any return code from isc_lex_gettoken(). 327 */ 328 329 isc_result_t 330 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol); 331 /*%< 332 * Get the next token from a DNS master file type stream. This is a 333 * convenience function that sets appropriate options and handles end 334 * of line correctly for master files. It also ungets unexpected tokens. 335 * 336 * Requires: 337 *\li 'lex' is a valid lexer. 338 * 339 *\li 'token' is a valid pointer 340 * 341 * Returns: 342 * 343 * \li any return code from isc_lex_gettoken(). 344 */ 345 346 void 347 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp); 348 /*%< 349 * Unget the current token. 350 * 351 * Requires: 352 *\li 'lex' is a valid lexer. 353 * 354 *\li 'lex' has an input source. 355 * 356 *\li 'tokenp' points to a valid token. 357 * 358 *\li There is no ungotten token already. 359 */ 360 361 void 362 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r); 363 /*%< 364 * Returns a region containing the text of the last token returned. 365 * 366 * Requires: 367 *\li 'lex' is a valid lexer. 368 * 369 *\li 'lex' has an input source. 370 * 371 *\li 'tokenp' points to a valid token. 372 * 373 *\li A token has been gotten and not ungotten. 374 */ 375 376 char * 377 isc_lex_getsourcename(isc_lex_t *lex); 378 /*%< 379 * Return the input source name. 380 * 381 * Requires: 382 *\li 'lex' is a valid lexer. 383 * 384 * Returns: 385 * \li source name or NULL if no current source. 386 *\li result valid while current input source exists. 387 */ 388 389 390 unsigned long 391 isc_lex_getsourceline(isc_lex_t *lex); 392 /*%< 393 * Return the input source line number. 394 * 395 * Requires: 396 *\li 'lex' is a valid lexer. 397 * 398 * Returns: 399 *\li Current line number or 0 if no current source. 400 */ 401 402 isc_result_t 403 isc_lex_setsourcename(isc_lex_t *lex, const char *name); 404 /*%< 405 * Assigns a new name to the input source. 406 * 407 * Requires: 408 * 409 * \li 'lex' is a valid lexer. 410 * 411 * Returns: 412 * \li #ISC_R_SUCCESS 413 * \li #ISC_R_NOMEMORY 414 * \li #ISC_R_NOTFOUND - there are no sources. 415 */ 416 417 isc_boolean_t 418 isc_lex_isfile(isc_lex_t *lex); 419 /*%< 420 * Return whether the current input source is a file. 421 * 422 * Requires: 423 *\li 'lex' is a valid lexer. 424 * 425 * Returns: 426 * \li #ISC_TRUE if the current input is a file, 427 *\li #ISC_FALSE otherwise. 428 */ 429 430 431 ISC_LANG_ENDDECLS 432 433 #endif /* ISC_LEX_H */ 434