1 /* 2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 3 * 4 * Permission to use, copy, modify, and/or distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 10 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 14 * PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* $Id: lex.h,v 1.7 2020/09/13 09:31:36 florian Exp $ */ 18 19 #ifndef ISC_LEX_H 20 #define ISC_LEX_H 1 21 22 /***** 23 ***** Module Info 24 *****/ 25 26 /*! \file isc/lex.h 27 * \brief The "lex" module provides a lightweight tokenizer. It can operate 28 * on files or buffers, and can handle "include". It is designed for 29 * parsing of DNS master files and the BIND configuration file, but 30 * should be general enough to tokenize other things, e.g. HTTP. 31 * 32 * \li MP: 33 * No synchronization is provided. Clients must ensure exclusive 34 * access. 35 * 36 * \li Reliability: 37 * No anticipated impact. 38 * 39 * \li Resources: 40 * TBS 41 * 42 * \li Security: 43 * No anticipated impact. 44 * 45 * \li Standards: 46 * None. 47 */ 48 49 /*** 50 *** Imports 51 ***/ 52 53 #include <stdio.h> 54 55 #include <isc/region.h> 56 #include <isc/types.h> 57 58 /*** 59 *** Options 60 ***/ 61 62 /*@{*/ 63 /*! 64 * Various options for isc_lex_gettoken(). 65 */ 66 67 #define ISC_LEXOPT_EOF 0x02 /*%< Want end-of-file token. */ 68 #define ISC_LEXOPT_QSTRING 0x10 /*%< Recognize qstrings. */ 69 /*@}*/ 70 71 #define ISC_LEXOPT_NOMORE 0x40 /*%< Want "no more" token. */ 72 73 #define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /*%< Allow multiline "" strings */ 74 75 /*@{*/ 76 /*! 77 * Various commenting styles, which may be changed at any time with 78 * isc_lex_setcomments(). 79 */ 80 81 #define ISC_LEXCOMMENT_C 0x01 82 #define ISC_LEXCOMMENT_CPLUSPLUS 0x02 83 #define ISC_LEXCOMMENT_SHELL 0x04 84 /*@}*/ 85 86 /*** 87 *** Types 88 ***/ 89 90 /*! Lex */ 91 92 typedef char isc_lexspecials_t[256]; 93 94 /* Tokens */ 95 96 typedef enum { 97 isc_tokentype_unknown = 0, 98 isc_tokentype_string = 1, 99 isc_tokentype_number = 2, 100 isc_tokentype_qstring = 3, 101 isc_tokentype_eol = 4, 102 isc_tokentype_eof = 5, 103 isc_tokentype_initialws = 6, 104 isc_tokentype_special = 7, 105 isc_tokentype_nomore = 8 106 } isc_tokentype_t; 107 108 typedef union { 109 char as_char; 110 unsigned long as_ulong; 111 isc_region_t as_region; 112 isc_textregion_t as_textregion; 113 void * as_pointer; 114 } isc_tokenvalue_t; 115 116 typedef struct isc_token { 117 isc_tokentype_t type; 118 isc_tokenvalue_t value; 119 } isc_token_t; 120 121 /*** 122 *** Functions 123 ***/ 124 125 isc_result_t 126 isc_lex_create(size_t max_token, isc_lex_t **lexp); 127 /*%< 128 * Create a lexer. 129 * 130 * 'max_token' is a hint of the number of bytes in the largest token. 131 * 132 * Requires: 133 *\li '*lexp' is a valid lexer. 134 * 135 * Ensures: 136 *\li On success, *lexp is attached to the newly created lexer. 137 * 138 * Returns: 139 *\li #ISC_R_SUCCESS 140 *\li #ISC_R_NOMEMORY 141 */ 142 143 void 144 isc_lex_destroy(isc_lex_t **lexp); 145 /*%< 146 * Destroy the lexer. 147 * 148 * Requires: 149 *\li '*lexp' is a valid lexer. 150 * 151 * Ensures: 152 *\li *lexp == NULL 153 */ 154 155 void 156 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments); 157 /*%< 158 * Set allowed lexer commenting styles. 159 * 160 * Requires: 161 *\li 'lex' is a valid lexer. 162 * 163 *\li 'comments' has meaningful values. 164 */ 165 166 void 167 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials); 168 /*!< 169 * The characters in 'specials' are returned as tokens. Along with 170 * whitespace, they delimit strings and numbers. 171 * 172 * Note: 173 *\li Comment processing takes precedence over special character 174 * recognition. 175 * 176 * Requires: 177 *\li 'lex' is a valid lexer. 178 */ 179 180 isc_result_t 181 isc_lex_openfile(isc_lex_t *lex, const char *filename); 182 /*%< 183 * Open 'filename' and make it the current input source for 'lex'. 184 * 185 * Requires: 186 *\li 'lex' is a valid lexer. 187 * 188 *\li filename is a valid C string. 189 * 190 * Returns: 191 *\li #ISC_R_SUCCESS 192 *\li #ISC_R_NOMEMORY Out of memory 193 *\li #ISC_R_NOTFOUND File not found 194 *\li #ISC_R_NOPERM No permission to open file 195 *\li #ISC_R_FAILURE Couldn't open file, not sure why 196 *\li #ISC_R_UNEXPECTED 197 */ 198 199 isc_result_t 200 isc_lex_close(isc_lex_t *lex); 201 /*%< 202 * Close the most recently opened object (i.e. file or buffer). 203 * 204 * Returns: 205 *\li #ISC_R_SUCCESS 206 *\li #ISC_R_NOMORE No more input sources 207 */ 208 209 isc_result_t 210 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp); 211 /*%< 212 * Get the next token. 213 * 214 * Requires: 215 *\li 'lex' is a valid lexer. 216 * 217 *\li 'lex' has an input source. 218 * 219 *\li 'options' contains valid options. 220 * 221 *\li '*tokenp' is a valid pointer. 222 * 223 * Returns: 224 *\li #ISC_R_SUCCESS 225 *\li #ISC_R_UNEXPECTEDEND 226 *\li #ISC_R_NOMEMORY 227 * 228 * These two results are returned only if their corresponding lexer 229 * options are not set. 230 * 231 *\li #ISC_R_EOF End of input source 232 *\li #ISC_R_NOMORE No more input sources 233 */ 234 235 void 236 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp); 237 /*%< 238 * Unget the current token. 239 * 240 * Requires: 241 *\li 'lex' is a valid lexer. 242 * 243 *\li 'lex' has an input source. 244 * 245 *\li 'tokenp' points to a valid token. 246 * 247 *\li There is no ungotten token already. 248 */ 249 250 void 251 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r); 252 /*%< 253 * Returns a region containing the text of the last token returned. 254 * 255 * Requires: 256 *\li 'lex' is a valid lexer. 257 * 258 *\li 'lex' has an input source. 259 * 260 *\li 'tokenp' points to a valid token. 261 * 262 *\li A token has been gotten and not ungotten. 263 */ 264 265 char * 266 isc_lex_getsourcename(isc_lex_t *lex); 267 /*%< 268 * Return the input source name. 269 * 270 * Requires: 271 *\li 'lex' is a valid lexer. 272 * 273 * Returns: 274 * \li source name or NULL if no current source. 275 *\li result valid while current input source exists. 276 */ 277 278 unsigned long 279 isc_lex_getsourceline(isc_lex_t *lex); 280 /*%< 281 * Return the input source line number. 282 * 283 * Requires: 284 *\li 'lex' is a valid lexer. 285 * 286 * Returns: 287 *\li Current line number or 0 if no current source. 288 */ 289 290 #endif /* ISC_LEX_H */ 291