xref: /openbsd/usr.bin/dig/lib/isc/include/isc/lex.h (revision e6d3fd4a)
1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /* $Id: lex.h,v 1.7 2020/09/13 09:31:36 florian Exp $ */
18 
19 #ifndef ISC_LEX_H
20 #define ISC_LEX_H 1
21 
22 /*****
23  ***** Module Info
24  *****/
25 
26 /*! \file isc/lex.h
27  * \brief The "lex" module provides a lightweight tokenizer.  It can operate
28  * on files or buffers, and can handle "include".  It is designed for
29  * parsing of DNS master files and the BIND configuration file, but
30  * should be general enough to tokenize other things, e.g. HTTP.
31  *
32  * \li MP:
33  *	No synchronization is provided.  Clients must ensure exclusive
34  *	access.
35  *
36  * \li Reliability:
37  *	No anticipated impact.
38  *
39  * \li Resources:
40  *	TBS
41  *
42  * \li Security:
43  *	No anticipated impact.
44  *
45  * \li Standards:
46  * 	None.
47  */
48 
49 /***
50  *** Imports
51  ***/
52 
53 #include <stdio.h>
54 
55 #include <isc/region.h>
56 #include <isc/types.h>
57 
58 /***
59  *** Options
60  ***/
61 
62 /*@{*/
63 /*!
64  * Various options for isc_lex_gettoken().
65  */
66 
67 #define ISC_LEXOPT_EOF			0x02	/*%< Want end-of-file token. */
68 #define ISC_LEXOPT_QSTRING		0x10	/*%< Recognize qstrings. */
69 /*@}*/
70 
71 #define ISC_LEXOPT_NOMORE		0x40	/*%< Want "no more" token. */
72 
73 #define ISC_LEXOPT_QSTRINGMULTILINE	0x200	/*%< Allow multiline "" strings */
74 
75 /*@{*/
76 /*!
77  * Various commenting styles, which may be changed at any time with
78  * isc_lex_setcomments().
79  */
80 
81 #define ISC_LEXCOMMENT_C		0x01
82 #define ISC_LEXCOMMENT_CPLUSPLUS	0x02
83 #define ISC_LEXCOMMENT_SHELL		0x04
84 /*@}*/
85 
86 /***
87  *** Types
88  ***/
89 
90 /*! Lex */
91 
92 typedef char isc_lexspecials_t[256];
93 
94 /* Tokens */
95 
96 typedef enum {
97 	isc_tokentype_unknown = 0,
98 	isc_tokentype_string = 1,
99 	isc_tokentype_number = 2,
100 	isc_tokentype_qstring = 3,
101 	isc_tokentype_eol = 4,
102 	isc_tokentype_eof = 5,
103 	isc_tokentype_initialws = 6,
104 	isc_tokentype_special = 7,
105 	isc_tokentype_nomore = 8
106 } isc_tokentype_t;
107 
108 typedef union {
109 	char				as_char;
110 	unsigned long			as_ulong;
111 	isc_region_t			as_region;
112 	isc_textregion_t		as_textregion;
113 	void *				as_pointer;
114 } isc_tokenvalue_t;
115 
116 typedef struct isc_token {
117 	isc_tokentype_t			type;
118 	isc_tokenvalue_t		value;
119 } isc_token_t;
120 
121 /***
122  *** Functions
123  ***/
124 
125 isc_result_t
126 isc_lex_create(size_t max_token, isc_lex_t **lexp);
127 /*%<
128  * Create a lexer.
129  *
130  * 'max_token' is a hint of the number of bytes in the largest token.
131  *
132  * Requires:
133  *\li	'*lexp' is a valid lexer.
134  *
135  * Ensures:
136  *\li	On success, *lexp is attached to the newly created lexer.
137  *
138  * Returns:
139  *\li	#ISC_R_SUCCESS
140  *\li	#ISC_R_NOMEMORY
141  */
142 
143 void
144 isc_lex_destroy(isc_lex_t **lexp);
145 /*%<
146  * Destroy the lexer.
147  *
148  * Requires:
149  *\li	'*lexp' is a valid lexer.
150  *
151  * Ensures:
152  *\li	*lexp == NULL
153  */
154 
155 void
156 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
157 /*%<
158  * Set allowed lexer commenting styles.
159  *
160  * Requires:
161  *\li	'lex' is a valid lexer.
162  *
163  *\li	'comments' has meaningful values.
164  */
165 
166 void
167 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
168 /*!<
169  * The characters in 'specials' are returned as tokens.  Along with
170  * whitespace, they delimit strings and numbers.
171  *
172  * Note:
173  *\li	Comment processing takes precedence over special character
174  *	recognition.
175  *
176  * Requires:
177  *\li	'lex' is a valid lexer.
178  */
179 
180 isc_result_t
181 isc_lex_openfile(isc_lex_t *lex, const char *filename);
182 /*%<
183  * Open 'filename' and make it the current input source for 'lex'.
184  *
185  * Requires:
186  *\li	'lex' is a valid lexer.
187  *
188  *\li	filename is a valid C string.
189  *
190  * Returns:
191  *\li	#ISC_R_SUCCESS
192  *\li	#ISC_R_NOMEMORY			Out of memory
193  *\li	#ISC_R_NOTFOUND			File not found
194  *\li	#ISC_R_NOPERM			No permission to open file
195  *\li	#ISC_R_FAILURE			Couldn't open file, not sure why
196  *\li	#ISC_R_UNEXPECTED
197  */
198 
199 isc_result_t
200 isc_lex_close(isc_lex_t *lex);
201 /*%<
202  * Close the most recently opened object (i.e. file or buffer).
203  *
204  * Returns:
205  *\li	#ISC_R_SUCCESS
206  *\li	#ISC_R_NOMORE			No more input sources
207  */
208 
209 isc_result_t
210 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
211 /*%<
212  * Get the next token.
213  *
214  * Requires:
215  *\li	'lex' is a valid lexer.
216  *
217  *\li	'lex' has an input source.
218  *
219  *\li	'options' contains valid options.
220  *
221  *\li	'*tokenp' is a valid pointer.
222  *
223  * Returns:
224  *\li	#ISC_R_SUCCESS
225  *\li	#ISC_R_UNEXPECTEDEND
226  *\li	#ISC_R_NOMEMORY
227  *
228  *	These two results are returned only if their corresponding lexer
229  *	options are not set.
230  *
231  *\li	#ISC_R_EOF			End of input source
232  *\li	#ISC_R_NOMORE			No more input sources
233  */
234 
235 void
236 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
237 /*%<
238  * Unget the current token.
239  *
240  * Requires:
241  *\li	'lex' is a valid lexer.
242  *
243  *\li	'lex' has an input source.
244  *
245  *\li	'tokenp' points to a valid token.
246  *
247  *\li	There is no ungotten token already.
248  */
249 
250 void
251 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
252 /*%<
253  * Returns a region containing the text of the last token returned.
254  *
255  * Requires:
256  *\li	'lex' is a valid lexer.
257  *
258  *\li	'lex' has an input source.
259  *
260  *\li	'tokenp' points to a valid token.
261  *
262  *\li	A token has been gotten and not ungotten.
263  */
264 
265 char *
266 isc_lex_getsourcename(isc_lex_t *lex);
267 /*%<
268  * Return the input source name.
269  *
270  * Requires:
271  *\li	'lex' is a valid lexer.
272  *
273  * Returns:
274  * \li	source name or NULL if no current source.
275  *\li	result valid while current input source exists.
276  */
277 
278 unsigned long
279 isc_lex_getsourceline(isc_lex_t *lex);
280 /*%<
281  * Return the input source line number.
282  *
283  * Requires:
284  *\li	'lex' is a valid lexer.
285  *
286  * Returns:
287  *\li 	Current line number or 0 if no current source.
288  */
289 
290 #endif /* ISC_LEX_H */
291