1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7  *
8  * See the COPYRIGHT file distributed with this work for additional
9  * information regarding copyright ownership.
10  */
11 
12 #ifndef ISC_LEX_H
13 #define ISC_LEX_H 1
14 
15 /*****
16 ***** Module Info
17 *****/
18 
19 /*! \file isc/lex.h
20  * \brief The "lex" module provides a lightweight tokenizer.  It can operate
21  * on files or buffers, and can handle "include".  It is designed for
22  * parsing of DNS master files and the BIND configuration file, but
23  * should be general enough to tokenize other things, e.g. HTTP.
24  *
25  * \li MP:
26  *	No synchronization is provided.  Clients must ensure exclusive
27  *	access.
28  *
29  * \li Reliability:
30  *	No anticipated impact.
31  *
32  * \li Resources:
33  *	TBS
34  *
35  * \li Security:
36  *	No anticipated impact.
37  *
38  * \li Standards:
39  * 	None.
40  */
41 
42 /***
43  *** Imports
44  ***/
45 
46 #include <stdbool.h>
47 #include <stdio.h>
48 
49 #include <isc/lang.h>
50 #include <isc/region.h>
51 #include <isc/types.h>
52 
53 ISC_LANG_BEGINDECLS
54 
55 /***
56  *** Options
57  ***/
58 
59 /*@{*/
60 /*!
61  * Various options for isc_lex_gettoken().
62  */
63 
64 #define ISC_LEXOPT_EOL	     0x0001 /*%< Want end-of-line token. */
65 #define ISC_LEXOPT_EOF	     0x0002 /*%< Want end-of-file token. */
66 #define ISC_LEXOPT_INITIALWS 0x0004 /*%< Want initial whitespace. */
67 #define ISC_LEXOPT_NUMBER    0x0008 /*%< Recognize numbers. */
68 #define ISC_LEXOPT_QSTRING   0x0010 /*%< Recognize qstrings. */
69 /*@}*/
70 
71 /*@{*/
72 /*!
73  * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
74  * the DNS master file format.  If this option is set, then the
75  * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
76  * the paren count is > 0.  To use this option, '(' and ')' must be special
77  * characters.
78  */
79 #define ISC_LEXOPT_DNSMULTILINE 0x0020 /*%< Handle '(' and ')'. */
80 #define ISC_LEXOPT_NOMORE	0x0040 /*%< Want "no more" token. */
81 
82 #define ISC_LEXOPT_CNUMBER	    0x0080 /*%< Recognize octal and hex. */
83 #define ISC_LEXOPT_ESCAPE	    0x0100 /*%< Recognize escapes. */
84 #define ISC_LEXOPT_QSTRINGMULTILINE 0x0200 /*%< Allow multiline "" strings */
85 #define ISC_LEXOPT_OCTAL	    0x0400 /*%< Expect a octal number. */
86 #define ISC_LEXOPT_BTEXT	    0x0800 /*%< Bracketed text. */
87 #define ISC_LEXOPT_VPAIR	    0x1000 /*%< Recognize value pair. */
88 #define ISC_LEXOPT_QVPAIR	    0x2000 /*%< Recognize quoted value pair. */
89 /*@}*/
90 /*@{*/
91 /*!
92  * Various commenting styles, which may be changed at any time with
93  * isc_lex_setcomments().
94  */
95 
96 #define ISC_LEXCOMMENT_C	     0x01
97 #define ISC_LEXCOMMENT_CPLUSPLUS     0x02
98 #define ISC_LEXCOMMENT_SHELL	     0x04
99 #define ISC_LEXCOMMENT_DNSMASTERFILE 0x08
100 /*@}*/
101 
102 /***
103  *** Types
104  ***/
105 
106 /*! Lex */
107 
108 typedef char isc_lexspecials_t[256];
109 
110 /* Tokens */
111 
112 typedef enum {
113 	isc_tokentype_unknown = 0,
114 	isc_tokentype_string = 1,
115 	isc_tokentype_number = 2,
116 	isc_tokentype_qstring = 3,
117 	isc_tokentype_eol = 4,
118 	isc_tokentype_eof = 5,
119 	isc_tokentype_initialws = 6,
120 	isc_tokentype_special = 7,
121 	isc_tokentype_nomore = 8,
122 	isc_tokentype_btext = 9,
123 	isc_tokentype_vpair = 10,
124 	isc_tokentype_qvpair = 11,
125 } isc_tokentype_t;
126 
127 typedef union {
128 	char		 as_char;
129 	unsigned long	 as_ulong;
130 	isc_region_t	 as_region;
131 	isc_textregion_t as_textregion;
132 	void *		 as_pointer;
133 } isc_tokenvalue_t;
134 
135 typedef struct isc_token {
136 	isc_tokentype_t	 type;
137 	isc_tokenvalue_t value;
138 } isc_token_t;
139 
140 /***
141  *** Functions
142  ***/
143 
144 isc_result_t
145 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp);
146 /*%<
147  * Create a lexer.
148  *
149  * 'max_token' is a hint of the number of bytes in the largest token.
150  *
151  * Requires:
152  *\li	'*lexp' is a valid lexer.
153  *
154  * Ensures:
155  *\li	On success, *lexp is attached to the newly created lexer.
156  *
157  * Returns:
158  *\li	#ISC_R_SUCCESS
159  *\li	#ISC_R_NOMEMORY
160  */
161 
162 void
163 isc_lex_destroy(isc_lex_t **lexp);
164 /*%<
165  * Destroy the lexer.
166  *
167  * Requires:
168  *\li	'*lexp' is a valid lexer.
169  *
170  * Ensures:
171  *\li	*lexp == NULL
172  */
173 
174 unsigned int
175 isc_lex_getcomments(isc_lex_t *lex);
176 /*%<
177  * Return the current lexer commenting styles.
178  *
179  * Requires:
180  *\li	'lex' is a valid lexer.
181  *
182  * Returns:
183  *\li	The commenting styles which are currently allowed.
184  */
185 
186 void
187 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
188 /*%<
189  * Set allowed lexer commenting styles.
190  *
191  * Requires:
192  *\li	'lex' is a valid lexer.
193  *
194  *\li	'comments' has meaningful values.
195  */
196 
197 void
198 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials);
199 /*%<
200  * Put the current list of specials into 'specials'.
201  *
202  * Requires:
203  *\li	'lex' is a valid lexer.
204  */
205 
206 void
207 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
208 /*!<
209  * The characters in 'specials' are returned as tokens.  Along with
210  * whitespace, they delimit strings and numbers.
211  *
212  * Note:
213  *\li	Comment processing takes precedence over special character
214  *	recognition.
215  *
216  * Requires:
217  *\li	'lex' is a valid lexer.
218  */
219 
220 isc_result_t
221 isc_lex_openfile(isc_lex_t *lex, const char *filename);
222 /*%<
223  * Open 'filename' and make it the current input source for 'lex'.
224  *
225  * Requires:
226  *\li	'lex' is a valid lexer.
227  *
228  *\li	filename is a valid C string.
229  *
230  * Returns:
231  *\li	#ISC_R_SUCCESS
232  *\li	#ISC_R_NOMEMORY			Out of memory
233  *\li	#ISC_R_NOTFOUND			File not found
234  *\li	#ISC_R_NOPERM			No permission to open file
235  *\li	#ISC_R_FAILURE			Couldn't open file, not sure why
236  *\li	#ISC_R_UNEXPECTED
237  */
238 
239 isc_result_t
240 isc_lex_openstream(isc_lex_t *lex, FILE *stream);
241 /*%<
242  * Make 'stream' the current input source for 'lex'.
243  *
244  * Requires:
245  *\li	'lex' is a valid lexer.
246  *
247  *\li	'stream' is a valid C stream.
248  *
249  * Returns:
250  *\li	#ISC_R_SUCCESS
251  *\li	#ISC_R_NOMEMORY			Out of memory
252  */
253 
254 isc_result_t
255 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer);
256 /*%<
257  * Make 'buffer' the current input source for 'lex'.
258  *
259  * Requires:
260  *\li	'lex' is a valid lexer.
261  *
262  *\li	'buffer' is a valid buffer.
263  *
264  * Returns:
265  *\li	#ISC_R_SUCCESS
266  *\li	#ISC_R_NOMEMORY			Out of memory
267  */
268 
269 isc_result_t
270 isc_lex_close(isc_lex_t *lex);
271 /*%<
272  * Close the most recently opened object (i.e. file or buffer).
273  *
274  * Returns:
275  *\li	#ISC_R_SUCCESS
276  *\li	#ISC_R_NOMORE			No more input sources
277  */
278 
279 isc_result_t
280 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
281 /*%<
282  * Get the next token.
283  *
284  * Requires:
285  *\li	'lex' is a valid lexer.
286  *
287  *\li	'lex' has an input source.
288  *
289  *\li	'options' contains valid options.
290  *
291  *\li	'*tokenp' is a valid pointer.
292  *
293  * Returns:
294  *\li	#ISC_R_SUCCESS
295  *\li	#ISC_R_UNEXPECTEDEND
296  *\li	#ISC_R_NOMEMORY
297  *
298  *	These two results are returned only if their corresponding lexer
299  *	options are not set.
300  *
301  *\li	#ISC_R_EOF			End of input source
302  *\li	#ISC_R_NOMORE			No more input sources
303  */
304 
305 isc_result_t
306 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
307 		       isc_tokentype_t expect, bool eol);
308 /*%<
309  * Get the next token from a DNS master file type stream.  This is a
310  * convenience function that sets appropriate options and handles quoted
311  * strings and end of line correctly for master files.  It also ungets
312  * unexpected tokens.  If `eol` is set then expect end-of-line otherwise
313  * eol is a error.
314  *
315  * Requires:
316  *\li	'lex' is a valid lexer.
317  *
318  *\li	'token' is a valid pointer
319  *
320  * Returns:
321  *
322  * \li	any return code from isc_lex_gettoken().
323  */
324 
325 isc_result_t
326 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, bool eol);
327 /*%<
328  * Get the next token from a DNS master file type stream.  This is a
329  * convenience function that sets appropriate options and handles end
330  * of line correctly for master files.  It also ungets unexpected tokens.
331  * If `eol` is set then expect end-of-line otherwise eol is a error.
332  *
333  * Requires:
334  *\li	'lex' is a valid lexer.
335  *
336  *\li	'token' is a valid pointer
337  *
338  * Returns:
339  *
340  * \li	any return code from isc_lex_gettoken().
341  */
342 
343 void
344 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
345 /*%<
346  * Unget the current token.
347  *
348  * Requires:
349  *\li	'lex' is a valid lexer.
350  *
351  *\li	'lex' has an input source.
352  *
353  *\li	'tokenp' points to a valid token.
354  *
355  *\li	There is no ungotten token already.
356  */
357 
358 void
359 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
360 /*%<
361  * Returns a region containing the text of the last token returned.
362  *
363  * Requires:
364  *\li	'lex' is a valid lexer.
365  *
366  *\li	'lex' has an input source.
367  *
368  *\li	'tokenp' points to a valid token.
369  *
370  *\li	A token has been gotten and not ungotten.
371  */
372 
373 char *
374 isc_lex_getsourcename(isc_lex_t *lex);
375 /*%<
376  * Return the input source name.
377  *
378  * Requires:
379  *\li	'lex' is a valid lexer.
380  *
381  * Returns:
382  * \li	source name or NULL if no current source.
383  *\li	result valid while current input source exists.
384  */
385 
386 unsigned long
387 isc_lex_getsourceline(isc_lex_t *lex);
388 /*%<
389  * Return the input source line number.
390  *
391  * Requires:
392  *\li	'lex' is a valid lexer.
393  *
394  * Returns:
395  *\li 	Current line number or 0 if no current source.
396  */
397 
398 isc_result_t
399 isc_lex_setsourcename(isc_lex_t *lex, const char *name);
400 /*%<
401  * Assigns a new name to the input source.
402  *
403  * Requires:
404  *
405  * \li	'lex' is a valid lexer.
406  *
407  * Returns:
408  * \li	#ISC_R_SUCCESS
409  * \li	#ISC_R_NOMEMORY
410  * \li	#ISC_R_NOTFOUND - there are no sources.
411  */
412 
413 isc_result_t
414 isc_lex_setsourceline(isc_lex_t *lex, unsigned long line);
415 /*%<
416  * Assigns a new line number to the input source. This can be used
417  * when parsing a buffer that's been excerpted from the middle a file,
418  * allowing logged messages to display the correct line number,
419  * rather than the line number within the buffer.
420  *
421  * Requires:
422  *
423  * \li	'lex' is a valid lexer.
424  *
425  * Returns:
426  * \li	#ISC_R_SUCCESS
427  * \li	#ISC_R_NOTFOUND - there are no sources.
428  */
429 
430 bool
431 isc_lex_isfile(isc_lex_t *lex);
432 /*%<
433  * Return whether the current input source is a file.
434  *
435  * Requires:
436  *\li	'lex' is a valid lexer.
437  *
438  * Returns:
439  * \li	#true if the current input is a file,
440  *\li	#false otherwise.
441  */
442 
443 ISC_LANG_ENDDECLS
444 
445 #endif /* ISC_LEX_H */
446