xref: /minix/external/bsd/bind/dist/lib/isc/include/isc/lex.h (revision 00b67f09)
1 /*	$NetBSD: lex.h,v 1.4 2014/12/10 04:38:00 christos Exp $	*/
2 
3 /*
4  * Copyright (C) 2004, 2005, 2007, 2008  Internet Systems Consortium, Inc. ("ISC")
5  * Copyright (C) 1998-2002  Internet Software Consortium.
6  *
7  * Permission to use, copy, modify, and/or distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17  * PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /* Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp  */
21 
22 #ifndef ISC_LEX_H
23 #define ISC_LEX_H 1
24 
25 /*****
26  ***** Module Info
27  *****/
28 
29 /*! \file isc/lex.h
30  * \brief The "lex" module provides a lightweight tokenizer.  It can operate
31  * on files or buffers, and can handle "include".  It is designed for
32  * parsing of DNS master files and the BIND configuration file, but
33  * should be general enough to tokenize other things, e.g. HTTP.
34  *
35  * \li MP:
36  *	No synchronization is provided.  Clients must ensure exclusive
37  *	access.
38  *
39  * \li Reliability:
40  *	No anticipated impact.
41  *
42  * \li Resources:
43  *	TBS
44  *
45  * \li Security:
46  *	No anticipated impact.
47  *
48  * \li Standards:
49  * 	None.
50  */
51 
52 /***
53  *** Imports
54  ***/
55 
56 #include <stdio.h>
57 
58 #include <isc/lang.h>
59 #include <isc/region.h>
60 #include <isc/types.h>
61 
62 ISC_LANG_BEGINDECLS
63 
64 /***
65  *** Options
66  ***/
67 
68 /*@{*/
69 /*!
70  * Various options for isc_lex_gettoken().
71  */
72 
73 #define ISC_LEXOPT_EOL			0x01	/*%< Want end-of-line token. */
74 #define ISC_LEXOPT_EOF			0x02	/*%< Want end-of-file token. */
75 #define ISC_LEXOPT_INITIALWS		0x04	/*%< Want initial whitespace. */
76 #define ISC_LEXOPT_NUMBER		0x08	/*%< Recognize numbers. */
77 #define ISC_LEXOPT_QSTRING		0x10	/*%< Recognize qstrings. */
78 /*@}*/
79 
80 /*@{*/
81 /*!
82  * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
83  * the DNS master file format.  If this option is set, then the
84  * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
85  * the paren count is > 0.  To use this option, '(' and ')' must be special
86  * characters.
87  */
88 #define ISC_LEXOPT_DNSMULTILINE		0x20	/*%< Handle '(' and ')'. */
89 #define ISC_LEXOPT_NOMORE		0x40	/*%< Want "no more" token. */
90 
91 #define ISC_LEXOPT_CNUMBER		0x80    /*%< Recognize octal and hex. */
92 #define ISC_LEXOPT_ESCAPE		0x100	/*%< Recognize escapes. */
93 #define ISC_LEXOPT_QSTRINGMULTILINE	0x200	/*%< Allow multiline "" strings */
94 #define ISC_LEXOPT_OCTAL		0x400	/*%< Expect a octal number. */
95 /*@}*/
96 /*@{*/
97 /*!
98  * Various commenting styles, which may be changed at any time with
99  * isc_lex_setcomments().
100  */
101 
102 #define ISC_LEXCOMMENT_C		0x01
103 #define ISC_LEXCOMMENT_CPLUSPLUS	0x02
104 #define ISC_LEXCOMMENT_SHELL		0x04
105 #define ISC_LEXCOMMENT_DNSMASTERFILE	0x08
106 /*@}*/
107 
108 /***
109  *** Types
110  ***/
111 
112 /*! Lex */
113 
114 typedef char isc_lexspecials_t[256];
115 
116 /* Tokens */
117 
118 typedef enum {
119 	isc_tokentype_unknown = 0,
120 	isc_tokentype_string = 1,
121 	isc_tokentype_number = 2,
122 	isc_tokentype_qstring = 3,
123 	isc_tokentype_eol = 4,
124 	isc_tokentype_eof = 5,
125 	isc_tokentype_initialws = 6,
126 	isc_tokentype_special = 7,
127 	isc_tokentype_nomore = 8
128 } isc_tokentype_t;
129 
130 typedef union {
131 	char				as_char;
132 	unsigned long			as_ulong;
133 	isc_region_t			as_region;
134 	isc_textregion_t		as_textregion;
135 	void *				as_pointer;
136 } isc_tokenvalue_t;
137 
138 typedef struct isc_token {
139 	isc_tokentype_t			type;
140 	isc_tokenvalue_t		value;
141 } isc_token_t;
142 
143 /***
144  *** Functions
145  ***/
146 
147 isc_result_t
148 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp);
149 /*%<
150  * Create a lexer.
151  *
152  * 'max_token' is a hint of the number of bytes in the largest token.
153  *
154  * Requires:
155  *\li	'*lexp' is a valid lexer.
156  *
157  *\li	max_token > 0.
158  *
159  * Ensures:
160  *\li	On success, *lexp is attached to the newly created lexer.
161  *
162  * Returns:
163  *\li	#ISC_R_SUCCESS
164  *\li	#ISC_R_NOMEMORY
165  */
166 
167 void
168 isc_lex_destroy(isc_lex_t **lexp);
169 /*%<
170  * Destroy the lexer.
171  *
172  * Requires:
173  *\li	'*lexp' is a valid lexer.
174  *
175  * Ensures:
176  *\li	*lexp == NULL
177  */
178 
179 unsigned int
180 isc_lex_getcomments(isc_lex_t *lex);
181 /*%<
182  * Return the current lexer commenting styles.
183  *
184  * Requires:
185  *\li	'lex' is a valid lexer.
186  *
187  * Returns:
188  *\li	The commenting sytles which are currently allowed.
189  */
190 
191 void
192 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
193 /*%<
194  * Set allowed lexer commenting styles.
195  *
196  * Requires:
197  *\li	'lex' is a valid lexer.
198  *
199  *\li	'comments' has meaningful values.
200  */
201 
202 void
203 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials);
204 /*%<
205  * Put the current list of specials into 'specials'.
206  *
207  * Requires:
208  *\li	'lex' is a valid lexer.
209  */
210 
211 void
212 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
213 /*!<
214  * The characters in 'specials' are returned as tokens.  Along with
215  * whitespace, they delimit strings and numbers.
216  *
217  * Note:
218  *\li	Comment processing takes precedence over special character
219  *	recognition.
220  *
221  * Requires:
222  *\li	'lex' is a valid lexer.
223  */
224 
225 isc_result_t
226 isc_lex_openfile(isc_lex_t *lex, const char *filename);
227 /*%<
228  * Open 'filename' and make it the current input source for 'lex'.
229  *
230  * Requires:
231  *\li	'lex' is a valid lexer.
232  *
233  *\li	filename is a valid C string.
234  *
235  * Returns:
236  *\li	#ISC_R_SUCCESS
237  *\li	#ISC_R_NOMEMORY			Out of memory
238  *\li	#ISC_R_NOTFOUND			File not found
239  *\li	#ISC_R_NOPERM			No permission to open file
240  *\li	#ISC_R_FAILURE			Couldn't open file, not sure why
241  *\li	#ISC_R_UNEXPECTED
242  */
243 
244 isc_result_t
245 isc_lex_openstream(isc_lex_t *lex, FILE *stream);
246 /*%<
247  * Make 'stream' the current input source for 'lex'.
248  *
249  * Requires:
250  *\li	'lex' is a valid lexer.
251  *
252  *\li	'stream' is a valid C stream.
253  *
254  * Returns:
255  *\li	#ISC_R_SUCCESS
256  *\li	#ISC_R_NOMEMORY			Out of memory
257  */
258 
259 isc_result_t
260 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer);
261 /*%<
262  * Make 'buffer' the current input source for 'lex'.
263  *
264  * Requires:
265  *\li	'lex' is a valid lexer.
266  *
267  *\li	'buffer' is a valid buffer.
268  *
269  * Returns:
270  *\li	#ISC_R_SUCCESS
271  *\li	#ISC_R_NOMEMORY			Out of memory
272  */
273 
274 isc_result_t
275 isc_lex_close(isc_lex_t *lex);
276 /*%<
277  * Close the most recently opened object (i.e. file or buffer).
278  *
279  * Returns:
280  *\li	#ISC_R_SUCCESS
281  *\li	#ISC_R_NOMORE			No more input sources
282  */
283 
284 isc_result_t
285 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
286 /*%<
287  * Get the next token.
288  *
289  * Requires:
290  *\li	'lex' is a valid lexer.
291  *
292  *\li	'lex' has an input source.
293  *
294  *\li	'options' contains valid options.
295  *
296  *\li	'*tokenp' is a valid pointer.
297  *
298  * Returns:
299  *\li	#ISC_R_SUCCESS
300  *\li	#ISC_R_UNEXPECTEDEND
301  *\li	#ISC_R_NOMEMORY
302  *
303  *	These two results are returned only if their corresponding lexer
304  *	options are not set.
305  *
306  *\li	#ISC_R_EOF			End of input source
307  *\li	#ISC_R_NOMORE			No more input sources
308  */
309 
310 isc_result_t
311 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
312 		       isc_tokentype_t expect, isc_boolean_t eol);
313 /*%<
314  * Get the next token from a DNS master file type stream.  This is a
315  * convenience function that sets appropriate options and handles quoted
316  * strings and end of line correctly for master files.  It also ungets
317  * unexpected tokens.
318  *
319  * Requires:
320  *\li	'lex' is a valid lexer.
321  *
322  *\li	'token' is a valid pointer
323  *
324  * Returns:
325  *
326  * \li	any return code from isc_lex_gettoken().
327  */
328 
329 isc_result_t
330 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol);
331 /*%<
332  * Get the next token from a DNS master file type stream.  This is a
333  * convenience function that sets appropriate options and handles end
334  * of line correctly for master files.  It also ungets unexpected tokens.
335  *
336  * Requires:
337  *\li	'lex' is a valid lexer.
338  *
339  *\li	'token' is a valid pointer
340  *
341  * Returns:
342  *
343  * \li	any return code from isc_lex_gettoken().
344  */
345 
346 void
347 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
348 /*%<
349  * Unget the current token.
350  *
351  * Requires:
352  *\li	'lex' is a valid lexer.
353  *
354  *\li	'lex' has an input source.
355  *
356  *\li	'tokenp' points to a valid token.
357  *
358  *\li	There is no ungotten token already.
359  */
360 
361 void
362 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
363 /*%<
364  * Returns a region containing the text of the last token returned.
365  *
366  * Requires:
367  *\li	'lex' is a valid lexer.
368  *
369  *\li	'lex' has an input source.
370  *
371  *\li	'tokenp' points to a valid token.
372  *
373  *\li	A token has been gotten and not ungotten.
374  */
375 
376 char *
377 isc_lex_getsourcename(isc_lex_t *lex);
378 /*%<
379  * Return the input source name.
380  *
381  * Requires:
382  *\li	'lex' is a valid lexer.
383  *
384  * Returns:
385  * \li	source name or NULL if no current source.
386  *\li	result valid while current input source exists.
387  */
388 
389 
390 unsigned long
391 isc_lex_getsourceline(isc_lex_t *lex);
392 /*%<
393  * Return the input source line number.
394  *
395  * Requires:
396  *\li	'lex' is a valid lexer.
397  *
398  * Returns:
399  *\li 	Current line number or 0 if no current source.
400  */
401 
402 isc_result_t
403 isc_lex_setsourcename(isc_lex_t *lex, const char *name);
404 /*%<
405  * Assigns a new name to the input source.
406  *
407  * Requires:
408  *
409  * \li	'lex' is a valid lexer.
410  *
411  * Returns:
412  * \li	#ISC_R_SUCCESS
413  * \li	#ISC_R_NOMEMORY
414  * \li	#ISC_R_NOTFOUND - there are no sources.
415  */
416 
417 isc_boolean_t
418 isc_lex_isfile(isc_lex_t *lex);
419 /*%<
420  * Return whether the current input source is a file.
421  *
422  * Requires:
423  *\li	'lex' is a valid lexer.
424  *
425  * Returns:
426  * \li	#ISC_TRUE if the current input is a file,
427  *\li	#ISC_FALSE otherwise.
428  */
429 
430 
431 ISC_LANG_ENDDECLS
432 
433 #endif /* ISC_LEX_H */
434