1 /*
2  * Copyright (c) 2002-2012 Balabit
3  * Copyright (c) 1998-2012 Balázs Scheidler
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  *
19  * As an additional exemption you are allowed to compile & link against the
20  * OpenSSL libraries as published by the OpenSSL project. See the file
21  * COPYING for details.
22  *
23  */
24 
25 #ifndef CFG_LEXER_H_INCLUDED
26 #define CFG_LEXER_H_INCLUDED 1
27 
28 #include "syslog-ng.h"
29 #include "cfg-args.h"
30 #include "cfg-block-generator.h"
31 #include "messages.h"
32 
33 #include <stdio.h>
34 #include <setjmp.h>
35 
36 /* this module provides a higher level encapsulation for the configuration
37  * file lexer. */
38 
39 #define MAX_INCLUDE_DEPTH 256
40 
41 typedef struct _CfgIncludeLevel CfgIncludeLevel;
42 typedef struct _CfgTokenBlock CfgTokenBlock;
43 
44 /*
45  * YYLTYPE/YYSTYPE naming conventions
46  *
47  * We use multiple bison generated grammars (basically one for each plugin)
48  * with a single lexer and the same location/symbol types.  Earlier we could
49  * easily just define YYLTYPE and YYSTYPE here and all generated grammars
50  * and the lexer used it properly.  With the advent of the `api.prefix'
51  * option for the grammars (and the deprecation of the old `name-prefix'
52  * behaviors), we needed to complicate things somewhat.
53  *
54  * We have three contexts where we need to use proper type names:
55  *   - in our own code where we might need to use location information (e.g. YYLTYPE)
56  *   - in the generated lexer,
57  *   - in the generated grammars
58  *
59  * Our own code
60  * ============
61  * Because of the various #define/typedef games that generated code uses, I
62  * decided that our own code should not use the names YYLTYPE/YYSTYPE
63  * directly.  In those cases we use CFG_LTYPE and CFG_STYPE to indicate that
64  * these are types related to our configuration language.  None of the
65  * grammars use the "CFG_" prefix (and should not in the future either).
66  *
67  * The generated lexer
68  * ===================
69  *
70  * The lexer get these types by us #define-ing YYLTYPE/YYSTYPE to
71  * CFG_LTYPE/STYPE but only privately, e.g.  these definitions should not be
72  * published to the rest of the codebase.  We do this by defining these in
73  * implementation files and not in the headers.  This is because some of the
74  * code would try to #ifdef based on the existance of these macros.
75  *
76  * The generated grammars
77  * ======================
78  * The grammars each have an api.location.type and api.value.type options in
79  * their .y files, which use the names CFG_LTYPE and CFG_STYPE respectively.
80  * The generated code uses YYLTYPE and YYSTYPE internally (defined as
81  * macros), but because of the previous points this does not create a
82  * conflict.
83  */
84 
85 /* the location type to carry location information from the lexer to the grammar */
86 typedef struct CFG_LTYPE
87 {
88   int first_line;
89   int first_column;
90   int last_line;
91   int last_column;
92   CfgIncludeLevel *level;
93 } CFG_LTYPE;
94 
95 /* symbol type that carries token related information to the grammar */
96 typedef struct CFG_STYPE
97 {
98   /* one of LL_ types that indicates which field is being used */
99   int type;
100   union
101   {
102     gint token;
103     gint64 num;
104     double fnum;
105     char *cptr;
106     void *ptr;
107     gpointer node;
108   };
109 } CFG_STYPE;
110 
111 #define KWS_NORMAL        0
112 #define KWS_OBSOLETE      1
113 
114 /* used to describe a syslog-ng keyword */
115 typedef struct _CfgLexerKeyword
116 {
117   const gchar *kw_name;
118   gint  kw_token;
119   gint  kw_status;
120   const gchar *kw_explain;
121 } CfgLexerKeyword;
122 
123 #define CFG_KEYWORD_STOP "@!#?"
124 
125 
126 
127 /* structure that describes a given location in the include stack */
128 struct _CfgIncludeLevel
129 {
130   enum
131   {
132     CFGI_FILE,
133     CFGI_BUFFER,
134   } include_type;
135   /* include file or block name */
136   gchar *name;
137   union
138   {
139     struct
140     {
141       GSList *files;
142       FILE *include_file;
143     } file;
144     struct
145     {
146       /* the lexer mutates content, so save it for error reporting */
147       gchar *original_content;
148       /* buffer for the lexer */
149       gchar *content;
150       gsize content_length;
151     } buffer;
152   };
153   CFG_LTYPE lloc;
154   struct yy_buffer_state *yybuf;
155 };
156 
157 /* Lexer class that encapsulates a flex generated lexer. This can be
158  * instantiated multiple times in parallel, e.g.  doesn't use any global
159  * state as we're using the "reentrant" code by flex
160  */
161 struct _CfgLexer
162 {
163   /* flex state, not using yyscan_t as it is not defined */
164   gpointer state;
165   jmp_buf fatal_error;
166   CfgIncludeLevel include_stack[MAX_INCLUDE_DEPTH];
167   GList *context_stack;
168   gint include_depth;
169   gchar block_boundary[2];
170   gint brace_count;
171   gint tokenize_eol;
172   GList *token_blocks;
173   GString *string_buffer;
174   GString *preprocess_output;
175   gint preprocess_suppress_tokens;
176   GString *token_pretext;
177   GString *token_text;
178   GlobalConfig *cfg;
179   gboolean non_pragma_seen:1, ignore_pragma:1;
180 };
181 
182 /* pattern buffer */
183 void cfg_lexer_unput_token(CfgLexer *self, CFG_STYPE *yylval);
184 
185 void cfg_lexer_start_block_state(CfgLexer *self, const gchar block_boundary[2]);
186 
187 void cfg_lexer_append_string(CfgLexer *self, int length, char *str);
188 void cfg_lexer_append_char(CfgLexer *self, char c);
189 
190 /* keyword handling */
191 void cfg_lexer_set_current_keywords(CfgLexer *self, CfgLexerKeyword *keywords);
192 char *cfg_lexer_get_keyword_string(CfgLexer *self, int kw);
193 int cfg_lexer_lookup_keyword(CfgLexer *self, CFG_STYPE *yylval, CFG_LTYPE *yylloc, const char *token);
194 
195 /* include files */
196 gboolean cfg_lexer_start_next_include(CfgLexer *self);
197 gboolean cfg_lexer_include_file(CfgLexer *self, const gchar *filename);
198 gboolean cfg_lexer_include_buffer(CfgLexer *self, const gchar *name, const gchar *buffer, gssize length);
199 gboolean cfg_lexer_include_buffer_without_backtick_substitution(CfgLexer *self,
200     const gchar *name, const gchar *buffer, gsize length);
201 const gchar *cfg_lexer_format_location(CfgLexer *self, CFG_LTYPE *yylloc, gchar *buf, gsize buf_len);
202 EVTTAG *cfg_lexer_format_location_tag(CfgLexer *self, CFG_LTYPE *yylloc);
203 
204 /* context tracking */
205 void cfg_lexer_push_context(CfgLexer *self, gint context, CfgLexerKeyword *keywords, const gchar *desc);
206 void cfg_lexer_pop_context(CfgLexer *self);
207 const gchar *cfg_lexer_get_context_description(CfgLexer *self);
208 gint cfg_lexer_get_context_type(CfgLexer *self);
209 
210 /* token blocks */
211 void cfg_lexer_inject_token_block(CfgLexer *self, CfgTokenBlock *block);
212 
213 int cfg_lexer_lex(CfgLexer *self, CFG_STYPE *yylval, CFG_LTYPE *yylloc);
214 void cfg_lexer_free_token(CFG_STYPE *token);
215 
216 CfgLexer *cfg_lexer_new(GlobalConfig *cfg, FILE *file, const gchar *filename, GString *preprocess_output);
217 CfgLexer *cfg_lexer_new_buffer(GlobalConfig *cfg, const gchar *buffer, gsize length);
218 void  cfg_lexer_free(CfgLexer *self);
219 
220 gint cfg_lexer_lookup_context_type_by_name(const gchar *name);
221 const gchar *cfg_lexer_lookup_context_name_by_type(gint id);
222 
223 /* token block objects */
224 
225 void cfg_token_block_add_and_consume_token(CfgTokenBlock *self, CFG_STYPE *token);
226 void cfg_token_block_add_token(CfgTokenBlock *self, CFG_STYPE *token);
227 CFG_STYPE *cfg_token_block_get_token(CfgTokenBlock *self);
228 
229 CfgTokenBlock *cfg_token_block_new(void);
230 void cfg_token_block_free(CfgTokenBlock *self);
231 
232 void cfg_lexer_register_generator_plugin(PluginContext *context, CfgBlockGenerator *gen);
233 
234 #define CFG_LEXER_ERROR cfg_lexer_error_quark()
235 
236 GQuark cfg_lexer_error_quark(void);
237 
238 enum CfgLexerError
239 {
240   CFG_LEXER_MISSING_BACKTICK_PAIR,
241   CFG_LEXER_CANNOT_REPRESENT_APOSTROPHES_IN_QSTRINGS,
242   CFG_LEXER_BACKTICKS_CANT_BE_SUBSTITUTED_AFTER_BACKSLASH,
243 };
244 
245 #endif
246