1 /*
2 Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3 
4 Redistribution and use in source and binary forms, with or without modification,
5 are permitted provided that the following conditions are met:
6 
7 1. Redistributions of source code must retain the above copyright notice, this
8 list of conditions and the following disclaimer.
9 
10 2. Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation and/or
12 other materials provided with the distribution.
13 
14 3. Neither the name of the copyright holder nor the names of its contributors
15 may be used to endorse or promote products derived from this software without
16 specific prior written permission.
17 
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifndef YR_COMPILER_H
31 #define YR_COMPILER_H
32 
33 #include <setjmp.h>
34 #include <stdio.h>
35 #include <yara/ahocorasick.h>
36 #include <yara/arena.h>
37 #include <yara/filemap.h>
38 #include <yara/hash.h>
39 #include <yara/utils.h>
40 
41 #define YARA_ERROR_LEVEL_ERROR   0
42 #define YARA_ERROR_LEVEL_WARNING 1
43 
44 // Expression type constants are powers of two because they are used as flags.
45 #define EXPRESSION_TYPE_UNKNOWN 0
46 #define EXPRESSION_TYPE_BOOLEAN 1
47 #define EXPRESSION_TYPE_INTEGER 2
48 #define EXPRESSION_TYPE_STRING  4
49 #define EXPRESSION_TYPE_REGEXP  8
50 #define EXPRESSION_TYPE_OBJECT  16
51 #define EXPRESSION_TYPE_FLOAT   32
52 
53 // The compiler uses an arena to store the data it generates during the
54 // compilation. Each buffer in the arena is used for storing a different type
55 // of data. The following identifiers indicate the purpose of each buffer.
56 #define YR_NAMESPACES_TABLE         0
57 #define YR_RULES_TABLE              1
58 #define YR_METAS_TABLE              2
59 #define YR_STRINGS_TABLE            3
60 #define YR_EXTERNAL_VARIABLES_TABLE 4
61 #define YR_SZ_POOL                  5
62 #define YR_CODE_SECTION             6
63 #define YR_RE_CODE_SECTION          7
64 #define YR_AC_TRANSITION_TABLE      8
65 #define YR_AC_STATE_MATCHES_TABLE   9
66 #define YR_AC_STATE_MATCHES_POOL    10
67 #define YR_SUMMARY_SECTION          11
68 
69 // This is the number of buffers used by the compiler, should match the number
70 // of items in the list above.
71 #define YR_NUM_SECTIONS 12
72 
73 // Number of variables used by loops. This doesn't include user defined
74 // variables.
75 #define YR_INTERNAL_LOOP_VARS 3
76 
77 typedef struct _YR_EXPRESSION
78 {
79   int type;
80 
81   union
82   {
83     int64_t integer;
84     YR_OBJECT* object;
85     YR_ARENA_REF sized_string_ref;
86   } value;
87 
88   // An expression can have an associated identifier, if "ptr" is not NULL it
89   // points to the identifier name, if it is NULL, then "ref" holds a reference
90   // to the identifier within YR_SZ_POOL. When the identifier is in YR_SZ_POOL
91   // a pointer can't be used as the YR_SZ_POOL can be moved to a different
92   // memory location.
93   struct
94   {
95     const char* ptr;
96     YR_ARENA_REF ref;
97   } identifier;
98 
99 } YR_EXPRESSION;
100 
101 typedef void (*YR_COMPILER_CALLBACK_FUNC)(
102     int error_level,
103     const char* file_name,
104     int line_number,
105     const YR_RULE* rule,
106     const char* message,
107     void* user_data);
108 
109 typedef const char* (*YR_COMPILER_INCLUDE_CALLBACK_FUNC)(
110     const char* include_name,
111     const char* calling_rule_filename,
112     const char* calling_rule_namespace,
113     void* user_data);
114 
115 typedef void (*YR_COMPILER_INCLUDE_FREE_FUNC)(
116     const char* callback_result_ptr,
117     void* user_data);
118 
119 typedef void (*YR_COMPILER_RE_AST_CALLBACK_FUNC)(
120     const YR_RULE* rule,
121     const char* string_identifier,
122     const RE_AST* re_ast,
123     void* user_data);
124 
125 typedef struct _YR_FIXUP
126 {
127   YR_ARENA_REF ref;
128   struct _YR_FIXUP* next;
129 
130 } YR_FIXUP;
131 
132 // Each "for" loop in the condition has an associated context which holds
133 // information about loop, like the target address for the jump instruction
134 // that goes back to the beginning of the loop and the local variables used
135 // by the loop.
136 
137 typedef struct _YR_LOOP_CONTEXT
138 {
139   // Reference indicating the the place in the code where the loop starts. The
140   // loop goes back to this address on each iteration.
141   YR_ARENA_REF start_ref;
142 
143   // vars_count is the number of local variables defined by the loop, and vars
144   // is an array of expressions with the identifier and type for each of those
145   // local variables.
146   int vars_count;
147   YR_EXPRESSION vars[YR_MAX_LOOP_VARS];
148 
149   // vars_internal_count is the number of variables used by the loop which are
150   // not defined by the rule itself but that are necessary for keeping the
151   // loop's state. One example is the iteration counter.
152   int vars_internal_count;
153 } YR_LOOP_CONTEXT;
154 
155 typedef struct _YR_COMPILER
156 {
157   // Arena that contains the data generated by the compiled. The arena has
158   // the following buffers:
159   //
160   //   YR_SUMMARY_SECTION:
161   //      A YR_SUMMARY struct.
162   //   YR_RULES_TABLE:
163   //      An array of YR_RULE structures, one per each rule.
164   //   YR_STRINGS_TABLE:
165   //      An array of YR_STRING structures, one per each string.
166   //   YR_METAS_TABLE:
167   //      An array of YR_META structures, one per each meta definition.
168   //   YR_NAMESPACES_TABLE:
169   //      An array of YR_NAMESPACE structures, one per each namespace.
170   //   YR_EXTERNAL_VARIABLES_TABLE:
171   //      An array of YR_EXTERNAL_VARIABLE structures, one per each external
172   //      variable defined.
173   //   YR_SZ_POOL:
174   //      A collection of null-terminated strings. This buffer contains
175   //      identifiers, literal strings, and in general any null-terminated
176   //      string referenced by other data structures.
177   //   YR_CODE_SECTION:
178   //      The code for the condition section of all the rules. This is the
179   //      code executed by yr_execute_code.
180   //   YR_RE_CODE_SECTION:
181   //      Similar to YR_CODE_SECTION, but it contains the code for regular
182   //      expressions. This is the code executed by yr_re_exec and
183   //      yr_re_fast_exec.
184   //   YR_AC_TRANSITION_TABLE:
185   //      An array of uint32_t containing the Aho-Corasick transition table.
186   //      See comment in _yr_ac_build_transition_table for details.
187   //   YR_AC_STATE_MATCHES_TABLE:
188   //      An array of uint32_t with the same number of items than the transition
189   //      table. If entry N in the transition table corresponds to some
190   //      Aho-Corasick state, the N-th item in this array has the index within
191   //      the matches pool where the list of matches for that state begins.
192   //   YR_AC_STATE_MATCHES_POOL:
193   //      An array of YR_AC_MATCH structures.
194   //
195   YR_ARENA* arena;
196 
197   // Index of the rule being compiled in the array of YR_RULE structures
198   // stored in YR_RULES_TABLE. If this is MAX_UINT32 the compiler is not
199   // parsing a rule.
200   uint32_t current_rule_idx;
201 
202   // Index of the rule that comes next during parsing.
203   uint32_t next_rule_idx;
204 
205   // Index of the string being compiled in the array of YR_STRING structures
206   // stored in YR_STRINGS_TABLE.
207   uint32_t current_string_idx;
208 
209   // Index of the current namespace in the array of YR_NAMESPACE structures
210   // stored in YR_NAMESPACES_TABLE.
211   uint32_t current_namespace_idx;
212 
213   // Index of the current meta in the array of YR_META structures stored in
214   // YR_METAS_TABLE.
215   uint32_t current_meta_idx;
216 
217   // Pointer to a YR_RULES structure that represents the compiled rules. This
218   // is what yr_compiler_get_rules returns. Once these rules are generated you
219   // can't call any of the yr_compiler_add_xxx functions.
220   YR_RULES* rules;
221 
222   int errors;
223   int current_line;
224   int last_error;
225   int last_error_line;
226 
227   jmp_buf error_recovery;
228 
229   YR_AC_AUTOMATON* automaton;
230   YR_HASH_TABLE* rules_table;
231   YR_HASH_TABLE* objects_table;
232   YR_HASH_TABLE* strings_table;
233 
234   // Hash table that contains all the strings that has been written to the
235   // YR_SZ_POOL buffer in the compiler's arena. Values in the hash table are
236   // the offset within the YR_SZ_POOL where the string resides. This allows to
237   // know is some string has already been written in order to reuse instead of
238   // writting it again.
239   YR_HASH_TABLE* sz_table;
240 
241   YR_FIXUP* fixup_stack_head;
242 
243   int num_namespaces;
244 
245   YR_LOOP_CONTEXT loop[YR_MAX_LOOP_NESTING];
246   int loop_index;
247   int loop_for_of_var_index;
248 
249   char* file_name_stack[YR_MAX_INCLUDE_DEPTH];
250   int file_name_stack_ptr;
251 
252   char last_error_extra_info[YR_MAX_COMPILER_ERROR_EXTRA_INFO];
253 
254   // This buffer is used by the lexer for accumulating text strings. Those
255   // strings are copied from flex's internal variables. lex_buf_ptr points to
256   // the end of the string and lex_buf_len contains the number of bytes that
257   // have been copied into lex_buf.
258   char lex_buf[YR_LEX_BUF_SIZE];
259   char* lex_buf_ptr;
260   unsigned short lex_buf_len;
261 
262   char include_base_dir[MAX_PATH];
263   void* user_data;
264   void* incl_clbk_user_data;
265   void* re_ast_clbk_user_data;
266 
267   YR_COMPILER_CALLBACK_FUNC callback;
268   YR_COMPILER_INCLUDE_CALLBACK_FUNC include_callback;
269   YR_COMPILER_INCLUDE_FREE_FUNC include_free;
270   YR_COMPILER_RE_AST_CALLBACK_FUNC re_ast_callback;
271   YR_ATOMS_CONFIG atoms_config;
272 
273 } YR_COMPILER;
274 
275 #define yr_compiler_set_error_extra_info(compiler, info) \
276   strlcpy(                                               \
277       compiler->last_error_extra_info,                   \
278       info,                                              \
279       sizeof(compiler->last_error_extra_info));
280 
281 #define yr_compiler_set_error_extra_info_fmt(compiler, fmt, ...) \
282   snprintf(                                                      \
283       compiler->last_error_extra_info,                           \
284       sizeof(compiler->last_error_extra_info),                   \
285       fmt,                                                       \
286       __VA_ARGS__);
287 
288 int _yr_compiler_push_file_name(YR_COMPILER* compiler, const char* file_name);
289 
290 void _yr_compiler_pop_file_name(YR_COMPILER* compiler);
291 
292 int _yr_compiler_get_var_frame(YR_COMPILER* compiler);
293 
294 const char* _yr_compiler_default_include_callback(
295     const char* include_name,
296     const char* calling_rule_filename,
297     const char* calling_rule_namespace,
298     void* user_data);
299 
300 YR_RULE* _yr_compiler_get_rule_by_idx(YR_COMPILER* compiler, uint32_t rule_idx);
301 
302 int _yr_compiler_store_string(
303     YR_COMPILER* compiler,
304     const char* string,
305     YR_ARENA_REF* ref);
306 
307 int _yr_compiler_store_data(
308     YR_COMPILER* compiler,
309     const void* data,
310     size_t data_length,
311     YR_ARENA_REF* ref);
312 
313 YR_API int yr_compiler_create(YR_COMPILER** compiler);
314 
315 YR_API void yr_compiler_destroy(YR_COMPILER* compiler);
316 
317 YR_API void yr_compiler_set_callback(
318     YR_COMPILER* compiler,
319     YR_COMPILER_CALLBACK_FUNC callback,
320     void* user_data);
321 
322 YR_API void yr_compiler_set_include_callback(
323     YR_COMPILER* compiler,
324     YR_COMPILER_INCLUDE_CALLBACK_FUNC include_callback,
325     YR_COMPILER_INCLUDE_FREE_FUNC include_free,
326     void* user_data);
327 
328 YR_API void yr_compiler_set_re_ast_callback(
329     YR_COMPILER* compiler,
330     YR_COMPILER_RE_AST_CALLBACK_FUNC re_ast_callback,
331     void* user_data);
332 
333 YR_API void yr_compiler_set_atom_quality_table(
334     YR_COMPILER* compiler,
335     const void* table,
336     int entries,
337     unsigned char warning_threshold);
338 
339 YR_API int yr_compiler_load_atom_quality_table(
340     YR_COMPILER* compiler,
341     const char* filename,
342     unsigned char warning_threshold);
343 
344 YR_API int yr_compiler_add_file(
345     YR_COMPILER* compiler,
346     FILE* rules_file,
347     const char* namespace_,
348     const char* file_name);
349 
350 YR_API int yr_compiler_add_fd(
351     YR_COMPILER* compiler,
352     YR_FILE_DESCRIPTOR rules_fd,
353     const char* namespace_,
354     const char* file_name);
355 
356 YR_API int yr_compiler_add_string(
357     YR_COMPILER* compiler,
358     const char* rules_string,
359     const char* namespace_);
360 
361 YR_API char* yr_compiler_get_error_message(
362     YR_COMPILER* compiler,
363     char* buffer,
364     int buffer_size);
365 
366 YR_API char* yr_compiler_get_current_file_name(YR_COMPILER* compiler);
367 
368 YR_API int yr_compiler_define_integer_variable(
369     YR_COMPILER* compiler,
370     const char* identifier,
371     int64_t value);
372 
373 YR_API int yr_compiler_define_boolean_variable(
374     YR_COMPILER* compiler,
375     const char* identifier,
376     int value);
377 
378 YR_API int yr_compiler_define_float_variable(
379     YR_COMPILER* compiler,
380     const char* identifier,
381     double value);
382 
383 YR_API int yr_compiler_define_string_variable(
384     YR_COMPILER* compiler,
385     const char* identifier,
386     const char* value);
387 
388 YR_API int yr_compiler_get_rules(YR_COMPILER* compiler, YR_RULES** rules);
389 
390 #endif
391