1 /* 2 Copyright (c) 2013. The YARA Authors. All Rights Reserved. 3 4 Redistribution and use in source and binary forms, with or without modification, 5 are permitted provided that the following conditions are met: 6 7 1. Redistributions of source code must retain the above copyright notice, this 8 list of conditions and the following disclaimer. 9 10 2. Redistributions in binary form must reproduce the above copyright notice, 11 this list of conditions and the following disclaimer in the documentation and/or 12 other materials provided with the distribution. 13 14 3. Neither the name of the copyright holder nor the names of its contributors 15 may be used to endorse or promote products derived from this software without 16 specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifndef YR_COMPILER_H 31 #define YR_COMPILER_H 32 33 #include <setjmp.h> 34 #include <stdio.h> 35 #include <yara/ahocorasick.h> 36 #include <yara/arena.h> 37 #include <yara/filemap.h> 38 #include <yara/hash.h> 39 #include <yara/utils.h> 40 41 #define YARA_ERROR_LEVEL_ERROR 0 42 #define YARA_ERROR_LEVEL_WARNING 1 43 44 // Expression type constants are powers of two because they are used as flags. 45 #define EXPRESSION_TYPE_UNKNOWN 0 46 #define EXPRESSION_TYPE_BOOLEAN 1 47 #define EXPRESSION_TYPE_INTEGER 2 48 #define EXPRESSION_TYPE_STRING 4 49 #define EXPRESSION_TYPE_REGEXP 8 50 #define EXPRESSION_TYPE_OBJECT 16 51 #define EXPRESSION_TYPE_FLOAT 32 52 53 // The compiler uses an arena to store the data it generates during the 54 // compilation. Each buffer in the arena is used for storing a different type 55 // of data. The following identifiers indicate the purpose of each buffer. 56 #define YR_NAMESPACES_TABLE 0 57 #define YR_RULES_TABLE 1 58 #define YR_METAS_TABLE 2 59 #define YR_STRINGS_TABLE 3 60 #define YR_EXTERNAL_VARIABLES_TABLE 4 61 #define YR_SZ_POOL 5 62 #define YR_CODE_SECTION 6 63 #define YR_RE_CODE_SECTION 7 64 #define YR_AC_TRANSITION_TABLE 8 65 #define YR_AC_STATE_MATCHES_TABLE 9 66 #define YR_AC_STATE_MATCHES_POOL 10 67 #define YR_SUMMARY_SECTION 11 68 69 // This is the number of buffers used by the compiler, should match the number 70 // of items in the list above. 71 #define YR_NUM_SECTIONS 12 72 73 // Number of variables used by loops. This doesn't include user defined 74 // variables. 75 #define YR_INTERNAL_LOOP_VARS 3 76 77 typedef struct _YR_EXPRESSION 78 { 79 int type; 80 81 union 82 { 83 int64_t integer; 84 YR_OBJECT* object; 85 YR_ARENA_REF sized_string_ref; 86 } value; 87 88 // An expression can have an associated identifier, if "ptr" is not NULL it 89 // points to the identifier name, if it is NULL, then "ref" holds a reference 90 // to the identifier within YR_SZ_POOL. When the identifier is in YR_SZ_POOL 91 // a pointer can't be used as the YR_SZ_POOL can be moved to a different 92 // memory location. 93 struct 94 { 95 const char* ptr; 96 YR_ARENA_REF ref; 97 } identifier; 98 99 } YR_EXPRESSION; 100 101 typedef void (*YR_COMPILER_CALLBACK_FUNC)( 102 int error_level, 103 const char* file_name, 104 int line_number, 105 const YR_RULE* rule, 106 const char* message, 107 void* user_data); 108 109 typedef const char* (*YR_COMPILER_INCLUDE_CALLBACK_FUNC)( 110 const char* include_name, 111 const char* calling_rule_filename, 112 const char* calling_rule_namespace, 113 void* user_data); 114 115 typedef void (*YR_COMPILER_INCLUDE_FREE_FUNC)( 116 const char* callback_result_ptr, 117 void* user_data); 118 119 typedef void (*YR_COMPILER_RE_AST_CALLBACK_FUNC)( 120 const YR_RULE* rule, 121 const char* string_identifier, 122 const RE_AST* re_ast, 123 void* user_data); 124 125 typedef struct _YR_FIXUP 126 { 127 YR_ARENA_REF ref; 128 struct _YR_FIXUP* next; 129 130 } YR_FIXUP; 131 132 // Each "for" loop in the condition has an associated context which holds 133 // information about loop, like the target address for the jump instruction 134 // that goes back to the beginning of the loop and the local variables used 135 // by the loop. 136 137 typedef struct _YR_LOOP_CONTEXT 138 { 139 // Reference indicating the the place in the code where the loop starts. The 140 // loop goes back to this address on each iteration. 141 YR_ARENA_REF start_ref; 142 143 // vars_count is the number of local variables defined by the loop, and vars 144 // is an array of expressions with the identifier and type for each of those 145 // local variables. 146 int vars_count; 147 YR_EXPRESSION vars[YR_MAX_LOOP_VARS]; 148 149 // vars_internal_count is the number of variables used by the loop which are 150 // not defined by the rule itself but that are necessary for keeping the 151 // loop's state. One example is the iteration counter. 152 int vars_internal_count; 153 } YR_LOOP_CONTEXT; 154 155 typedef struct _YR_COMPILER 156 { 157 // Arena that contains the data generated by the compiled. The arena has 158 // the following buffers: 159 // 160 // YR_SUMMARY_SECTION: 161 // A YR_SUMMARY struct. 162 // YR_RULES_TABLE: 163 // An array of YR_RULE structures, one per each rule. 164 // YR_STRINGS_TABLE: 165 // An array of YR_STRING structures, one per each string. 166 // YR_METAS_TABLE: 167 // An array of YR_META structures, one per each meta definition. 168 // YR_NAMESPACES_TABLE: 169 // An array of YR_NAMESPACE structures, one per each namespace. 170 // YR_EXTERNAL_VARIABLES_TABLE: 171 // An array of YR_EXTERNAL_VARIABLE structures, one per each external 172 // variable defined. 173 // YR_SZ_POOL: 174 // A collection of null-terminated strings. This buffer contains 175 // identifiers, literal strings, and in general any null-terminated 176 // string referenced by other data structures. 177 // YR_CODE_SECTION: 178 // The code for the condition section of all the rules. This is the 179 // code executed by yr_execute_code. 180 // YR_RE_CODE_SECTION: 181 // Similar to YR_CODE_SECTION, but it contains the code for regular 182 // expressions. This is the code executed by yr_re_exec and 183 // yr_re_fast_exec. 184 // YR_AC_TRANSITION_TABLE: 185 // An array of uint32_t containing the Aho-Corasick transition table. 186 // See comment in _yr_ac_build_transition_table for details. 187 // YR_AC_STATE_MATCHES_TABLE: 188 // An array of uint32_t with the same number of items than the transition 189 // table. If entry N in the transition table corresponds to some 190 // Aho-Corasick state, the N-th item in this array has the index within 191 // the matches pool where the list of matches for that state begins. 192 // YR_AC_STATE_MATCHES_POOL: 193 // An array of YR_AC_MATCH structures. 194 // 195 YR_ARENA* arena; 196 197 // Index of the rule being compiled in the array of YR_RULE structures 198 // stored in YR_RULES_TABLE. If this is MAX_UINT32 the compiler is not 199 // parsing a rule. 200 uint32_t current_rule_idx; 201 202 // Index of the rule that comes next during parsing. 203 uint32_t next_rule_idx; 204 205 // Index of the string being compiled in the array of YR_STRING structures 206 // stored in YR_STRINGS_TABLE. 207 uint32_t current_string_idx; 208 209 // Index of the current namespace in the array of YR_NAMESPACE structures 210 // stored in YR_NAMESPACES_TABLE. 211 uint32_t current_namespace_idx; 212 213 // Index of the current meta in the array of YR_META structures stored in 214 // YR_METAS_TABLE. 215 uint32_t current_meta_idx; 216 217 // Pointer to a YR_RULES structure that represents the compiled rules. This 218 // is what yr_compiler_get_rules returns. Once these rules are generated you 219 // can't call any of the yr_compiler_add_xxx functions. 220 YR_RULES* rules; 221 222 int errors; 223 int current_line; 224 int last_error; 225 int last_error_line; 226 227 jmp_buf error_recovery; 228 229 YR_AC_AUTOMATON* automaton; 230 YR_HASH_TABLE* rules_table; 231 YR_HASH_TABLE* objects_table; 232 YR_HASH_TABLE* strings_table; 233 234 // Hash table that contains all the strings that has been written to the 235 // YR_SZ_POOL buffer in the compiler's arena. Values in the hash table are 236 // the offset within the YR_SZ_POOL where the string resides. This allows to 237 // know is some string has already been written in order to reuse instead of 238 // writting it again. 239 YR_HASH_TABLE* sz_table; 240 241 YR_FIXUP* fixup_stack_head; 242 243 int num_namespaces; 244 245 YR_LOOP_CONTEXT loop[YR_MAX_LOOP_NESTING]; 246 int loop_index; 247 int loop_for_of_var_index; 248 249 char* file_name_stack[YR_MAX_INCLUDE_DEPTH]; 250 int file_name_stack_ptr; 251 252 char last_error_extra_info[YR_MAX_COMPILER_ERROR_EXTRA_INFO]; 253 254 // This buffer is used by the lexer for accumulating text strings. Those 255 // strings are copied from flex's internal variables. lex_buf_ptr points to 256 // the end of the string and lex_buf_len contains the number of bytes that 257 // have been copied into lex_buf. 258 char lex_buf[YR_LEX_BUF_SIZE]; 259 char* lex_buf_ptr; 260 unsigned short lex_buf_len; 261 262 char include_base_dir[MAX_PATH]; 263 void* user_data; 264 void* incl_clbk_user_data; 265 void* re_ast_clbk_user_data; 266 267 YR_COMPILER_CALLBACK_FUNC callback; 268 YR_COMPILER_INCLUDE_CALLBACK_FUNC include_callback; 269 YR_COMPILER_INCLUDE_FREE_FUNC include_free; 270 YR_COMPILER_RE_AST_CALLBACK_FUNC re_ast_callback; 271 YR_ATOMS_CONFIG atoms_config; 272 273 } YR_COMPILER; 274 275 #define yr_compiler_set_error_extra_info(compiler, info) \ 276 strlcpy( \ 277 compiler->last_error_extra_info, \ 278 info, \ 279 sizeof(compiler->last_error_extra_info)); 280 281 #define yr_compiler_set_error_extra_info_fmt(compiler, fmt, ...) \ 282 snprintf( \ 283 compiler->last_error_extra_info, \ 284 sizeof(compiler->last_error_extra_info), \ 285 fmt, \ 286 __VA_ARGS__); 287 288 int _yr_compiler_push_file_name(YR_COMPILER* compiler, const char* file_name); 289 290 void _yr_compiler_pop_file_name(YR_COMPILER* compiler); 291 292 int _yr_compiler_get_var_frame(YR_COMPILER* compiler); 293 294 const char* _yr_compiler_default_include_callback( 295 const char* include_name, 296 const char* calling_rule_filename, 297 const char* calling_rule_namespace, 298 void* user_data); 299 300 YR_RULE* _yr_compiler_get_rule_by_idx(YR_COMPILER* compiler, uint32_t rule_idx); 301 302 int _yr_compiler_store_string( 303 YR_COMPILER* compiler, 304 const char* string, 305 YR_ARENA_REF* ref); 306 307 int _yr_compiler_store_data( 308 YR_COMPILER* compiler, 309 const void* data, 310 size_t data_length, 311 YR_ARENA_REF* ref); 312 313 YR_API int yr_compiler_create(YR_COMPILER** compiler); 314 315 YR_API void yr_compiler_destroy(YR_COMPILER* compiler); 316 317 YR_API void yr_compiler_set_callback( 318 YR_COMPILER* compiler, 319 YR_COMPILER_CALLBACK_FUNC callback, 320 void* user_data); 321 322 YR_API void yr_compiler_set_include_callback( 323 YR_COMPILER* compiler, 324 YR_COMPILER_INCLUDE_CALLBACK_FUNC include_callback, 325 YR_COMPILER_INCLUDE_FREE_FUNC include_free, 326 void* user_data); 327 328 YR_API void yr_compiler_set_re_ast_callback( 329 YR_COMPILER* compiler, 330 YR_COMPILER_RE_AST_CALLBACK_FUNC re_ast_callback, 331 void* user_data); 332 333 YR_API void yr_compiler_set_atom_quality_table( 334 YR_COMPILER* compiler, 335 const void* table, 336 int entries, 337 unsigned char warning_threshold); 338 339 YR_API int yr_compiler_load_atom_quality_table( 340 YR_COMPILER* compiler, 341 const char* filename, 342 unsigned char warning_threshold); 343 344 YR_API int yr_compiler_add_file( 345 YR_COMPILER* compiler, 346 FILE* rules_file, 347 const char* namespace_, 348 const char* file_name); 349 350 YR_API int yr_compiler_add_fd( 351 YR_COMPILER* compiler, 352 YR_FILE_DESCRIPTOR rules_fd, 353 const char* namespace_, 354 const char* file_name); 355 356 YR_API int yr_compiler_add_string( 357 YR_COMPILER* compiler, 358 const char* rules_string, 359 const char* namespace_); 360 361 YR_API char* yr_compiler_get_error_message( 362 YR_COMPILER* compiler, 363 char* buffer, 364 int buffer_size); 365 366 YR_API char* yr_compiler_get_current_file_name(YR_COMPILER* compiler); 367 368 YR_API int yr_compiler_define_integer_variable( 369 YR_COMPILER* compiler, 370 const char* identifier, 371 int64_t value); 372 373 YR_API int yr_compiler_define_boolean_variable( 374 YR_COMPILER* compiler, 375 const char* identifier, 376 int value); 377 378 YR_API int yr_compiler_define_float_variable( 379 YR_COMPILER* compiler, 380 const char* identifier, 381 double value); 382 383 YR_API int yr_compiler_define_string_variable( 384 YR_COMPILER* compiler, 385 const char* identifier, 386 const char* value); 387 388 YR_API int yr_compiler_get_rules(YR_COMPILER* compiler, YR_RULES** rules); 389 390 #endif 391