1 /* 2 Copyright (c) 2013. The YARA Authors. All Rights Reserved. 3 4 Redistribution and use in source and binary forms, with or without modification, 5 are permitted provided that the following conditions are met: 6 7 1. Redistributions of source code must retain the above copyright notice, this 8 list of conditions and the following disclaimer. 9 10 2. Redistributions in binary form must reproduce the above copyright notice, 11 this list of conditions and the following disclaimer in the documentation and/or 12 other materials provided with the distribution. 13 14 3. Neither the name of the copyright holder nor the names of its contributors 15 may be used to endorse or promote products derived from this software without 16 specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifndef YR_RE_H 31 #define YR_RE_H 32 33 #include <ctype.h> 34 #include <yara/arena.h> 35 #include <yara/sizedstr.h> 36 #include <yara/types.h> 37 #include <yara/utils.h> 38 39 #define RE_MAX_RANGE INT16_MAX 40 41 #define RE_NODE_LITERAL 1 42 #define RE_NODE_MASKED_LITERAL 2 43 #define RE_NODE_ANY 3 44 #define RE_NODE_CONCAT 4 45 #define RE_NODE_ALT 5 46 #define RE_NODE_RANGE 6 47 #define RE_NODE_STAR 7 48 #define RE_NODE_PLUS 8 49 #define RE_NODE_CLASS 9 50 #define RE_NODE_WORD_CHAR 10 51 #define RE_NODE_NON_WORD_CHAR 11 52 #define RE_NODE_SPACE 12 53 #define RE_NODE_NON_SPACE 13 54 #define RE_NODE_DIGIT 14 55 #define RE_NODE_NON_DIGIT 15 56 #define RE_NODE_EMPTY 16 57 #define RE_NODE_ANCHOR_START 17 58 #define RE_NODE_ANCHOR_END 18 59 #define RE_NODE_WORD_BOUNDARY 19 60 #define RE_NODE_NON_WORD_BOUNDARY 20 61 #define RE_NODE_RANGE_ANY 21 62 63 #define RE_OPCODE_ANY 0xA0 64 #define RE_OPCODE_LITERAL 0xA2 65 #define RE_OPCODE_MASKED_LITERAL 0xA4 66 #define RE_OPCODE_CLASS 0xA5 67 #define RE_OPCODE_WORD_CHAR 0xA7 68 #define RE_OPCODE_NON_WORD_CHAR 0xA8 69 #define RE_OPCODE_SPACE 0xA9 70 #define RE_OPCODE_NON_SPACE 0xAA 71 #define RE_OPCODE_DIGIT 0xAB 72 #define RE_OPCODE_NON_DIGIT 0xAC 73 #define RE_OPCODE_MATCH 0xAD 74 75 #define RE_OPCODE_MATCH_AT_END 0xB0 76 #define RE_OPCODE_MATCH_AT_START 0xB1 77 #define RE_OPCODE_WORD_BOUNDARY 0xB2 78 #define RE_OPCODE_NON_WORD_BOUNDARY 0xB3 79 #define RE_OPCODE_REPEAT_ANY_GREEDY 0xB4 80 #define RE_OPCODE_REPEAT_ANY_UNGREEDY 0xB5 81 82 #define RE_OPCODE_SPLIT_A 0xC0 83 #define RE_OPCODE_SPLIT_B 0xC1 84 #define RE_OPCODE_JUMP 0xC2 85 #define RE_OPCODE_REPEAT_START_GREEDY 0xC3 86 #define RE_OPCODE_REPEAT_END_GREEDY 0xC4 87 #define RE_OPCODE_REPEAT_START_UNGREEDY 0xC5 88 #define RE_OPCODE_REPEAT_END_UNGREEDY 0xC6 89 90 #define RE_FLAGS_FAST_REGEXP 0x02 91 #define RE_FLAGS_BACKWARDS 0x04 92 #define RE_FLAGS_EXHAUSTIVE 0x08 93 #define RE_FLAGS_WIDE 0x10 94 #define RE_FLAGS_NO_CASE 0x20 95 #define RE_FLAGS_SCAN 0x40 96 #define RE_FLAGS_DOT_ALL 0x80 97 #define RE_FLAGS_GREEDY 0x400 98 #define RE_FLAGS_UNGREEDY 0x800 99 100 typedef int RE_MATCH_CALLBACK_FUNC( 101 const uint8_t* match, 102 int match_length, 103 int flags, 104 void* args); 105 106 int yr_re_ast_create(RE_AST** re_ast); 107 108 void yr_re_ast_destroy(RE_AST* re_ast); 109 110 void yr_re_ast_print(RE_AST* re_ast); 111 112 SIZED_STRING* yr_re_ast_extract_literal(RE_AST* re_ast); 113 114 int yr_re_ast_has_unbounded_quantifier_for_dot(RE_AST* re_ast); 115 116 int yr_re_ast_split_at_chaining_point( 117 RE_AST* re_ast, 118 RE_AST** remainder_re_ast, 119 int32_t* min_gap, 120 int32_t* max_gap); 121 122 int yr_re_ast_emit_code(RE_AST* re_ast, YR_ARENA* arena, int backwards_code); 123 124 RE_NODE* yr_re_node_create(int type); 125 126 void yr_re_node_destroy(RE_NODE* node); 127 128 void yr_re_node_append_child(RE_NODE* node, RE_NODE* child); 129 130 void yr_re_node_prepend_child(RE_NODE* node, RE_NODE* child); 131 132 int yr_re_exec( 133 YR_SCAN_CONTEXT* context, 134 const uint8_t* code, 135 const uint8_t* input_data, 136 size_t input_forwards_size, 137 size_t input_backwards_size, 138 int flags, 139 RE_MATCH_CALLBACK_FUNC callback, 140 void* callback_args, 141 int* matches); 142 143 int yr_re_fast_exec( 144 YR_SCAN_CONTEXT* context, 145 const uint8_t* code, 146 const uint8_t* input_data, 147 size_t input_forwards_size, 148 size_t input_backwards_size, 149 int flags, 150 RE_MATCH_CALLBACK_FUNC callback, 151 void* callback_args, 152 int* matches); 153 154 int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error); 155 156 int yr_re_parse_hex(const char* hex_string, RE_AST** re_ast, RE_ERROR* error); 157 158 int yr_re_compile( 159 const char* re_string, 160 int flags, 161 YR_ARENA* arena, 162 YR_ARENA_REF* ref, 163 RE_ERROR* error); 164 165 int yr_re_match(YR_SCAN_CONTEXT* context, RE* re, const char* target); 166 167 #endif 168