1 /*
2 Copyright (c) 2013. The YARA Authors. All Rights Reserved.
3 
4 Redistribution and use in source and binary forms, with or without modification,
5 are permitted provided that the following conditions are met:
6 
7 1. Redistributions of source code must retain the above copyright notice, this
8 list of conditions and the following disclaimer.
9 
10 2. Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation and/or
12 other materials provided with the distribution.
13 
14 3. Neither the name of the copyright holder nor the names of its contributors
15 may be used to endorse or promote products derived from this software without
16 specific prior written permission.
17 
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifndef YR_RE_H
31 #define YR_RE_H
32 
33 #include <ctype.h>
34 #include <yara/arena.h>
35 #include <yara/sizedstr.h>
36 #include <yara/types.h>
37 #include <yara/utils.h>
38 
39 #define RE_MAX_RANGE              INT16_MAX
40 
41 #define RE_NODE_LITERAL           1
42 #define RE_NODE_MASKED_LITERAL    2
43 #define RE_NODE_ANY               3
44 #define RE_NODE_CONCAT            4
45 #define RE_NODE_ALT               5
46 #define RE_NODE_RANGE             6
47 #define RE_NODE_STAR              7
48 #define RE_NODE_PLUS              8
49 #define RE_NODE_CLASS             9
50 #define RE_NODE_WORD_CHAR         10
51 #define RE_NODE_NON_WORD_CHAR     11
52 #define RE_NODE_SPACE             12
53 #define RE_NODE_NON_SPACE         13
54 #define RE_NODE_DIGIT             14
55 #define RE_NODE_NON_DIGIT         15
56 #define RE_NODE_EMPTY             16
57 #define RE_NODE_ANCHOR_START      17
58 #define RE_NODE_ANCHOR_END        18
59 #define RE_NODE_WORD_BOUNDARY     19
60 #define RE_NODE_NON_WORD_BOUNDARY 20
61 #define RE_NODE_RANGE_ANY         21
62 
63 #define RE_OPCODE_ANY            0xA0
64 #define RE_OPCODE_LITERAL        0xA2
65 #define RE_OPCODE_MASKED_LITERAL 0xA4
66 #define RE_OPCODE_CLASS          0xA5
67 #define RE_OPCODE_WORD_CHAR      0xA7
68 #define RE_OPCODE_NON_WORD_CHAR  0xA8
69 #define RE_OPCODE_SPACE          0xA9
70 #define RE_OPCODE_NON_SPACE      0xAA
71 #define RE_OPCODE_DIGIT          0xAB
72 #define RE_OPCODE_NON_DIGIT      0xAC
73 #define RE_OPCODE_MATCH          0xAD
74 
75 #define RE_OPCODE_MATCH_AT_END        0xB0
76 #define RE_OPCODE_MATCH_AT_START      0xB1
77 #define RE_OPCODE_WORD_BOUNDARY       0xB2
78 #define RE_OPCODE_NON_WORD_BOUNDARY   0xB3
79 #define RE_OPCODE_REPEAT_ANY_GREEDY   0xB4
80 #define RE_OPCODE_REPEAT_ANY_UNGREEDY 0xB5
81 
82 #define RE_OPCODE_SPLIT_A               0xC0
83 #define RE_OPCODE_SPLIT_B               0xC1
84 #define RE_OPCODE_JUMP                  0xC2
85 #define RE_OPCODE_REPEAT_START_GREEDY   0xC3
86 #define RE_OPCODE_REPEAT_END_GREEDY     0xC4
87 #define RE_OPCODE_REPEAT_START_UNGREEDY 0xC5
88 #define RE_OPCODE_REPEAT_END_UNGREEDY   0xC6
89 
90 #define RE_FLAGS_FAST_REGEXP 0x02
91 #define RE_FLAGS_BACKWARDS   0x04
92 #define RE_FLAGS_EXHAUSTIVE  0x08
93 #define RE_FLAGS_WIDE        0x10
94 #define RE_FLAGS_NO_CASE     0x20
95 #define RE_FLAGS_SCAN        0x40
96 #define RE_FLAGS_DOT_ALL     0x80
97 #define RE_FLAGS_GREEDY      0x400
98 #define RE_FLAGS_UNGREEDY    0x800
99 
100 typedef int RE_MATCH_CALLBACK_FUNC(
101     const uint8_t* match,
102     int match_length,
103     int flags,
104     void* args);
105 
106 int yr_re_ast_create(RE_AST** re_ast);
107 
108 void yr_re_ast_destroy(RE_AST* re_ast);
109 
110 void yr_re_ast_print(RE_AST* re_ast);
111 
112 SIZED_STRING* yr_re_ast_extract_literal(RE_AST* re_ast);
113 
114 int yr_re_ast_has_unbounded_quantifier_for_dot(RE_AST* re_ast);
115 
116 int yr_re_ast_split_at_chaining_point(
117     RE_AST* re_ast,
118     RE_AST** remainder_re_ast,
119     int32_t* min_gap,
120     int32_t* max_gap);
121 
122 int yr_re_ast_emit_code(RE_AST* re_ast, YR_ARENA* arena, int backwards_code);
123 
124 RE_NODE* yr_re_node_create(int type);
125 
126 void yr_re_node_destroy(RE_NODE* node);
127 
128 void yr_re_node_append_child(RE_NODE* node, RE_NODE* child);
129 
130 void yr_re_node_prepend_child(RE_NODE* node, RE_NODE* child);
131 
132 int yr_re_exec(
133     YR_SCAN_CONTEXT* context,
134     const uint8_t* code,
135     const uint8_t* input_data,
136     size_t input_forwards_size,
137     size_t input_backwards_size,
138     int flags,
139     RE_MATCH_CALLBACK_FUNC callback,
140     void* callback_args,
141     int* matches);
142 
143 int yr_re_fast_exec(
144     YR_SCAN_CONTEXT* context,
145     const uint8_t* code,
146     const uint8_t* input_data,
147     size_t input_forwards_size,
148     size_t input_backwards_size,
149     int flags,
150     RE_MATCH_CALLBACK_FUNC callback,
151     void* callback_args,
152     int* matches);
153 
154 int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error);
155 
156 int yr_re_parse_hex(const char* hex_string, RE_AST** re_ast, RE_ERROR* error);
157 
158 int yr_re_compile(
159     const char* re_string,
160     int flags,
161     YR_ARENA* arena,
162     YR_ARENA_REF* ref,
163     RE_ERROR* error);
164 
165 int yr_re_match(YR_SCAN_CONTEXT* context, RE* re, const char* target);
166 
167 #endif
168