1 /*
2  * Copyright (c) 2018, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifndef CH_RUNTIME_H_
30 #define CH_RUNTIME_H_
31 
32 #include <stdlib.h>
33 
34 /**
35  * @file
36  * @brief The Chimera runtime API definition.
37  *
38  * Chimera is a hybrid of Hyperscan and PCRE regular expression engine.
39  *
40  * This header contains functions for using compiled Chimera databases for
41  * scanning data at runtime.
42  */
43 
44 #include "hs_common.h"
45 
46 #ifdef __cplusplus
47 extern "C"
48 {
49 #endif
50 
51 struct ch_scratch;
52 
53 /**
54  * A Chimera scratch space.
55  */
56 typedef struct ch_scratch ch_scratch_t;
57 
58 /**
59  * Callback return value used to tell the Chimera matcher what to do after
60  * processing this match.
61  */
62 typedef int ch_callback_t;
63 
64 /**
65  * @defgroup CH_CALLBACK ch_callback_t values
66  *
67  * @{
68  */
69 
70 /**
71  * Continue matching.
72  */
73 #define CH_CALLBACK_CONTINUE     0
74 
75 /**
76  * Terminate matching.
77  */
78 #define CH_CALLBACK_TERMINATE    1
79 
80 /**
81  * Skip remaining matches for this ID and continue.
82  */
83 #define CH_CALLBACK_SKIP_PATTERN 2
84 
85 
86 /** @} */
87 
88 
89 /**
90  * Type used to differentiate the errors raised with the @ref
91  * ch_error_event_handler callback.
92  */
93 typedef int ch_error_event_t;
94 
95 /**
96  * @defgroup CH_ERROR_EVENT ch_error_event_t values
97  *
98  * @{
99  */
100 
101 /**
102  * PCRE hits its match limit and reports PCRE_ERROR_MATCHLIMIT.
103  */
104 #define CH_ERROR_MATCHLIMIT      1
105 
106 /**
107  * PCRE hits its recursion limit and reports PCRE_ERROR_RECURSIONLIMIT.
108  */
109 #define CH_ERROR_RECURSIONLIMIT  2
110 
111 /** @} */
112 
113 /**
114  * Structure representing a captured subexpression within a match. An array of
115  * these structures corresponding to capture groups in order is passed to the
116  * callback on match, with active structures identified by the
117  * CH_CAPTURE_FLAG_ACTIVE flag.
118  */
119 typedef struct ch_capture {
120     /**
121      * The flags indicating if this structure is active.
122      */
123     unsigned int flags;
124 
125     /**
126      * offset at which this capture group begins.
127      */
128     unsigned long long from; /*< offset at which this capture group begins. */
129 
130     /**
131      * offset at which this capture group ends.
132      */
133     unsigned long long to;
134 } ch_capture_t;
135 
136 /**
137  * @defgroup CH_CAPTURE ch_capture_t flags
138  *
139  * These flags are used in @ref ch_capture_t::flags to indicate if this
140  * structure is active.
141  *
142  * @{
143  */
144 
145 /**
146  * Flag indicating that a particular capture group is inactive, used in @ref
147  * ch_capture_t::flags.
148  */
149 #define CH_CAPTURE_FLAG_INACTIVE      0
150 
151 /**
152  * Flag indicating that a particular capture group is active, used in @ref
153  * ch_capture_t::flags.
154  */
155 #define CH_CAPTURE_FLAG_ACTIVE      1
156 
157 /** @} */
158 
159 /**
160  * Definition of the match event callback function type.
161  *
162  * A callback function matching the defined type must be provided by the
163  * application calling the @ref ch_scan()
164  *
165  * This callback function will be invoked whenever a match is located in the
166  * target data during the execution of a scan. The details of the match are
167  * passed in as parameters to the callback function, and the callback function
168  * should return a value indicating whether or not matching should continue on
169  * the target data. If no callbacks are desired from a scan call, NULL may be
170  * provided in order to suppress match production.
171  *
172  * @param id
173  *      The ID number of the expression that matched. If the expression was a
174  *      single expression compiled with @ref ch_compile(), this value will be
175  *      zero.
176  *
177  * @param from
178  *      The offset of the first byte that matches the expression.
179  *
180  * @param to
181  *      The offset after the last byte that matches the expression.
182  *
183  * @param flags
184  *      This is provided for future use and is unused at present.
185  *
186  * @param size
187  *      The number of valid entries pointed to by the captured parameter.
188  *
189  * @param captured
190  *      A pointer to an array of @ref ch_capture_t structures that
191  *      contain the start and end offsets of entire pattern match and
192  *      each captured subexpression.
193  *
194  * @param ctx
195  *      The pointer supplied by the user to the @ref ch_scan() function.
196  *
197  * @return
198  *      The callback can return @ref CH_CALLBACK_TERMINATE to stop matching.
199  *      Otherwise, a return value of @ref CH_CALLBACK_CONTINUE will continue,
200  *      with the current pattern if configured to produce multiple matches per
201  *      pattern, while a return value of @ref CH_CALLBACK_SKIP_PATTERN will
202  *      cease matching this pattern but continue matching the next pattern.
203  */
204 typedef ch_callback_t (HS_CDECL *ch_match_event_handler)(unsigned int id,
205                                                 unsigned long long from,
206                                                 unsigned long long to,
207                                                 unsigned int flags,
208                                                 unsigned int size,
209                                                 const ch_capture_t *captured,
210                                                 void *ctx);
211 
212 /**
213  * Definition of the Chimera error event callback function type.
214  *
215  * A callback function matching the defined type may be provided by the
216  * application calling the @ref ch_scan function. This callback function
217  * will be invoked when an error event occurs during matching; this indicates
218  * that some matches for a given expression may not be reported.
219  *
220  * @param error_type
221  *      The type of error event that occurred. Currently these errors
222  *      correspond to resource limits on PCRE backtracking
223  *      @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT.
224  *
225  * @param id
226  *      The ID number of the expression that matched.
227  *
228  * @param info
229  *      Event-specific data, for future use. Currently unused.
230  *
231  * @param ctx
232  *      The context pointer supplied by the user to the @ref ch_scan
233  *      function.
234  *
235  * @return
236  *      The callback can return @ref CH_CALLBACK_SKIP_PATTERN to cease matching
237  *      this pattern but continue matching the next pattern. Otherwise, we stop
238  *      matching for all patterns with @ref CH_CALLBACK_TERMINATE.
239  */
240  typedef ch_callback_t (HS_CDECL *ch_error_event_handler)(
241                                                  ch_error_event_t error_type,
242                                                  unsigned int id, void *info,
243                                                  void *ctx);
244 
245 /**
246  * The block regular expression scanner.
247  *
248  * This is the function call in which the actual pattern matching takes place
249  * for block-mode pattern databases.
250  *
251  * @param db
252  *      A compiled pattern database.
253  *
254  * @param data
255  *      Pointer to the data to be scanned.
256  *
257  * @param length
258  *      The number of bytes to scan.
259  *
260  * @param flags
261  *      Flags modifying the behaviour of this function. This parameter is
262  *      provided for future use and is unused at present.
263  *
264  * @param scratch
265  *      A per-thread scratch space allocated by @ref ch_alloc_scratch() for this
266  *      database.
267  *
268  * @param onEvent
269  *      Pointer to a match event callback function. If a NULL pointer is given,
270  *      no matches will be returned.
271  *
272  * @param onError
273  *      Pointer to a error event callback function. If a NULL pointer is given,
274  *      @ref CH_ERROR_MATCHLIMIT and @ref CH_ERROR_RECURSIONLIMIT errors will
275  *      be ignored and match will continue.
276  *
277  * @param context
278  *      The user defined pointer which will be passed to the callback function.
279  *
280  * @return
281  *      Returns @ref CH_SUCCESS on success; @ref CH_SCAN_TERMINATED if the
282  *      match callback indicated that scanning should stop; other values on
283  *      error.
284  */
285 ch_error_t HS_CDECL ch_scan(const ch_database_t *db, const char *data,
286                             unsigned int length, unsigned int flags,
287                             ch_scratch_t *scratch,
288                             ch_match_event_handler onEvent,
289                             ch_error_event_handler onError,
290                             void *context);
291 
292 /**
293  * Allocate a "scratch" space for use by Chimera.
294  *
295  * This is required for runtime use, and one scratch space per thread, or
296  * concurrent caller, is required. Any allocator callback set by @ref
297  * ch_set_scratch_allocator() or @ref ch_set_allocator() will be used by this
298  * function.
299  *
300  * @param db
301  *      The database, as produced by @ref ch_compile().
302  *
303  * @param scratch
304  *      On first allocation, a pointer to NULL should be provided so a new
305  *      scratch can be allocated. If a scratch block has been previously
306  *      allocated, then a pointer to it should be passed back in to see if it
307  *      is valid for this database block. If a new scratch block is required,
308  *      the original will be freed and the new one returned, otherwise the
309  *      previous scratch block will be returned. On success, the scratch block
310  *      will be suitable for use with the provided database in addition to any
311  *      databases that original scratch space was suitable for.
312  *
313  * @return
314  *      @ref CH_SUCCESS on successful allocation; @ref CH_NOMEM if the
315  *      allocation fails.  Other errors may be returned if invalid parameters
316  *      are specified.
317  */
318 ch_error_t HS_CDECL ch_alloc_scratch(const ch_database_t *db,
319                                      ch_scratch_t **scratch);
320 
321 /**
322  * Allocate a scratch space that is a clone of an existing scratch space.
323  *
324  * This is useful when multiple concurrent threads will be using the same set
325  * of compiled databases, and another scratch space is required. Any allocator
326  * callback set by @ref ch_set_scratch_allocator() or @ref ch_set_allocator()
327  * will be used by this function.
328  *
329  * @param src
330  *      The existing @ref ch_scratch_t to be cloned.
331  *
332  * @param dest
333  *      A pointer to the new scratch space will be returned here.
334  *
335  * @return
336  *      @ref CH_SUCCESS on success; @ref CH_NOMEM if the allocation fails.
337  *      Other errors may be returned if invalid parameters are specified.
338  */
339 ch_error_t HS_CDECL ch_clone_scratch(const ch_scratch_t *src,
340                                      ch_scratch_t **dest);
341 
342 /**
343  * Provides the size of the given scratch space.
344  *
345  * @param scratch
346  *      A per-thread scratch space allocated by @ref ch_alloc_scratch() or @ref
347  *      ch_clone_scratch().
348  *
349  * @param scratch_size
350  *      On success, the size of the scratch space in bytes is placed in this
351  *      parameter.
352  *
353  * @return
354  *      @ref CH_SUCCESS on success, other values on failure.
355  */
356 ch_error_t HS_CDECL ch_scratch_size(const ch_scratch_t *scratch,
357                                     size_t *scratch_size);
358 
359 /**
360  * Free a scratch block previously allocated by @ref ch_alloc_scratch() or @ref
361  * ch_clone_scratch().
362  *
363  * The free callback set by @ref ch_set_scratch_allocator() or @ref
364  * ch_set_allocator() will be used by this function.
365  *
366  * @param scratch
367  *      The scratch block to be freed. NULL may also be safely provided.
368  *
369  * @return
370  *      @ref CH_SUCCESS on success, other values on failure.
371  */
372 ch_error_t HS_CDECL ch_free_scratch(ch_scratch_t *scratch);
373 
374 #ifdef __cplusplus
375 } /* extern "C" */
376 #endif
377 
378 #endif /* CH_RUNTIME_H_ */
379