1 /*
2  * Copyright (c) 2015-2018, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifndef HS_RUNTIME_H_
30 #define HS_RUNTIME_H_
31 
32 #include <stdlib.h>
33 
34 /**
35  * @file
36  * @brief The Hyperscan runtime API definition.
37  *
38  * Hyperscan is a high speed regular expression engine.
39  *
40  * This header contains functions for using compiled Hyperscan databases for
41  * scanning data at runtime.
42  */
43 
44 #include "hs_common.h"
45 
46 #ifdef __cplusplus
47 extern "C"
48 {
49 #endif
50 
51 /**
52  * Definition of the stream identifier type.
53  */
54 struct hs_stream;
55 
56 /**
57  * The stream identifier returned by @ref hs_open_stream().
58  */
59 typedef struct hs_stream hs_stream_t;
60 
61 struct hs_scratch;
62 
63 /**
64  * A Hyperscan scratch space.
65  */
66 typedef struct hs_scratch hs_scratch_t;
67 
68 /**
69  * Definition of the match event callback function type.
70  *
71  * A callback function matching the defined type must be provided by the
72  * application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
73  * hs_scan_stream() functions (or other streaming calls which can produce
74  * matches).
75  *
76  * This callback function will be invoked whenever a match is located in the
77  * target data during the execution of a scan. The details of the match are
78  * passed in as parameters to the callback function, and the callback function
79  * should return a value indicating whether or not matching should continue on
80  * the target data. If no callbacks are desired from a scan call, NULL may be
81  * provided in order to suppress match production.
82  *
83  * This callback function should not attempt to call Hyperscan API functions on
84  * the same stream nor should it attempt to reuse the scratch space allocated
85  * for the API calls that caused it to be triggered. Making another call to the
86  * Hyperscan library with completely independent parameters should work (for
87  * example, scanning a different database in a new stream and with new scratch
88  * space), but reusing data structures like stream state and/or scratch space
89  * will produce undefined behavior.
90  *
91  * @param id
92  *      The ID number of the expression that matched. If the expression was a
93  *      single expression compiled with @ref hs_compile(), this value will be
94  *      zero.
95  *
96  * @param from
97  *      - If a start of match flag is enabled for the current pattern, this
98  *        argument will be set to the start of match for the pattern assuming
99  *        that that start of match value lies within the current 'start of match
100  *        horizon' chosen by one of the SOM_HORIZON mode flags.
101 
102  *      - If the start of match value lies outside this horizon (possible only
103  *        when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
104  *        the @p from value will be set to @ref HS_OFFSET_PAST_HORIZON.
105 
106  *      - This argument will be set to zero if the Start of Match flag is not
107  *        enabled for the given pattern.
108  *
109  * @param to
110  *      The offset after the last byte that matches the expression.
111  *
112  * @param flags
113  *      This is provided for future use and is unused at present.
114  *
115  * @param context
116  *      The pointer supplied by the user to the @ref hs_scan(), @ref
117  *      hs_scan_vector() or @ref hs_scan_stream() function.
118  *
119  * @return
120  *      Non-zero if the matching should cease, else zero. If scanning is
121  *      performed in streaming mode and a non-zero value is returned, any
122  *      subsequent calls to @ref hs_scan_stream() for that stream will
123  *      immediately return with @ref HS_SCAN_TERMINATED.
124  */
125 typedef int (HS_CDECL *match_event_handler)(unsigned int id,
126                                             unsigned long long from,
127                                             unsigned long long to,
128                                             unsigned int flags,
129                                             void *context);
130 
131 /**
132  * Open and initialise a stream.
133  *
134  * @param db
135  *      A compiled pattern database.
136  *
137  * @param flags
138  *      Flags modifying the behaviour of the stream. This parameter is provided
139  *      for future use and is unused at present.
140  *
141  * @param stream
142  *      On success, a pointer to the generated @ref hs_stream_t will be
143  *      returned; NULL on failure.
144  *
145  * @return
146  *      @ref HS_SUCCESS on success, other values on failure.
147  */
148 hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db, unsigned int flags,
149                                    hs_stream_t **stream);
150 
151 /**
152  * Write data to be scanned to the opened stream.
153  *
154  * This is the function call in which the actual pattern matching takes place
155  * as data is written to the stream. Matches will be returned via the @ref
156  * match_event_handler callback supplied.
157  *
158  * @param id
159  *      The stream ID (returned by @ref hs_open_stream()) to which the data
160  *      will be written.
161  *
162  * @param data
163  *      Pointer to the data to be scanned.
164  *
165  * @param length
166  *      The number of bytes to scan.
167  *
168  * @param flags
169  *      Flags modifying the behaviour of the stream. This parameter is provided
170  *      for future use and is unused at present.
171  *
172  * @param scratch
173  *      A per-thread scratch space allocated by @ref hs_alloc_scratch().
174  *
175  * @param onEvent
176  *      Pointer to a match event callback function. If a NULL pointer is given,
177  *      no matches will be returned.
178  *
179  * @param ctxt
180  *      The user defined pointer which will be passed to the callback function
181  *      when a match occurs.
182  *
183  * @return
184  *      Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
185  *      match callback indicated that scanning should stop; other values on
186  *      error.
187  */
188 hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
189                                    unsigned int length, unsigned int flags,
190                                    hs_scratch_t *scratch,
191                                    match_event_handler onEvent, void *ctxt);
192 
193 /**
194  * Close a stream.
195  *
196  * This function completes matching on the given stream and frees the memory
197  * associated with the stream state. After this call, the stream pointed to by
198  * @p id is invalid and can no longer be used. To reuse the stream state after
199  * completion, rather than closing it, the @ref hs_reset_stream function can be
200  * used.
201  *
202  * This function must be called for any stream created with @ref
203  * hs_open_stream(), even if scanning has been terminated by a non-zero return
204  * from the match callback function.
205  *
206  * Note: This operation may result in matches being returned (via calls to the
207  * match event callback) for expressions anchored to the end of the data stream
208  * (for example, via the use of the `$` meta-character). If these matches are
209  * not desired, NULL may be provided as the @ref match_event_handler callback.
210  *
211  * If NULL is provided as the @ref match_event_handler callback, it is
212  * permissible to provide a NULL scratch.
213  *
214  * @param id
215  *      The stream ID returned by @ref hs_open_stream().
216  *
217  * @param scratch
218  *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
219  *      allowed to be NULL only if the @p onEvent callback is also NULL.
220  *
221  * @param onEvent
222  *      Pointer to a match event callback function. If a NULL pointer is given,
223  *      no matches will be returned.
224  *
225  * @param ctxt
226  *      The user defined pointer which will be passed to the callback function
227  *      when a match occurs.
228  *
229  * @return
230  *      Returns @ref HS_SUCCESS on success, other values on failure.
231  */
232 hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
233                                     match_event_handler onEvent, void *ctxt);
234 
235 /**
236  * Reset a stream to an initial state.
237  *
238  * Conceptually, this is equivalent to performing @ref hs_close_stream() on the
239  * given stream, followed by a @ref hs_open_stream(). This new stream replaces
240  * the original stream in memory, avoiding the overhead of freeing the old
241  * stream and allocating the new one.
242  *
243  * Note: This operation may result in matches being returned (via calls to the
244  * match event callback) for expressions anchored to the end of the original
245  * data stream (for example, via the use of the `$` meta-character). If these
246  * matches are not desired, NULL may be provided as the @ref match_event_handler
247  * callback.
248  *
249  * Note: the stream will also be tied to the same database.
250  *
251  * @param id
252  *      The stream (as created by @ref hs_open_stream()) to be replaced.
253  *
254  * @param flags
255  *      Flags modifying the behaviour of the stream. This parameter is provided
256  *      for future use and is unused at present.
257  *
258  * @param scratch
259  *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
260  *      allowed to be NULL only if the @p onEvent callback is also NULL.
261  *
262  * @param onEvent
263  *      Pointer to a match event callback function. If a NULL pointer is given,
264  *      no matches will be returned.
265  *
266  * @param context
267  *      The user defined pointer which will be passed to the callback function
268  *      when a match occurs.
269  *
270  * @return
271  *      @ref HS_SUCCESS on success, other values on failure.
272  */
273 hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, unsigned int flags,
274                                     hs_scratch_t *scratch,
275                                     match_event_handler onEvent, void *context);
276 
277 /**
278  * Duplicate the given stream. The new stream will have the same state as the
279  * original including the current stream offset.
280  *
281  * @param to_id
282  *      On success, a pointer to the new, copied @ref hs_stream_t will be
283  *      returned; NULL on failure.
284  *
285  * @param from_id
286  *      The stream (as created by @ref hs_open_stream()) to be copied.
287  *
288  * @return
289  *      @ref HS_SUCCESS on success, other values on failure.
290  */
291 hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
292                                    const hs_stream_t *from_id);
293 
294 /**
295  * Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
296  * will first be reset (reporting any EOD matches if a non-NULL @p onEvent
297  * callback handler is provided).
298  *
299  * Note: the 'to' stream and the 'from' stream must be open against the same
300  * database.
301  *
302  * @param to_id
303  *      On success, a pointer to the new, copied @ref hs_stream_t will be
304  *      returned; NULL on failure.
305  *
306  * @param from_id
307  *      The stream (as created by @ref hs_open_stream()) to be copied.
308  *
309  * @param scratch
310  *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
311  *      allowed to be NULL only if the @p onEvent callback is also NULL.
312  *
313  * @param onEvent
314  *      Pointer to a match event callback function. If a NULL pointer is given,
315  *      no matches will be returned.
316  *
317  * @param context
318  *      The user defined pointer which will be passed to the callback function
319  *      when a match occurs.
320  *
321  * @return
322  *      @ref HS_SUCCESS on success, other values on failure.
323  */
324 hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
325                                              const hs_stream_t *from_id,
326                                              hs_scratch_t *scratch,
327                                              match_event_handler onEvent,
328                                              void *context);
329 
330 /**
331  * Creates a compressed representation of the provided stream in the buffer
332  * provided. This compressed representation can be converted back into a stream
333  * state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
334  * The size of the compressed representation will be placed into @p used_space.
335  *
336  * If there is not sufficient space in the buffer to hold the compressed
337  * representation, @ref HS_INSUFFICIENT_SPACE will be returned and @p used_space
338  * will be populated with the amount of space required.
339  *
340  * Note: this function does not close the provided stream, you may continue to
341  * use the stream or to free it with @ref hs_close_stream().
342  *
343  * @param stream
344  *      The stream (as created by @ref hs_open_stream()) to be compressed.
345  *
346  * @param buf
347  *      Buffer to write the compressed representation into. Note: if the call is
348  *      just being used to determine the amount of space required, it is allowed
349  *      to pass NULL here and @p buf_space as 0.
350  *
351  * @param buf_space
352  *      The number of bytes in @p buf. If buf_space is too small, the call will
353  *      fail with @ref HS_INSUFFICIENT_SPACE.
354  *
355  * @param used_space
356  *      Pointer to where the amount of used space will be written to. The used
357  *      buffer space is always less than or equal to @p buf_space. If the call
358  *      fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to
359  *      write out the amount of buffer space required.
360  *
361  * @return
362  *      @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided
363  *      buffer is too small.
364  */
365 hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
366                                        size_t buf_space, size_t *used_space);
367 
368 /**
369  * Decompresses a compressed representation created by @ref hs_compress_stream()
370  * into a new stream.
371  *
372  * Note: @p buf must correspond to a complete compressed representation created
373  * by @ref hs_compress_stream() of a stream that was opened against @p db. It is
374  * not always possible to detect misuse of this API and behaviour is undefined
375  * if these properties are not satisfied.
376  *
377  * @param db
378  *      The compiled pattern database that the compressed stream was opened
379  *      against.
380  *
381  * @param stream
382  *      On success, a pointer to the expanded @ref hs_stream_t will be
383  *      returned; NULL on failure.
384  *
385  * @param buf
386  *      A compressed representation of a stream. These compressed forms are
387  *      created by @ref hs_compress_stream().
388  *
389  * @param buf_size
390  *      The size in bytes of the compressed representation.
391  *
392  * @return
393  *      @ref HS_SUCCESS on success, other values on failure.
394  */
395 hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db,
396                                      hs_stream_t **stream, const char *buf,
397                                      size_t buf_size);
398 
399 /**
400  * Decompresses a compressed representation created by @ref hs_compress_stream()
401  * on top of the 'to' stream. The 'to' stream will first be reset (reporting
402  * any EOD matches if a non-NULL @p onEvent callback handler is provided).
403  *
404  * Note: the 'to' stream must be opened against the same database as the
405  * compressed stream.
406  *
407  * Note: @p buf must correspond to a complete compressed representation created
408  * by @ref hs_compress_stream() of a stream that was opened against @p db. It is
409  * not always possible to detect misuse of this API and behaviour is undefined
410  * if these properties are not satisfied.
411  *
412  * @param to_stream
413  *      A pointer to a valid stream state. A pointer to the expanded @ref
414  *      hs_stream_t will be returned; NULL on failure.
415  *
416  * @param buf
417  *      A compressed representation of a stream. These compressed forms are
418  *      created by @ref hs_compress_stream().
419  *
420  * @param buf_size
421  *      The size in bytes of the compressed representation.
422  *
423  * @param scratch
424  *      A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
425  *      allowed to be NULL only if the @p onEvent callback is also NULL.
426  *
427  * @param onEvent
428  *      Pointer to a match event callback function. If a NULL pointer is given,
429  *      no matches will be returned.
430  *
431  * @param context
432  *      The user defined pointer which will be passed to the callback function
433  *      when a match occurs.
434  *
435  * @return
436  *      @ref HS_SUCCESS on success, other values on failure.
437  */
438 hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
439                                                const char *buf, size_t buf_size,
440                                                hs_scratch_t *scratch,
441                                                match_event_handler onEvent,
442                                                void *context);
443 
444 /**
445  * The block (non-streaming) regular expression scanner.
446  *
447  * This is the function call in which the actual pattern matching takes place
448  * for block-mode pattern databases.
449  *
450  * @param db
451  *      A compiled pattern database.
452  *
453  * @param data
454  *      Pointer to the data to be scanned.
455  *
456  * @param length
457  *      The number of bytes to scan.
458  *
459  * @param flags
460  *      Flags modifying the behaviour of this function. This parameter is
461  *      provided for future use and is unused at present.
462  *
463  * @param scratch
464  *      A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
465  *      database.
466  *
467  * @param onEvent
468  *      Pointer to a match event callback function. If a NULL pointer is given,
469  *      no matches will be returned.
470  *
471  * @param context
472  *      The user defined pointer which will be passed to the callback function.
473  *
474  * @return
475  *      Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
476  *      match callback indicated that scanning should stop; other values on
477  *      error.
478  */
479 hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
480                             unsigned int length, unsigned int flags,
481                             hs_scratch_t *scratch, match_event_handler onEvent,
482                             void *context);
483 
484 /**
485  * The vectored regular expression scanner.
486  *
487  * This is the function call in which the actual pattern matching takes place
488  * for vectoring-mode pattern databases.
489  *
490  * @param db
491  *      A compiled pattern database.
492  *
493  * @param data
494  *      An array of pointers to the data blocks to be scanned.
495  *
496  * @param length
497  *      An array of lengths (in bytes) of each data block to scan.
498  *
499  * @param count
500  *      Number of data blocks to scan. This should correspond to the size of
501  *      of the @p data and @p length arrays.
502  *
503  * @param flags
504  *      Flags modifying the behaviour of this function. This parameter is
505  *      provided for future use and is unused at present.
506  *
507  * @param scratch
508  *      A per-thread scratch space allocated by @ref hs_alloc_scratch() for
509  *      this database.
510  *
511  * @param onEvent
512  *      Pointer to a match event callback function. If a NULL pointer is given,
513  *      no matches will be returned.
514  *
515  * @param context
516  *      The user defined pointer which will be passed to the callback function.
517  *
518  * @return
519  *      Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
520  *      callback indicated that scanning should stop; other values on error.
521  */
522 hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
523                                    const char *const *data,
524                                    const unsigned int *length,
525                                    unsigned int count, unsigned int flags,
526                                    hs_scratch_t *scratch,
527                                    match_event_handler onEvent, void *context);
528 
529 /**
530  * Allocate a "scratch" space for use by Hyperscan.
531  *
532  * This is required for runtime use, and one scratch space per thread, or
533  * concurrent caller, is required. Any allocator callback set by @ref
534  * hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
535  * function.
536  *
537  * @param db
538  *      The database, as produced by @ref hs_compile().
539  *
540  * @param scratch
541  *      On first allocation, a pointer to NULL should be provided so a new
542  *      scratch can be allocated. If a scratch block has been previously
543  *      allocated, then a pointer to it should be passed back in to see if it
544  *      is valid for this database block. If a new scratch block is required,
545  *      the original will be freed and the new one returned, otherwise the
546  *      previous scratch block will be returned. On success, the scratch block
547  *      will be suitable for use with the provided database in addition to any
548  *      databases that original scratch space was suitable for.
549  *
550  * @return
551  *      @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
552  *      allocation fails.  Other errors may be returned if invalid parameters
553  *      are specified.
554  */
555 hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db,
556                                      hs_scratch_t **scratch);
557 
558 /**
559  * Allocate a scratch space that is a clone of an existing scratch space.
560  *
561  * This is useful when multiple concurrent threads will be using the same set
562  * of compiled databases, and another scratch space is required. Any allocator
563  * callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
564  * will be used by this function.
565  *
566  * @param src
567  *      The existing @ref hs_scratch_t to be cloned.
568  *
569  * @param dest
570  *      A pointer to the new scratch space will be returned here.
571  *
572  * @return
573  *      @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
574  *      Other errors may be returned if invalid parameters are specified.
575  */
576 hs_error_t HS_CDECL hs_clone_scratch(const hs_scratch_t *src,
577                                      hs_scratch_t **dest);
578 
579 /**
580  * Provides the size of the given scratch space.
581  *
582  * @param scratch
583  *      A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
584  *      hs_clone_scratch().
585  *
586  * @param scratch_size
587  *      On success, the size of the scratch space in bytes is placed in this
588  *      parameter.
589  *
590  * @return
591  *      @ref HS_SUCCESS on success, other values on failure.
592  */
593 hs_error_t HS_CDECL hs_scratch_size(const hs_scratch_t *scratch,
594                                     size_t *scratch_size);
595 
596 /**
597  * Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
598  * hs_clone_scratch().
599  *
600  * The free callback set by @ref hs_set_scratch_allocator() or @ref
601  * hs_set_allocator() will be used by this function.
602  *
603  * @param scratch
604  *      The scratch block to be freed. NULL may also be safely provided.
605  *
606  * @return
607  *      @ref HS_SUCCESS on success, other values on failure.
608  */
609 hs_error_t HS_CDECL hs_free_scratch(hs_scratch_t *scratch);
610 
611 /**
612  * Callback 'from' return value, indicating that the start of this match was
613  * too early to be tracked with the requested SOM_HORIZON precision.
614  */
615 #define HS_OFFSET_PAST_HORIZON    (~0ULL)
616 
617 #ifdef __cplusplus
618 } /* extern "C" */
619 #endif
620 
621 #endif /* HS_RUNTIME_H_ */
622