1 /*
2  * Copyright (c) 2015-2019, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #ifndef HS_COMMON_H_
30 #define HS_COMMON_H_
31 
32 #if defined(_WIN32)
33 #define HS_CDECL    __cdecl
34 #else
35 #define HS_CDECL
36 #endif
37 #include <stdlib.h>
38 
39 /**
40  * @file
41  * @brief The Hyperscan common API definition.
42  *
43  * Hyperscan is a high speed regular expression engine.
44  *
45  * This header contains functions available to both the Hyperscan compiler and
46  * runtime.
47  */
48 
49 #ifdef __cplusplus
50 extern "C"
51 {
52 #endif
53 
54 struct hs_database;
55 
56 /**
57  * A Hyperscan pattern database.
58  *
59  * Generated by one of the Hyperscan compiler functions:
60  *  - @ref hs_compile()
61  *  - @ref hs_compile_multi()
62  *  - @ref hs_compile_ext_multi()
63  */
64 typedef struct hs_database hs_database_t;
65 
66 /**
67  * A type for errors returned by Hyperscan functions.
68  */
69 typedef int hs_error_t;
70 
71 /**
72  * Free a compiled pattern database.
73  *
74  * The free callback set by @ref hs_set_database_allocator() (or @ref
75  * hs_set_allocator()) will be used by this function.
76  *
77  * @param db
78  *      A compiled pattern database. NULL may also be safely provided, in which
79  *      case the function does nothing.
80  *
81  * @return
82  *      @ref HS_SUCCESS on success, other values on failure.
83  */
84 hs_error_t HS_CDECL hs_free_database(hs_database_t *db);
85 
86 /**
87  * Serialize a pattern database to a stream of bytes.
88  *
89  * The allocator callback set by @ref hs_set_misc_allocator() (or @ref
90  * hs_set_allocator()) will be used by this function.
91  *
92  * @param db
93  *      A compiled pattern database.
94  *
95  * @param bytes
96  *      On success, a pointer to an array of bytes will be returned here.
97  *      These bytes can be subsequently relocated or written to disk. The
98  *      caller is responsible for freeing this block.
99  *
100  * @param length
101  *      On success, the number of bytes in the generated byte array will be
102  *      returned here.
103  *
104  * @return
105  *      @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
106  *      allocated, other values may be returned if errors are detected.
107  */
108 hs_error_t HS_CDECL hs_serialize_database(const hs_database_t *db, char **bytes,
109                                           size_t *length);
110 
111 /**
112  * Reconstruct a pattern database from a stream of bytes previously generated
113  * by @ref hs_serialize_database().
114  *
115  * This function will allocate sufficient space for the database using the
116  * allocator set with @ref hs_set_database_allocator() (or @ref
117  * hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
118  * hs_deserialize_database_at() function.
119  *
120  * @param bytes
121  *      A byte array generated by @ref hs_serialize_database() representing a
122  *      compiled pattern database.
123  *
124  * @param length
125  *      The length of the byte array generated by @ref hs_serialize_database().
126  *      This should be the same value as that returned by @ref
127  *      hs_serialize_database().
128  *
129  * @param db
130  *      On success, a pointer to a newly allocated @ref hs_database_t will be
131  *      returned here. This database can then be used for scanning, and
132  *      eventually freed by the caller using @ref hs_free_database().
133  *
134  * @return
135  *      @ref HS_SUCCESS on success, other values on failure.
136  */
137 hs_error_t HS_CDECL hs_deserialize_database(const char *bytes,
138                                             const size_t length,
139                                             hs_database_t **db);
140 
141 /**
142  * Reconstruct a pattern database from a stream of bytes previously generated
143  * by @ref hs_serialize_database() at a given memory location.
144  *
145  * This function (unlike @ref hs_deserialize_database()) will write the
146  * reconstructed database to the memory location given in the @p db parameter.
147  * The amount of space required at this location can be determined with the
148  * @ref hs_serialized_database_size() function.
149  *
150  * @param bytes
151  *      A byte array generated by @ref hs_serialize_database() representing a
152  *      compiled pattern database.
153  *
154  * @param length
155  *      The length of the byte array generated by @ref hs_serialize_database().
156  *      This should be the same value as that returned by @ref
157  *      hs_serialize_database().
158  *
159  * @param db
160  *      Pointer to an 8-byte aligned block of memory of sufficient size to hold
161  *      the deserialized database. On success, the reconstructed database will
162  *      be written to this location. This database can then be used for pattern
163  *      matching. The user is responsible for freeing this memory; the @ref
164  *      hs_free_database() call should not be used.
165  *
166  * @return
167  *      @ref HS_SUCCESS on success, other values on failure.
168  */
169 hs_error_t HS_CDECL hs_deserialize_database_at(const char *bytes,
170                                                const size_t length,
171                                                hs_database_t *db);
172 
173 /**
174  * Provides the size of the stream state allocated by a single stream opened
175  * against the given database.
176  *
177  * @param database
178  *      Pointer to a compiled (streaming mode) pattern database.
179  *
180  * @param stream_size
181  *      On success, the size in bytes of an individual stream opened against the
182  *      given database is placed in this parameter.
183  *
184  * @return
185  *      @ref HS_SUCCESS on success, other values on failure.
186  */
187 hs_error_t HS_CDECL hs_stream_size(const hs_database_t *database,
188                                    size_t *stream_size);
189 
190 /**
191  * Provides the size of the given database in bytes.
192  *
193  * @param database
194  *      Pointer to compiled pattern database.
195  *
196  * @param database_size
197  *      On success, the size of the compiled database in bytes is placed in this
198  *      parameter.
199  *
200  * @return
201  *      @ref HS_SUCCESS on success, other values on failure.
202  */
203 hs_error_t HS_CDECL hs_database_size(const hs_database_t *database,
204                                      size_t *database_size);
205 
206 /**
207  * Utility function for reporting the size that would be required by a
208  * database if it were deserialized.
209  *
210  * This can be used to allocate a shared memory region or other "special"
211  * allocation prior to deserializing with the @ref hs_deserialize_database_at()
212  * function.
213  *
214  * @param bytes
215  *      Pointer to a byte array generated by @ref hs_serialize_database()
216  *      representing a compiled pattern database.
217  *
218  * @param length
219  *      The length of the byte array generated by @ref hs_serialize_database().
220  *      This should be the same value as that returned by @ref
221  *      hs_serialize_database().
222  *
223  * @param deserialized_size
224  *      On success, the size of the compiled database that would be generated
225  *      by @ref hs_deserialize_database_at() is returned here.
226  *
227  * @return
228  *      @ref HS_SUCCESS on success, other values on failure.
229  */
230 hs_error_t HS_CDECL hs_serialized_database_size(const char *bytes,
231                                                 const size_t length,
232                                                 size_t *deserialized_size);
233 
234 /**
235  * Utility function providing information about a database.
236  *
237  * @param database
238  *      Pointer to a compiled database.
239  *
240  * @param info
241  *      On success, a string containing the version and platform information for
242  *      the supplied database is placed in the parameter. The string is
243  *      allocated using the allocator supplied in @ref hs_set_misc_allocator()
244  *      (or malloc() if no allocator was set) and should be freed by the caller.
245  *
246  * @return
247  *      @ref HS_SUCCESS on success, other values on failure.
248  */
249 hs_error_t HS_CDECL hs_database_info(const hs_database_t *database,
250                                      char **info);
251 
252 /**
253  * Utility function providing information about a serialized database.
254  *
255  * @param bytes
256  *      Pointer to a serialized database.
257  *
258  * @param length
259  *      Length in bytes of the serialized database.
260  *
261  * @param info
262  *      On success, a string containing the version and platform information
263  *      for the supplied serialized database is placed in the parameter. The
264  *      string is allocated using the allocator supplied in @ref
265  *      hs_set_misc_allocator() (or malloc() if no allocator was set) and
266  *      should be freed by the caller.
267  *
268  * @return
269  *      @ref HS_SUCCESS on success, other values on failure.
270  */
271 hs_error_t HS_CDECL hs_serialized_database_info(const char *bytes,
272                                                 size_t length, char **info);
273 
274 /**
275  * The type of the callback function that will be used by Hyperscan to allocate
276  * more memory at runtime as required, for example in @ref hs_open_stream() to
277  * allocate stream state.
278  *
279  * If Hyperscan is to be used in a multi-threaded, or similarly concurrent
280  * environment, the allocation function will need to be re-entrant, or
281  * similarly safe for concurrent use.
282  *
283  * @param size
284  *      The number of bytes to allocate.
285  * @return
286  *      A pointer to the region of memory allocated, or NULL on error.
287  */
288 typedef void *(HS_CDECL *hs_alloc_t)(size_t size);
289 
290 /**
291  * The type of the callback function that will be used by Hyperscan to free
292  * memory regions previously allocated using the @ref hs_alloc_t function.
293  *
294  * @param ptr
295  *      The region of memory to be freed.
296  */
297 typedef void (HS_CDECL *hs_free_t)(void *ptr);
298 
299 /**
300  * Set the allocate and free functions used by Hyperscan for allocating
301  * memory at runtime for stream state, scratch space, database bytecode,
302  * and various other data structure returned by the Hyperscan API.
303  *
304  * The function is equivalent to calling @ref hs_set_stream_allocator(),
305  * @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
306  * @ref hs_set_misc_allocator() with the provided parameters.
307  *
308  * This call will override any previous allocators that have been set.
309  *
310  * Note: there is no way to change the allocator used for temporary objects
311  * created during the various compile calls (@ref hs_compile(), @ref
312  * hs_compile_multi(), @ref hs_compile_ext_multi()).
313  *
314  * @param alloc_func
315  *      A callback function pointer that allocates memory. This function must
316  *      return memory suitably aligned for the largest representable data type
317  *      on this platform.
318  *
319  * @param free_func
320  *      A callback function pointer that frees allocated memory.
321  *
322  * @return
323  *      @ref HS_SUCCESS on success, other values on failure.
324  */
325 hs_error_t HS_CDECL hs_set_allocator(hs_alloc_t alloc_func,
326                                      hs_free_t free_func);
327 
328 /**
329  * Set the allocate and free functions used by Hyperscan for allocating memory
330  * for database bytecode produced by the compile calls (@ref hs_compile(), @ref
331  * hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
332  * deserialization (@ref hs_deserialize_database()).
333  *
334  * If no database allocation functions are set, or if NULL is used in place of
335  * both parameters, then memory allocation will default to standard methods
336  * (such as the system malloc() and free() calls).
337  *
338  * This call will override any previous database allocators that have been set.
339  *
340  * Note: the database allocator may also be set by calling @ref
341  * hs_set_allocator().
342  *
343  * Note: there is no way to change how temporary objects created during the
344  * various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
345  * hs_compile_ext_multi()) are allocated.
346  *
347  * @param alloc_func
348  *      A callback function pointer that allocates memory. This function must
349  *      return memory suitably aligned for the largest representable data type
350  *      on this platform.
351  *
352  * @param free_func
353  *      A callback function pointer that frees allocated memory.
354  *
355  * @return
356  *      @ref HS_SUCCESS on success, other values on failure.
357  */
358 hs_error_t HS_CDECL hs_set_database_allocator(hs_alloc_t alloc_func,
359                                               hs_free_t free_func);
360 
361 /**
362  * Set the allocate and free functions used by Hyperscan for allocating memory
363  * for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
364  * hs_expr_info_t and serialized databases.
365  *
366  * If no misc allocation functions are set, or if NULL is used in place of both
367  * parameters, then memory allocation will default to standard methods (such as
368  * the system malloc() and free() calls).
369  *
370  * This call will override any previous misc allocators that have been set.
371  *
372  * Note: the misc allocator may also be set by calling @ref hs_set_allocator().
373  *
374  * @param alloc_func
375  *      A callback function pointer that allocates memory. This function must
376  *      return memory suitably aligned for the largest representable data type
377  *      on this platform.
378  *
379  * @param free_func
380  *      A callback function pointer that frees allocated memory.
381  *
382  * @return
383  *      @ref HS_SUCCESS on success, other values on failure.
384  */
385 hs_error_t HS_CDECL hs_set_misc_allocator(hs_alloc_t alloc_func,
386                                           hs_free_t free_func);
387 
388 /**
389  * Set the allocate and free functions used by Hyperscan for allocating memory
390  * for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
391  *
392  * If no scratch allocation functions are set, or if NULL is used in place of
393  * both parameters, then memory allocation will default to standard methods
394  * (such as the system malloc() and free() calls).
395  *
396  * This call will override any previous scratch allocators that have been set.
397  *
398  * Note: the scratch allocator may also be set by calling @ref
399  * hs_set_allocator().
400  *
401  * @param alloc_func
402  *      A callback function pointer that allocates memory. This function must
403  *      return memory suitably aligned for the largest representable data type
404  *      on this platform.
405  *
406  * @param free_func
407  *      A callback function pointer that frees allocated memory.
408  *
409  * @return
410  *      @ref HS_SUCCESS on success, other values on failure.
411  */
412 hs_error_t HS_CDECL hs_set_scratch_allocator(hs_alloc_t alloc_func,
413                                              hs_free_t free_func);
414 
415 /**
416  * Set the allocate and free functions used by Hyperscan for allocating memory
417  * for stream state by @ref hs_open_stream().
418  *
419  * If no stream allocation functions are set, or if NULL is used in place of
420  * both parameters, then memory allocation will default to standard methods
421  * (such as the system malloc() and free() calls).
422  *
423  * This call will override any previous stream allocators that have been set.
424  *
425  * Note: the stream allocator may also be set by calling @ref
426  * hs_set_allocator().
427  *
428  * @param alloc_func
429  *      A callback function pointer that allocates memory. This function must
430  *      return memory suitably aligned for the largest representable data type
431  *      on this platform.
432  *
433  * @param free_func
434  *      A callback function pointer that frees allocated memory.
435  *
436  * @return
437  *      @ref HS_SUCCESS on success, other values on failure.
438  */
439 hs_error_t HS_CDECL hs_set_stream_allocator(hs_alloc_t alloc_func,
440                                             hs_free_t free_func);
441 
442 /**
443  * Utility function for identifying this release version.
444  *
445  * @return
446  *      A string containing the version number of this release build and the
447  *      date of the build. It is allocated statically, so it does not need to
448  *      be freed by the caller.
449  */
450 const char * HS_CDECL hs_version(void);
451 
452 /**
453  * Utility function to test the current system architecture.
454  *
455  * Hyperscan requires the Supplemental Streaming SIMD Extensions 3 instruction
456  * set. This function can be called on any x86 platform to determine if the
457  * system provides the required instruction set.
458  *
459  * This function does not test for more advanced features if Hyperscan has
460  * been built for a more specific architecture, for example the AVX2
461  * instruction set.
462  *
463  * @return
464  *      @ref HS_SUCCESS on success, @ref HS_ARCH_ERROR if system does not
465  *      support Hyperscan.
466  */
467 hs_error_t HS_CDECL hs_valid_platform(void);
468 
469 /**
470  * @defgroup HS_ERROR hs_error_t values
471  *
472  * @{
473  */
474 
475 /**
476  * The engine completed normally.
477  */
478 #define HS_SUCCESS              0
479 
480 /**
481  * A parameter passed to this function was invalid.
482  *
483  * This error is only returned in cases where the function can detect an
484  * invalid parameter -- it cannot be relied upon to detect (for example)
485  * pointers to freed memory or other invalid data.
486  */
487 #define HS_INVALID              (-1)
488 
489 /**
490  * A memory allocation failed.
491  */
492 #define HS_NOMEM                (-2)
493 
494 /**
495  * The engine was terminated by callback.
496  *
497  * This return value indicates that the target buffer was partially scanned,
498  * but that the callback function requested that scanning cease after a match
499  * was located.
500  */
501 #define HS_SCAN_TERMINATED      (-3)
502 
503 /**
504  * The pattern compiler failed, and the @ref hs_compile_error_t should be
505  * inspected for more detail.
506  */
507 #define HS_COMPILER_ERROR       (-4)
508 
509 /**
510  * The given database was built for a different version of Hyperscan.
511  */
512 #define HS_DB_VERSION_ERROR     (-5)
513 
514 /**
515  * The given database was built for a different platform (i.e., CPU type).
516  */
517 #define HS_DB_PLATFORM_ERROR    (-6)
518 
519 /**
520  * The given database was built for a different mode of operation. This error
521  * is returned when streaming calls are used with a block or vectored database
522  * and vice versa.
523  */
524 #define HS_DB_MODE_ERROR        (-7)
525 
526 /**
527  * A parameter passed to this function was not correctly aligned.
528  */
529 #define HS_BAD_ALIGN            (-8)
530 
531 /**
532  * The memory allocator (either malloc() or the allocator set with @ref
533  * hs_set_allocator()) did not correctly return memory suitably aligned for the
534  * largest representable data type on this platform.
535  */
536 #define HS_BAD_ALLOC            (-9)
537 
538 /**
539  * The scratch region was already in use.
540  *
541  * This error is returned when Hyperscan is able to detect that the scratch
542  * region given is already in use by another Hyperscan API call.
543  *
544  * A separate scratch region, allocated with @ref hs_alloc_scratch() or @ref
545  * hs_clone_scratch(), is required for every concurrent caller of the Hyperscan
546  * API.
547  *
548  * For example, this error might be returned when @ref hs_scan() has been
549  * called inside a callback delivered by a currently-executing @ref hs_scan()
550  * call using the same scratch region.
551  *
552  * Note: Not all concurrent uses of scratch regions may be detected. This error
553  * is intended as a best-effort debugging tool, not a guarantee.
554  */
555 #define HS_SCRATCH_IN_USE       (-10)
556 
557 /**
558  * Unsupported CPU architecture.
559  *
560  * This error is returned when Hyperscan is able to detect that the current
561  * system does not support the required instruction set.
562  *
563  * At a minimum, Hyperscan requires Supplemental Streaming SIMD Extensions 3
564  * (SSSE3).
565  */
566 #define HS_ARCH_ERROR           (-11)
567 
568 /**
569  * Provided buffer was too small.
570  *
571  * This error indicates that there was insufficient space in the buffer. The
572  * call should be repeated with a larger provided buffer.
573  *
574  * Note: in this situation, it is normal for the amount of space required to be
575  * returned in the same manner as the used space would have been returned if the
576  * call was successful.
577  */
578 #define HS_INSUFFICIENT_SPACE   (-12)
579 
580 /**
581  * Unexpected internal error.
582  *
583  * This error indicates that there was unexpected matching behaviors. This
584  * could be related to invalid usage of stream and scratch space or invalid memory
585  * operations by users.
586  *
587  */
588 #define HS_UNKNOWN_ERROR   (-13)
589 
590 /** @} */
591 
592 #ifdef __cplusplus
593 } /* extern "C" */
594 #endif
595 
596 #endif /* HS_COMMON_H_ */
597