1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \file
20 *
21 * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22 *
23 * Boyer Moore simple pattern matcher implementation
24 *
25 * Boyer Moore algorithm has a really good performance. It need two arrays
26 * of context for each pattern that hold applicable shifts on the text
27 * to seach in, based on characters not available in the pattern
28 * and combinations of characters that start a sufix of the pattern.
29 * If possible, we should store the context of patterns that we are going
30 * to search for multiple times, so we don't spend time on rebuilding them.
31 */
32
33 #include "suricata-common.h"
34 #include "suricata.h"
35
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41
42 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
43 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
44 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
46 uint16_t *suff);
47 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
48
49 /**
50 * \brief Given a BmCtx structure, recreate the pre/suffixes for
51 * nocase
52 *
53 * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
54 * \param str pointer to the pattern string
55 * \param size length of the string
56 */
BoyerMooreCtxToNocase(BmCtx * bm_ctx,uint8_t * needle,uint16_t needle_len)57 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
58 {
59 /* Store the content as lower case to make searching faster */
60 memcpy_tolower(needle, needle, needle_len);
61
62 /* Prepare bad chars with nocase chars */
63 PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
64
65 /* Prepare good Suffixes with nocase chars */
66 PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
67 }
68
69 /**
70 * \brief Setup a Booyer Moore context.
71 *
72 * \param str pointer to the pattern string
73 * \param size length of the string
74 * \retval BmCtx pointer to the newly created Context for the pattern
75 * \initonly BoyerMoore contexts should be created at init
76 */
BoyerMooreCtxInit(const uint8_t * needle,uint16_t needle_len)77 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
78 {
79 BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
80 if (unlikely(new == NULL)) {
81 FatalError(SC_ERR_FATAL,
82 "Fatal error encountered in BoyerMooreCtxInit. Exiting...");
83 }
84
85 /* Prepare bad chars */
86 PreBmBc(needle, needle_len, new->bmBc);
87
88 /* Prepare good Suffixes */
89 if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90 FatalError(SC_ERR_FATAL,
91 "Fatal error encountered in BooyerMooreCtxInit. Exiting...");
92 }
93
94
95 return new;
96 }
97
98 /**
99 * \brief Setup a Booyer Moore context for nocase search
100 *
101 * \param str pointer to the pattern string
102 * \param size length of the string
103 * \retval BmCtx pointer to the newly created Context for the pattern
104 * \initonly BoyerMoore contexts should be created at init
105 */
BoyerMooreNocaseCtxInit(uint8_t * needle,uint16_t needle_len)106 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
107 {
108 BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
109
110 BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
111
112 return bm_ctx;
113 }
114
115 /**
116 * \brief Free the memory allocated to Booyer Moore context.
117 *
118 * \param bmCtx pointer to the Context for the pattern
119 */
BoyerMooreCtxDeInit(BmCtx * bmctx)120 void BoyerMooreCtxDeInit(BmCtx *bmctx)
121 {
122 SCEnter();
123 if (bmctx == NULL)
124 SCReturn;
125
126 SCFree(bmctx);
127
128 SCReturn;
129 }
130 /**
131 * \brief Array setup function for bad characters that split the pattern
132 * Remember that the result array should be the length of ALPHABET_SIZE
133 *
134 * \param str pointer to the pattern string
135 * \param size length of the string
136 * \param result pointer to an empty array that will hold the badchars
137 */
PreBmBc(const uint8_t * x,uint16_t m,uint16_t * bmBc)138 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
139 {
140 int32_t i;
141
142 for (i = 0; i < 256; ++i) {
143 bmBc[i] = m;
144 }
145 for (i = 0; i < m - 1; ++i) {
146 bmBc[(unsigned char)x[i]] = m - i - 1;
147 }
148 }
149
150 /**
151 * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
152 *
153 * \param x pointer to the pattern string
154 * \param m length of the string
155 * \param suff pointer to an empty array that will hold the prefixes (shifts)
156 */
BoyerMooreSuffixes(const uint8_t * x,uint16_t m,uint16_t * suff)157 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
158 {
159 int32_t f = 0, g, i;
160 suff[m - 1] = m;
161 g = m - 1;
162 for (i = m - 2; i >= 0; --i) {
163 if (i > g && suff[i + m - 1 - f] < i - g)
164 suff[i] = suff[i + m - 1 - f];
165 else {
166 if (i < g)
167 g = i;
168 f = i;
169 while (g >= 0 && x[g] == x[g + m - 1 - f])
170 --g;
171 suff[i] = f - g;
172 }
173 }
174 }
175
176 /**
177 * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178 *
179 * \param x pointer to the pattern string
180 * \param m length of the string
181 * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182 * \retval 0 ok, -1 failed
183 */
PreBmGs(const uint8_t * x,uint16_t m,uint16_t * bmGs)184 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185 {
186 int32_t i, j;
187 uint16_t suff[m + 1];
188
189 BoyerMooreSuffixes(x, m, suff);
190
191 for (i = 0; i < m; ++i)
192 bmGs[i] = m;
193
194 j = 0;
195
196 for (i = m - 1; i >= -1; --i)
197 if (i == -1 || suff[i] == i + 1)
198 for (; j < m - 1 - i; ++j)
199 if (bmGs[j] == m)
200 bmGs[j] = m - 1 - i;
201
202 for (i = 0; i <= m - 2; ++i)
203 bmGs[m - 1 - suff[i]] = m - 1 - i;
204 return 0;
205 }
206
207 /**
208 * \brief Array setup function for bad characters that split the pattern
209 * Remember that the result array should be the length of ALPHABET_SIZE
210 *
211 * \param str pointer to the pattern string
212 * \param size length of the string
213 * \param result pointer to an empty array that will hold the badchars
214 */
PreBmBcNocase(const uint8_t * x,uint16_t m,uint16_t * bmBc)215 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216 {
217 int32_t i;
218
219 for (i = 0; i < 256; ++i) {
220 bmBc[i] = m;
221 }
222 for (i = 0; i < m - 1; ++i) {
223 bmBc[u8_tolower((unsigned char)x[i])] = m - 1 - i;
224 bmBc[u8_toupper((unsigned char)x[i])] = m - 1 - i;
225 }
226 }
227
BoyerMooreSuffixesNocase(const uint8_t * x,uint16_t m,uint16_t * suff)228 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
229 uint16_t *suff)
230 {
231 int32_t f = 0, g, i;
232
233 suff[m - 1] = m;
234 g = m - 1;
235 for (i = m - 2; i >= 0; --i) {
236 if (i > g && suff[i + m - 1 - f] < i - g) {
237 suff[i] = suff[i + m - 1 - f];
238 } else {
239 if (i < g) {
240 g = i;
241 }
242 f = i;
243 while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
244 --g;
245 }
246 suff[i] = f - g;
247 }
248 }
249 }
250
251 /**
252 * \brief Array setup function for building prefixes (shift for valid prefixes)
253 * for boyermoore context case less
254 *
255 * \param x pointer to the pattern string
256 * \param m length of the string
257 * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
258 */
PreBmGsNocase(const uint8_t * x,uint16_t m,uint16_t * bmGs)259 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
260 {
261 int32_t i, j;
262 uint16_t suff[m + 1];
263
264 BoyerMooreSuffixesNocase(x, m, suff);
265
266 for (i = 0; i < m; ++i) {
267 bmGs[i] = m;
268 }
269 j = 0;
270 for (i = m - 1; i >= 0; --i) {
271 if (i == -1 || suff[i] == i + 1) {
272 for (; j < m - 1 - i; ++j) {
273 if (bmGs[j] == m) {
274 bmGs[j] = m - 1 - i;
275 }
276 }
277 }
278 }
279 for (i = 0; i <= m - 2; ++i) {
280 bmGs[m - 1 - suff[i]] = m - 1 - i;
281 }
282 }
283
284 /**
285 * \brief Boyer Moore search algorithm
286 * Is better as the pattern length increases and for big buffers to search in.
287 * The algorithm needs a context of two arrays already prepared
288 * by prep_bad_chars() and prep_good_suffix()
289 *
290 * \param y pointer to the buffer to search in
291 * \param n length limit of the buffer
292 * \param x pointer to the pattern we ar searching for
293 * \param m length limit of the needle
294 * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
295 * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
296 *
297 * \retval ptr to start of the match; NULL if no match
298 */
BoyerMoore(const uint8_t * x,uint16_t m,const uint8_t * y,uint32_t n,BmCtx * bm_ctx)299 uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
300 {
301 uint16_t *bmGs = bm_ctx->bmGs;
302 uint16_t *bmBc = bm_ctx->bmBc;
303
304 int i, j, m1, m2;
305 int32_t int_n;
306 #if 0
307 printf("\nBad:\n");
308 for (i=0;i<ALPHABET_SIZE;i++)
309 printf("%c,%d ", i, bmBc[i]);
310
311 printf("\ngood:\n");
312 for (i=0;i<m;i++)
313 printf("%c, %d ", x[i],bmBc[i]);
314 printf("\n");
315 #endif
316 // force casting to int32_t (if possible)
317 int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
318 j = 0;
319 while (j <= int_n - m ) {
320 for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
321
322 if (i < 0) {
323 return (uint8_t *)(y + j);
324 //j += bmGs[0];
325 } else {
326 // printf("%c", y[i+j]);
327 j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
328 // printf("%d, %d\n", m1, m2);
329 }
330 }
331 return NULL;
332 }
333
334
335 /**
336 * \brief Boyer Moore search algorithm
337 * Is better as the pattern length increases and for big buffers to search in.
338 * The algorithm needs a context of two arrays already prepared
339 * by prep_bad_chars() and prep_good_suffix()
340 *
341 * \param y pointer to the buffer to search in
342 * \param n length limit of the buffer
343 * \param x pointer to the pattern we ar searching for
344 * \param m length limit of the needle
345 * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
346 * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
347 *
348 * \retval ptr to start of the match; NULL if no match
349 */
BoyerMooreNocase(const uint8_t * x,uint16_t m,const uint8_t * y,uint32_t n,BmCtx * bm_ctx)350 uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
351 {
352 uint16_t *bmGs = bm_ctx->bmGs;
353 uint16_t *bmBc = bm_ctx->bmBc;
354 int i, j, m1, m2;
355 int32_t int_n;
356 #if 0
357 printf("\nBad:\n");
358 for (i=0;i<ALPHABET_SIZE;i++)
359 printf("%c,%d ", i, bmBc[i]);
360
361 printf("\ngood:\n");
362 for (i=0;i<m;i++)
363 printf("%c, %d ", x[i],bmBc[i]);
364 printf("\n");
365 #endif
366 // force casting to int32_t (if possible)
367 int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
368 j = 0;
369 while (j <= int_n - m ) {
370 /* x is stored in lowercase. */
371 for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
372
373 if (i < 0) {
374 return (uint8_t *)(y + j);
375 } else {
376 j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)?
377 m1: m2;
378 }
379 }
380 return NULL;
381 }
382
383 typedef struct SpmBmCtx_ {
384 BmCtx *bm_ctx;
385 uint8_t *needle;
386 uint16_t needle_len;
387 int nocase;
388 } SpmBmCtx;
389
BMInitCtx(const uint8_t * needle,uint16_t needle_len,int nocase,SpmGlobalThreadCtx * global_thread_ctx)390 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
391 SpmGlobalThreadCtx *global_thread_ctx)
392 {
393 SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
394 if (ctx == NULL) {
395 SCLogDebug("Unable to alloc SpmCtx.");
396 return NULL;
397 }
398 memset(ctx, 0, sizeof(*ctx));
399 ctx->matcher = SPM_BM;
400
401 SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
402 if (sctx == NULL) {
403 SCLogDebug("Unable to alloc SpmBmCtx.");
404 SCFree(ctx);
405 return NULL;
406 }
407 memset(sctx, 0, sizeof(*sctx));
408
409 sctx->needle = SCMalloc(needle_len);
410 if (sctx->needle == NULL) {
411 SCLogDebug("Unable to alloc string.");
412 SCFree(sctx);
413 SCFree(ctx);
414 return NULL;
415 }
416 memcpy(sctx->needle, needle, needle_len);
417 sctx->needle_len = needle_len;
418
419 if (nocase) {
420 sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
421 sctx->nocase = 1;
422 } else {
423 sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
424 sctx->nocase = 0;
425 }
426
427 ctx->ctx = sctx;
428 return ctx;
429 }
430
BMDestroyCtx(SpmCtx * ctx)431 static void BMDestroyCtx(SpmCtx *ctx)
432 {
433 if (ctx == NULL) {
434 return;
435 }
436
437 SpmBmCtx *sctx = ctx->ctx;
438 if (sctx != NULL) {
439 BoyerMooreCtxDeInit(sctx->bm_ctx);
440 if (sctx->needle != NULL) {
441 SCFree(sctx->needle);
442 }
443 SCFree(sctx);
444 }
445
446 SCFree(ctx);
447 }
448
BMScan(const SpmCtx * ctx,SpmThreadCtx * thread_ctx,const uint8_t * haystack,uint32_t haystack_len)449 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
450 const uint8_t *haystack, uint32_t haystack_len)
451 {
452 const SpmBmCtx *sctx = ctx->ctx;
453
454 if (sctx->nocase) {
455 return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
456 haystack_len, sctx->bm_ctx);
457 } else {
458 return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
459 haystack_len, sctx->bm_ctx);
460 }
461 }
462
BMInitGlobalThreadCtx(void)463 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
464 {
465 SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
466 if (global_thread_ctx == NULL) {
467 SCLogDebug("Unable to alloc SpmThreadCtx.");
468 return NULL;
469 }
470 memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
471 global_thread_ctx->matcher = SPM_BM;
472 return global_thread_ctx;
473 }
474
BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx * global_thread_ctx)475 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
476 {
477 if (global_thread_ctx == NULL) {
478 return;
479 }
480 SCFree(global_thread_ctx);
481 }
482
BMDestroyThreadCtx(SpmThreadCtx * thread_ctx)483 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
484 {
485 if (thread_ctx == NULL) {
486 return;
487 }
488 SCFree(thread_ctx);
489 }
490
BMMakeThreadCtx(const SpmGlobalThreadCtx * global_thread_ctx)491 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
492 SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
493 if (thread_ctx == NULL) {
494 SCLogDebug("Unable to alloc SpmThreadCtx.");
495 return NULL;
496 }
497 memset(thread_ctx, 0, sizeof(*thread_ctx));
498 thread_ctx->matcher = SPM_BM;
499 return thread_ctx;
500 }
501
SpmBMRegister(void)502 void SpmBMRegister(void)
503 {
504 spm_table[SPM_BM].name = "bm";
505 spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
506 spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
507 spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
508 spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
509 spm_table[SPM_BM].InitCtx = BMInitCtx;
510 spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
511 spm_table[SPM_BM].Scan = BMScan;
512 }
513