1 /* Copyright (C) 2007-2014 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Boyer Moore simple pattern matcher implementation
24  *
25  * Boyer Moore algorithm has a really good performance. It need two arrays
26  * of context for each pattern that hold applicable shifts on the text
27  * to seach in, based on characters not available in the pattern
28  * and combinations of characters that start a sufix of the pattern.
29  * If possible, we should store the context of patterns that we are going
30  * to search for multiple times, so we don't spend time on rebuilding them.
31  */
32 
33 #include "suricata-common.h"
34 #include "suricata.h"
35 
36 #include "util-spm-bm.h"
37 #include "util-spm.h"
38 #include "util-debug.h"
39 #include "util-error.h"
40 #include "util-memcpy.h"
41 
42 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
43 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
44 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
45 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
46                                      uint16_t *suff);
47 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
48 
49 /**
50  * \brief Given a BmCtx structure, recreate the pre/suffixes for
51  *        nocase
52  *
53  * \retval BmCtx pointer to the already created BmCtx (with BoyerMooreCtxInit())
54  * \param str pointer to the pattern string
55  * \param size length of the string
56  */
BoyerMooreCtxToNocase(BmCtx * bm_ctx,uint8_t * needle,uint16_t needle_len)57 void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len)
58 {
59     /* Store the content as lower case to make searching faster */
60     memcpy_tolower(needle, needle, needle_len);
61 
62     /* Prepare bad chars with nocase chars */
63     PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
64 
65     /* Prepare good Suffixes with nocase chars */
66     PreBmGsNocase(needle, needle_len, bm_ctx->bmGs);
67 }
68 
69 /**
70  * \brief Setup a Booyer Moore context.
71  *
72  * \param str pointer to the pattern string
73  * \param size length of the string
74  * \retval BmCtx pointer to the newly created Context for the pattern
75  * \initonly BoyerMoore contexts should be created at init
76  */
BoyerMooreCtxInit(const uint8_t * needle,uint16_t needle_len)77 BmCtx *BoyerMooreCtxInit(const uint8_t *needle, uint16_t needle_len)
78 {
79     BmCtx *new = SCMalloc(sizeof(BmCtx) + sizeof(uint16_t) * (needle_len + 1));
80     if (unlikely(new == NULL)) {
81         FatalError(SC_ERR_FATAL,
82                    "Fatal error encountered in BoyerMooreCtxInit. Exiting...");
83     }
84 
85     /* Prepare bad chars */
86     PreBmBc(needle, needle_len, new->bmBc);
87 
88     /* Prepare good Suffixes */
89     if (PreBmGs(needle, needle_len, new->bmGs) == -1) {
90         FatalError(SC_ERR_FATAL,
91                    "Fatal error encountered in BooyerMooreCtxInit. Exiting...");
92     }
93 
94 
95     return new;
96 }
97 
98 /**
99  * \brief Setup a Booyer Moore context for nocase search
100  *
101  * \param str pointer to the pattern string
102  * \param size length of the string
103  * \retval BmCtx pointer to the newly created Context for the pattern
104  * \initonly BoyerMoore contexts should be created at init
105  */
BoyerMooreNocaseCtxInit(uint8_t * needle,uint16_t needle_len)106 BmCtx *BoyerMooreNocaseCtxInit(uint8_t *needle, uint16_t needle_len)
107 {
108     BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needle_len);
109 
110     BoyerMooreCtxToNocase(bm_ctx, needle, needle_len);
111 
112     return bm_ctx;
113 }
114 
115 /**
116  * \brief Free the memory allocated to Booyer Moore context.
117  *
118  * \param bmCtx pointer to the Context for the pattern
119  */
BoyerMooreCtxDeInit(BmCtx * bmctx)120 void BoyerMooreCtxDeInit(BmCtx *bmctx)
121 {
122     SCEnter();
123     if (bmctx == NULL)
124         SCReturn;
125 
126     SCFree(bmctx);
127 
128     SCReturn;
129 }
130 /**
131  * \brief Array setup function for bad characters that split the pattern
132  *        Remember that the result array should be the length of ALPHABET_SIZE
133  *
134  * \param str pointer to the pattern string
135  * \param size length of the string
136  * \param result pointer to an empty array that will hold the badchars
137  */
PreBmBc(const uint8_t * x,uint16_t m,uint16_t * bmBc)138 static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
139 {
140     int32_t i;
141 
142     for (i = 0; i < 256; ++i) {
143         bmBc[i] = m;
144     }
145     for (i = 0; i < m - 1; ++i) {
146         bmBc[(unsigned char)x[i]] = m - i - 1;
147     }
148 }
149 
150 /**
151  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
152  *
153  * \param x pointer to the pattern string
154  * \param m length of the string
155  * \param suff pointer to an empty array that will hold the prefixes (shifts)
156  */
BoyerMooreSuffixes(const uint8_t * x,uint16_t m,uint16_t * suff)157 static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff)
158 {
159     int32_t f = 0, g, i;
160     suff[m - 1] = m;
161     g = m - 1;
162     for (i = m - 2; i >= 0; --i) {
163         if (i > g && suff[i + m - 1 - f] < i - g)
164             suff[i] = suff[i + m - 1 - f];
165         else {
166             if (i < g)
167                 g = i;
168             f = i;
169             while (g >= 0 && x[g] == x[g + m - 1 - f])
170                 --g;
171             suff[i] = f - g;
172         }
173     }
174 }
175 
176 /**
177  * \brief Array setup function for building prefixes (shift for valid prefixes) for boyermoore context
178  *
179  * \param x pointer to the pattern string
180  * \param m length of the string
181  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
182  * \retval 0 ok, -1 failed
183  */
PreBmGs(const uint8_t * x,uint16_t m,uint16_t * bmGs)184 static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
185 {
186     int32_t i, j;
187     uint16_t suff[m + 1];
188 
189     BoyerMooreSuffixes(x, m, suff);
190 
191     for (i = 0; i < m; ++i)
192         bmGs[i] = m;
193 
194     j = 0;
195 
196     for (i = m - 1; i >= -1; --i)
197         if (i == -1 || suff[i] == i + 1)
198             for (; j < m - 1 - i; ++j)
199                 if (bmGs[j] == m)
200                     bmGs[j] = m - 1 - i;
201 
202     for (i = 0; i <= m - 2; ++i)
203         bmGs[m - 1 - suff[i]] = m - 1 - i;
204     return 0;
205 }
206 
207 /**
208  * \brief Array setup function for bad characters that split the pattern
209  *        Remember that the result array should be the length of ALPHABET_SIZE
210  *
211  * \param str pointer to the pattern string
212  * \param size length of the string
213  * \param result pointer to an empty array that will hold the badchars
214  */
PreBmBcNocase(const uint8_t * x,uint16_t m,uint16_t * bmBc)215 static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
216 {
217     int32_t i;
218 
219     for (i = 0; i < 256; ++i) {
220         bmBc[i] = m;
221     }
222     for (i = 0; i < m - 1; ++i) {
223         bmBc[u8_tolower((unsigned char)x[i])] = m - 1 - i;
224         bmBc[u8_toupper((unsigned char)x[i])] = m - 1 - i;
225     }
226 }
227 
BoyerMooreSuffixesNocase(const uint8_t * x,uint16_t m,uint16_t * suff)228 static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
229                                      uint16_t *suff)
230 {
231     int32_t f = 0, g, i;
232 
233     suff[m - 1] = m;
234     g = m - 1;
235     for (i = m - 2; i >= 0; --i) {
236         if (i > g && suff[i + m - 1 - f] < i - g) {
237             suff[i] = suff[i + m - 1 - f];
238         } else {
239             if (i < g) {
240                 g = i;
241             }
242             f = i;
243             while (g >= 0 && u8_tolower(x[g]) == u8_tolower(x[g + m - 1 - f])) {
244                 --g;
245             }
246             suff[i] = f - g;
247         }
248     }
249 }
250 
251 /**
252  * \brief Array setup function for building prefixes (shift for valid prefixes)
253  *        for boyermoore context case less
254  *
255  * \param x pointer to the pattern string
256  * \param m length of the string
257  * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
258  */
PreBmGsNocase(const uint8_t * x,uint16_t m,uint16_t * bmGs)259 static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
260 {
261     int32_t i, j;
262     uint16_t suff[m + 1];
263 
264     BoyerMooreSuffixesNocase(x, m, suff);
265 
266     for (i = 0; i < m; ++i) {
267         bmGs[i] = m;
268     }
269     j = 0;
270     for (i = m - 1; i >= 0; --i) {
271         if (i == -1 || suff[i] == i + 1) {
272             for (; j < m - 1 - i; ++j) {
273                 if (bmGs[j] == m) {
274                     bmGs[j] = m - 1 - i;
275                 }
276             }
277         }
278     }
279     for (i = 0; i <= m - 2; ++i) {
280         bmGs[m - 1 - suff[i]] = m - 1 - i;
281     }
282 }
283 
284 /**
285  * \brief Boyer Moore search algorithm
286  *        Is better as the pattern length increases and for big buffers to search in.
287  *        The algorithm needs a context of two arrays already prepared
288  *        by prep_bad_chars() and prep_good_suffix()
289  *
290  * \param y pointer to the buffer to search in
291  * \param n length limit of the buffer
292  * \param x pointer to the pattern we ar searching for
293  * \param m length limit of the needle
294  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
295  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
296  *
297  * \retval ptr to start of the match; NULL if no match
298  */
BoyerMoore(const uint8_t * x,uint16_t m,const uint8_t * y,uint32_t n,BmCtx * bm_ctx)299 uint8_t *BoyerMoore(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
300 {
301     uint16_t *bmGs = bm_ctx->bmGs;
302     uint16_t *bmBc = bm_ctx->bmBc;
303 
304     int i, j, m1, m2;
305     int32_t int_n;
306 #if 0
307     printf("\nBad:\n");
308     for (i=0;i<ALPHABET_SIZE;i++)
309         printf("%c,%d ", i, bmBc[i]);
310 
311     printf("\ngood:\n");
312     for (i=0;i<m;i++)
313         printf("%c, %d ", x[i],bmBc[i]);
314     printf("\n");
315 #endif
316     // force casting to int32_t (if possible)
317     int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
318     j = 0;
319     while (j <= int_n - m ) {
320         for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i);
321 
322         if (i < 0) {
323             return (uint8_t *)(y + j);
324             //j += bmGs[0];
325         } else {
326 //          printf("%c", y[i+j]);
327             j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)? m1: m2;
328 //          printf("%d, %d\n", m1, m2);
329         }
330     }
331     return NULL;
332 }
333 
334 
335 /**
336  * \brief Boyer Moore search algorithm
337  *        Is better as the pattern length increases and for big buffers to search in.
338  *        The algorithm needs a context of two arrays already prepared
339  *        by prep_bad_chars() and prep_good_suffix()
340  *
341  * \param y pointer to the buffer to search in
342  * \param n length limit of the buffer
343  * \param x pointer to the pattern we ar searching for
344  * \param m length limit of the needle
345  * \param bmBc pointer to an array of BoyerMooreSuffixes prepared by prep_good_suffix()
346  * \param bmGs pointer to an array of bachars prepared by prep_bad_chars()
347  *
348  * \retval ptr to start of the match; NULL if no match
349  */
BoyerMooreNocase(const uint8_t * x,uint16_t m,const uint8_t * y,uint32_t n,BmCtx * bm_ctx)350 uint8_t *BoyerMooreNocase(const uint8_t *x, uint16_t m, const uint8_t *y, uint32_t n, BmCtx *bm_ctx)
351 {
352     uint16_t *bmGs = bm_ctx->bmGs;
353     uint16_t *bmBc = bm_ctx->bmBc;
354     int i, j, m1, m2;
355     int32_t int_n;
356 #if 0
357     printf("\nBad:\n");
358     for (i=0;i<ALPHABET_SIZE;i++)
359         printf("%c,%d ", i, bmBc[i]);
360 
361     printf("\ngood:\n");
362     for (i=0;i<m;i++)
363         printf("%c, %d ", x[i],bmBc[i]);
364     printf("\n");
365 #endif
366     // force casting to int32_t (if possible)
367     int_n = unlikely(n > INT32_MAX) ? INT32_MAX : n;
368     j = 0;
369     while (j <= int_n - m ) {
370          /* x is stored in lowercase. */
371         for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
372 
373         if (i < 0) {
374             return (uint8_t *)(y + j);
375         } else {
376             j += (m1 = bmGs[i]) > (m2 = bmBc[y[i + j]] - m + 1 + i)?
377                 m1: m2;
378         }
379     }
380     return NULL;
381 }
382 
383 typedef struct SpmBmCtx_ {
384     BmCtx *bm_ctx;
385     uint8_t *needle;
386     uint16_t needle_len;
387     int nocase;
388 } SpmBmCtx;
389 
BMInitCtx(const uint8_t * needle,uint16_t needle_len,int nocase,SpmGlobalThreadCtx * global_thread_ctx)390 static SpmCtx *BMInitCtx(const uint8_t *needle, uint16_t needle_len, int nocase,
391                          SpmGlobalThreadCtx *global_thread_ctx)
392 {
393     SpmCtx *ctx = SCMalloc(sizeof(SpmCtx));
394     if (ctx == NULL) {
395         SCLogDebug("Unable to alloc SpmCtx.");
396         return NULL;
397     }
398     memset(ctx, 0, sizeof(*ctx));
399     ctx->matcher = SPM_BM;
400 
401     SpmBmCtx *sctx = SCMalloc(sizeof(SpmBmCtx));
402     if (sctx == NULL) {
403         SCLogDebug("Unable to alloc SpmBmCtx.");
404         SCFree(ctx);
405         return NULL;
406     }
407     memset(sctx, 0, sizeof(*sctx));
408 
409     sctx->needle = SCMalloc(needle_len);
410     if (sctx->needle == NULL) {
411         SCLogDebug("Unable to alloc string.");
412         SCFree(sctx);
413         SCFree(ctx);
414         return NULL;
415     }
416     memcpy(sctx->needle, needle, needle_len);
417     sctx->needle_len = needle_len;
418 
419     if (nocase) {
420         sctx->bm_ctx = BoyerMooreNocaseCtxInit(sctx->needle, sctx->needle_len);
421         sctx->nocase = 1;
422     } else {
423         sctx->bm_ctx = BoyerMooreCtxInit(sctx->needle, sctx->needle_len);
424         sctx->nocase = 0;
425     }
426 
427     ctx->ctx = sctx;
428     return ctx;
429 }
430 
BMDestroyCtx(SpmCtx * ctx)431 static void BMDestroyCtx(SpmCtx *ctx)
432 {
433     if (ctx == NULL) {
434         return;
435     }
436 
437     SpmBmCtx *sctx = ctx->ctx;
438     if (sctx != NULL) {
439         BoyerMooreCtxDeInit(sctx->bm_ctx);
440         if (sctx->needle != NULL) {
441             SCFree(sctx->needle);
442         }
443         SCFree(sctx);
444     }
445 
446     SCFree(ctx);
447 }
448 
BMScan(const SpmCtx * ctx,SpmThreadCtx * thread_ctx,const uint8_t * haystack,uint32_t haystack_len)449 static uint8_t *BMScan(const SpmCtx *ctx, SpmThreadCtx *thread_ctx,
450                        const uint8_t *haystack, uint32_t haystack_len)
451 {
452     const SpmBmCtx *sctx = ctx->ctx;
453 
454     if (sctx->nocase) {
455         return BoyerMooreNocase(sctx->needle, sctx->needle_len, haystack,
456                                 haystack_len, sctx->bm_ctx);
457     } else {
458         return BoyerMoore(sctx->needle, sctx->needle_len, haystack,
459                           haystack_len, sctx->bm_ctx);
460     }
461 }
462 
BMInitGlobalThreadCtx(void)463 static SpmGlobalThreadCtx *BMInitGlobalThreadCtx(void)
464 {
465     SpmGlobalThreadCtx *global_thread_ctx = SCMalloc(sizeof(SpmGlobalThreadCtx));
466     if (global_thread_ctx == NULL) {
467         SCLogDebug("Unable to alloc SpmThreadCtx.");
468         return NULL;
469     }
470     memset(global_thread_ctx, 0, sizeof(*global_thread_ctx));
471     global_thread_ctx->matcher = SPM_BM;
472     return global_thread_ctx;
473 }
474 
BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx * global_thread_ctx)475 static void BMDestroyGlobalThreadCtx(SpmGlobalThreadCtx *global_thread_ctx)
476 {
477     if (global_thread_ctx == NULL) {
478         return;
479     }
480     SCFree(global_thread_ctx);
481 }
482 
BMDestroyThreadCtx(SpmThreadCtx * thread_ctx)483 static void BMDestroyThreadCtx(SpmThreadCtx *thread_ctx)
484 {
485     if (thread_ctx == NULL) {
486         return;
487     }
488     SCFree(thread_ctx);
489 }
490 
BMMakeThreadCtx(const SpmGlobalThreadCtx * global_thread_ctx)491 static SpmThreadCtx *BMMakeThreadCtx(const SpmGlobalThreadCtx *global_thread_ctx) {
492     SpmThreadCtx *thread_ctx = SCMalloc(sizeof(SpmThreadCtx));
493     if (thread_ctx == NULL) {
494         SCLogDebug("Unable to alloc SpmThreadCtx.");
495         return NULL;
496     }
497     memset(thread_ctx, 0, sizeof(*thread_ctx));
498     thread_ctx->matcher = SPM_BM;
499     return thread_ctx;
500 }
501 
SpmBMRegister(void)502 void SpmBMRegister(void)
503 {
504     spm_table[SPM_BM].name = "bm";
505     spm_table[SPM_BM].InitGlobalThreadCtx = BMInitGlobalThreadCtx;
506     spm_table[SPM_BM].DestroyGlobalThreadCtx = BMDestroyGlobalThreadCtx;
507     spm_table[SPM_BM].MakeThreadCtx = BMMakeThreadCtx;
508     spm_table[SPM_BM].DestroyThreadCtx = BMDestroyThreadCtx;
509     spm_table[SPM_BM].InitCtx = BMInitCtx;
510     spm_table[SPM_BM].DestroyCtx = BMDestroyCtx;
511     spm_table[SPM_BM].Scan = BMScan;
512 }
513