1 /* Copyright (C) 2007-2010 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22  *
23  * Bs2Bm use a simple context array to determine the charactes
24  * that are not present on the pattern. This way on partial matches
25  * broken by a char not present, we can skip to the next character
26  * making less checks
27  */
28 
29 #include "suricata-common.h"
30 #include "suricata.h"
31 
32 #include "util-spm-bs2bm.h"
33 
34 /**
35  * \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
36  *
37  * \param neddle pointer to the pattern we ar searching for
38  * \param needle_len length limit of the needle
39  * \param badchars pointer to an empty array of bachars. The array prepared contains
40  *                 characters that can't be inside the needle_len. So the skips can be
41  *                 faster
42  */
Bs2BmBadchars(const uint8_t * needle,uint16_t needle_len,uint8_t * badchars)43 void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
44 {
45     uint32_t i;
46     for (i = 0; i < ALPHABET_SIZE; i++)
47         badchars[i] = 1;
48 
49     /* set to 0 the values where index as ascii is present
50      * because they are not badchars
51      */
52     for (i = 0; i < needle_len; i++)
53         badchars[needle[i]] = 0;
54 }
55 
56 /**
57  * \brief Array setup function for Bs2BmNocase of bad characters index (not found at the needle)
58  *
59  * \param neddle pointer to the pattern we ar searching for
60  * \param needle_len length limit of the needle
61  * \param badchars pointer to an empty array of bachars. The array prepared contains
62  *                 characters that can't be inside the needle_len. So the skips can be
63  *                 faster
64  */
Bs2BmBadcharsNocase(const uint8_t * needle,uint16_t needle_len,uint8_t * badchars)65 void Bs2BmBadcharsNocase(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
66 {
67     uint32_t i;
68     for (i = 0; i < ALPHABET_SIZE; i++)
69         badchars[i] = 1;
70 
71     /* set to 0 the values where index as ascii is present
72      * because they are not badchars
73      */
74     for (i = 0; i < needle_len; i++) {
75         badchars[u8_tolower(needle[i])] = 0;
76     }
77 }
78 
79 
80 /**
81  * \brief Basic search with a bad characters array. The array badchars contains
82  *        flags at character's ascii index that can't be inside the needle. So the skips can be
83  *        faster
84  *
85  * \param haystack pointer to the buffer to search in
86  * \param haystack_len length limit of the buffer
87  * \param neddle pointer to the pattern we ar searching for
88  * \param needle_len length limit of the needle
89  * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
90  *
91  * \retval ptr to start of the match; NULL if no match
92  */
Bs2Bm(const uint8_t * haystack,uint32_t haystack_len,const uint8_t * needle,uint16_t needle_len,uint8_t badchars[])93 uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, uint8_t badchars[])
94 {
95     const uint8_t *h, *n;
96     const uint8_t *hmax = haystack + haystack_len;
97     const uint8_t *nmax = needle + needle_len;
98 
99     if (needle_len == 0 || needle_len > haystack_len)
100         return NULL;
101 
102     for (n = needle; nmax - n <= hmax - haystack; haystack++) {
103         if (*haystack != *n) {
104             continue;
105         }
106         /* one byte needles */
107         if (needle_len == 1)
108             return (uint8_t *)haystack;
109 
110         for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
111             if (*h != *n) {
112                 if (badchars[*h] == 1) {
113                     /* skip it! */
114                     haystack = h;
115                 }
116                 break;
117             }
118             /* if we run out of needle we fully matched */
119             if (n == nmax - 1 ) {
120                 return (uint8_t *)haystack;
121             }
122         }
123         n = needle;
124     }
125 
126     return NULL;
127 }
128 
129 /**
130  * \brief Basic search case less with a bad characters array. The array badchars contains
131  *        flags at character's ascii index that can't be inside the needle. So the skips can be
132  *        faster
133  *
134  * \param haystack pointer to the buffer to search in
135  * \param haystack_len length limit of the buffer
136  * \param neddle pointer to the pattern we ar searching for
137  * \param needle_len length limit of the needle
138  * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
139  *
140  * \retval ptr to start of the match; NULL if no match
141  */
Bs2BmNocase(const uint8_t * haystack,uint32_t haystack_len,const uint8_t * needle,uint16_t needle_len,uint8_t badchars[])142 uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, uint8_t badchars[])
143 {
144     const uint8_t *h, *n;
145     const uint8_t *hmax = haystack + haystack_len;
146     const uint8_t *nmax = needle + needle_len;
147 
148     if (needle_len == 0 || needle_len > haystack_len)
149         return NULL;
150 
151     for (n = needle; nmax - n <= hmax - haystack; haystack++) {
152         if (u8_tolower(*haystack) != u8_tolower(*n)) {
153             continue;
154         }
155         /* one byte needles */
156         if (needle_len == 1)
157             return (uint8_t *)haystack;
158 
159         for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
160             if (u8_tolower(*h) != u8_tolower(*n)) {
161                 if (badchars[u8_tolower(*h)] == 1) {
162                     /* skip it! */
163                     haystack = h;
164                 }
165                 break;
166             }
167             /* if we run out of needle we fully matched */
168             if (n == nmax - 1) {
169                 return (uint8_t *)haystack;
170             }
171         }
172         n = needle;
173     }
174 
175     return NULL;
176 }
177