1 /* Copyright (C) 2007-2010 Open Information Security Foundation
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
18 /**
19 * \file
20 *
21 * \author Pablo Rincon Crespo <pablo.rincon.crespo@gmail.com>
22 *
23 * Bs2Bm use a simple context array to determine the charactes
24 * that are not present on the pattern. This way on partial matches
25 * broken by a char not present, we can skip to the next character
26 * making less checks
27 */
28
29 #include "suricata-common.h"
30 #include "suricata.h"
31
32 #include "util-spm-bs2bm.h"
33
34 /**
35 * \brief Array setup function for Bs2Bm of bad characters index (not found at the needle)
36 *
37 * \param neddle pointer to the pattern we ar searching for
38 * \param needle_len length limit of the needle
39 * \param badchars pointer to an empty array of bachars. The array prepared contains
40 * characters that can't be inside the needle_len. So the skips can be
41 * faster
42 */
Bs2BmBadchars(const uint8_t * needle,uint16_t needle_len,uint8_t * badchars)43 void Bs2BmBadchars(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
44 {
45 uint32_t i;
46 for (i = 0; i < ALPHABET_SIZE; i++)
47 badchars[i] = 1;
48
49 /* set to 0 the values where index as ascii is present
50 * because they are not badchars
51 */
52 for (i = 0; i < needle_len; i++)
53 badchars[needle[i]] = 0;
54 }
55
56 /**
57 * \brief Array setup function for Bs2BmNocase of bad characters index (not found at the needle)
58 *
59 * \param neddle pointer to the pattern we ar searching for
60 * \param needle_len length limit of the needle
61 * \param badchars pointer to an empty array of bachars. The array prepared contains
62 * characters that can't be inside the needle_len. So the skips can be
63 * faster
64 */
Bs2BmBadcharsNocase(const uint8_t * needle,uint16_t needle_len,uint8_t * badchars)65 void Bs2BmBadcharsNocase(const uint8_t *needle, uint16_t needle_len, uint8_t *badchars)
66 {
67 uint32_t i;
68 for (i = 0; i < ALPHABET_SIZE; i++)
69 badchars[i] = 1;
70
71 /* set to 0 the values where index as ascii is present
72 * because they are not badchars
73 */
74 for (i = 0; i < needle_len; i++) {
75 badchars[u8_tolower(needle[i])] = 0;
76 }
77 }
78
79
80 /**
81 * \brief Basic search with a bad characters array. The array badchars contains
82 * flags at character's ascii index that can't be inside the needle. So the skips can be
83 * faster
84 *
85 * \param haystack pointer to the buffer to search in
86 * \param haystack_len length limit of the buffer
87 * \param neddle pointer to the pattern we ar searching for
88 * \param needle_len length limit of the needle
89 * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
90 *
91 * \retval ptr to start of the match; NULL if no match
92 */
Bs2Bm(const uint8_t * haystack,uint32_t haystack_len,const uint8_t * needle,uint16_t needle_len,uint8_t badchars[])93 uint8_t * Bs2Bm(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, uint8_t badchars[])
94 {
95 const uint8_t *h, *n;
96 const uint8_t *hmax = haystack + haystack_len;
97 const uint8_t *nmax = needle + needle_len;
98
99 if (needle_len == 0 || needle_len > haystack_len)
100 return NULL;
101
102 for (n = needle; nmax - n <= hmax - haystack; haystack++) {
103 if (*haystack != *n) {
104 continue;
105 }
106 /* one byte needles */
107 if (needle_len == 1)
108 return (uint8_t *)haystack;
109
110 for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
111 if (*h != *n) {
112 if (badchars[*h] == 1) {
113 /* skip it! */
114 haystack = h;
115 }
116 break;
117 }
118 /* if we run out of needle we fully matched */
119 if (n == nmax - 1 ) {
120 return (uint8_t *)haystack;
121 }
122 }
123 n = needle;
124 }
125
126 return NULL;
127 }
128
129 /**
130 * \brief Basic search case less with a bad characters array. The array badchars contains
131 * flags at character's ascii index that can't be inside the needle. So the skips can be
132 * faster
133 *
134 * \param haystack pointer to the buffer to search in
135 * \param haystack_len length limit of the buffer
136 * \param neddle pointer to the pattern we ar searching for
137 * \param needle_len length limit of the needle
138 * \param badchars pointer to an array of bachars prepared by Bs2BmBadchars()
139 *
140 * \retval ptr to start of the match; NULL if no match
141 */
Bs2BmNocase(const uint8_t * haystack,uint32_t haystack_len,const uint8_t * needle,uint16_t needle_len,uint8_t badchars[])142 uint8_t *Bs2BmNocase(const uint8_t *haystack, uint32_t haystack_len, const uint8_t *needle, uint16_t needle_len, uint8_t badchars[])
143 {
144 const uint8_t *h, *n;
145 const uint8_t *hmax = haystack + haystack_len;
146 const uint8_t *nmax = needle + needle_len;
147
148 if (needle_len == 0 || needle_len > haystack_len)
149 return NULL;
150
151 for (n = needle; nmax - n <= hmax - haystack; haystack++) {
152 if (u8_tolower(*haystack) != u8_tolower(*n)) {
153 continue;
154 }
155 /* one byte needles */
156 if (needle_len == 1)
157 return (uint8_t *)haystack;
158
159 for (h = haystack+1, n++; nmax - n <= hmax - haystack; h++, n++) {
160 if (u8_tolower(*h) != u8_tolower(*n)) {
161 if (badchars[u8_tolower(*h)] == 1) {
162 /* skip it! */
163 haystack = h;
164 }
165 break;
166 }
167 /* if we run out of needle we fully matched */
168 if (n == nmax - 1) {
169 return (uint8_t *)haystack;
170 }
171 }
172 n = needle;
173 }
174
175 return NULL;
176 }
177