1 /*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30 #include "gtest/gtest.h"
31 #include "hs.h"
32
33 #include <fstream>
34 #include <string>
35 #include <boost/algorithm/string/trim.hpp>
36
37 #include "util/expressions.h"
38 #include "util/ExpressionParser.h"
39
40 using namespace std;
41 using namespace testing;
42
43 class HyperscanFailBadPattern
44 : public TestWithParam<tuple<bool, const char*>> {
45 protected:
SetUp()46 virtual void SetUp() {
47 // compiles a database for this test instantiation
48 bool isStreaming;
49 tie(isStreaming, pattern) = GetParam();
50 err = hs_compile(pattern, 0,
51 isStreaming ? HS_MODE_STREAM : HS_MODE_BLOCK, nullptr,
52 &db, &compile_err);
53 }
54
TearDown()55 virtual void TearDown() {
56 hs_free_database(db);
57 hs_free_compile_error(compile_err);
58 }
59
60 hs_error_t err;
61 const char *pattern;
62 hs_compile_error_t *compile_err;
63 hs_database_t *db;
64 };
65
66 static const char *badPatterns[] = {
67 // vacuous patterns
68 "",
69 "a?",
70 "a*",
71 "(foo)?",
72 "(foo)*(bar)*",
73 "^arg|(foo)*(bar)*",
74 "foo|bar|",
75 "a*(b?)*c*",
76 "a{0,3}",
77 "[\\s\\S]*",
78 "[^\\s\\S]*",
79 // too big for our largest NFA
80 "(ewh|m?uit|f|snmv.g.gx[yofl]|.[^g][hbd])((.h|((y|vypfw|dfg{4}|x+|o.|y{8,}))+|k{9}t|cgp...gsk+)){17,}",
81 // illegal bounds
82 "fooa{0}",
83 "a{4,3}",
84 "a{2,1}",
85 // nothing to repeat
86 "a++",
87 "a+?+",
88 "a??",
89 "a?+",
90 "?qa",
91 "*abc",
92 "+abc",
93 // repeating boundaries is not allowed (UE-1007)
94 "^?0",
95 "^*0",
96 "^+0",
97 "^{1,3}0",
98 "0$?",
99 "0$*",
100 "0$+",
101 "0${1,3}",
102 // zero width asserts ("lookarounds")
103 "[a-z]+(?=;)", // positive lookahead
104 ".((?!\\x00\\x00)..)*?foo", // negative lookahead
105 "(?<=bullock|donkey)", // positive lookbehind
106 "(?<!foo)bar", // negative lookbehind
107 // embedded anchors
108 "foo^bar",
109 "foo$bar",
110 "(foo^bar|other)",
111 "(foo$bar|other)",
112 "$test",
113 "test^",
114 "foo(a|^)bar",
115 "a$^b",
116 "(^foo)+bar",
117 "foo(bar$)+",
118 "a^{3}=",
119 // atomic groups
120 "foobar(?>.{3,})bar",
121 // possessive quantifiers
122 "\\d++foo",
123 "(abc|xyz){2,3}+",
124 // back-reference inside a repeat (also too big, actually)
125 "^..\x02.{10,522}([^\00])\1{16}",
126 // char classes
127 "[]",
128 "[]foobar",
129 "[`-\\80",
130 "[A-\\K]",
131 // bad named classes
132 "[[:foo:]]",
133 "[[:1234:]]",
134 "[[:f\\oo:]]",
135 "[[: :]]",
136 "[[:...:]]",
137 "[[:l\\ower:]]",
138 "[[:abc\\:]]",
139 "[abc[:x\\]pqr:]]",
140 "[[:a\\dz:]]",
141 // unhandled subroutines and backrefs
142 "foo\\g''bar",
143 "foo\\g'45'bar",
144 "foo\\g'hatstand'bar",
145 "foo\\g<>bar",
146 "foo\\g<45>bar",
147 "foo\\g<teakettle>bar",
148 "((?i)rah)\\s+\\1",
149 "(?<p1>(?i)rah)\\s+\\k<p1>",
150 "(?'p1'(?i)rah)\\s+\\k{p1}",
151 "(?P<p1>(?i)rah)\\s+(?P=p1)",
152 "(?<p1>(?i)rah)\\s+\\g{p1}",
153 // truly enormous and with complicated assert resolution (UE-1107)
154 "((c(p|p)h{2,}bh.|p|((((cq|j|c|(\\b)|.[^nbgn]|(\\B)[qfh]a)){10,12}|ih|a|mnde[pa].|.g)){5,8})){21,29}",
155 // conditional subpatterns
156 "(?(?=[^a-z]*[a-z])\\d{2}-[a-z]{3}-\\d{2}|\\d{2}-\\d{2}-\\d{2)}",
157 // unmatched parens
158 "(foo",
159 "foo)",
160 "((foo)",
161 "(foo))",
162 // unterminated comment
163 "/foo(?#comment/",
164 // bogus \g backrefs
165 "A\\g",
166 "A(.*)\\ga",
167 // malformed \g backrefs (see UE-950)
168 "^(a)\\g",
169 "^(a)\\g{3",
170 "\\g{A",
171 "[\\g6666666666]",
172 "(^(a|b\\g<-1'c))",
173 // oniguruma subroutine calls (UE-950 as well)
174 "^(?<name>a|b\\g'name'c)",
175 "^(a|b\\g'1'c)",
176 "^(a|b\\g'-1'c)",
177 // backtracking control verbs
178 "A((?:A|B(*ACCEPT)|C)D)",
179 "(*FAIL)",
180 "(*F)",
181 "a+(*COMMIT)b",
182 "(*PRUNE)",
183 "a+(*SKIP)b",
184 // other unsupported PCRE features
185 "\\R",
186 "foo\\Kbar",
187 "\\Gfoo",
188 "(?|(Sat)ur|(Sun))day", // duplicate subpatterns, see UE-958
189 "foobar\\", // trailing unescaped backslash
190 "(?x)abc(#i)def" // unterminated extended-mode comment
191 };
192
193 // Did we correctly fail the compile?
TEST_P(HyperscanFailBadPattern,Compile)194 TEST_P(HyperscanFailBadPattern, Compile) {
195 ASSERT_NE(HS_SUCCESS, err) << "Compile should have failed for expr: " << pattern;
196 EXPECT_EQ(HS_COMPILER_ERROR, err);
197 EXPECT_TRUE(db == nullptr);
198 EXPECT_TRUE(compile_err != nullptr);
199 // We shouldn't fail with the following messagess
200 EXPECT_STRNE("An invalid flag was specified.", compile_err->message);
201 EXPECT_STRNE("Unable to allocate memory.", compile_err->message);
202 EXPECT_STRNE("Internal error.", compile_err->message);
203 EXPECT_STRNE("Match can be raised on EOD", compile_err->message);
204 }
205
206 INSTANTIATE_TEST_CASE_P(CompileBadPatterns,
207 HyperscanFailBadPattern,
208 Combine(Bool(), ValuesIn(badPatterns)));
209
210 struct BadPatternParam {
BadPatternParamBadPatternParam211 BadPatternParam(const string &expr_in, unsigned int flags_in,
212 const hs_expr_ext &ext_in,
213 const string &expected_error_in)
214 : expr(expr_in), flags(flags_in), ext(ext_in),
215 expected_error(expected_error_in) {}
216 string expr;
217 unsigned int flags;
218 hs_expr_ext ext;
219 string expected_error;
220
221 // Wrap hs_compile_ext_multi for single patterns.
compileBadPatternParam222 hs_error_t compile(unsigned int mode, hs_database_t **db,
223 hs_compile_error_t **compile_err) const {
224 const char *regex = expr.c_str();
225 const hs_expr_ext *extp = &ext;
226 return hs_compile_ext_multi(®ex, &flags, nullptr, &extp, 1,
227 mode, nullptr, db, compile_err);
228 }
229 };
230
PrintTo(const BadPatternParam & p,::std::ostream * os)231 void PrintTo(const BadPatternParam &p, ::std::ostream *os) {
232 *os << "expr: \"" << p.expr << "\", expected error: " << p.expected_error;
233 }
234
235 class BadPattern : public TestWithParam<BadPatternParam> {
236 };
237
TEST_P(BadPattern,Block)238 TEST_P(BadPattern, Block) {
239 const BadPatternParam &p = GetParam();
240 SCOPED_TRACE(p.expr);
241
242 hs_compile_error_t *compile_err;
243 hs_database_t *db;
244 hs_error_t err = p.compile(HS_MODE_NOSTREAM, &db, &compile_err);
245 EXPECT_EQ(HS_COMPILER_ERROR, err);
246 EXPECT_TRUE(db == nullptr);
247 EXPECT_TRUE(compile_err != nullptr);
248 if (compile_err) {
249 EXPECT_STREQ(p.expected_error.c_str(), compile_err->message);
250 }
251
252 hs_free_database(db);
253 hs_free_compile_error(compile_err);
254 }
255
TEST_P(BadPattern,Stream)256 TEST_P(BadPattern, Stream) {
257 const BadPatternParam &p = GetParam();
258 SCOPED_TRACE(p.expr);
259
260 hs_compile_error_t *compile_err;
261 hs_database_t *db;
262 hs_error_t err = p.compile(HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE, &db,
263 &compile_err);
264 EXPECT_EQ(HS_COMPILER_ERROR, err);
265 EXPECT_TRUE(db == nullptr);
266 EXPECT_TRUE(compile_err != nullptr);
267 if (compile_err) {
268 EXPECT_STREQ(p.expected_error.c_str(), compile_err->message);
269 }
270
271 hs_free_database(db);
272 hs_free_compile_error(compile_err);
273 }
274
275 // happy fun preprocessor hoop jumping
276 #define xstr(s) str(s)
277 #define str(s) #s
278
279 #define SRCDIR_PREFIX xstr(SRCDIR)
280
281 static
getBadPatterns()282 vector<BadPatternParam> getBadPatterns() {
283 string filename = "unit/hyperscan/bad_patterns.txt";
284
285 ifstream f;
286 f.open(filename.c_str(), ifstream::in);
287 if (!f.good()) {
288 // try it with the src prefix
289 f.open((string(SRCDIR_PREFIX) + "/" + filename).c_str(), ifstream::in);
290 }
291
292 vector<BadPatternParam> rv;
293 if (!f.good()) {
294 string expr("couldn't find input file:" + filename);
295 cerr << expr << endl;
296 abort();
297 return rv;
298 }
299
300 string line;
301 while (f.good()) {
302 getline(f, line);
303 if (line.empty()) {
304 continue;
305 }
306
307 size_t hashIdx = line.find_first_of('#');
308 size_t colonIdx = line.find_first_of(':');
309 assert(hashIdx != string::npos);
310 assert(colonIdx != string::npos);
311 if (!hashIdx) {
312 continue;
313 }
314
315 string error = line.substr(hashIdx + 1);
316 string expr = line.substr(colonIdx + 1, hashIdx - colonIdx - 1);
317 boost::trim(expr);
318
319 unsigned int flags;
320 string regex;
321 hs_expr_ext ext;
322 if (!readExpression(expr, regex, &flags, &ext)) {
323 cerr << expr << " failed in readExpression" << endl;
324 abort();
325 }
326 rv.push_back(BadPatternParam(regex, flags, ext, error));
327 }
328 f.close();
329
330 return rv;
331 }
332
333 INSTANTIATE_TEST_CASE_P(Hyperscan, BadPattern, ValuesIn(getBadPatterns()));
334
TEST(ResourceLimits,longPattern)335 TEST(ResourceLimits, longPattern) {
336 #define LONG_PATTERN_LEN 16384
337
338 hs_database_t *db = nullptr;
339 hs_compile_error_t *compile_err = nullptr;
340
341 char pattern[LONG_PATTERN_LEN];
342 memset(pattern, 'a', LONG_PATTERN_LEN);
343 pattern[LONG_PATTERN_LEN - 1] = 0;
344
345 hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, nullptr,
346 &db, &compile_err);
347
348 EXPECT_EQ(HS_COMPILER_ERROR, err);
349 EXPECT_TRUE(db == nullptr);
350 EXPECT_TRUE(compile_err != nullptr);
351 if (compile_err) {
352 EXPECT_STREQ("Pattern length exceeds limit.", compile_err->message);
353 }
354
355 hs_free_database(db);
356 hs_free_compile_error(compile_err);
357 }
358
TEST(ResourceLimits,longPatternInfo)359 TEST(ResourceLimits, longPatternInfo) {
360 #define LONG_PATTERN_LEN 16384
361
362 hs_expr_info_t *info = nullptr;
363 hs_compile_error_t *compile_err = nullptr;
364
365 char pattern[LONG_PATTERN_LEN];
366 memset(pattern, 'a', LONG_PATTERN_LEN);
367 pattern[LONG_PATTERN_LEN - 1] = 0;
368
369 hs_error_t err =
370 hs_expression_info(pattern, HS_FLAG_DOTALL, &info, &compile_err);
371
372 EXPECT_EQ(HS_COMPILER_ERROR, err);
373 EXPECT_TRUE(compile_err != nullptr);
374 if (compile_err) {
375 EXPECT_STREQ("Pattern length exceeds limit.", compile_err->message);
376 }
377
378 free(info);
379 hs_free_compile_error(compile_err);
380 }
381
TEST(ResourceLimits,longLiteral)382 TEST(ResourceLimits, longLiteral) {
383 #define LONG_PATTERN_LEN 16384
384
385 hs_database_t *db = nullptr;
386 hs_compile_error_t *compile_err = nullptr;
387
388 const char *pattern = "(abcd){4096}";
389
390 hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, nullptr,
391 &db, &compile_err);
392
393 EXPECT_EQ(HS_COMPILER_ERROR, err);
394 EXPECT_TRUE(db == nullptr);
395 EXPECT_TRUE(compile_err != nullptr);
396 if (compile_err) {
397 EXPECT_STREQ("Resource limit exceeded.", compile_err->message);
398 }
399
400 hs_free_database(db);
401 hs_free_compile_error(compile_err);
402 }
403