1 /*
2  * Copyright (c) 2015, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 #include "gtest/gtest.h"
31 #include "hs.h"
32 
33 #include <fstream>
34 #include <string>
35 #include <boost/algorithm/string/trim.hpp>
36 
37 #include "util/expressions.h"
38 #include "util/ExpressionParser.h"
39 
40 using namespace std;
41 using namespace testing;
42 
43 class HyperscanFailBadPattern
44     : public TestWithParam<tuple<bool, const char*>> {
45 protected:
SetUp()46     virtual void SetUp() {
47         // compiles a database for this test instantiation
48         bool isStreaming;
49         tie(isStreaming, pattern) = GetParam();
50         err = hs_compile(pattern, 0,
51                          isStreaming ? HS_MODE_STREAM : HS_MODE_BLOCK, nullptr,
52                          &db, &compile_err);
53     }
54 
TearDown()55     virtual void TearDown() {
56         hs_free_database(db);
57         hs_free_compile_error(compile_err);
58     }
59 
60     hs_error_t err;
61     const char *pattern;
62     hs_compile_error_t *compile_err;
63     hs_database_t *db;
64 };
65 
66 static const char *badPatterns[] = {
67     // vacuous patterns
68     "",
69     "a?",
70     "a*",
71     "(foo)?",
72     "(foo)*(bar)*",
73     "^arg|(foo)*(bar)*",
74     "foo|bar|",
75     "a*(b?)*c*",
76     "a{0,3}",
77     "[\\s\\S]*",
78     "[^\\s\\S]*",
79     // too big for our largest NFA
80     "(ewh|m?uit|f|snmv.g.gx[yofl]|.[^g][hbd])((.h|((y|vypfw|dfg{4}|x+|o.|y{8,}))+|k{9}t|cgp...gsk+)){17,}",
81     // illegal bounds
82     "fooa{0}",
83     "a{4,3}",
84     "a{2,1}",
85     // nothing to repeat
86     "a++",
87     "a+?+",
88     "a??",
89     "a?+",
90     "?qa",
91     "*abc",
92     "+abc",
93     // repeating boundaries is not allowed (UE-1007)
94     "^?0",
95     "^*0",
96     "^+0",
97     "^{1,3}0",
98     "0$?",
99     "0$*",
100     "0$+",
101     "0${1,3}",
102     // zero width asserts ("lookarounds")
103     "[a-z]+(?=;)", // positive lookahead
104     ".((?!\\x00\\x00)..)*?foo", // negative lookahead
105     "(?<=bullock|donkey)", // positive lookbehind
106     "(?<!foo)bar", // negative lookbehind
107     // embedded anchors
108     "foo^bar",
109     "foo$bar",
110     "(foo^bar|other)",
111     "(foo$bar|other)",
112     "$test",
113     "test^",
114     "foo(a|^)bar",
115     "a$^b",
116     "(^foo)+bar",
117     "foo(bar$)+",
118     "a^{3}=",
119     // atomic groups
120     "foobar(?>.{3,})bar",
121     // possessive quantifiers
122     "\\d++foo",
123     "(abc|xyz){2,3}+",
124     // back-reference inside a repeat (also too big, actually)
125     "^..\x02.{10,522}([^\00])\1{16}",
126     // char classes
127     "[]",
128     "[]foobar",
129     "[`-\\80",
130     "[A-\\K]",
131     // bad named classes
132     "[[:foo:]]",
133     "[[:1234:]]",
134     "[[:f\\oo:]]",
135     "[[: :]]",
136     "[[:...:]]",
137     "[[:l\\ower:]]",
138     "[[:abc\\:]]",
139     "[abc[:x\\]pqr:]]",
140     "[[:a\\dz:]]",
141     // unhandled subroutines and backrefs
142     "foo\\g''bar",
143     "foo\\g'45'bar",
144     "foo\\g'hatstand'bar",
145     "foo\\g<>bar",
146     "foo\\g<45>bar",
147     "foo\\g<teakettle>bar",
148     "((?i)rah)\\s+\\1",
149     "(?<p1>(?i)rah)\\s+\\k<p1>",
150     "(?'p1'(?i)rah)\\s+\\k{p1}",
151     "(?P<p1>(?i)rah)\\s+(?P=p1)",
152     "(?<p1>(?i)rah)\\s+\\g{p1}",
153     // truly enormous and with complicated assert resolution (UE-1107)
154     "((c(p|p)h{2,}bh.|p|((((cq|j|c|(\\b)|.[^nbgn]|(\\B)[qfh]a)){10,12}|ih|a|mnde[pa].|.g)){5,8})){21,29}",
155     // conditional subpatterns
156     "(?(?=[^a-z]*[a-z])\\d{2}-[a-z]{3}-\\d{2}|\\d{2}-\\d{2}-\\d{2)}",
157     // unmatched parens
158     "(foo",
159     "foo)",
160     "((foo)",
161     "(foo))",
162     // unterminated comment
163     "/foo(?#comment/",
164     // bogus \g backrefs
165     "A\\g",
166     "A(.*)\\ga",
167     // malformed \g backrefs (see UE-950)
168     "^(a)\\g",
169     "^(a)\\g{3",
170     "\\g{A",
171     "[\\g6666666666]",
172     "(^(a|b\\g<-1'c))",
173     // oniguruma subroutine calls (UE-950 as well)
174     "^(?<name>a|b\\g'name'c)",
175     "^(a|b\\g'1'c)",
176     "^(a|b\\g'-1'c)",
177     // backtracking control verbs
178     "A((?:A|B(*ACCEPT)|C)D)",
179     "(*FAIL)",
180     "(*F)",
181     "a+(*COMMIT)b",
182     "(*PRUNE)",
183     "a+(*SKIP)b",
184     // other unsupported PCRE features
185     "\\R",
186     "foo\\Kbar",
187     "\\Gfoo",
188     "(?|(Sat)ur|(Sun))day", // duplicate subpatterns, see UE-958
189     "foobar\\", // trailing unescaped backslash
190     "(?x)abc(#i)def" // unterminated extended-mode comment
191 };
192 
193 // Did we correctly fail the compile?
TEST_P(HyperscanFailBadPattern,Compile)194 TEST_P(HyperscanFailBadPattern, Compile) {
195     ASSERT_NE(HS_SUCCESS, err) << "Compile should have failed for expr: " << pattern;
196     EXPECT_EQ(HS_COMPILER_ERROR, err);
197     EXPECT_TRUE(db == nullptr);
198     EXPECT_TRUE(compile_err != nullptr);
199     // We shouldn't fail with the following messagess
200     EXPECT_STRNE("An invalid flag was specified.", compile_err->message);
201     EXPECT_STRNE("Unable to allocate memory.", compile_err->message);
202     EXPECT_STRNE("Internal error.", compile_err->message);
203     EXPECT_STRNE("Match can be raised on EOD", compile_err->message);
204 }
205 
206 INSTANTIATE_TEST_CASE_P(CompileBadPatterns,
207                         HyperscanFailBadPattern,
208                         Combine(Bool(), ValuesIn(badPatterns)));
209 
210 struct BadPatternParam {
BadPatternParamBadPatternParam211     BadPatternParam(const string &expr_in, unsigned int flags_in,
212                     const hs_expr_ext &ext_in,
213                     const string &expected_error_in)
214         : expr(expr_in), flags(flags_in), ext(ext_in),
215           expected_error(expected_error_in) {}
216     string expr;
217     unsigned int flags;
218     hs_expr_ext ext;
219     string expected_error;
220 
221     // Wrap hs_compile_ext_multi for single patterns.
compileBadPatternParam222     hs_error_t compile(unsigned int mode, hs_database_t **db,
223                        hs_compile_error_t **compile_err) const {
224         const char *regex = expr.c_str();
225         const hs_expr_ext *extp = &ext;
226         return hs_compile_ext_multi(&regex, &flags, nullptr, &extp, 1,
227                                     mode, nullptr, db, compile_err);
228     }
229 };
230 
PrintTo(const BadPatternParam & p,::std::ostream * os)231 void PrintTo(const BadPatternParam &p, ::std::ostream *os) {
232     *os << "expr: \"" << p.expr << "\", expected error: " << p.expected_error;
233 }
234 
235 class BadPattern : public TestWithParam<BadPatternParam> {
236 };
237 
TEST_P(BadPattern,Block)238 TEST_P(BadPattern, Block) {
239     const BadPatternParam &p = GetParam();
240     SCOPED_TRACE(p.expr);
241 
242     hs_compile_error_t *compile_err;
243     hs_database_t *db;
244     hs_error_t err = p.compile(HS_MODE_NOSTREAM, &db, &compile_err);
245     EXPECT_EQ(HS_COMPILER_ERROR, err);
246     EXPECT_TRUE(db == nullptr);
247     EXPECT_TRUE(compile_err != nullptr);
248     if (compile_err) {
249         EXPECT_STREQ(p.expected_error.c_str(), compile_err->message);
250     }
251 
252     hs_free_database(db);
253     hs_free_compile_error(compile_err);
254 }
255 
TEST_P(BadPattern,Stream)256 TEST_P(BadPattern, Stream) {
257     const BadPatternParam &p = GetParam();
258     SCOPED_TRACE(p.expr);
259 
260     hs_compile_error_t *compile_err;
261     hs_database_t *db;
262     hs_error_t err = p.compile(HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE, &db,
263                                &compile_err);
264     EXPECT_EQ(HS_COMPILER_ERROR, err);
265     EXPECT_TRUE(db == nullptr);
266     EXPECT_TRUE(compile_err != nullptr);
267     if (compile_err) {
268         EXPECT_STREQ(p.expected_error.c_str(), compile_err->message);
269     }
270 
271     hs_free_database(db);
272     hs_free_compile_error(compile_err);
273 }
274 
275 // happy fun preprocessor hoop jumping
276 #define xstr(s) str(s)
277 #define str(s) #s
278 
279 #define SRCDIR_PREFIX xstr(SRCDIR)
280 
281 static
getBadPatterns()282 vector<BadPatternParam> getBadPatterns() {
283     string filename = "unit/hyperscan/bad_patterns.txt";
284 
285     ifstream f;
286     f.open(filename.c_str(), ifstream::in);
287     if (!f.good()) {
288         // try it with the src prefix
289         f.open((string(SRCDIR_PREFIX) + "/" + filename).c_str(), ifstream::in);
290     }
291 
292     vector<BadPatternParam> rv;
293     if (!f.good()) {
294         string expr("couldn't find input file:" + filename);
295         cerr << expr << endl;
296         abort();
297         return rv;
298     }
299 
300     string line;
301     while (f.good()) {
302         getline(f, line);
303         if (line.empty()) {
304             continue;
305         }
306 
307         size_t hashIdx = line.find_first_of('#');
308         size_t colonIdx = line.find_first_of(':');
309         assert(hashIdx != string::npos);
310         assert(colonIdx != string::npos);
311         if (!hashIdx) {
312             continue;
313         }
314 
315         string error = line.substr(hashIdx + 1);
316         string expr = line.substr(colonIdx + 1, hashIdx - colonIdx - 1);
317         boost::trim(expr);
318 
319         unsigned int flags;
320         string regex;
321         hs_expr_ext ext;
322         if (!readExpression(expr, regex, &flags, &ext)) {
323             cerr << expr << " failed in readExpression" << endl;
324             abort();
325         }
326         rv.push_back(BadPatternParam(regex, flags, ext, error));
327     }
328     f.close();
329 
330     return rv;
331 }
332 
333 INSTANTIATE_TEST_CASE_P(Hyperscan, BadPattern, ValuesIn(getBadPatterns()));
334 
TEST(ResourceLimits,longPattern)335 TEST(ResourceLimits, longPattern) {
336 #define LONG_PATTERN_LEN 16384
337 
338     hs_database_t *db = nullptr;
339     hs_compile_error_t *compile_err = nullptr;
340 
341     char pattern[LONG_PATTERN_LEN];
342     memset(pattern, 'a', LONG_PATTERN_LEN);
343     pattern[LONG_PATTERN_LEN - 1] = 0;
344 
345     hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, nullptr,
346                                 &db, &compile_err);
347 
348     EXPECT_EQ(HS_COMPILER_ERROR, err);
349     EXPECT_TRUE(db == nullptr);
350     EXPECT_TRUE(compile_err != nullptr);
351     if (compile_err) {
352         EXPECT_STREQ("Pattern length exceeds limit.", compile_err->message);
353     }
354 
355     hs_free_database(db);
356     hs_free_compile_error(compile_err);
357 }
358 
TEST(ResourceLimits,longPatternInfo)359 TEST(ResourceLimits, longPatternInfo) {
360 #define LONG_PATTERN_LEN 16384
361 
362     hs_expr_info_t *info = nullptr;
363     hs_compile_error_t *compile_err = nullptr;
364 
365     char pattern[LONG_PATTERN_LEN];
366     memset(pattern, 'a', LONG_PATTERN_LEN);
367     pattern[LONG_PATTERN_LEN - 1] = 0;
368 
369     hs_error_t err =
370         hs_expression_info(pattern, HS_FLAG_DOTALL, &info, &compile_err);
371 
372     EXPECT_EQ(HS_COMPILER_ERROR, err);
373     EXPECT_TRUE(compile_err != nullptr);
374     if (compile_err) {
375         EXPECT_STREQ("Pattern length exceeds limit.", compile_err->message);
376     }
377 
378     free(info);
379     hs_free_compile_error(compile_err);
380 }
381 
TEST(ResourceLimits,longLiteral)382 TEST(ResourceLimits, longLiteral) {
383 #define LONG_PATTERN_LEN 16384
384 
385     hs_database_t *db = nullptr;
386     hs_compile_error_t *compile_err = nullptr;
387 
388     const char *pattern = "(abcd){4096}";
389 
390     hs_error_t err = hs_compile(pattern, HS_FLAG_DOTALL, HS_MODE_BLOCK, nullptr,
391                                 &db, &compile_err);
392 
393     EXPECT_EQ(HS_COMPILER_ERROR, err);
394     EXPECT_TRUE(db == nullptr);
395     EXPECT_TRUE(compile_err != nullptr);
396     if (compile_err) {
397         EXPECT_STREQ("Resource limit exceeded.", compile_err->message);
398     }
399 
400     hs_free_database(db);
401     hs_free_compile_error(compile_err);
402 }
403