1 /*
2  * Copyright (c) 2015-2016, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #include "gtest/gtest.h"
32 #include "test_util.h"
33 #include "hs.h"
34 
35 namespace {
36 
37 struct PatternInfo {
38     std::string expr;
39     unsigned flags;
40     std::string corpus;
41     unsigned long long match;
42 };
43 
44 class IdenticalTest : public testing::TestWithParam<PatternInfo> {};
45 
TEST_P(IdenticalTest,Block)46 TEST_P(IdenticalTest, Block) {
47     const PatternInfo &info = GetParam();
48 
49     std::vector<pattern> patterns;
50     for (unsigned i = 0; i < 100; i++) {
51         patterns.push_back(pattern(info.expr, info.flags, i));
52     }
53 
54     hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK);
55     ASSERT_NE(nullptr, db);
56 
57     hs_scratch_t *scratch = nullptr;
58     hs_error_t err = hs_alloc_scratch(db, &scratch);
59     ASSERT_EQ(HS_SUCCESS, err);
60     ASSERT_NE(nullptr, scratch);
61 
62     CallBackContext cb;
63     err = hs_scan(db, info.corpus.c_str(), info.corpus.size(), 0, scratch,
64                   record_cb, &cb);
65     ASSERT_EQ(HS_SUCCESS, err);
66 
67     err = hs_free_scratch(scratch);
68     ASSERT_EQ(HS_SUCCESS, err);
69     hs_free_database(db);
70 
71     ASSERT_EQ(patterns.size(), cb.matches.size());
72 
73     std::set<unsigned> ids;
74     for (size_t i = 0; i < cb.matches.size(); i++) {
75         ASSERT_EQ(info.match, cb.matches[i].to);
76         ids.insert(cb.matches[i].id);
77     }
78 
79     ASSERT_EQ(patterns.size(), ids.size());
80     ASSERT_EQ(0, *ids.begin());
81     ASSERT_EQ(patterns.size() - 1, *ids.rbegin());
82 }
83 
TEST_P(IdenticalTest,Stream)84 TEST_P(IdenticalTest, Stream) {
85     const PatternInfo &info = GetParam();
86 
87     std::vector<pattern> patterns;
88     for (unsigned i = 0; i < 100; i++) {
89         patterns.push_back(pattern(info.expr, info.flags, i));
90     }
91 
92     hs_database_t *db =
93         buildDB(patterns, HS_MODE_STREAM | HS_MODE_SOM_HORIZON_LARGE);
94     ASSERT_NE(nullptr, db);
95 
96     hs_scratch_t *scratch = nullptr;
97     hs_error_t err = hs_alloc_scratch(db, &scratch);
98     ASSERT_EQ(HS_SUCCESS, err);
99     ASSERT_NE(nullptr, scratch);
100 
101     CallBackContext cb;
102     hs_stream_t *stream = nullptr;
103 
104     err = hs_open_stream(db, 0, &stream);
105     ASSERT_EQ(HS_SUCCESS, err);
106     ASSERT_NE(nullptr, stream);
107 
108     err = hs_scan_stream(stream, info.corpus.c_str(), info.corpus.size(), 0,
109                          scratch, record_cb, &cb);
110     ASSERT_EQ(HS_SUCCESS, err);
111 
112     err = hs_close_stream(stream, scratch, record_cb, &cb);
113     ASSERT_EQ(HS_SUCCESS, err);
114 
115     err = hs_free_scratch(scratch);
116     ASSERT_EQ(HS_SUCCESS, err);
117     hs_free_database(db);
118 
119     ASSERT_EQ(patterns.size(), cb.matches.size());
120 
121     std::set<unsigned> ids;
122     for (size_t i = 0; i < cb.matches.size(); i++) {
123         ASSERT_EQ(info.match, cb.matches[i].to);
124         ids.insert(cb.matches[i].id);
125     }
126 
127     ASSERT_EQ(patterns.size(), ids.size());
128     ASSERT_EQ(0, *ids.begin());
129     ASSERT_EQ(patterns.size() - 1, *ids.rbegin());
130 }
131 
TEST_P(IdenticalTest,Vectored)132 TEST_P(IdenticalTest, Vectored) {
133     const PatternInfo &info = GetParam();
134 
135     std::vector<pattern> patterns;
136     for (unsigned i = 0; i < 100; i++) {
137         patterns.push_back(pattern(info.expr, info.flags, i));
138     }
139 
140     hs_database_t *db = buildDB(patterns, HS_MODE_VECTORED);
141     ASSERT_NE(nullptr, db);
142 
143     hs_scratch_t *scratch = nullptr;
144     hs_error_t err = hs_alloc_scratch(db, &scratch);
145     ASSERT_EQ(HS_SUCCESS, err);
146     ASSERT_NE(nullptr, scratch);
147 
148     CallBackContext cb;
149 
150     const char * const data[] = { info.corpus.c_str() };
151     const unsigned datalen[] = { (unsigned)info.corpus.size() };
152 
153     err = hs_scan_vector(db, data, datalen, 1, 0, scratch, record_cb, &cb);
154     ASSERT_EQ(HS_SUCCESS, err);
155 
156     err = hs_free_scratch(scratch);
157     ASSERT_EQ(HS_SUCCESS, err);
158     hs_free_database(db);
159 
160     ASSERT_EQ(patterns.size(), cb.matches.size());
161 
162     std::set<unsigned> ids;
163     for (size_t i = 0; i < cb.matches.size(); i++) {
164         ASSERT_EQ(info.match, cb.matches[i].to);
165         ids.insert(cb.matches[i].id);
166     }
167 
168     ASSERT_EQ(patterns.size(), ids.size());
169     ASSERT_EQ(0, *ids.begin());
170     ASSERT_EQ(patterns.size() - 1, *ids.rbegin());
171 }
172 
173 static const PatternInfo patterns[] = {
174     { "a", 0, "a", 1 },
175     { "a", HS_FLAG_SINGLEMATCH, "a", 1 },
176     { "handbasket", 0, "__handbasket__", 12 },
177     { "handbasket", HS_FLAG_SINGLEMATCH, "__handbasket__", 12 },
178     { "handbasket", HS_FLAG_SOM_LEFTMOST, "__handbasket__", 12 },
179     { "foo.*bar", 0, "a foolish embarrassment", 15 },
180     { "foo.*bar", HS_FLAG_SINGLEMATCH, "a foolish embarrassment", 15 },
181     { "foo.*bar", HS_FLAG_SOM_LEFTMOST, "a foolish embarrassment", 15 },
182     { "\\bword\\b(..)+\\d{3,7}", 0, "    word    012", 15 },
183     { "\\bword\\b(..)+\\d{3,7}", HS_FLAG_SINGLEMATCH, "    word    012", 15 },
184     { "\\bword\\b(..)+\\d{3,7}", HS_FLAG_SOM_LEFTMOST, "    word    012", 15 },
185     { "eod\\z", 0, "eod", 3 },
186     { "eod\\z", HS_FLAG_SINGLEMATCH, "eod", 3 },
187     { "eod\\z", HS_FLAG_SOM_LEFTMOST, "eod", 3 },
188 };
189 
190 INSTANTIATE_TEST_CASE_P(Identical, IdenticalTest, testing::ValuesIn(patterns));
191 
192 // teach google-test how to print a param
PrintTo(const PatternInfo & p,::std::ostream * os)193 void PrintTo(const PatternInfo &p, ::std::ostream *os) {
194     *os << p.expr << ":" << p.flags << ", " << p.corpus;
195 }
196 
197 } // namespace
198