1 /*
2  * Copyright (c) 2015, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <algorithm>
30 #include <array>
31 #include <iostream>
32 #include <vector>
33 
34 #include "gtest/gtest.h"
35 #include "hs.h"
36 #include "config.h"
37 #include "test_util.h"
38 
39 using namespace std;
40 
TEST(MMAdaptor,norm_cont1)41 TEST(MMAdaptor, norm_cont1) { // UE-901
42     hs_database_t *db = nullptr;
43     hs_compile_error_t *compile_err = nullptr;
44     CallBackContext c;
45     string data = "aooAaooAbarZ";
46     const char *expr[] = {"aoo[A-K]", "bar[L-Z]"};
47     unsigned flags[] = {0, 0};
48     unsigned ids[] = {30, 31};
49     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
50                                       nullptr, &db, &compile_err);
51 
52     ASSERT_EQ(HS_SUCCESS, err);
53     ASSERT_TRUE(db != nullptr);
54 
55     hs_scratch_t *scratch = nullptr;
56     err = hs_alloc_scratch(db, &scratch);
57     ASSERT_EQ(HS_SUCCESS, err);
58     ASSERT_TRUE(scratch != nullptr);
59 
60     c.halt = 0;
61     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
62                   (void *)&c);
63     ASSERT_EQ(HS_SUCCESS, err);
64     ASSERT_EQ(3U, c.matches.size());
65     ASSERT_EQ(MatchRecord(4, 30), c.matches[0]);
66     ASSERT_EQ(MatchRecord(8, 30), c.matches[1]);
67     ASSERT_EQ(MatchRecord(12, 31), c.matches[2]);
68 
69     hs_free_database(db);
70     err = hs_free_scratch(scratch);
71     ASSERT_EQ(HS_SUCCESS, err);
72 }
73 
TEST(MMAdaptor,norm_cont2)74 TEST(MMAdaptor, norm_cont2) {
75     hs_database_t *db = nullptr;
76     hs_compile_error_t *compile_err = nullptr;
77     CallBackContext c;
78     string data = "aooAaooAbarZ                      ";
79     const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"};
80     unsigned flags[] = {0, 0};
81     unsigned ids[] = {30, 31};
82     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
83                                       nullptr, &db, &compile_err);
84 
85     ASSERT_EQ(HS_SUCCESS, err);
86     ASSERT_TRUE(db != nullptr);
87 
88     hs_scratch_t *scratch = nullptr;
89     err = hs_alloc_scratch(db, &scratch);
90     ASSERT_EQ(HS_SUCCESS, err);
91     ASSERT_TRUE(scratch != nullptr);
92 
93     c.halt = 0;
94     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
95                   (void *)&c);
96     ASSERT_EQ(HS_SUCCESS, err);
97     ASSERT_EQ(3U, c.matches.size());
98     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(20, 30)));
99     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(24, 30)));
100     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(28, 31)));
101 
102     hs_free_database(db);
103     err = hs_free_scratch(scratch);
104     ASSERT_EQ(HS_SUCCESS, err);
105 }
106 
TEST(MMAdaptor,norm_halt1)107 TEST(MMAdaptor, norm_halt1) {
108     hs_database_t *db = nullptr;
109     hs_compile_error_t *compile_err = nullptr;
110     CallBackContext c;
111     string data = "aooAaooAbarZ";
112     const char *expr[] = {"aoo[A-K]", "bar[L-Z]"};
113     unsigned flags[] = {0, 0};
114     unsigned ids[] = {30, 31};
115     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
116                                       nullptr, &db, &compile_err);
117 
118     ASSERT_EQ(HS_SUCCESS, err);
119     ASSERT_TRUE(db != nullptr);
120 
121     hs_scratch_t *scratch = nullptr;
122     err = hs_alloc_scratch(db, &scratch);
123     ASSERT_EQ(HS_SUCCESS, err);
124     ASSERT_TRUE(scratch != nullptr);
125 
126     c.halt = 1;
127     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
128                   (void *)&c);
129     ASSERT_EQ(HS_SCAN_TERMINATED, err);
130     ASSERT_EQ(1U, c.matches.size());
131     ASSERT_EQ(MatchRecord(4, 30), c.matches[0]);
132 
133     hs_free_database(db);
134     err = hs_free_scratch(scratch);
135     ASSERT_EQ(HS_SUCCESS, err);
136 }
137 
TEST(MMAdaptor,norm_halt2)138 TEST(MMAdaptor, norm_halt2) { // UE-901
139     hs_database_t *db = nullptr;
140     hs_compile_error_t *compile_err = nullptr;
141     CallBackContext c;
142     string data = "aooAaooAbarZ                      ";
143     const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"};
144     unsigned flags[] = {0, 0};
145     unsigned ids[] = {30, 31};
146     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
147                                       nullptr, &db, &compile_err);
148 
149     ASSERT_EQ(HS_SUCCESS, err);
150     ASSERT_TRUE(db != nullptr);
151 
152     hs_scratch_t *scratch = nullptr;
153     err = hs_alloc_scratch(db, &scratch);
154     ASSERT_EQ(HS_SUCCESS, err);
155     ASSERT_TRUE(scratch != nullptr);
156 
157     c.halt = 1;
158     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
159                   (void *)&c);
160     ASSERT_EQ(HS_SCAN_TERMINATED, err);
161     ASSERT_EQ(1U, c.matches.size());
162     ASSERT_EQ(MatchRecord(20, 30), c.matches[0]);
163 
164     hs_free_database(db);
165     err = hs_free_scratch(scratch);
166     ASSERT_EQ(HS_SUCCESS, err);
167 }
168 
TEST(MMAdaptor,high_cont1)169 TEST(MMAdaptor, high_cont1) { // UE-901
170     hs_database_t *db = nullptr;
171     hs_compile_error_t *compile_err = nullptr;
172     CallBackContext c;
173     string data = "aooAaooAbarZ";
174     const char *expr[] = {"aoo[A-K]", "bar[L-Z]"};
175     unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0};
176     unsigned ids[] = {30, 31};
177     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
178                                       nullptr, &db, &compile_err);
179 
180     ASSERT_EQ(HS_SUCCESS, err);
181     ASSERT_TRUE(db != nullptr);
182 
183     hs_scratch_t *scratch = nullptr;
184     err = hs_alloc_scratch(db, &scratch);
185     ASSERT_EQ(HS_SUCCESS, err);
186     ASSERT_TRUE(scratch != nullptr);
187 
188     c.halt = 0;
189     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
190                   (void *)&c);
191     ASSERT_EQ(HS_SUCCESS, err);
192     ASSERT_EQ(2U, c.matches.size());
193     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(4, 30)));
194     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(12, 31)));
195 
196     hs_free_database(db);
197     err = hs_free_scratch(scratch);
198     ASSERT_EQ(HS_SUCCESS, err);
199 }
200 
TEST(MMAdaptor,high_cont2)201 TEST(MMAdaptor, high_cont2) {
202     hs_database_t *db = nullptr;
203     hs_compile_error_t *compile_err = nullptr;
204     CallBackContext c;
205     string data = "aooAaooAbarZ                      ";
206     const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"};
207     unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0};
208     unsigned ids[] = {30, 31};
209     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
210                                       nullptr, &db, &compile_err);
211 
212     ASSERT_EQ(HS_SUCCESS, err);
213     ASSERT_TRUE(db != nullptr);
214 
215     hs_scratch_t *scratch = nullptr;
216     err = hs_alloc_scratch(db, &scratch);
217     ASSERT_EQ(HS_SUCCESS, err);
218     ASSERT_TRUE(scratch != nullptr);
219 
220     c.halt = 0;
221     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
222                   (void *)&c);
223     ASSERT_EQ(HS_SUCCESS, err);
224     ASSERT_EQ(2U, c.matches.size());
225     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(20, 30)));
226     ASSERT_TRUE(c.matches.end() != find(c.matches.begin(), c.matches.end(), MatchRecord(28, 31)));
227 
228     hs_free_database(db);
229     err = hs_free_scratch(scratch);
230     ASSERT_EQ(HS_SUCCESS, err);
231 }
232 
TEST(MMAdaptor,high_halt1)233 TEST(MMAdaptor, high_halt1) {
234     hs_database_t *db = nullptr;
235     hs_compile_error_t *compile_err = nullptr;
236     CallBackContext c;
237     string data = "aooAaooAbarZ";
238     const char *expr[] = {"aoo[A-K]", "bar[L-Z]"};
239     unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0};
240     unsigned ids[] = {30, 31};
241     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
242                                       nullptr, &db, &compile_err);
243 
244     ASSERT_EQ(HS_SUCCESS, err);
245     ASSERT_TRUE(db != nullptr);
246 
247     hs_scratch_t *scratch = nullptr;
248     err = hs_alloc_scratch(db, &scratch);
249     ASSERT_EQ(HS_SUCCESS, err);
250     ASSERT_TRUE(scratch != nullptr);
251 
252     c.halt = 1;
253     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
254                   (void *)&c);
255     ASSERT_EQ(HS_SCAN_TERMINATED, err);
256     ASSERT_EQ(1U, c.matches.size());
257     ASSERT_EQ(MatchRecord(4, 30), c.matches[0]);
258 
259     hs_free_database(db);
260     err = hs_free_scratch(scratch);
261     ASSERT_EQ(HS_SUCCESS, err);
262 }
263 
TEST(MMAdaptor,high_halt2)264 TEST(MMAdaptor, high_halt2) {
265     hs_database_t *db = nullptr;
266     hs_compile_error_t *compile_err = nullptr;
267     CallBackContext c;
268     string data = "aooAaooAbarZbarZaooA                      ";
269     const char *expr[] = {"aoo[A-K][^\n]{16}", "bar[L-Z][^\n]{16}"};
270     unsigned flags[] = {HS_FLAG_SINGLEMATCH, 0};
271     unsigned ids[] = {30, 31};
272     hs_error_t err = hs_compile_multi(expr, flags, ids, 2, HS_MODE_NOSTREAM,
273                                       nullptr, &db, &compile_err);
274 
275     ASSERT_EQ(HS_SUCCESS, err);
276     ASSERT_TRUE(db != nullptr);
277 
278     hs_scratch_t *scratch = nullptr;
279     err = hs_alloc_scratch(db, &scratch);
280     ASSERT_EQ(HS_SUCCESS, err);
281     ASSERT_TRUE(scratch != nullptr);
282 
283     c.halt = 1;
284     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
285                   (void *)&c);
286     ASSERT_EQ(HS_SCAN_TERMINATED, err);
287     ASSERT_EQ(1U, c.matches.size());
288     ASSERT_TRUE(MatchRecord(20, 30) == c.matches[0]
289                 || MatchRecord(28, 31) == c.matches[0]);
290 
291     hs_free_database(db);
292     err = hs_free_scratch(scratch);
293     ASSERT_EQ(HS_SUCCESS, err);
294 }
295 
TEST(MPV,UE_2395)296 TEST(MPV, UE_2395) {
297     vector<pattern> patterns;
298     patterns.push_back(pattern("^.{200}", HS_FLAG_DOTALL, 1));
299     patterns.push_back(pattern(".{40,}", HS_FLAG_DOTALL, 2));
300     patterns.push_back(pattern("aaa", HS_FLAG_DOTALL, 3));
301 
302     hs_database_t *db = buildDB(patterns, HS_MODE_BLOCK);
303     ASSERT_NE(nullptr, db);
304 
305     hs_scratch_t *scratch = nullptr;
306     hs_error_t err = hs_alloc_scratch(db, &scratch);
307     ASSERT_EQ(HS_SUCCESS, err);
308 
309     array<char, 300> data;
310     data.fill('a');
311 
312     CallBackContext c;
313     err = hs_scan(db, data.data(), data.size(), 0, scratch, record_cb,
314                   (void *)&c);
315     ASSERT_EQ(HS_SUCCESS, err);
316 
317     unsigned seen = 39;
318     for (vector<MatchRecord>::const_iterator it = c.matches.begin();
319          it != c.matches.end(); ++it) {
320         if (it->id != 2) {
321             if (it->id == 1) {
322                 ASSERT_EQ(200, it->to);
323             }
324             continue;
325         }
326         ASSERT_EQ(seen + 1, it->to);
327         seen = it->to;
328     }
329 
330     ASSERT_EQ(300, seen);
331 
332     hs_free_database(db);
333     err = hs_free_scratch(scratch);
334     ASSERT_EQ(HS_SUCCESS, err);
335 }
336 
TEST(MMRoseLiteralPath,issue_141)337 TEST(MMRoseLiteralPath, issue_141) {
338     hs_database_t *db = nullptr;
339     hs_compile_error_t *compile_err = nullptr;
340     CallBackContext c;
341     string data = "/odezhda-dlya-bega/";
342     const char *expr[] = {"/odezhda-dlya-bega/",
343                           "kurtki-i-vetrovki-dlya-bega",
344                           "futbolki-i-mayki-dlya-bega"};
345     unsigned flags[] = {HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
346                         HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
347                         HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH};
348     hs_error_t err = hs_compile_multi(expr, flags, nullptr, 3, HS_MODE_BLOCK,
349                                       nullptr, &db, &compile_err);
350 
351     ASSERT_EQ(HS_SUCCESS, err);
352     ASSERT_TRUE(db != nullptr);
353 
354     hs_scratch_t *scratch = nullptr;
355     err = hs_alloc_scratch(db, &scratch);
356     ASSERT_EQ(HS_SUCCESS, err);
357     ASSERT_TRUE(scratch != nullptr);
358 
359     c.halt = 0;
360     err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb,
361                   (void *)&c);
362     ASSERT_EQ(HS_SUCCESS, err);
363     ASSERT_EQ(1U, c.matches.size());
364     ASSERT_EQ(MatchRecord(19, 0), c.matches[0]);
365 
366     hs_free_database(db);
367     err = hs_free_scratch(scratch);
368     ASSERT_EQ(HS_SUCCESS, err);
369 }
370