1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifdef _WIN32
30 #define PCRE_STATIC
31 #endif
32 #include "config.h"
33
34 #include "common.h"
35 #include "engine_pcre.h"
36 #include "heapstats.h"
37 #include "huge.h"
38 #include "sqldb.h"
39 #include "timer.h"
40
41 #include "util/make_unique.h"
42 #include "util/unicode_def.h"
43
44 #include <algorithm>
45
46 using namespace std;
47
EnginePCREContext(int capture_cnt)48 EnginePCREContext::EnginePCREContext(int capture_cnt) {
49 ovec = (int *)malloc((capture_cnt + 1)* sizeof(int) * 3);
50 }
51
~EnginePCREContext()52 EnginePCREContext::~EnginePCREContext() {
53 free(ovec);
54 }
55
56 namespace /* anonymous */ {
57
58 /** Scan context structure passed to the onMatch callback function. */
59 struct ScanPCREContext {
ScanPCREContext__anon7e26c13d0111::ScanPCREContext60 ScanPCREContext(unsigned id_in, ResultEntry &result_in)
61 : id(id_in), result(result_in) {}
62 unsigned id;
63 ResultEntry &result;
64 };
65
66 } // namespace
67
68 /**
69 * Function called for every match that PCRE produces, used when
70 * "echo matches" is off.
71 */
72 static
onMatch(ScanPCREContext * sc)73 int onMatch(ScanPCREContext *sc) {
74 assert(sc);
75 sc->result.matches++;
76
77 return 0;
78 }
79
80 /**
81 * Function called for every match that PCRE produces when "echo
82 * matches" is enabled.
83 */
84 static
onMatchEcho(unsigned int id,unsigned long long,unsigned long long to,ScanPCREContext * sc)85 int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
86 ScanPCREContext *sc) {
87 assert(sc);
88 sc->result.matches++;
89
90 printf("Match @%u:%llu for %u\n", sc->id, to, id);
91
92 return 0;
93 }
94
EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in,CompilePCREStats cs,int capture_cnt_in)95 EnginePCRE::EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in, CompilePCREStats cs,
96 int capture_cnt_in)
97 : dbs(move(dbs_in)), compile_stats(move(cs)),
98 capture_cnt(capture_cnt_in) {}
99
~EnginePCRE()100 EnginePCRE::~EnginePCRE() {
101 for (auto &pcreDB : dbs) {
102 free(pcreDB->extra);
103 free(pcreDB->db);
104 }
105 }
106
makeContext() const107 unique_ptr<EngineContext> EnginePCRE::makeContext() const {
108 return ue2::make_unique<EnginePCREContext>(capture_cnt);
109 }
110
scan(const char * data,unsigned int len,unsigned int id,ResultEntry & result,EngineContext & ectx) const111 void EnginePCRE::scan(const char *data, unsigned int len, unsigned int id,
112 ResultEntry &result, EngineContext &ectx) const {
113 assert(data);
114
115 ScanPCREContext sc(id, result);
116 auto &ctx = static_cast<EnginePCREContext &>(ectx);
117 int *ovec = ctx.ovec;
118 int ovec_size = (capture_cnt + 1) * 3;
119 for (const auto &pcreDB : dbs) {
120 int startoffset = 0;
121 bool utf8 = pcreDB->utf8;
122 bool highlander = pcreDB->highlander;
123
124 int flags = 0;
125 int ret;
126 do {
127 ret = pcre_exec(pcreDB->db, pcreDB->extra, data, len,
128 startoffset, flags, ovec, ovec_size);
129 if (ret <= PCRE_ERROR_NOMATCH) {
130 break;
131 }
132
133 int from = ovec[0];
134 int to = ovec[1];
135 assert(from <= to);
136
137 if (echo_matches) {
138 onMatchEcho(pcreDB->id, from, to, &sc);
139 } else {
140 onMatch(&sc);
141 }
142
143 // If we only wanted a single match, we're done.
144 if (highlander) {
145 break;
146 }
147
148 // Next scan starts at the first codepoint after the match. It's
149 // possible that we have a vacuous match, in which case we must step
150 // past it to ensure that we always progress.
151 if (from != to) {
152 startoffset = to;
153 } else if (utf8) {
154 startoffset = to + 1;
155 while (startoffset < (int)len &&
156 ((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
157 ++startoffset;
158 }
159 } else {
160 startoffset = to + 1;
161 }
162 } while (startoffset <= (int)len);
163
164 if (ret < PCRE_ERROR_NOMATCH) {
165 printf("Fatal error: pcre returned error %d\n", ret);
166 abort();
167 }
168 }
169 }
170
171 // vectoring scan
scan_vectored(UNUSED const char * const * data,UNUSED const unsigned int * len,UNUSED unsigned int count,UNUSED unsigned int streamId,UNUSED ResultEntry & result,UNUSED EngineContext & ectx) const172 void EnginePCRE::scan_vectored(UNUSED const char *const *data,
173 UNUSED const unsigned int *len,
174 UNUSED unsigned int count,
175 UNUSED unsigned int streamId,
176 UNUSED ResultEntry &result,
177 UNUSED EngineContext &ectx) const {
178 printf("PCRE matcher can't support vectored mode.\n");
179 abort();
180 }
181
streamOpen(UNUSED EngineContext & ectx,UNUSED unsigned id) const182 unique_ptr<EngineStream> EnginePCRE::streamOpen(UNUSED EngineContext &ectx,
183 UNUSED unsigned id) const {
184 printf("PCRE matcher can't stream.\n");
185 abort();
186 }
187
streamClose(UNUSED unique_ptr<EngineStream> stream,UNUSED ResultEntry & result) const188 void EnginePCRE::streamClose(UNUSED unique_ptr<EngineStream> stream,
189 UNUSED ResultEntry &result) const {
190 printf("PCRE matcher can't stream.\n");
191 abort();
192 }
193
streamScan(UNUSED EngineStream & stream,UNUSED const char * data,UNUSED unsigned len,UNUSED unsigned id,UNUSED ResultEntry & result) const194 void EnginePCRE::streamScan(UNUSED EngineStream &stream,
195 UNUSED const char *data,
196 UNUSED unsigned len, UNUSED unsigned id,
197 UNUSED ResultEntry &result) const {
198 printf("PCRE matcher can't stream.\n");
199 abort();
200 }
201
streamCompressExpand(UNUSED EngineStream & stream,UNUSED vector<char> & temp) const202 void EnginePCRE::streamCompressExpand(UNUSED EngineStream &stream,
203 UNUSED vector<char> &temp) const {
204 printf("PCRE matcher can't stream.\n");
205 abort();
206 }
207
printStats() const208 void EnginePCRE::printStats() const {
209 // Output summary information.
210 if (!compile_stats.sigs_name.empty()) {
211 printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
212 }
213 printf("Signatures: %s\n", compile_stats.signatures.c_str());
214 printf("PCRE info: %s\n", compile_stats.db_info.c_str());
215 #ifndef _WIN32
216 printf("Expression count: %'zu\n", compile_stats.expressionCount);
217 printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
218 printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
219 printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
220 printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
221 #else
222 printf("Expression count: %zu\n", compile_stats.expressionCount);
223 printf("Bytecode size: %zu bytes\n", compile_stats.compiledSize);
224 printf("Scratch size: %zu bytes\n", compile_stats.scratchSize);
225 printf("Compile time: %0.3Lf seconds\n", compile_stats.compileSecs);
226 printf("Peak heap usage: %u bytes\n", compile_stats.peakMemorySize);
227 #endif
228 }
229
printCsvStats() const230 void EnginePCRE::printCsvStats() const {
231 printf(",\"%s\"", compile_stats.signatures.c_str());
232 printf(",\"%zu\"", compile_stats.expressionCount);
233 printf(",\"%zu\"", compile_stats.compiledSize);
234 printf(",\"%zu\"", compile_stats.scratchSize);
235 printf(",\"%0.3Lf\"", compile_stats.compileSecs);
236 printf(",\"%u\"", compile_stats.peakMemorySize);
237 }
238
sqlStats(SqlDB & sqldb) const239 void EnginePCRE::sqlStats(SqlDB &sqldb) const {
240 ostringstream crc;
241
242 static const string Q =
243 "INSERT INTO Compile ("
244 "sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
245 "scratchSize, compileSecs, peakMemory) "
246 "VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
247
248 sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
249 compile_stats.db_info, compile_stats.expressionCount,
250 compile_stats.compiledSize, crc.str(),
251 compile_stats.scratchSize, compile_stats.compileSecs,
252 compile_stats.peakMemorySize);
253 }
254
255 static
decodeExprPCRE(string & expr,unsigned * flags,struct PcreDB & db)256 bool decodeExprPCRE(string &expr, unsigned *flags, struct PcreDB &db) {
257 if (expr[0] != '/') {
258 return false;
259 }
260
261 size_t end = expr.find_last_of('/');
262 if (end == string::npos) {
263 return false;
264 }
265 string strFlags = expr.substr(end + 1, expr.length() - end - 1);
266
267 // strip starting and trailing slashes and the flags
268 expr.erase(end, expr.length() - end);
269 expr.erase(0, 1);
270
271 // decode the flags
272 *flags = 0;
273 for (size_t i = 0; i != strFlags.length(); ++i) {
274 switch (strFlags[i]) {
275 case 's':
276 *flags |= PCRE_DOTALL;
277 break;
278 case 'm':
279 *flags |= PCRE_MULTILINE;
280 break;
281 case 'i':
282 *flags |= PCRE_CASELESS;
283 break;
284 case '8':
285 *flags |= PCRE_UTF8;
286 db.utf8 = true;
287 break;
288 case 'W':
289 *flags |= PCRE_UCP;
290 break;
291 case 'H':
292 db.highlander = true;
293 break;
294 default:
295 return false;
296 }
297 }
298
299 return true;
300 }
301
302 unique_ptr<EnginePCRE>
buildEnginePcre(const ExpressionMap & expressions,const string & name,const string & sigs_name)303 buildEnginePcre(const ExpressionMap &expressions, const string &name,
304 const string &sigs_name) {
305 if (expressions.empty()) {
306 assert(0);
307 return nullptr;
308 }
309
310 long double compileSecs = 0.0;
311 size_t compiledSize = 0.0;
312 unsigned int peakMemorySize = 0;
313 string db_info("Version: ");
314 db_info += string(pcre_version());
315
316 vector<unique_ptr<PcreDB>> dbs;
317 int capture_cnt = 0;
318
319 Timer timer;
320 timer.start();
321
322 for (const auto &m : expressions) {
323 string expr(m.second);
324 unsigned int flags = 0;
325 auto pcreDB = ue2::make_unique<PcreDB>();
326 if (!decodeExprPCRE(expr, &flags, *pcreDB)) {
327 printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
328 m.first);
329 return nullptr;
330 }
331
332 const char *errp;
333 int erro;
334 pcre *db = pcre_compile(expr.c_str(), flags, &errp, &erro, NULL);
335
336 if (!db) {
337 printf("Compile error %s\n", errp);
338 return nullptr;
339 }
340
341 pcre_extra *extra = pcre_study(db, PCRE_STUDY_JIT_COMPILE, &errp);
342 if (errp) {
343 printf("PCRE could not be studied: %s\n", errp);
344 return nullptr;
345 }
346 if (!extra) {
347 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
348 }
349 int cap = 0; // PCRE_INFO_CAPTURECOUNT demands an int
350 if (pcre_fullinfo(db, extra, PCRE_INFO_CAPTURECOUNT, &cap)) {
351 printf("PCRE fullinfo error\n");
352 free(extra);
353 free(db);
354 return nullptr;
355 }
356 assert(cap >= 0);
357 capture_cnt = max(capture_cnt, cap);
358
359 size_t db_size = 0;
360 if (pcre_fullinfo(db, extra, PCRE_INFO_SIZE, &db_size)) {
361 printf("PCRE fullinfo error\n");
362 free(extra);
363 free(db);
364 return nullptr;
365 }
366
367 size_t study_size = 0;
368 if (pcre_fullinfo(db, extra, PCRE_INFO_STUDYSIZE,
369 &study_size)) {
370 printf("PCRE fullinfo error\n");
371 free(extra);
372 free(db);
373 return nullptr;
374 }
375 compiledSize += db_size + study_size;
376
377 pcreDB->id = m.first;
378 pcreDB->db = db;
379
380 extra->flags =
381 PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
382 extra->match_limit = 10000000;
383 extra->match_limit_recursion = 1500;
384
385 pcreDB->extra = extra;
386 dbs.push_back(move(pcreDB));
387 }
388
389 timer.complete();
390 compileSecs = timer.seconds();
391 peakMemorySize = getPeakHeap();
392
393 // Collect summary information.
394 CompilePCREStats cs;
395 cs.sigs_name = sigs_name;
396 if (!sigs_name.empty()) {
397 const auto pos = name.find_last_of('/');
398 cs.signatures = name.substr(pos + 1);
399 } else {
400 cs.signatures = name;
401 }
402 cs.db_info = db_info;
403 cs.expressionCount = expressions.size();
404 cs.compiledSize = compiledSize;
405 cs.scratchSize = (capture_cnt + 1) * sizeof(int) * 3;
406 cs.compileSecs = compileSecs;
407 cs.peakMemorySize = peakMemorySize;
408
409 return ue2::make_unique<EnginePCRE>(move(dbs), move(cs), capture_cnt);
410 }
411