1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Benchmarks for regular expression implementations.
6
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <thread>
12 #include <unordered_map>
13 #include <utility>
14
15 #include "util/benchmark.h"
16 #include "util/test.h"
17 #include "util/flags.h"
18 #include "util/logging.h"
19 #include "util/malloc_counter.h"
20 #include "util/strutil.h"
21 #include "re2/prog.h"
22 #include "re2/re2.h"
23 #include "re2/regexp.h"
24 #include "util/mutex.h"
25 #include "util/pcre.h"
26
27 namespace re2 {
28 void Test();
29 void MemoryUsage();
30 } // namespace re2
31
32 typedef testing::MallocCounter MallocCounter;
33
34 namespace re2 {
35
Test()36 void Test() {
37 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
38 CHECK(re);
39 Prog* prog = re->CompileToProg(0);
40 CHECK(prog);
41 CHECK(prog->IsOnePass());
42 CHECK(prog->CanBitState());
43 const char* text = "650-253-0001";
44 StringPiece sp[4];
45 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
46 CHECK_EQ(sp[0], "650-253-0001");
47 CHECK_EQ(sp[1], "650");
48 CHECK_EQ(sp[2], "253");
49 CHECK_EQ(sp[3], "0001");
50 delete prog;
51 re->Decref();
52 LOG(INFO) << "test passed\n";
53 }
54
MemoryUsage()55 void MemoryUsage() {
56 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
57 const char* text = "650-253-0001";
58 {
59 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
60 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
61 CHECK(re);
62 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
63 // because LOG(INFO) might do a big allocation before they get evaluated.
64 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
65 mc.HeapGrowth(), mc.PeakHeapGrowth());
66 mc.Reset();
67
68 Prog* prog = re->CompileToProg(0);
69 CHECK(prog);
70 CHECK(prog->IsOnePass());
71 CHECK(prog->CanBitState());
72 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n",
73 mc.HeapGrowth(), mc.PeakHeapGrowth());
74 mc.Reset();
75
76 StringPiece sp[4];
77 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
78 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
79 mc.HeapGrowth(), mc.PeakHeapGrowth());
80 delete prog;
81 re->Decref();
82 }
83
84 {
85 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
86
87 PCRE re(regexp, PCRE::UTF8);
88 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
89 mc.HeapGrowth(), mc.PeakHeapGrowth());
90 PCRE::FullMatch(text, re);
91 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
92 mc.HeapGrowth(), mc.PeakHeapGrowth());
93 }
94
95 {
96 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
97
98 PCRE* re = new PCRE(regexp, PCRE::UTF8);
99 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
100 mc.HeapGrowth(), mc.PeakHeapGrowth());
101 PCRE::FullMatch(text, *re);
102 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
103 mc.HeapGrowth(), mc.PeakHeapGrowth());
104 delete re;
105 }
106
107 {
108 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
109
110 RE2 re(regexp);
111 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
112 mc.HeapGrowth(), mc.PeakHeapGrowth());
113 RE2::FullMatch(text, re);
114 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
115 mc.HeapGrowth(), mc.PeakHeapGrowth());
116 }
117
118 fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
119 sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
120 }
121
NumCPUs()122 int NumCPUs() {
123 return static_cast<int>(std::thread::hardware_concurrency());
124 }
125
126 // Regular expression implementation wrappers.
127 // Defined at bottom of file, but they are repetitive
128 // and not interesting.
129
130 typedef void SearchImpl(benchmark::State& state, const char* regexp,
131 const StringPiece& text, Prog::Anchor anchor,
132 bool expect_match);
133
134 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
135 SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
136 SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
137
138 typedef void ParseImpl(benchmark::State& state, const char* regexp,
139 const StringPiece& text);
140
141 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
142 Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
143 Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
144
145 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
146 Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
147 Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
148
149 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
150
151 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
152
153 // Benchmark: failed search for regexp in random text.
154
155 // Generate random text that won't contain the search string,
156 // to test worst-case search behavior.
RandomText(int64_t nbytes)157 std::string RandomText(int64_t nbytes) {
158 static const std::string* const text = []() {
159 std::string* text = new std::string;
160 srand(1);
161 text->resize(16<<20);
162 for (int64_t i = 0; i < 16<<20; i++) {
163 // Generate a one-byte rune that isn't a control character (e.g. '\n').
164 // Clipping to 0x20 introduces some bias, but we don't need uniformity.
165 int byte = rand() & 0x7F;
166 if (byte < 0x20)
167 byte = 0x20;
168 (*text)[i] = byte;
169 }
170 return text;
171 }();
172 CHECK_LE(nbytes, 16<<20);
173 return text->substr(0, nbytes);
174 }
175
176 // Makes text of size nbytes, then calls run to search
177 // the text for regexp iters times.
Search(benchmark::State & state,const char * regexp,SearchImpl * search)178 void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
179 std::string s = RandomText(state.range(0));
180 search(state, regexp, s, Prog::kUnanchored, false);
181 state.SetBytesProcessed(state.iterations() * state.range(0));
182 }
183
184 // These two are easy because they start with an A,
185 // giving the search loop something to memchr for.
186 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
187 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
188
189 // This is a little harder, since it starts with a character class
190 // and thus can't be memchr'ed. Could look for ABC and work backward,
191 // but no one does that.
192 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
193
194 // This is a fair amount harder, because of the leading [ -~]*.
195 // A bad backtracking implementation will take O(text^2) time to
196 // figure out there's no match.
197 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
198
199 // This has quite a high degree of fanout.
200 // NFA execution will be particularly slow.
201 #define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
202
203 // This stresses engines that are trying to track parentheses.
204 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
205 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
206
Search_Easy0_CachedDFA(benchmark::State & state)207 void Search_Easy0_CachedDFA(benchmark::State& state) { Search(state, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(benchmark::State & state)208 void Search_Easy0_CachedNFA(benchmark::State& state) { Search(state, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(benchmark::State & state)209 void Search_Easy0_CachedPCRE(benchmark::State& state) { Search(state, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(benchmark::State & state)210 void Search_Easy0_CachedRE2(benchmark::State& state) { Search(state, EASY0, SearchCachedRE2); }
211
212 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
213 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
214 #ifdef USEPCRE
215 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
216 #endif
217 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
218
Search_Easy1_CachedDFA(benchmark::State & state)219 void Search_Easy1_CachedDFA(benchmark::State& state) { Search(state, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(benchmark::State & state)220 void Search_Easy1_CachedNFA(benchmark::State& state) { Search(state, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(benchmark::State & state)221 void Search_Easy1_CachedPCRE(benchmark::State& state) { Search(state, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(benchmark::State & state)222 void Search_Easy1_CachedRE2(benchmark::State& state) { Search(state, EASY1, SearchCachedRE2); }
223
224 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
225 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
226 #ifdef USEPCRE
227 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
228 #endif
229 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
230
Search_Medium_CachedDFA(benchmark::State & state)231 void Search_Medium_CachedDFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(benchmark::State & state)232 void Search_Medium_CachedNFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(benchmark::State & state)233 void Search_Medium_CachedPCRE(benchmark::State& state) { Search(state, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(benchmark::State & state)234 void Search_Medium_CachedRE2(benchmark::State& state) { Search(state, MEDIUM, SearchCachedRE2); }
235
236 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
237 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
238 #ifdef USEPCRE
239 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
240 #endif
241 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
242
Search_Hard_CachedDFA(benchmark::State & state)243 void Search_Hard_CachedDFA(benchmark::State& state) { Search(state, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(benchmark::State & state)244 void Search_Hard_CachedNFA(benchmark::State& state) { Search(state, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(benchmark::State & state)245 void Search_Hard_CachedPCRE(benchmark::State& state) { Search(state, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(benchmark::State & state)246 void Search_Hard_CachedRE2(benchmark::State& state) { Search(state, HARD, SearchCachedRE2); }
247
248 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
249 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
250 #ifdef USEPCRE
251 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
252 #endif
253 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
254
Search_Fanout_CachedDFA(benchmark::State & state)255 void Search_Fanout_CachedDFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(benchmark::State & state)256 void Search_Fanout_CachedNFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(benchmark::State & state)257 void Search_Fanout_CachedPCRE(benchmark::State& state) { Search(state, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(benchmark::State & state)258 void Search_Fanout_CachedRE2(benchmark::State& state) { Search(state, FANOUT, SearchCachedRE2); }
259
260 BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
261 BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
262 #ifdef USEPCRE
263 BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
264 #endif
265 BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
266
Search_Parens_CachedDFA(benchmark::State & state)267 void Search_Parens_CachedDFA(benchmark::State& state) { Search(state, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(benchmark::State & state)268 void Search_Parens_CachedNFA(benchmark::State& state) { Search(state, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(benchmark::State & state)269 void Search_Parens_CachedPCRE(benchmark::State& state) { Search(state, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(benchmark::State & state)270 void Search_Parens_CachedRE2(benchmark::State& state) { Search(state, PARENS, SearchCachedRE2); }
271
272 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
273 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
274 #ifdef USEPCRE
275 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
276 #endif
277 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
278
SearchBigFixed(benchmark::State & state,SearchImpl * search)279 void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
280 std::string s;
281 s.append(state.range(0)/2, 'x');
282 std::string regexp = "^" + s + ".*$";
283 std::string t = RandomText(state.range(0)/2);
284 s += t;
285 search(state, regexp.c_str(), s, Prog::kUnanchored, true);
286 state.SetBytesProcessed(state.iterations() * state.range(0));
287 }
288
Search_BigFixed_CachedDFA(benchmark::State & state)289 void Search_BigFixed_CachedDFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedDFA); }
Search_BigFixed_CachedNFA(benchmark::State & state)290 void Search_BigFixed_CachedNFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(benchmark::State & state)291 void Search_BigFixed_CachedPCRE(benchmark::State& state) { SearchBigFixed(state, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(benchmark::State & state)292 void Search_BigFixed_CachedRE2(benchmark::State& state) { SearchBigFixed(state, SearchCachedRE2); }
293
294 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
295 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
296 #ifdef USEPCRE
297 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
298 #endif
299 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
300
301 // Benchmark: FindAndConsume
302
FindAndConsume(benchmark::State & state)303 void FindAndConsume(benchmark::State& state) {
304 std::string s = RandomText(state.range(0));
305 s.append("Hello World");
306 RE2 re("((Hello World))");
307 for (auto _ : state) {
308 StringPiece t = s;
309 StringPiece u;
310 CHECK(RE2::FindAndConsume(&t, re, &u));
311 CHECK_EQ(u, "Hello World");
312 }
313 state.SetBytesProcessed(state.iterations() * state.range(0));
314 }
315
316 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
317
318 // Benchmark: successful anchored search.
319
SearchSuccess(benchmark::State & state,const char * regexp,SearchImpl * search)320 void SearchSuccess(benchmark::State& state, const char* regexp,
321 SearchImpl* search) {
322 std::string s = RandomText(state.range(0));
323 search(state, regexp, s, Prog::kAnchored, true);
324 state.SetBytesProcessed(state.iterations() * state.range(0));
325 }
326
327 // Unambiguous search (RE2 can use OnePass).
328
Search_Success_DFA(benchmark::State & state)329 void Search_Success_DFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchDFA); }
Search_Success_NFA(benchmark::State & state)330 void Search_Success_NFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchNFA); }
Search_Success_PCRE(benchmark::State & state)331 void Search_Success_PCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchPCRE); }
Search_Success_RE2(benchmark::State & state)332 void Search_Success_RE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchRE2); }
Search_Success_OnePass(benchmark::State & state)333 void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
334
335 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
336 BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
337 #ifdef USEPCRE
338 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
339 #endif
340 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
341 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
342
Search_Success_CachedDFA(benchmark::State & state)343 void Search_Success_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(benchmark::State & state)344 void Search_Success_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(benchmark::State & state)345 void Search_Success_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(benchmark::State & state)346 void Search_Success_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(benchmark::State & state)347 void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
348
349 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
350 BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
351 #ifdef USEPCRE
352 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
353 #endif
354 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
355 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
356
357 // Ambiguous search (RE2 cannot use OnePass).
358 // Used to be ".*.$", but that is coalesced to ".+$" these days.
359
Search_Success1_DFA(benchmark::State & state)360 void Search_Success1_DFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchDFA); }
Search_Success1_NFA(benchmark::State & state)361 void Search_Success1_NFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(benchmark::State & state)362 void Search_Success1_PCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(benchmark::State & state)363 void Search_Success1_RE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchRE2); }
Search_Success1_BitState(benchmark::State & state)364 void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
365
366 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
367 BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
368 #ifdef USEPCRE
369 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
370 #endif
371 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
372 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
373
Search_Success1_CachedDFA(benchmark::State & state)374 void Search_Success1_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(benchmark::State & state)375 void Search_Success1_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(benchmark::State & state)376 void Search_Success1_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(benchmark::State & state)377 void Search_Success1_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(benchmark::State & state)378 void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
379
380 BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
381 BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
382 #ifdef USEPCRE
383 BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
384 #endif
385 BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
386 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
387
388 // Benchmark: AltMatch optimisation (just to verify that it works)
389 // Note that OnePass doesn't implement it!
390
SearchAltMatch(benchmark::State & state,SearchImpl * search)391 void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
392 std::string s = RandomText(state.range(0));
393 search(state, "\\C*", s, Prog::kAnchored, true);
394 state.SetBytesProcessed(state.iterations() * state.range(0));
395 }
396
Search_AltMatch_DFA(benchmark::State & state)397 void Search_AltMatch_DFA(benchmark::State& state) { SearchAltMatch(state, SearchDFA); }
Search_AltMatch_NFA(benchmark::State & state)398 void Search_AltMatch_NFA(benchmark::State& state) { SearchAltMatch(state, SearchNFA); }
Search_AltMatch_OnePass(benchmark::State & state)399 void Search_AltMatch_OnePass(benchmark::State& state) { SearchAltMatch(state, SearchOnePass); }
Search_AltMatch_BitState(benchmark::State & state)400 void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
Search_AltMatch_PCRE(benchmark::State & state)401 void Search_AltMatch_PCRE(benchmark::State& state) { SearchAltMatch(state, SearchPCRE); }
Search_AltMatch_RE2(benchmark::State & state)402 void Search_AltMatch_RE2(benchmark::State& state) { SearchAltMatch(state, SearchRE2); }
403
404 BENCHMARK_RANGE(Search_AltMatch_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
405 BENCHMARK_RANGE(Search_AltMatch_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
406 BENCHMARK_RANGE(Search_AltMatch_OnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
407 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
408 #ifdef USEPCRE
409 BENCHMARK_RANGE(Search_AltMatch_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
410 #endif
411 BENCHMARK_RANGE(Search_AltMatch_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
412
Search_AltMatch_CachedDFA(benchmark::State & state)413 void Search_AltMatch_CachedDFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedDFA); }
Search_AltMatch_CachedNFA(benchmark::State & state)414 void Search_AltMatch_CachedNFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(benchmark::State & state)415 void Search_AltMatch_CachedOnePass(benchmark::State& state) { SearchAltMatch(state, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(benchmark::State & state)416 void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(benchmark::State & state)417 void Search_AltMatch_CachedPCRE(benchmark::State& state) { SearchAltMatch(state, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(benchmark::State & state)418 void Search_AltMatch_CachedRE2(benchmark::State& state) { SearchAltMatch(state, SearchCachedRE2); }
419
420 BENCHMARK_RANGE(Search_AltMatch_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
421 BENCHMARK_RANGE(Search_AltMatch_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
422 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
423 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
424 #ifdef USEPCRE
425 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
426 #endif
427 BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
428
429 // Benchmark: use regexp to find phone number.
430
SearchDigits(benchmark::State & state,SearchImpl * search)431 void SearchDigits(benchmark::State& state, SearchImpl* search) {
432 StringPiece s("650-253-0001");
433 search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
434 state.SetItemsProcessed(state.iterations());
435 }
436
Search_Digits_DFA(benchmark::State & state)437 void Search_Digits_DFA(benchmark::State& state) { SearchDigits(state, SearchDFA); }
Search_Digits_NFA(benchmark::State & state)438 void Search_Digits_NFA(benchmark::State& state) { SearchDigits(state, SearchNFA); }
Search_Digits_OnePass(benchmark::State & state)439 void Search_Digits_OnePass(benchmark::State& state) { SearchDigits(state, SearchOnePass); }
Search_Digits_PCRE(benchmark::State & state)440 void Search_Digits_PCRE(benchmark::State& state) { SearchDigits(state, SearchPCRE); }
Search_Digits_RE2(benchmark::State & state)441 void Search_Digits_RE2(benchmark::State& state) { SearchDigits(state, SearchRE2); }
Search_Digits_BitState(benchmark::State & state)442 void Search_Digits_BitState(benchmark::State& state) { SearchDigits(state, SearchBitState); }
443
444 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
445 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
446 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
447 #ifdef USEPCRE
448 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
449 #endif
450 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
451 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
452
453 // Benchmark: use regexp to parse digit fields in phone number.
454
Parse3Digits(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))455 void Parse3Digits(benchmark::State& state,
456 void (*parse3)(benchmark::State&, const char*,
457 const StringPiece&)) {
458 parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
459 state.SetItemsProcessed(state.iterations());
460 }
461
Parse_Digits_NFA(benchmark::State & state)462 void Parse_Digits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3NFA); }
Parse_Digits_OnePass(benchmark::State & state)463 void Parse_Digits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3OnePass); }
Parse_Digits_PCRE(benchmark::State & state)464 void Parse_Digits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3PCRE); }
Parse_Digits_RE2(benchmark::State & state)465 void Parse_Digits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3RE2); }
Parse_Digits_Backtrack(benchmark::State & state)466 void Parse_Digits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3Backtrack); }
Parse_Digits_BitState(benchmark::State & state)467 void Parse_Digits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3BitState); }
468
469 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
470 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
471 #ifdef USEPCRE
472 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
473 #endif
474 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
475 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
476 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
477
Parse_CachedDigits_NFA(benchmark::State & state)478 void Parse_CachedDigits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(benchmark::State & state)479 void Parse_CachedDigits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(benchmark::State & state)480 void Parse_CachedDigits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(benchmark::State & state)481 void Parse_CachedDigits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(benchmark::State & state)482 void Parse_CachedDigits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(benchmark::State & state)483 void Parse_CachedDigits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3CachedBitState); }
484
485 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
486 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
487 #ifdef USEPCRE
488 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
489 #endif
490 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
491 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
492 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
493
Parse3DigitDs(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))494 void Parse3DigitDs(benchmark::State& state,
495 void (*parse3)(benchmark::State&, const char*,
496 const StringPiece&)) {
497 parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
498 state.SetItemsProcessed(state.iterations());
499 }
500
Parse_DigitDs_NFA(benchmark::State & state)501 void Parse_DigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3NFA); }
Parse_DigitDs_OnePass(benchmark::State & state)502 void Parse_DigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3OnePass); }
Parse_DigitDs_PCRE(benchmark::State & state)503 void Parse_DigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3PCRE); }
Parse_DigitDs_RE2(benchmark::State & state)504 void Parse_DigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3RE2); }
Parse_DigitDs_Backtrack(benchmark::State & state)505 void Parse_DigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(benchmark::State & state)506 void Parse_DigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
507
508 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
509 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
510 #ifdef USEPCRE
511 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
512 #endif
513 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
514 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
515 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
516
Parse_CachedDigitDs_NFA(benchmark::State & state)517 void Parse_CachedDigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(benchmark::State & state)518 void Parse_CachedDigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(benchmark::State & state)519 void Parse_CachedDigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(benchmark::State & state)520 void Parse_CachedDigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(benchmark::State & state)521 void Parse_CachedDigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(benchmark::State & state)522 void Parse_CachedDigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
523
524 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
525 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
526 #ifdef USEPCRE
527 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
528 #endif
529 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
530 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
531 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
532
533 // Benchmark: splitting off leading number field.
534
Parse1Split(benchmark::State & state,void (* parse1)(benchmark::State &,const char *,const StringPiece &))535 void Parse1Split(benchmark::State& state,
536 void (*parse1)(benchmark::State&, const char*,
537 const StringPiece&)) {
538 parse1(state, "[0-9]+-(.*)", "650-253-0001");
539 state.SetItemsProcessed(state.iterations());
540 }
541
Parse_Split_NFA(benchmark::State & state)542 void Parse_Split_NFA(benchmark::State& state) { Parse1Split(state, Parse1NFA); }
Parse_Split_OnePass(benchmark::State & state)543 void Parse_Split_OnePass(benchmark::State& state) { Parse1Split(state, Parse1OnePass); }
Parse_Split_PCRE(benchmark::State & state)544 void Parse_Split_PCRE(benchmark::State& state) { Parse1Split(state, Parse1PCRE); }
Parse_Split_RE2(benchmark::State & state)545 void Parse_Split_RE2(benchmark::State& state) { Parse1Split(state, Parse1RE2); }
Parse_Split_BitState(benchmark::State & state)546 void Parse_Split_BitState(benchmark::State& state) { Parse1Split(state, Parse1BitState); }
547
548 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
549 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
550 #ifdef USEPCRE
551 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
552 #endif
553 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
554 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
555
Parse_CachedSplit_NFA(benchmark::State & state)556 void Parse_CachedSplit_NFA(benchmark::State& state) { Parse1Split(state, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(benchmark::State & state)557 void Parse_CachedSplit_OnePass(benchmark::State& state) { Parse1Split(state, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(benchmark::State & state)558 void Parse_CachedSplit_PCRE(benchmark::State& state) { Parse1Split(state, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(benchmark::State & state)559 void Parse_CachedSplit_RE2(benchmark::State& state) { Parse1Split(state, Parse1CachedRE2); }
Parse_CachedSplit_BitState(benchmark::State & state)560 void Parse_CachedSplit_BitState(benchmark::State& state) { Parse1Split(state, Parse1CachedBitState); }
561
562 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
563 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
564 #ifdef USEPCRE
565 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
566 #endif
567 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
568 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
569
570 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
571
Parse1SplitHard(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))572 void Parse1SplitHard(benchmark::State& state,
573 void (*run)(benchmark::State&, const char*,
574 const StringPiece&)) {
575 run(state, "[0-9]+.(.*)", "650-253-0001");
576 state.SetItemsProcessed(state.iterations());
577 }
578
Parse_SplitHard_NFA(benchmark::State & state)579 void Parse_SplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1NFA); }
Parse_SplitHard_PCRE(benchmark::State & state)580 void Parse_SplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1PCRE); }
Parse_SplitHard_RE2(benchmark::State & state)581 void Parse_SplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1RE2); }
Parse_SplitHard_BitState(benchmark::State & state)582 void Parse_SplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1BitState); }
583
584 #ifdef USEPCRE
585 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
586 #endif
587 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
588 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
589 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
590
Parse_CachedSplitHard_NFA(benchmark::State & state)591 void Parse_CachedSplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(benchmark::State & state)592 void Parse_CachedSplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(benchmark::State & state)593 void Parse_CachedSplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(benchmark::State & state)594 void Parse_CachedSplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(benchmark::State & state)595 void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
596
597 #ifdef USEPCRE
598 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
599 #endif
600 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
601 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
602 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
603 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
604
605 // Benchmark: Parse1SplitHard, big text, small match.
606
Parse1SplitBig1(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))607 void Parse1SplitBig1(benchmark::State& state,
608 void (*run)(benchmark::State&, const char*,
609 const StringPiece&)) {
610 std::string s;
611 s.append(100000, 'x');
612 s.append("650-253-0001");
613 run(state, "[0-9]+.(.*)", s);
614 state.SetItemsProcessed(state.iterations());
615 }
616
Parse_CachedSplitBig1_PCRE(benchmark::State & state)617 void Parse_CachedSplitBig1_PCRE(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(benchmark::State & state)618 void Parse_CachedSplitBig1_RE2(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedRE2); }
619
620 #ifdef USEPCRE
621 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
622 #endif
623 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
624
625 // Benchmark: Parse1SplitHard, big text, big match.
626
Parse1SplitBig2(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))627 void Parse1SplitBig2(benchmark::State& state,
628 void (*run)(benchmark::State&, const char*,
629 const StringPiece&)) {
630 std::string s;
631 s.append("650-253-");
632 s.append(100000, '0');
633 run(state, "[0-9]+.(.*)", s);
634 state.SetItemsProcessed(state.iterations());
635 }
636
Parse_CachedSplitBig2_PCRE(benchmark::State & state)637 void Parse_CachedSplitBig2_PCRE(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(benchmark::State & state)638 void Parse_CachedSplitBig2_RE2(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedRE2); }
639
640 #ifdef USEPCRE
641 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
642 #endif
643 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
644
645 // Benchmark: measure time required to parse (but not execute)
646 // a simple regular expression.
647
ParseRegexp(benchmark::State & state,const std::string & regexp)648 void ParseRegexp(benchmark::State& state, const std::string& regexp) {
649 for (auto _ : state) {
650 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
651 CHECK(re);
652 re->Decref();
653 }
654 }
655
SimplifyRegexp(benchmark::State & state,const std::string & regexp)656 void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
657 for (auto _ : state) {
658 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
659 CHECK(re);
660 Regexp* sre = re->Simplify();
661 CHECK(sre);
662 sre->Decref();
663 re->Decref();
664 }
665 }
666
NullWalkRegexp(benchmark::State & state,const std::string & regexp)667 void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
668 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
669 CHECK(re);
670 for (auto _ : state) {
671 re->NullWalk();
672 }
673 re->Decref();
674 }
675
SimplifyCompileRegexp(benchmark::State & state,const std::string & regexp)676 void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
677 for (auto _ : state) {
678 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
679 CHECK(re);
680 Regexp* sre = re->Simplify();
681 CHECK(sre);
682 Prog* prog = sre->CompileToProg(0);
683 CHECK(prog);
684 delete prog;
685 sre->Decref();
686 re->Decref();
687 }
688 }
689
CompileRegexp(benchmark::State & state,const std::string & regexp)690 void CompileRegexp(benchmark::State& state, const std::string& regexp) {
691 for (auto _ : state) {
692 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
693 CHECK(re);
694 Prog* prog = re->CompileToProg(0);
695 CHECK(prog);
696 delete prog;
697 re->Decref();
698 }
699 }
700
CompileToProg(benchmark::State & state,const std::string & regexp)701 void CompileToProg(benchmark::State& state, const std::string& regexp) {
702 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
703 CHECK(re);
704 for (auto _ : state) {
705 Prog* prog = re->CompileToProg(0);
706 CHECK(prog);
707 delete prog;
708 }
709 re->Decref();
710 }
711
CompileByteMap(benchmark::State & state,const std::string & regexp)712 void CompileByteMap(benchmark::State& state, const std::string& regexp) {
713 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
714 CHECK(re);
715 Prog* prog = re->CompileToProg(0);
716 CHECK(prog);
717 for (auto _ : state) {
718 prog->ComputeByteMap();
719 }
720 delete prog;
721 re->Decref();
722 }
723
CompilePCRE(benchmark::State & state,const std::string & regexp)724 void CompilePCRE(benchmark::State& state, const std::string& regexp) {
725 for (auto _ : state) {
726 PCRE re(regexp, PCRE::UTF8);
727 CHECK_EQ(re.error(), "");
728 }
729 }
730
CompileRE2(benchmark::State & state,const std::string & regexp)731 void CompileRE2(benchmark::State& state, const std::string& regexp) {
732 for (auto _ : state) {
733 RE2 re(regexp);
734 CHECK_EQ(re.error(), "");
735 }
736 }
737
RunBuild(benchmark::State & state,const std::string & regexp,void (* run)(benchmark::State &,const std::string &))738 void RunBuild(benchmark::State& state, const std::string& regexp,
739 void (*run)(benchmark::State&, const std::string&)) {
740 run(state, regexp);
741 state.SetItemsProcessed(state.iterations());
742 }
743
744 } // namespace re2
745
746 DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
747 "regexp for compile benchmarks");
748
749 namespace re2 {
750
BM_PCRE_Compile(benchmark::State & state)751 void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
BM_Regexp_Parse(benchmark::State & state)752 void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
BM_Regexp_Simplify(benchmark::State & state)753 void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
BM_CompileToProg(benchmark::State & state)754 void BM_CompileToProg(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
BM_CompileByteMap(benchmark::State & state)755 void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
BM_Regexp_Compile(benchmark::State & state)756 void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
BM_Regexp_SimplifyCompile(benchmark::State & state)757 void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
BM_Regexp_NullWalk(benchmark::State & state)758 void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
BM_RE2_Compile(benchmark::State & state)759 void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
760
761 #ifdef USEPCRE
762 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
763 #endif
764 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
765 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
766 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
767 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
768 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
769 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
770 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
771 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
772
773 // Makes text of size nbytes, then calls run to search
774 // the text for regexp iters times.
SearchPhone(benchmark::State & state,ParseImpl * search)775 void SearchPhone(benchmark::State& state, ParseImpl* search) {
776 std::string s = RandomText(state.range(0));
777 s.append("(650) 253-0001");
778 search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
779 state.SetBytesProcessed(state.iterations() * state.range(0));
780 }
781
SearchPhone_CachedPCRE(benchmark::State & state)782 void SearchPhone_CachedPCRE(benchmark::State& state) {
783 SearchPhone(state, SearchParse2CachedPCRE);
784 }
785
SearchPhone_CachedRE2(benchmark::State & state)786 void SearchPhone_CachedRE2(benchmark::State& state) {
787 SearchPhone(state, SearchParse2CachedRE2);
788 }
789
790 #ifdef USEPCRE
791 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
792 #endif
793 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
794
795 /*
796 TODO(rsc): Make this work again.
797 void CacheFill(int iters, int n, SearchImpl *srch) {
798 std::string s = DeBruijnString(n+1);
799 std::string t;
800 for (int i = n+1; i < 20; i++) {
801 t = s + s;
802 using std::swap;
803 swap(s, t);
804 }
805 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
806 Prog::kUnanchored, true);
807 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
808 }
809
810 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
811 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
812 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
813 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
814
815 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
816 // for the static BenchmarkRegisterer, which makes it unusable inside
817 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
818 // to make the identifiers distinct (only possible when 'a' is a simple
819 // expression like 2, not like 1+1).
820 #define MY_BENCHMARK_WITH_ARG(n, a) \
821 bool __benchmark_ ## n ## a = \
822 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
823
824 #define DO24(A, B) \
825 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
826 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
827 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
828 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
829
830 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
831 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
832 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
833 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
834
835 #undef DO24
836 #undef MY_BENCHMARK_WITH_ARG
837 */
838
839 ////////////////////////////////////////////////////////////////////////
840 //
841 // Implementation routines. Sad that there are so many,
842 // but all the interfaces are slightly different.
843
844 // Runs implementation to search for regexp in text, iters times.
845 // Expect_match says whether the regexp should be found.
846 // Anchored says whether to run an anchored search.
847
SearchDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)848 void SearchDFA(benchmark::State& state, const char* regexp,
849 const StringPiece& text, Prog::Anchor anchor,
850 bool expect_match) {
851 for (auto _ : state) {
852 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
853 CHECK(re);
854 Prog* prog = re->CompileToProg(0);
855 CHECK(prog);
856 bool failed = false;
857 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
858 NULL, &failed, NULL),
859 expect_match);
860 CHECK(!failed);
861 delete prog;
862 re->Decref();
863 }
864 }
865
SearchNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)866 void SearchNFA(benchmark::State& state, const char* regexp,
867 const StringPiece& text, Prog::Anchor anchor,
868 bool expect_match) {
869 for (auto _ : state) {
870 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
871 CHECK(re);
872 Prog* prog = re->CompileToProg(0);
873 CHECK(prog);
874 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
875 NULL, 0),
876 expect_match);
877 delete prog;
878 re->Decref();
879 }
880 }
881
SearchOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)882 void SearchOnePass(benchmark::State& state, const char* regexp,
883 const StringPiece& text, Prog::Anchor anchor,
884 bool expect_match) {
885 for (auto _ : state) {
886 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
887 CHECK(re);
888 Prog* prog = re->CompileToProg(0);
889 CHECK(prog);
890 CHECK(prog->IsOnePass());
891 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
892 expect_match);
893 delete prog;
894 re->Decref();
895 }
896 }
897
SearchBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)898 void SearchBitState(benchmark::State& state, const char* regexp,
899 const StringPiece& text, Prog::Anchor anchor,
900 bool expect_match) {
901 for (auto _ : state) {
902 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
903 CHECK(re);
904 Prog* prog = re->CompileToProg(0);
905 CHECK(prog);
906 CHECK(prog->CanBitState());
907 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
908 expect_match);
909 delete prog;
910 re->Decref();
911 }
912 }
913
SearchPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)914 void SearchPCRE(benchmark::State& state, const char* regexp,
915 const StringPiece& text, Prog::Anchor anchor,
916 bool expect_match) {
917 for (auto _ : state) {
918 PCRE re(regexp, PCRE::UTF8);
919 CHECK_EQ(re.error(), "");
920 if (anchor == Prog::kAnchored)
921 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
922 else
923 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
924 }
925 }
926
SearchRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)927 void SearchRE2(benchmark::State& state, const char* regexp,
928 const StringPiece& text, Prog::Anchor anchor,
929 bool expect_match) {
930 for (auto _ : state) {
931 RE2 re(regexp);
932 CHECK_EQ(re.error(), "");
933 if (anchor == Prog::kAnchored)
934 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
935 else
936 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
937 }
938 }
939
940 // SearchCachedXXX is like SearchXXX but only does the
941 // regexp parsing and compiling once. This lets us measure
942 // search time without the per-regexp overhead.
943
GetCachedProg(const char * regexp)944 Prog* GetCachedProg(const char* regexp) {
945 static auto& mutex = *new Mutex;
946 MutexLock lock(&mutex);
947 static auto& cache = *new std::unordered_map<std::string, Prog*>;
948 Prog* prog = cache[regexp];
949 if (prog == NULL) {
950 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
951 CHECK(re);
952 prog = re->CompileToProg(int64_t{1}<<31); // mostly for the DFA
953 CHECK(prog);
954 cache[regexp] = prog;
955 re->Decref();
956 }
957 return prog;
958 }
959
GetCachedPCRE(const char * regexp)960 PCRE* GetCachedPCRE(const char* regexp) {
961 static auto& mutex = *new Mutex;
962 MutexLock lock(&mutex);
963 static auto& cache = *new std::unordered_map<std::string, PCRE*>;
964 PCRE* re = cache[regexp];
965 if (re == NULL) {
966 re = new PCRE(regexp, PCRE::UTF8);
967 CHECK_EQ(re->error(), "");
968 cache[regexp] = re;
969 }
970 return re;
971 }
972
GetCachedRE2(const char * regexp)973 RE2* GetCachedRE2(const char* regexp) {
974 static auto& mutex = *new Mutex;
975 MutexLock lock(&mutex);
976 static auto& cache = *new std::unordered_map<std::string, RE2*>;
977 RE2* re = cache[regexp];
978 if (re == NULL) {
979 re = new RE2(regexp);
980 CHECK_EQ(re->error(), "");
981 cache[regexp] = re;
982 }
983 return re;
984 }
985
SearchCachedDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)986 void SearchCachedDFA(benchmark::State& state, const char* regexp,
987 const StringPiece& text, Prog::Anchor anchor,
988 bool expect_match) {
989 Prog* prog = GetCachedProg(regexp);
990 for (auto _ : state) {
991 bool failed = false;
992 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
993 NULL, &failed, NULL),
994 expect_match);
995 CHECK(!failed);
996 }
997 }
998
SearchCachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)999 void SearchCachedNFA(benchmark::State& state, const char* regexp,
1000 const StringPiece& text, Prog::Anchor anchor,
1001 bool expect_match) {
1002 Prog* prog = GetCachedProg(regexp);
1003 for (auto _ : state) {
1004 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
1005 NULL, 0),
1006 expect_match);
1007 }
1008 }
1009
SearchCachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1010 void SearchCachedOnePass(benchmark::State& state, const char* regexp,
1011 const StringPiece& text, Prog::Anchor anchor,
1012 bool expect_match) {
1013 Prog* prog = GetCachedProg(regexp);
1014 CHECK(prog->IsOnePass());
1015 for (auto _ : state) {
1016 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1017 expect_match);
1018 }
1019 }
1020
SearchCachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1021 void SearchCachedBitState(benchmark::State& state, const char* regexp,
1022 const StringPiece& text, Prog::Anchor anchor,
1023 bool expect_match) {
1024 Prog* prog = GetCachedProg(regexp);
1025 CHECK(prog->CanBitState());
1026 for (auto _ : state) {
1027 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1028 expect_match);
1029 }
1030 }
1031
SearchCachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1032 void SearchCachedPCRE(benchmark::State& state, const char* regexp,
1033 const StringPiece& text, Prog::Anchor anchor,
1034 bool expect_match) {
1035 PCRE& re = *GetCachedPCRE(regexp);
1036 for (auto _ : state) {
1037 if (anchor == Prog::kAnchored)
1038 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1039 else
1040 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1041 }
1042 }
1043
SearchCachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1044 void SearchCachedRE2(benchmark::State& state, const char* regexp,
1045 const StringPiece& text, Prog::Anchor anchor,
1046 bool expect_match) {
1047 RE2& re = *GetCachedRE2(regexp);
1048 for (auto _ : state) {
1049 if (anchor == Prog::kAnchored)
1050 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1051 else
1052 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1053 }
1054 }
1055
1056 // Runs implementation to full match regexp against text,
1057 // extracting three submatches. Expects match always.
1058
Parse3NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1059 void Parse3NFA(benchmark::State& state, const char* regexp,
1060 const StringPiece& text) {
1061 for (auto _ : state) {
1062 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1063 CHECK(re);
1064 Prog* prog = re->CompileToProg(0);
1065 CHECK(prog);
1066 StringPiece sp[4]; // 4 because sp[0] is whole match.
1067 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1068 Prog::kFullMatch, sp, 4));
1069 delete prog;
1070 re->Decref();
1071 }
1072 }
1073
Parse3OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1074 void Parse3OnePass(benchmark::State& state, const char* regexp,
1075 const StringPiece& text) {
1076 for (auto _ : state) {
1077 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1078 CHECK(re);
1079 Prog* prog = re->CompileToProg(0);
1080 CHECK(prog);
1081 CHECK(prog->IsOnePass());
1082 StringPiece sp[4]; // 4 because sp[0] is whole match.
1083 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1084 delete prog;
1085 re->Decref();
1086 }
1087 }
1088
Parse3BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1089 void Parse3BitState(benchmark::State& state, const char* regexp,
1090 const StringPiece& text) {
1091 for (auto _ : state) {
1092 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1093 CHECK(re);
1094 Prog* prog = re->CompileToProg(0);
1095 CHECK(prog);
1096 CHECK(prog->CanBitState());
1097 StringPiece sp[4]; // 4 because sp[0] is whole match.
1098 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1099 delete prog;
1100 re->Decref();
1101 }
1102 }
1103
Parse3Backtrack(benchmark::State & state,const char * regexp,const StringPiece & text)1104 void Parse3Backtrack(benchmark::State& state, const char* regexp,
1105 const StringPiece& text) {
1106 for (auto _ : state) {
1107 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1108 CHECK(re);
1109 Prog* prog = re->CompileToProg(0);
1110 CHECK(prog);
1111 StringPiece sp[4]; // 4 because sp[0] is whole match.
1112 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1113 delete prog;
1114 re->Decref();
1115 }
1116 }
1117
Parse3PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1118 void Parse3PCRE(benchmark::State& state, const char* regexp,
1119 const StringPiece& text) {
1120 for (auto _ : state) {
1121 PCRE re(regexp, PCRE::UTF8);
1122 CHECK_EQ(re.error(), "");
1123 StringPiece sp1, sp2, sp3;
1124 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1125 }
1126 }
1127
Parse3RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1128 void Parse3RE2(benchmark::State& state, const char* regexp,
1129 const StringPiece& text) {
1130 for (auto _ : state) {
1131 RE2 re(regexp);
1132 CHECK_EQ(re.error(), "");
1133 StringPiece sp1, sp2, sp3;
1134 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1135 }
1136 }
1137
Parse3CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1138 void Parse3CachedNFA(benchmark::State& state, const char* regexp,
1139 const StringPiece& text) {
1140 Prog* prog = GetCachedProg(regexp);
1141 StringPiece sp[4]; // 4 because sp[0] is whole match.
1142 for (auto _ : state) {
1143 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1144 Prog::kFullMatch, sp, 4));
1145 }
1146 }
1147
Parse3CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1148 void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
1149 const StringPiece& text) {
1150 Prog* prog = GetCachedProg(regexp);
1151 CHECK(prog->IsOnePass());
1152 StringPiece sp[4]; // 4 because sp[0] is whole match.
1153 for (auto _ : state) {
1154 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1155 }
1156 }
1157
Parse3CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1158 void Parse3CachedBitState(benchmark::State& state, const char* regexp,
1159 const StringPiece& text) {
1160 Prog* prog = GetCachedProg(regexp);
1161 CHECK(prog->CanBitState());
1162 StringPiece sp[4]; // 4 because sp[0] is whole match.
1163 for (auto _ : state) {
1164 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1165 }
1166 }
1167
Parse3CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1168 void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
1169 const StringPiece& text) {
1170 Prog* prog = GetCachedProg(regexp);
1171 StringPiece sp[4]; // 4 because sp[0] is whole match.
1172 for (auto _ : state) {
1173 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1174 }
1175 }
1176
Parse3CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1177 void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
1178 const StringPiece& text) {
1179 PCRE& re = *GetCachedPCRE(regexp);
1180 StringPiece sp1, sp2, sp3;
1181 for (auto _ : state) {
1182 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1183 }
1184 }
1185
Parse3CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1186 void Parse3CachedRE2(benchmark::State& state, const char* regexp,
1187 const StringPiece& text) {
1188 RE2& re = *GetCachedRE2(regexp);
1189 StringPiece sp1, sp2, sp3;
1190 for (auto _ : state) {
1191 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1192 }
1193 }
1194
1195 // Runs implementation to full match regexp against text,
1196 // extracting three submatches. Expects match always.
1197
Parse1NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1198 void Parse1NFA(benchmark::State& state, const char* regexp,
1199 const StringPiece& text) {
1200 for (auto _ : state) {
1201 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1202 CHECK(re);
1203 Prog* prog = re->CompileToProg(0);
1204 CHECK(prog);
1205 StringPiece sp[2]; // 2 because sp[0] is whole match.
1206 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1207 Prog::kFullMatch, sp, 2));
1208 delete prog;
1209 re->Decref();
1210 }
1211 }
1212
Parse1OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1213 void Parse1OnePass(benchmark::State& state, const char* regexp,
1214 const StringPiece& text) {
1215 for (auto _ : state) {
1216 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1217 CHECK(re);
1218 Prog* prog = re->CompileToProg(0);
1219 CHECK(prog);
1220 CHECK(prog->IsOnePass());
1221 StringPiece sp[2]; // 2 because sp[0] is whole match.
1222 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1223 delete prog;
1224 re->Decref();
1225 }
1226 }
1227
Parse1BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1228 void Parse1BitState(benchmark::State& state, const char* regexp,
1229 const StringPiece& text) {
1230 for (auto _ : state) {
1231 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1232 CHECK(re);
1233 Prog* prog = re->CompileToProg(0);
1234 CHECK(prog);
1235 CHECK(prog->CanBitState());
1236 StringPiece sp[2]; // 2 because sp[0] is whole match.
1237 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238 delete prog;
1239 re->Decref();
1240 }
1241 }
1242
Parse1PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1243 void Parse1PCRE(benchmark::State& state, const char* regexp,
1244 const StringPiece& text) {
1245 for (auto _ : state) {
1246 PCRE re(regexp, PCRE::UTF8);
1247 CHECK_EQ(re.error(), "");
1248 StringPiece sp1;
1249 CHECK(PCRE::FullMatch(text, re, &sp1));
1250 }
1251 }
1252
Parse1RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1253 void Parse1RE2(benchmark::State& state, const char* regexp,
1254 const StringPiece& text) {
1255 for (auto _ : state) {
1256 RE2 re(regexp);
1257 CHECK_EQ(re.error(), "");
1258 StringPiece sp1;
1259 CHECK(RE2::FullMatch(text, re, &sp1));
1260 }
1261 }
1262
Parse1CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1263 void Parse1CachedNFA(benchmark::State& state, const char* regexp,
1264 const StringPiece& text) {
1265 Prog* prog = GetCachedProg(regexp);
1266 StringPiece sp[2]; // 2 because sp[0] is whole match.
1267 for (auto _ : state) {
1268 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1269 Prog::kFullMatch, sp, 2));
1270 }
1271 }
1272
Parse1CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1273 void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
1274 const StringPiece& text) {
1275 Prog* prog = GetCachedProg(regexp);
1276 CHECK(prog->IsOnePass());
1277 StringPiece sp[2]; // 2 because sp[0] is whole match.
1278 for (auto _ : state) {
1279 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1280 }
1281 }
1282
Parse1CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1283 void Parse1CachedBitState(benchmark::State& state, const char* regexp,
1284 const StringPiece& text) {
1285 Prog* prog = GetCachedProg(regexp);
1286 CHECK(prog->CanBitState());
1287 StringPiece sp[2]; // 2 because sp[0] is whole match.
1288 for (auto _ : state) {
1289 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1290 }
1291 }
1292
Parse1CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1293 void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
1294 const StringPiece& text) {
1295 Prog* prog = GetCachedProg(regexp);
1296 StringPiece sp[2]; // 2 because sp[0] is whole match.
1297 for (auto _ : state) {
1298 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1299 }
1300 }
1301
Parse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1302 void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
1303 const StringPiece& text) {
1304 PCRE& re = *GetCachedPCRE(regexp);
1305 StringPiece sp1;
1306 for (auto _ : state) {
1307 CHECK(PCRE::FullMatch(text, re, &sp1));
1308 }
1309 }
1310
Parse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1311 void Parse1CachedRE2(benchmark::State& state, const char* regexp,
1312 const StringPiece& text) {
1313 RE2& re = *GetCachedRE2(regexp);
1314 StringPiece sp1;
1315 for (auto _ : state) {
1316 CHECK(RE2::FullMatch(text, re, &sp1));
1317 }
1318 }
1319
SearchParse2CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1320 void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
1321 const StringPiece& text) {
1322 PCRE& re = *GetCachedPCRE(regexp);
1323 for (auto _ : state) {
1324 StringPiece sp1, sp2;
1325 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1326 }
1327 }
1328
SearchParse2CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1329 void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
1330 const StringPiece& text) {
1331 RE2& re = *GetCachedRE2(regexp);
1332 for (auto _ : state) {
1333 StringPiece sp1, sp2;
1334 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1335 }
1336 }
1337
SearchParse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1338 void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
1339 const StringPiece& text) {
1340 PCRE& re = *GetCachedPCRE(regexp);
1341 for (auto _ : state) {
1342 StringPiece sp1;
1343 CHECK(PCRE::PartialMatch(text, re, &sp1));
1344 }
1345 }
1346
SearchParse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1347 void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
1348 const StringPiece& text) {
1349 RE2& re = *GetCachedRE2(regexp);
1350 for (auto _ : state) {
1351 StringPiece sp1;
1352 CHECK(RE2::PartialMatch(text, re, &sp1));
1353 }
1354 }
1355
EmptyPartialMatchPCRE(benchmark::State & state)1356 void EmptyPartialMatchPCRE(benchmark::State& state) {
1357 PCRE re("");
1358 for (auto _ : state) {
1359 PCRE::PartialMatch("", re);
1360 }
1361 }
1362
EmptyPartialMatchRE2(benchmark::State & state)1363 void EmptyPartialMatchRE2(benchmark::State& state) {
1364 RE2 re("");
1365 for (auto _ : state) {
1366 RE2::PartialMatch("", re);
1367 }
1368 }
1369 #ifdef USEPCRE
1370 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1371 #endif
1372 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1373
SimplePartialMatchPCRE(benchmark::State & state)1374 void SimplePartialMatchPCRE(benchmark::State& state) {
1375 PCRE re("abcdefg");
1376 for (auto _ : state) {
1377 PCRE::PartialMatch("abcdefg", re);
1378 }
1379 }
1380
SimplePartialMatchRE2(benchmark::State & state)1381 void SimplePartialMatchRE2(benchmark::State& state) {
1382 RE2 re("abcdefg");
1383 for (auto _ : state) {
1384 RE2::PartialMatch("abcdefg", re);
1385 }
1386 }
1387 #ifdef USEPCRE
1388 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1389 #endif
1390 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1391
1392 static std::string http_text =
1393 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1394 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1395
HTTPPartialMatchPCRE(benchmark::State & state)1396 void HTTPPartialMatchPCRE(benchmark::State& state) {
1397 StringPiece a;
1398 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1399 for (auto _ : state) {
1400 PCRE::PartialMatch(http_text, re, &a);
1401 }
1402 }
1403
HTTPPartialMatchRE2(benchmark::State & state)1404 void HTTPPartialMatchRE2(benchmark::State& state) {
1405 StringPiece a;
1406 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1407 for (auto _ : state) {
1408 RE2::PartialMatch(http_text, re, &a);
1409 }
1410 }
1411
1412 #ifdef USEPCRE
1413 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1414 #endif
1415 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1416
1417 static std::string smallhttp_text =
1418 "GET /abc HTTP/1.1";
1419
SmallHTTPPartialMatchPCRE(benchmark::State & state)1420 void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
1421 StringPiece a;
1422 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1423 for (auto _ : state) {
1424 PCRE::PartialMatch(smallhttp_text, re, &a);
1425 }
1426 }
1427
SmallHTTPPartialMatchRE2(benchmark::State & state)1428 void SmallHTTPPartialMatchRE2(benchmark::State& state) {
1429 StringPiece a;
1430 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1431 for (auto _ : state) {
1432 RE2::PartialMatch(smallhttp_text, re, &a);
1433 }
1434 }
1435
1436 #ifdef USEPCRE
1437 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1438 #endif
1439 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1440
DotMatchPCRE(benchmark::State & state)1441 void DotMatchPCRE(benchmark::State& state) {
1442 StringPiece a;
1443 PCRE re("(?-s)^(.+)");
1444 for (auto _ : state) {
1445 PCRE::PartialMatch(http_text, re, &a);
1446 }
1447 }
1448
DotMatchRE2(benchmark::State & state)1449 void DotMatchRE2(benchmark::State& state) {
1450 StringPiece a;
1451 RE2 re("(?-s)^(.+)");
1452 for (auto _ : state) {
1453 RE2::PartialMatch(http_text, re, &a);
1454 }
1455 }
1456
1457 #ifdef USEPCRE
1458 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1459 #endif
1460 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1461
ASCIIMatchPCRE(benchmark::State & state)1462 void ASCIIMatchPCRE(benchmark::State& state) {
1463 StringPiece a;
1464 PCRE re("(?-s)^([ -~]+)");
1465 for (auto _ : state) {
1466 PCRE::PartialMatch(http_text, re, &a);
1467 }
1468 }
1469
ASCIIMatchRE2(benchmark::State & state)1470 void ASCIIMatchRE2(benchmark::State& state) {
1471 StringPiece a;
1472 RE2 re("(?-s)^([ -~]+)");
1473 for (auto _ : state) {
1474 RE2::PartialMatch(http_text, re, &a);
1475 }
1476 }
1477
1478 #ifdef USEPCRE
1479 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1480 #endif
1481 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1482
FullMatchPCRE(benchmark::State & state,const char * regexp)1483 void FullMatchPCRE(benchmark::State& state, const char *regexp) {
1484 std::string s = RandomText(state.range(0));
1485 s += "ABCDEFGHIJ";
1486 PCRE re(regexp);
1487 for (auto _ : state) {
1488 CHECK(PCRE::FullMatch(s, re));
1489 }
1490 state.SetBytesProcessed(state.iterations() * state.range(0));
1491 }
1492
FullMatchRE2(benchmark::State & state,const char * regexp)1493 void FullMatchRE2(benchmark::State& state, const char *regexp) {
1494 std::string s = RandomText(state.range(0));
1495 s += "ABCDEFGHIJ";
1496 RE2 re(regexp, RE2::Latin1);
1497 for (auto _ : state) {
1498 CHECK(RE2::FullMatch(s, re));
1499 }
1500 state.SetBytesProcessed(state.iterations() * state.range(0));
1501 }
1502
FullMatch_DotStar_CachedPCRE(benchmark::State & state)1503 void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
FullMatch_DotStar_CachedRE2(benchmark::State & state)1504 void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); }
1505
FullMatch_DotStarDollar_CachedPCRE(benchmark::State & state)1506 void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(benchmark::State & state)1507 void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); }
1508
FullMatch_DotStarCapture_CachedPCRE(benchmark::State & state)1509 void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(benchmark::State & state)1510 void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); }
1511
1512 #ifdef USEPCRE
1513 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1514 #endif
1515 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1516
1517 #ifdef USEPCRE
1518 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1519 #endif
1520 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1521
1522 #ifdef USEPCRE
1523 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1524 #endif
1525 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1526
PossibleMatchRangeCommon(benchmark::State & state,const char * regexp)1527 void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
1528 RE2 re(regexp);
1529 std::string min;
1530 std::string max;
1531 const int kMaxLen = 16;
1532 for (auto _ : state) {
1533 CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1534 }
1535 }
1536
PossibleMatchRange_Trivial(benchmark::State & state)1537 void PossibleMatchRange_Trivial(benchmark::State& state) {
1538 PossibleMatchRangeCommon(state, ".*");
1539 }
PossibleMatchRange_Complex(benchmark::State & state)1540 void PossibleMatchRange_Complex(benchmark::State& state) {
1541 PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
1542 }
PossibleMatchRange_Prefix(benchmark::State & state)1543 void PossibleMatchRange_Prefix(benchmark::State& state) {
1544 PossibleMatchRangeCommon(state, "^some_random_prefix.*");
1545 }
PossibleMatchRange_NoProg(benchmark::State & state)1546 void PossibleMatchRange_NoProg(benchmark::State& state) {
1547 PossibleMatchRangeCommon(state, "^some_random_string$");
1548 }
1549
1550 BENCHMARK(PossibleMatchRange_Trivial);
1551 BENCHMARK(PossibleMatchRange_Complex);
1552 BENCHMARK(PossibleMatchRange_Prefix);
1553 BENCHMARK(PossibleMatchRange_NoProg);
1554
1555 } // namespace re2
1556