1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Benchmarks for regular expression implementations.
6
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <thread>
12 #include <unordered_map>
13 #include <utility>
14
15 #include "util/benchmark.h"
16 #include "util/test.h"
17 #include "util/flags.h"
18 #include "util/logging.h"
19 #include "util/malloc_counter.h"
20 #include "util/strutil.h"
21 #include "re2/prog.h"
22 #include "re2/re2.h"
23 #include "re2/regexp.h"
24 #include "util/mutex.h"
25 #include "util/pcre.h"
26
27 namespace re2 {
28 void Test();
29 void MemoryUsage();
30 } // namespace re2
31
32 typedef testing::MallocCounter MallocCounter;
33
34 namespace re2 {
35
Test()36 void Test() {
37 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
38 CHECK(re);
39 Prog* prog = re->CompileToProg(0);
40 CHECK(prog);
41 CHECK(prog->IsOnePass());
42 CHECK(prog->CanBitState());
43 const char* text = "650-253-0001";
44 StringPiece sp[4];
45 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
46 CHECK_EQ(sp[0], "650-253-0001");
47 CHECK_EQ(sp[1], "650");
48 CHECK_EQ(sp[2], "253");
49 CHECK_EQ(sp[3], "0001");
50 delete prog;
51 re->Decref();
52 LOG(INFO) << "test passed\n";
53 }
54
MemoryUsage()55 void MemoryUsage() {
56 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
57 const char* text = "650-253-0001";
58 {
59 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
60 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
61 CHECK(re);
62 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
63 // because LOG(INFO) might do a big allocation before they get evaluated.
64 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
65 mc.HeapGrowth(), mc.PeakHeapGrowth());
66 mc.Reset();
67
68 Prog* prog = re->CompileToProg(0);
69 CHECK(prog);
70 CHECK(prog->IsOnePass());
71 CHECK(prog->CanBitState());
72 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n",
73 mc.HeapGrowth(), mc.PeakHeapGrowth());
74 mc.Reset();
75
76 StringPiece sp[4];
77 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
78 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
79 mc.HeapGrowth(), mc.PeakHeapGrowth());
80 delete prog;
81 re->Decref();
82 }
83
84 {
85 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
86
87 PCRE re(regexp, PCRE::UTF8);
88 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
89 mc.HeapGrowth(), mc.PeakHeapGrowth());
90 PCRE::FullMatch(text, re);
91 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
92 mc.HeapGrowth(), mc.PeakHeapGrowth());
93 }
94
95 {
96 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
97
98 PCRE* re = new PCRE(regexp, PCRE::UTF8);
99 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
100 mc.HeapGrowth(), mc.PeakHeapGrowth());
101 PCRE::FullMatch(text, *re);
102 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
103 mc.HeapGrowth(), mc.PeakHeapGrowth());
104 delete re;
105 }
106
107 {
108 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
109
110 RE2 re(regexp);
111 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
112 mc.HeapGrowth(), mc.PeakHeapGrowth());
113 RE2::FullMatch(text, re);
114 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
115 mc.HeapGrowth(), mc.PeakHeapGrowth());
116 }
117
118 fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
119 sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
120 }
121
NumCPUs()122 int NumCPUs() {
123 return static_cast<int>(std::thread::hardware_concurrency());
124 }
125
126 // Regular expression implementation wrappers.
127 // Defined at bottom of file, but they are repetitive
128 // and not interesting.
129
130 typedef void SearchImpl(benchmark::State& state, const char* regexp,
131 const StringPiece& text, Prog::Anchor anchor,
132 bool expect_match);
133
134 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
135 SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
136 SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
137
138 typedef void ParseImpl(benchmark::State& state, const char* regexp,
139 const StringPiece& text);
140
141 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
142 Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
143 Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
144
145 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
146 Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
147 Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
148
149 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
150
151 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
152
153 // Benchmark: failed search for regexp in random text.
154
155 // Generate random text that won't contain the search string,
156 // to test worst-case search behavior.
RandomText(int64_t nbytes)157 std::string RandomText(int64_t nbytes) {
158 static const std::string* const text = []() {
159 std::string* text = new std::string;
160 srand(1);
161 text->resize(16<<20);
162 for (int64_t i = 0; i < 16<<20; i++) {
163 // Generate a one-byte rune that isn't a control character (e.g. '\n').
164 // Clipping to 0x20 introduces some bias, but we don't need uniformity.
165 int byte = rand() & 0x7F;
166 if (byte < 0x20)
167 byte = 0x20;
168 (*text)[i] = byte;
169 }
170 return text;
171 }();
172 CHECK_LE(nbytes, 16<<20);
173 return text->substr(0, nbytes);
174 }
175
176 // Makes text of size nbytes, then calls run to search
177 // the text for regexp iters times.
Search(benchmark::State & state,const char * regexp,SearchImpl * search)178 void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
179 std::string s = RandomText(state.range(0));
180 search(state, regexp, s, Prog::kUnanchored, false);
181 state.SetBytesProcessed(state.iterations() * state.range(0));
182 }
183
184 // These three are easy because they have prefixes,
185 // giving the search loop something to prefix accel.
186 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
187 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
188 #define EASY2 "(?i)" EASY0
189
190 // This is a little harder, since it starts with a character class
191 // and thus can't be memchr'ed. Could look for ABC and work backward,
192 // but no one does that.
193 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
194
195 // This is a fair amount harder, because of the leading [ -~]*.
196 // A bad backtracking implementation will take O(text^2) time to
197 // figure out there's no match.
198 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
199
200 // This has quite a high degree of fanout.
201 // NFA execution will be particularly slow.
202 #define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
203
204 // This stresses engines that are trying to track parentheses.
205 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
206 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
207
Search_Easy0_CachedDFA(benchmark::State & state)208 void Search_Easy0_CachedDFA(benchmark::State& state) { Search(state, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(benchmark::State & state)209 void Search_Easy0_CachedNFA(benchmark::State& state) { Search(state, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(benchmark::State & state)210 void Search_Easy0_CachedPCRE(benchmark::State& state) { Search(state, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(benchmark::State & state)211 void Search_Easy0_CachedRE2(benchmark::State& state) { Search(state, EASY0, SearchCachedRE2); }
212
213 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
214 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
215 #ifdef USEPCRE
216 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
217 #endif
218 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
219
Search_Easy1_CachedDFA(benchmark::State & state)220 void Search_Easy1_CachedDFA(benchmark::State& state) { Search(state, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(benchmark::State & state)221 void Search_Easy1_CachedNFA(benchmark::State& state) { Search(state, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(benchmark::State & state)222 void Search_Easy1_CachedPCRE(benchmark::State& state) { Search(state, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(benchmark::State & state)223 void Search_Easy1_CachedRE2(benchmark::State& state) { Search(state, EASY1, SearchCachedRE2); }
224
225 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
226 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
227 #ifdef USEPCRE
228 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
229 #endif
230 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
231
Search_Easy2_CachedDFA(benchmark::State & state)232 void Search_Easy2_CachedDFA(benchmark::State& state) { Search(state, EASY2, SearchCachedDFA); }
Search_Easy2_CachedNFA(benchmark::State & state)233 void Search_Easy2_CachedNFA(benchmark::State& state) { Search(state, EASY2, SearchCachedNFA); }
Search_Easy2_CachedPCRE(benchmark::State & state)234 void Search_Easy2_CachedPCRE(benchmark::State& state) { Search(state, EASY2, SearchCachedPCRE); }
Search_Easy2_CachedRE2(benchmark::State & state)235 void Search_Easy2_CachedRE2(benchmark::State& state) { Search(state, EASY2, SearchCachedRE2); }
236
237 BENCHMARK_RANGE(Search_Easy2_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
238 BENCHMARK_RANGE(Search_Easy2_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
239 #ifdef USEPCRE
240 BENCHMARK_RANGE(Search_Easy2_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
241 #endif
242 BENCHMARK_RANGE(Search_Easy2_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
243
Search_Medium_CachedDFA(benchmark::State & state)244 void Search_Medium_CachedDFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(benchmark::State & state)245 void Search_Medium_CachedNFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(benchmark::State & state)246 void Search_Medium_CachedPCRE(benchmark::State& state) { Search(state, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(benchmark::State & state)247 void Search_Medium_CachedRE2(benchmark::State& state) { Search(state, MEDIUM, SearchCachedRE2); }
248
249 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
250 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
251 #ifdef USEPCRE
252 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
253 #endif
254 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
255
Search_Hard_CachedDFA(benchmark::State & state)256 void Search_Hard_CachedDFA(benchmark::State& state) { Search(state, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(benchmark::State & state)257 void Search_Hard_CachedNFA(benchmark::State& state) { Search(state, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(benchmark::State & state)258 void Search_Hard_CachedPCRE(benchmark::State& state) { Search(state, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(benchmark::State & state)259 void Search_Hard_CachedRE2(benchmark::State& state) { Search(state, HARD, SearchCachedRE2); }
260
261 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
262 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
263 #ifdef USEPCRE
264 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
265 #endif
266 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
267
Search_Fanout_CachedDFA(benchmark::State & state)268 void Search_Fanout_CachedDFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(benchmark::State & state)269 void Search_Fanout_CachedNFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(benchmark::State & state)270 void Search_Fanout_CachedPCRE(benchmark::State& state) { Search(state, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(benchmark::State & state)271 void Search_Fanout_CachedRE2(benchmark::State& state) { Search(state, FANOUT, SearchCachedRE2); }
272
273 BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
274 BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
275 #ifdef USEPCRE
276 BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
277 #endif
278 BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
279
Search_Parens_CachedDFA(benchmark::State & state)280 void Search_Parens_CachedDFA(benchmark::State& state) { Search(state, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(benchmark::State & state)281 void Search_Parens_CachedNFA(benchmark::State& state) { Search(state, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(benchmark::State & state)282 void Search_Parens_CachedPCRE(benchmark::State& state) { Search(state, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(benchmark::State & state)283 void Search_Parens_CachedRE2(benchmark::State& state) { Search(state, PARENS, SearchCachedRE2); }
284
285 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
286 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
287 #ifdef USEPCRE
288 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
289 #endif
290 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
291
SearchBigFixed(benchmark::State & state,SearchImpl * search)292 void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
293 std::string s;
294 s.append(state.range(0)/2, 'x');
295 std::string regexp = "^" + s + ".*$";
296 std::string t = RandomText(state.range(0)/2);
297 s += t;
298 search(state, regexp.c_str(), s, Prog::kUnanchored, true);
299 state.SetBytesProcessed(state.iterations() * state.range(0));
300 }
301
Search_BigFixed_CachedDFA(benchmark::State & state)302 void Search_BigFixed_CachedDFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedDFA); }
Search_BigFixed_CachedNFA(benchmark::State & state)303 void Search_BigFixed_CachedNFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(benchmark::State & state)304 void Search_BigFixed_CachedPCRE(benchmark::State& state) { SearchBigFixed(state, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(benchmark::State & state)305 void Search_BigFixed_CachedRE2(benchmark::State& state) { SearchBigFixed(state, SearchCachedRE2); }
306
307 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
308 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
309 #ifdef USEPCRE
310 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
311 #endif
312 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
313
314 // Benchmark: FindAndConsume
315
FindAndConsume(benchmark::State & state)316 void FindAndConsume(benchmark::State& state) {
317 std::string s = RandomText(state.range(0));
318 s.append("Hello World");
319 RE2 re("((Hello World))");
320 for (auto _ : state) {
321 StringPiece t = s;
322 StringPiece u;
323 CHECK(RE2::FindAndConsume(&t, re, &u));
324 CHECK_EQ(u, "Hello World");
325 }
326 state.SetBytesProcessed(state.iterations() * state.range(0));
327 }
328
329 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
330
331 // Benchmark: successful anchored search.
332
SearchSuccess(benchmark::State & state,const char * regexp,SearchImpl * search)333 void SearchSuccess(benchmark::State& state, const char* regexp,
334 SearchImpl* search) {
335 std::string s = RandomText(state.range(0));
336 search(state, regexp, s, Prog::kAnchored, true);
337 state.SetBytesProcessed(state.iterations() * state.range(0));
338 }
339
340 // Unambiguous search (RE2 can use OnePass).
341
Search_Success_DFA(benchmark::State & state)342 void Search_Success_DFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchDFA); }
Search_Success_NFA(benchmark::State & state)343 void Search_Success_NFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchNFA); }
Search_Success_PCRE(benchmark::State & state)344 void Search_Success_PCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchPCRE); }
Search_Success_RE2(benchmark::State & state)345 void Search_Success_RE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchRE2); }
Search_Success_OnePass(benchmark::State & state)346 void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
347
348 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
349 BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
350 #ifdef USEPCRE
351 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
352 #endif
353 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
354 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
355
Search_Success_CachedDFA(benchmark::State & state)356 void Search_Success_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(benchmark::State & state)357 void Search_Success_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(benchmark::State & state)358 void Search_Success_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(benchmark::State & state)359 void Search_Success_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(benchmark::State & state)360 void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
361
362 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
363 BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
364 #ifdef USEPCRE
365 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
366 #endif
367 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
368 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
369
370 // Ambiguous search (RE2 cannot use OnePass).
371 // Used to be ".*.$", but that is coalesced to ".+$" these days.
372
Search_Success1_DFA(benchmark::State & state)373 void Search_Success1_DFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchDFA); }
Search_Success1_NFA(benchmark::State & state)374 void Search_Success1_NFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(benchmark::State & state)375 void Search_Success1_PCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(benchmark::State & state)376 void Search_Success1_RE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchRE2); }
Search_Success1_BitState(benchmark::State & state)377 void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
378
379 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
380 BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
381 #ifdef USEPCRE
382 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
383 #endif
384 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
385 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
386
Search_Success1_CachedDFA(benchmark::State & state)387 void Search_Success1_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(benchmark::State & state)388 void Search_Success1_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(benchmark::State & state)389 void Search_Success1_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(benchmark::State & state)390 void Search_Success1_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(benchmark::State & state)391 void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
392
393 BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
394 BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
395 #ifdef USEPCRE
396 BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
397 #endif
398 BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
399 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
400
401 // Benchmark: AltMatch optimisation (just to verify that it works)
402 // Note that OnePass doesn't implement it!
403
SearchAltMatch(benchmark::State & state,SearchImpl * search)404 void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
405 std::string s = RandomText(state.range(0));
406 search(state, "\\C*", s, Prog::kAnchored, true);
407 state.SetBytesProcessed(state.iterations() * state.range(0));
408 }
409
Search_AltMatch_DFA(benchmark::State & state)410 void Search_AltMatch_DFA(benchmark::State& state) { SearchAltMatch(state, SearchDFA); }
Search_AltMatch_NFA(benchmark::State & state)411 void Search_AltMatch_NFA(benchmark::State& state) { SearchAltMatch(state, SearchNFA); }
Search_AltMatch_OnePass(benchmark::State & state)412 void Search_AltMatch_OnePass(benchmark::State& state) { SearchAltMatch(state, SearchOnePass); }
Search_AltMatch_BitState(benchmark::State & state)413 void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
Search_AltMatch_PCRE(benchmark::State & state)414 void Search_AltMatch_PCRE(benchmark::State& state) { SearchAltMatch(state, SearchPCRE); }
Search_AltMatch_RE2(benchmark::State & state)415 void Search_AltMatch_RE2(benchmark::State& state) { SearchAltMatch(state, SearchRE2); }
416
417 BENCHMARK_RANGE(Search_AltMatch_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
418 BENCHMARK_RANGE(Search_AltMatch_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
419 BENCHMARK_RANGE(Search_AltMatch_OnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
420 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
421 #ifdef USEPCRE
422 BENCHMARK_RANGE(Search_AltMatch_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
423 #endif
424 BENCHMARK_RANGE(Search_AltMatch_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
425
Search_AltMatch_CachedDFA(benchmark::State & state)426 void Search_AltMatch_CachedDFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedDFA); }
Search_AltMatch_CachedNFA(benchmark::State & state)427 void Search_AltMatch_CachedNFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(benchmark::State & state)428 void Search_AltMatch_CachedOnePass(benchmark::State& state) { SearchAltMatch(state, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(benchmark::State & state)429 void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(benchmark::State & state)430 void Search_AltMatch_CachedPCRE(benchmark::State& state) { SearchAltMatch(state, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(benchmark::State & state)431 void Search_AltMatch_CachedRE2(benchmark::State& state) { SearchAltMatch(state, SearchCachedRE2); }
432
433 BENCHMARK_RANGE(Search_AltMatch_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
434 BENCHMARK_RANGE(Search_AltMatch_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
435 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
436 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
437 #ifdef USEPCRE
438 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
439 #endif
440 BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
441
442 // Benchmark: use regexp to find phone number.
443
SearchDigits(benchmark::State & state,SearchImpl * search)444 void SearchDigits(benchmark::State& state, SearchImpl* search) {
445 StringPiece s("650-253-0001");
446 search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
447 state.SetItemsProcessed(state.iterations());
448 }
449
Search_Digits_DFA(benchmark::State & state)450 void Search_Digits_DFA(benchmark::State& state) { SearchDigits(state, SearchDFA); }
Search_Digits_NFA(benchmark::State & state)451 void Search_Digits_NFA(benchmark::State& state) { SearchDigits(state, SearchNFA); }
Search_Digits_OnePass(benchmark::State & state)452 void Search_Digits_OnePass(benchmark::State& state) { SearchDigits(state, SearchOnePass); }
Search_Digits_PCRE(benchmark::State & state)453 void Search_Digits_PCRE(benchmark::State& state) { SearchDigits(state, SearchPCRE); }
Search_Digits_RE2(benchmark::State & state)454 void Search_Digits_RE2(benchmark::State& state) { SearchDigits(state, SearchRE2); }
Search_Digits_BitState(benchmark::State & state)455 void Search_Digits_BitState(benchmark::State& state) { SearchDigits(state, SearchBitState); }
456
457 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
458 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
459 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
460 #ifdef USEPCRE
461 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
462 #endif
463 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
464 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
465
466 // Benchmark: use regexp to parse digit fields in phone number.
467
Parse3Digits(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))468 void Parse3Digits(benchmark::State& state,
469 void (*parse3)(benchmark::State&, const char*,
470 const StringPiece&)) {
471 parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
472 state.SetItemsProcessed(state.iterations());
473 }
474
Parse_Digits_NFA(benchmark::State & state)475 void Parse_Digits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3NFA); }
Parse_Digits_OnePass(benchmark::State & state)476 void Parse_Digits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3OnePass); }
Parse_Digits_PCRE(benchmark::State & state)477 void Parse_Digits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3PCRE); }
Parse_Digits_RE2(benchmark::State & state)478 void Parse_Digits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3RE2); }
Parse_Digits_Backtrack(benchmark::State & state)479 void Parse_Digits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3Backtrack); }
Parse_Digits_BitState(benchmark::State & state)480 void Parse_Digits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3BitState); }
481
482 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
483 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
484 #ifdef USEPCRE
485 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
486 #endif
487 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
488 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
489 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
490
Parse_CachedDigits_NFA(benchmark::State & state)491 void Parse_CachedDigits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(benchmark::State & state)492 void Parse_CachedDigits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(benchmark::State & state)493 void Parse_CachedDigits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(benchmark::State & state)494 void Parse_CachedDigits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(benchmark::State & state)495 void Parse_CachedDigits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(benchmark::State & state)496 void Parse_CachedDigits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3CachedBitState); }
497
498 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
499 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
500 #ifdef USEPCRE
501 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
502 #endif
503 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
504 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
505 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
506
Parse3DigitDs(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))507 void Parse3DigitDs(benchmark::State& state,
508 void (*parse3)(benchmark::State&, const char*,
509 const StringPiece&)) {
510 parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
511 state.SetItemsProcessed(state.iterations());
512 }
513
Parse_DigitDs_NFA(benchmark::State & state)514 void Parse_DigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3NFA); }
Parse_DigitDs_OnePass(benchmark::State & state)515 void Parse_DigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3OnePass); }
Parse_DigitDs_PCRE(benchmark::State & state)516 void Parse_DigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3PCRE); }
Parse_DigitDs_RE2(benchmark::State & state)517 void Parse_DigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3RE2); }
Parse_DigitDs_Backtrack(benchmark::State & state)518 void Parse_DigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(benchmark::State & state)519 void Parse_DigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
520
521 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
522 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
523 #ifdef USEPCRE
524 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
525 #endif
526 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
527 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
528 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
529
Parse_CachedDigitDs_NFA(benchmark::State & state)530 void Parse_CachedDigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(benchmark::State & state)531 void Parse_CachedDigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(benchmark::State & state)532 void Parse_CachedDigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(benchmark::State & state)533 void Parse_CachedDigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(benchmark::State & state)534 void Parse_CachedDigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(benchmark::State & state)535 void Parse_CachedDigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
536
537 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
538 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
539 #ifdef USEPCRE
540 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
541 #endif
542 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
543 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
544 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
545
546 // Benchmark: splitting off leading number field.
547
Parse1Split(benchmark::State & state,void (* parse1)(benchmark::State &,const char *,const StringPiece &))548 void Parse1Split(benchmark::State& state,
549 void (*parse1)(benchmark::State&, const char*,
550 const StringPiece&)) {
551 parse1(state, "[0-9]+-(.*)", "650-253-0001");
552 state.SetItemsProcessed(state.iterations());
553 }
554
Parse_Split_NFA(benchmark::State & state)555 void Parse_Split_NFA(benchmark::State& state) { Parse1Split(state, Parse1NFA); }
Parse_Split_OnePass(benchmark::State & state)556 void Parse_Split_OnePass(benchmark::State& state) { Parse1Split(state, Parse1OnePass); }
Parse_Split_PCRE(benchmark::State & state)557 void Parse_Split_PCRE(benchmark::State& state) { Parse1Split(state, Parse1PCRE); }
Parse_Split_RE2(benchmark::State & state)558 void Parse_Split_RE2(benchmark::State& state) { Parse1Split(state, Parse1RE2); }
Parse_Split_BitState(benchmark::State & state)559 void Parse_Split_BitState(benchmark::State& state) { Parse1Split(state, Parse1BitState); }
560
561 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
562 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
563 #ifdef USEPCRE
564 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
565 #endif
566 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
567 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
568
Parse_CachedSplit_NFA(benchmark::State & state)569 void Parse_CachedSplit_NFA(benchmark::State& state) { Parse1Split(state, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(benchmark::State & state)570 void Parse_CachedSplit_OnePass(benchmark::State& state) { Parse1Split(state, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(benchmark::State & state)571 void Parse_CachedSplit_PCRE(benchmark::State& state) { Parse1Split(state, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(benchmark::State & state)572 void Parse_CachedSplit_RE2(benchmark::State& state) { Parse1Split(state, Parse1CachedRE2); }
Parse_CachedSplit_BitState(benchmark::State & state)573 void Parse_CachedSplit_BitState(benchmark::State& state) { Parse1Split(state, Parse1CachedBitState); }
574
575 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
576 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
577 #ifdef USEPCRE
578 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
579 #endif
580 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
581 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
582
583 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
584
Parse1SplitHard(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))585 void Parse1SplitHard(benchmark::State& state,
586 void (*run)(benchmark::State&, const char*,
587 const StringPiece&)) {
588 run(state, "[0-9]+.(.*)", "650-253-0001");
589 state.SetItemsProcessed(state.iterations());
590 }
591
Parse_SplitHard_NFA(benchmark::State & state)592 void Parse_SplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1NFA); }
Parse_SplitHard_PCRE(benchmark::State & state)593 void Parse_SplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1PCRE); }
Parse_SplitHard_RE2(benchmark::State & state)594 void Parse_SplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1RE2); }
Parse_SplitHard_BitState(benchmark::State & state)595 void Parse_SplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1BitState); }
596
597 #ifdef USEPCRE
598 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
599 #endif
600 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
601 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
602 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
603
Parse_CachedSplitHard_NFA(benchmark::State & state)604 void Parse_CachedSplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(benchmark::State & state)605 void Parse_CachedSplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(benchmark::State & state)606 void Parse_CachedSplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(benchmark::State & state)607 void Parse_CachedSplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(benchmark::State & state)608 void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
609
610 #ifdef USEPCRE
611 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
612 #endif
613 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
614 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
615 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
616 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
617
618 // Benchmark: Parse1SplitHard, big text, small match.
619
Parse1SplitBig1(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))620 void Parse1SplitBig1(benchmark::State& state,
621 void (*run)(benchmark::State&, const char*,
622 const StringPiece&)) {
623 std::string s;
624 s.append(100000, 'x');
625 s.append("650-253-0001");
626 run(state, "[0-9]+.(.*)", s);
627 state.SetItemsProcessed(state.iterations());
628 }
629
Parse_CachedSplitBig1_PCRE(benchmark::State & state)630 void Parse_CachedSplitBig1_PCRE(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(benchmark::State & state)631 void Parse_CachedSplitBig1_RE2(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedRE2); }
632
633 #ifdef USEPCRE
634 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
635 #endif
636 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
637
638 // Benchmark: Parse1SplitHard, big text, big match.
639
Parse1SplitBig2(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))640 void Parse1SplitBig2(benchmark::State& state,
641 void (*run)(benchmark::State&, const char*,
642 const StringPiece&)) {
643 std::string s;
644 s.append("650-253-");
645 s.append(100000, '0');
646 run(state, "[0-9]+.(.*)", s);
647 state.SetItemsProcessed(state.iterations());
648 }
649
Parse_CachedSplitBig2_PCRE(benchmark::State & state)650 void Parse_CachedSplitBig2_PCRE(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(benchmark::State & state)651 void Parse_CachedSplitBig2_RE2(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedRE2); }
652
653 #ifdef USEPCRE
654 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
655 #endif
656 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
657
658 // Benchmark: measure time required to parse (but not execute)
659 // a simple regular expression.
660
ParseRegexp(benchmark::State & state,const std::string & regexp)661 void ParseRegexp(benchmark::State& state, const std::string& regexp) {
662 for (auto _ : state) {
663 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
664 CHECK(re);
665 re->Decref();
666 }
667 }
668
SimplifyRegexp(benchmark::State & state,const std::string & regexp)669 void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
670 for (auto _ : state) {
671 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
672 CHECK(re);
673 Regexp* sre = re->Simplify();
674 CHECK(sre);
675 sre->Decref();
676 re->Decref();
677 }
678 }
679
NullWalkRegexp(benchmark::State & state,const std::string & regexp)680 void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
681 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
682 CHECK(re);
683 for (auto _ : state) {
684 re->NullWalk();
685 }
686 re->Decref();
687 }
688
SimplifyCompileRegexp(benchmark::State & state,const std::string & regexp)689 void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
690 for (auto _ : state) {
691 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
692 CHECK(re);
693 Regexp* sre = re->Simplify();
694 CHECK(sre);
695 Prog* prog = sre->CompileToProg(0);
696 CHECK(prog);
697 delete prog;
698 sre->Decref();
699 re->Decref();
700 }
701 }
702
CompileRegexp(benchmark::State & state,const std::string & regexp)703 void CompileRegexp(benchmark::State& state, const std::string& regexp) {
704 for (auto _ : state) {
705 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
706 CHECK(re);
707 Prog* prog = re->CompileToProg(0);
708 CHECK(prog);
709 delete prog;
710 re->Decref();
711 }
712 }
713
CompileToProg(benchmark::State & state,const std::string & regexp)714 void CompileToProg(benchmark::State& state, const std::string& regexp) {
715 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
716 CHECK(re);
717 for (auto _ : state) {
718 Prog* prog = re->CompileToProg(0);
719 CHECK(prog);
720 delete prog;
721 }
722 re->Decref();
723 }
724
CompileByteMap(benchmark::State & state,const std::string & regexp)725 void CompileByteMap(benchmark::State& state, const std::string& regexp) {
726 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
727 CHECK(re);
728 Prog* prog = re->CompileToProg(0);
729 CHECK(prog);
730 for (auto _ : state) {
731 prog->ComputeByteMap();
732 }
733 delete prog;
734 re->Decref();
735 }
736
CompilePCRE(benchmark::State & state,const std::string & regexp)737 void CompilePCRE(benchmark::State& state, const std::string& regexp) {
738 for (auto _ : state) {
739 PCRE re(regexp, PCRE::UTF8);
740 CHECK_EQ(re.error(), "");
741 }
742 }
743
CompileRE2(benchmark::State & state,const std::string & regexp)744 void CompileRE2(benchmark::State& state, const std::string& regexp) {
745 for (auto _ : state) {
746 RE2 re(regexp);
747 CHECK_EQ(re.error(), "");
748 }
749 }
750
RunBuild(benchmark::State & state,const std::string & regexp,void (* run)(benchmark::State &,const std::string &))751 void RunBuild(benchmark::State& state, const std::string& regexp,
752 void (*run)(benchmark::State&, const std::string&)) {
753 run(state, regexp);
754 state.SetItemsProcessed(state.iterations());
755 }
756
757 } // namespace re2
758
759 DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
760 "regexp for compile benchmarks");
761
762 namespace re2 {
763
BM_PCRE_Compile(benchmark::State & state)764 void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
BM_Regexp_Parse(benchmark::State & state)765 void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
BM_Regexp_Simplify(benchmark::State & state)766 void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
BM_CompileToProg(benchmark::State & state)767 void BM_CompileToProg(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
BM_CompileByteMap(benchmark::State & state)768 void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
BM_Regexp_Compile(benchmark::State & state)769 void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
BM_Regexp_SimplifyCompile(benchmark::State & state)770 void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
BM_Regexp_NullWalk(benchmark::State & state)771 void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
BM_RE2_Compile(benchmark::State & state)772 void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
773
774 #ifdef USEPCRE
775 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
776 #endif
777 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
778 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
779 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
780 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
781 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
782 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
783 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
784 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
785
786 // Makes text of size nbytes, then calls run to search
787 // the text for regexp iters times.
SearchPhone(benchmark::State & state,ParseImpl * search)788 void SearchPhone(benchmark::State& state, ParseImpl* search) {
789 std::string s = RandomText(state.range(0));
790 s.append("(650) 253-0001");
791 search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
792 state.SetBytesProcessed(state.iterations() * state.range(0));
793 }
794
SearchPhone_CachedPCRE(benchmark::State & state)795 void SearchPhone_CachedPCRE(benchmark::State& state) {
796 SearchPhone(state, SearchParse2CachedPCRE);
797 }
798
SearchPhone_CachedRE2(benchmark::State & state)799 void SearchPhone_CachedRE2(benchmark::State& state) {
800 SearchPhone(state, SearchParse2CachedRE2);
801 }
802
803 #ifdef USEPCRE
804 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
805 #endif
806 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
807
808 /*
809 TODO(rsc): Make this work again.
810 void CacheFill(int iters, int n, SearchImpl *srch) {
811 std::string s = DeBruijnString(n+1);
812 std::string t;
813 for (int i = n+1; i < 20; i++) {
814 t = s + s;
815 using std::swap;
816 swap(s, t);
817 }
818 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
819 Prog::kUnanchored, true);
820 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
821 }
822
823 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
824 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
825 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
826 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
827
828 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
829 // for the static BenchmarkRegisterer, which makes it unusable inside
830 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
831 // to make the identifiers distinct (only possible when 'a' is a simple
832 // expression like 2, not like 1+1).
833 #define MY_BENCHMARK_WITH_ARG(n, a) \
834 bool __benchmark_ ## n ## a = \
835 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
836
837 #define DO24(A, B) \
838 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
839 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
840 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
841 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
842
843 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
844 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
845 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
846 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
847
848 #undef DO24
849 #undef MY_BENCHMARK_WITH_ARG
850 */
851
852 ////////////////////////////////////////////////////////////////////////
853 //
854 // Implementation routines. Sad that there are so many,
855 // but all the interfaces are slightly different.
856
857 // Runs implementation to search for regexp in text, iters times.
858 // Expect_match says whether the regexp should be found.
859 // Anchored says whether to run an anchored search.
860
SearchDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)861 void SearchDFA(benchmark::State& state, const char* regexp,
862 const StringPiece& text, Prog::Anchor anchor,
863 bool expect_match) {
864 for (auto _ : state) {
865 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
866 CHECK(re);
867 Prog* prog = re->CompileToProg(0);
868 CHECK(prog);
869 bool failed = false;
870 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
871 NULL, &failed, NULL),
872 expect_match);
873 CHECK(!failed);
874 delete prog;
875 re->Decref();
876 }
877 }
878
SearchNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)879 void SearchNFA(benchmark::State& state, const char* regexp,
880 const StringPiece& text, Prog::Anchor anchor,
881 bool expect_match) {
882 for (auto _ : state) {
883 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
884 CHECK(re);
885 Prog* prog = re->CompileToProg(0);
886 CHECK(prog);
887 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
888 NULL, 0),
889 expect_match);
890 delete prog;
891 re->Decref();
892 }
893 }
894
SearchOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)895 void SearchOnePass(benchmark::State& state, const char* regexp,
896 const StringPiece& text, Prog::Anchor anchor,
897 bool expect_match) {
898 for (auto _ : state) {
899 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
900 CHECK(re);
901 Prog* prog = re->CompileToProg(0);
902 CHECK(prog);
903 CHECK(prog->IsOnePass());
904 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
905 expect_match);
906 delete prog;
907 re->Decref();
908 }
909 }
910
SearchBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)911 void SearchBitState(benchmark::State& state, const char* regexp,
912 const StringPiece& text, Prog::Anchor anchor,
913 bool expect_match) {
914 for (auto _ : state) {
915 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
916 CHECK(re);
917 Prog* prog = re->CompileToProg(0);
918 CHECK(prog);
919 CHECK(prog->CanBitState());
920 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
921 expect_match);
922 delete prog;
923 re->Decref();
924 }
925 }
926
SearchPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)927 void SearchPCRE(benchmark::State& state, const char* regexp,
928 const StringPiece& text, Prog::Anchor anchor,
929 bool expect_match) {
930 for (auto _ : state) {
931 PCRE re(regexp, PCRE::UTF8);
932 CHECK_EQ(re.error(), "");
933 if (anchor == Prog::kAnchored)
934 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
935 else
936 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
937 }
938 }
939
SearchRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)940 void SearchRE2(benchmark::State& state, const char* regexp,
941 const StringPiece& text, Prog::Anchor anchor,
942 bool expect_match) {
943 for (auto _ : state) {
944 RE2 re(regexp);
945 CHECK_EQ(re.error(), "");
946 if (anchor == Prog::kAnchored)
947 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
948 else
949 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
950 }
951 }
952
953 // SearchCachedXXX is like SearchXXX but only does the
954 // regexp parsing and compiling once. This lets us measure
955 // search time without the per-regexp overhead.
956
GetCachedProg(const char * regexp)957 Prog* GetCachedProg(const char* regexp) {
958 static auto& mutex = *new Mutex;
959 MutexLock lock(&mutex);
960 static auto& cache = *new std::unordered_map<std::string, Prog*>;
961 Prog* prog = cache[regexp];
962 if (prog == NULL) {
963 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
964 CHECK(re);
965 prog = re->CompileToProg(int64_t{1}<<31); // mostly for the DFA
966 CHECK(prog);
967 cache[regexp] = prog;
968 re->Decref();
969 // We must call this here - while we have exclusive access.
970 prog->IsOnePass();
971 }
972 return prog;
973 }
974
GetCachedPCRE(const char * regexp)975 PCRE* GetCachedPCRE(const char* regexp) {
976 static auto& mutex = *new Mutex;
977 MutexLock lock(&mutex);
978 static auto& cache = *new std::unordered_map<std::string, PCRE*>;
979 PCRE* re = cache[regexp];
980 if (re == NULL) {
981 re = new PCRE(regexp, PCRE::UTF8);
982 CHECK_EQ(re->error(), "");
983 cache[regexp] = re;
984 }
985 return re;
986 }
987
GetCachedRE2(const char * regexp)988 RE2* GetCachedRE2(const char* regexp) {
989 static auto& mutex = *new Mutex;
990 MutexLock lock(&mutex);
991 static auto& cache = *new std::unordered_map<std::string, RE2*>;
992 RE2* re = cache[regexp];
993 if (re == NULL) {
994 re = new RE2(regexp);
995 CHECK_EQ(re->error(), "");
996 cache[regexp] = re;
997 }
998 return re;
999 }
1000
SearchCachedDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1001 void SearchCachedDFA(benchmark::State& state, const char* regexp,
1002 const StringPiece& text, Prog::Anchor anchor,
1003 bool expect_match) {
1004 Prog* prog = GetCachedProg(regexp);
1005 for (auto _ : state) {
1006 bool failed = false;
1007 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
1008 NULL, &failed, NULL),
1009 expect_match);
1010 CHECK(!failed);
1011 }
1012 }
1013
SearchCachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1014 void SearchCachedNFA(benchmark::State& state, const char* regexp,
1015 const StringPiece& text, Prog::Anchor anchor,
1016 bool expect_match) {
1017 Prog* prog = GetCachedProg(regexp);
1018 for (auto _ : state) {
1019 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
1020 NULL, 0),
1021 expect_match);
1022 }
1023 }
1024
SearchCachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1025 void SearchCachedOnePass(benchmark::State& state, const char* regexp,
1026 const StringPiece& text, Prog::Anchor anchor,
1027 bool expect_match) {
1028 Prog* prog = GetCachedProg(regexp);
1029 CHECK(prog->IsOnePass());
1030 for (auto _ : state) {
1031 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1032 expect_match);
1033 }
1034 }
1035
SearchCachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1036 void SearchCachedBitState(benchmark::State& state, const char* regexp,
1037 const StringPiece& text, Prog::Anchor anchor,
1038 bool expect_match) {
1039 Prog* prog = GetCachedProg(regexp);
1040 CHECK(prog->CanBitState());
1041 for (auto _ : state) {
1042 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1043 expect_match);
1044 }
1045 }
1046
SearchCachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1047 void SearchCachedPCRE(benchmark::State& state, const char* regexp,
1048 const StringPiece& text, Prog::Anchor anchor,
1049 bool expect_match) {
1050 PCRE& re = *GetCachedPCRE(regexp);
1051 for (auto _ : state) {
1052 if (anchor == Prog::kAnchored)
1053 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1054 else
1055 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1056 }
1057 }
1058
SearchCachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1059 void SearchCachedRE2(benchmark::State& state, const char* regexp,
1060 const StringPiece& text, Prog::Anchor anchor,
1061 bool expect_match) {
1062 RE2& re = *GetCachedRE2(regexp);
1063 for (auto _ : state) {
1064 if (anchor == Prog::kAnchored)
1065 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1066 else
1067 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1068 }
1069 }
1070
1071 // Runs implementation to full match regexp against text,
1072 // extracting three submatches. Expects match always.
1073
Parse3NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1074 void Parse3NFA(benchmark::State& state, const char* regexp,
1075 const StringPiece& text) {
1076 for (auto _ : state) {
1077 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1078 CHECK(re);
1079 Prog* prog = re->CompileToProg(0);
1080 CHECK(prog);
1081 StringPiece sp[4]; // 4 because sp[0] is whole match.
1082 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1083 Prog::kFullMatch, sp, 4));
1084 delete prog;
1085 re->Decref();
1086 }
1087 }
1088
Parse3OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1089 void Parse3OnePass(benchmark::State& state, const char* regexp,
1090 const StringPiece& text) {
1091 for (auto _ : state) {
1092 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1093 CHECK(re);
1094 Prog* prog = re->CompileToProg(0);
1095 CHECK(prog);
1096 CHECK(prog->IsOnePass());
1097 StringPiece sp[4]; // 4 because sp[0] is whole match.
1098 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1099 delete prog;
1100 re->Decref();
1101 }
1102 }
1103
Parse3BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1104 void Parse3BitState(benchmark::State& state, const char* regexp,
1105 const StringPiece& text) {
1106 for (auto _ : state) {
1107 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1108 CHECK(re);
1109 Prog* prog = re->CompileToProg(0);
1110 CHECK(prog);
1111 CHECK(prog->CanBitState());
1112 StringPiece sp[4]; // 4 because sp[0] is whole match.
1113 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1114 delete prog;
1115 re->Decref();
1116 }
1117 }
1118
Parse3Backtrack(benchmark::State & state,const char * regexp,const StringPiece & text)1119 void Parse3Backtrack(benchmark::State& state, const char* regexp,
1120 const StringPiece& text) {
1121 for (auto _ : state) {
1122 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1123 CHECK(re);
1124 Prog* prog = re->CompileToProg(0);
1125 CHECK(prog);
1126 StringPiece sp[4]; // 4 because sp[0] is whole match.
1127 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1128 delete prog;
1129 re->Decref();
1130 }
1131 }
1132
Parse3PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1133 void Parse3PCRE(benchmark::State& state, const char* regexp,
1134 const StringPiece& text) {
1135 for (auto _ : state) {
1136 PCRE re(regexp, PCRE::UTF8);
1137 CHECK_EQ(re.error(), "");
1138 StringPiece sp1, sp2, sp3;
1139 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1140 }
1141 }
1142
Parse3RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1143 void Parse3RE2(benchmark::State& state, const char* regexp,
1144 const StringPiece& text) {
1145 for (auto _ : state) {
1146 RE2 re(regexp);
1147 CHECK_EQ(re.error(), "");
1148 StringPiece sp1, sp2, sp3;
1149 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1150 }
1151 }
1152
Parse3CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1153 void Parse3CachedNFA(benchmark::State& state, const char* regexp,
1154 const StringPiece& text) {
1155 Prog* prog = GetCachedProg(regexp);
1156 StringPiece sp[4]; // 4 because sp[0] is whole match.
1157 for (auto _ : state) {
1158 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1159 Prog::kFullMatch, sp, 4));
1160 }
1161 }
1162
Parse3CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1163 void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
1164 const StringPiece& text) {
1165 Prog* prog = GetCachedProg(regexp);
1166 CHECK(prog->IsOnePass());
1167 StringPiece sp[4]; // 4 because sp[0] is whole match.
1168 for (auto _ : state) {
1169 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1170 }
1171 }
1172
Parse3CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1173 void Parse3CachedBitState(benchmark::State& state, const char* regexp,
1174 const StringPiece& text) {
1175 Prog* prog = GetCachedProg(regexp);
1176 CHECK(prog->CanBitState());
1177 StringPiece sp[4]; // 4 because sp[0] is whole match.
1178 for (auto _ : state) {
1179 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1180 }
1181 }
1182
Parse3CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1183 void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
1184 const StringPiece& text) {
1185 Prog* prog = GetCachedProg(regexp);
1186 StringPiece sp[4]; // 4 because sp[0] is whole match.
1187 for (auto _ : state) {
1188 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1189 }
1190 }
1191
Parse3CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1192 void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
1193 const StringPiece& text) {
1194 PCRE& re = *GetCachedPCRE(regexp);
1195 StringPiece sp1, sp2, sp3;
1196 for (auto _ : state) {
1197 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1198 }
1199 }
1200
Parse3CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1201 void Parse3CachedRE2(benchmark::State& state, const char* regexp,
1202 const StringPiece& text) {
1203 RE2& re = *GetCachedRE2(regexp);
1204 StringPiece sp1, sp2, sp3;
1205 for (auto _ : state) {
1206 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1207 }
1208 }
1209
1210 // Runs implementation to full match regexp against text,
1211 // extracting three submatches. Expects match always.
1212
Parse1NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1213 void Parse1NFA(benchmark::State& state, const char* regexp,
1214 const StringPiece& text) {
1215 for (auto _ : state) {
1216 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1217 CHECK(re);
1218 Prog* prog = re->CompileToProg(0);
1219 CHECK(prog);
1220 StringPiece sp[2]; // 2 because sp[0] is whole match.
1221 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1222 Prog::kFullMatch, sp, 2));
1223 delete prog;
1224 re->Decref();
1225 }
1226 }
1227
Parse1OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1228 void Parse1OnePass(benchmark::State& state, const char* regexp,
1229 const StringPiece& text) {
1230 for (auto _ : state) {
1231 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1232 CHECK(re);
1233 Prog* prog = re->CompileToProg(0);
1234 CHECK(prog);
1235 CHECK(prog->IsOnePass());
1236 StringPiece sp[2]; // 2 because sp[0] is whole match.
1237 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238 delete prog;
1239 re->Decref();
1240 }
1241 }
1242
Parse1BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1243 void Parse1BitState(benchmark::State& state, const char* regexp,
1244 const StringPiece& text) {
1245 for (auto _ : state) {
1246 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1247 CHECK(re);
1248 Prog* prog = re->CompileToProg(0);
1249 CHECK(prog);
1250 CHECK(prog->CanBitState());
1251 StringPiece sp[2]; // 2 because sp[0] is whole match.
1252 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1253 delete prog;
1254 re->Decref();
1255 }
1256 }
1257
Parse1PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1258 void Parse1PCRE(benchmark::State& state, const char* regexp,
1259 const StringPiece& text) {
1260 for (auto _ : state) {
1261 PCRE re(regexp, PCRE::UTF8);
1262 CHECK_EQ(re.error(), "");
1263 StringPiece sp1;
1264 CHECK(PCRE::FullMatch(text, re, &sp1));
1265 }
1266 }
1267
Parse1RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1268 void Parse1RE2(benchmark::State& state, const char* regexp,
1269 const StringPiece& text) {
1270 for (auto _ : state) {
1271 RE2 re(regexp);
1272 CHECK_EQ(re.error(), "");
1273 StringPiece sp1;
1274 CHECK(RE2::FullMatch(text, re, &sp1));
1275 }
1276 }
1277
Parse1CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1278 void Parse1CachedNFA(benchmark::State& state, const char* regexp,
1279 const StringPiece& text) {
1280 Prog* prog = GetCachedProg(regexp);
1281 StringPiece sp[2]; // 2 because sp[0] is whole match.
1282 for (auto _ : state) {
1283 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1284 Prog::kFullMatch, sp, 2));
1285 }
1286 }
1287
Parse1CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1288 void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
1289 const StringPiece& text) {
1290 Prog* prog = GetCachedProg(regexp);
1291 CHECK(prog->IsOnePass());
1292 StringPiece sp[2]; // 2 because sp[0] is whole match.
1293 for (auto _ : state) {
1294 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1295 }
1296 }
1297
Parse1CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1298 void Parse1CachedBitState(benchmark::State& state, const char* regexp,
1299 const StringPiece& text) {
1300 Prog* prog = GetCachedProg(regexp);
1301 CHECK(prog->CanBitState());
1302 StringPiece sp[2]; // 2 because sp[0] is whole match.
1303 for (auto _ : state) {
1304 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1305 }
1306 }
1307
Parse1CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1308 void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
1309 const StringPiece& text) {
1310 Prog* prog = GetCachedProg(regexp);
1311 StringPiece sp[2]; // 2 because sp[0] is whole match.
1312 for (auto _ : state) {
1313 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1314 }
1315 }
1316
Parse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1317 void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
1318 const StringPiece& text) {
1319 PCRE& re = *GetCachedPCRE(regexp);
1320 StringPiece sp1;
1321 for (auto _ : state) {
1322 CHECK(PCRE::FullMatch(text, re, &sp1));
1323 }
1324 }
1325
Parse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1326 void Parse1CachedRE2(benchmark::State& state, const char* regexp,
1327 const StringPiece& text) {
1328 RE2& re = *GetCachedRE2(regexp);
1329 StringPiece sp1;
1330 for (auto _ : state) {
1331 CHECK(RE2::FullMatch(text, re, &sp1));
1332 }
1333 }
1334
SearchParse2CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1335 void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
1336 const StringPiece& text) {
1337 PCRE& re = *GetCachedPCRE(regexp);
1338 for (auto _ : state) {
1339 StringPiece sp1, sp2;
1340 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1341 }
1342 }
1343
SearchParse2CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1344 void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
1345 const StringPiece& text) {
1346 RE2& re = *GetCachedRE2(regexp);
1347 for (auto _ : state) {
1348 StringPiece sp1, sp2;
1349 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1350 }
1351 }
1352
SearchParse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1353 void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
1354 const StringPiece& text) {
1355 PCRE& re = *GetCachedPCRE(regexp);
1356 for (auto _ : state) {
1357 StringPiece sp1;
1358 CHECK(PCRE::PartialMatch(text, re, &sp1));
1359 }
1360 }
1361
SearchParse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1362 void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
1363 const StringPiece& text) {
1364 RE2& re = *GetCachedRE2(regexp);
1365 for (auto _ : state) {
1366 StringPiece sp1;
1367 CHECK(RE2::PartialMatch(text, re, &sp1));
1368 }
1369 }
1370
EmptyPartialMatchPCRE(benchmark::State & state)1371 void EmptyPartialMatchPCRE(benchmark::State& state) {
1372 PCRE re("");
1373 for (auto _ : state) {
1374 PCRE::PartialMatch("", re);
1375 }
1376 }
1377
EmptyPartialMatchRE2(benchmark::State & state)1378 void EmptyPartialMatchRE2(benchmark::State& state) {
1379 RE2 re("");
1380 for (auto _ : state) {
1381 RE2::PartialMatch("", re);
1382 }
1383 }
1384 #ifdef USEPCRE
1385 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1386 #endif
1387 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1388
SimplePartialMatchPCRE(benchmark::State & state)1389 void SimplePartialMatchPCRE(benchmark::State& state) {
1390 PCRE re("abcdefg");
1391 for (auto _ : state) {
1392 PCRE::PartialMatch("abcdefg", re);
1393 }
1394 }
1395
SimplePartialMatchRE2(benchmark::State & state)1396 void SimplePartialMatchRE2(benchmark::State& state) {
1397 RE2 re("abcdefg");
1398 for (auto _ : state) {
1399 RE2::PartialMatch("abcdefg", re);
1400 }
1401 }
1402 #ifdef USEPCRE
1403 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1404 #endif
1405 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1406
1407 static std::string http_text =
1408 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1409 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1410
HTTPPartialMatchPCRE(benchmark::State & state)1411 void HTTPPartialMatchPCRE(benchmark::State& state) {
1412 StringPiece a;
1413 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1414 for (auto _ : state) {
1415 PCRE::PartialMatch(http_text, re, &a);
1416 }
1417 }
1418
HTTPPartialMatchRE2(benchmark::State & state)1419 void HTTPPartialMatchRE2(benchmark::State& state) {
1420 StringPiece a;
1421 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1422 for (auto _ : state) {
1423 RE2::PartialMatch(http_text, re, &a);
1424 }
1425 }
1426
1427 #ifdef USEPCRE
1428 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1429 #endif
1430 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1431
1432 static std::string smallhttp_text =
1433 "GET /abc HTTP/1.1";
1434
SmallHTTPPartialMatchPCRE(benchmark::State & state)1435 void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
1436 StringPiece a;
1437 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1438 for (auto _ : state) {
1439 PCRE::PartialMatch(smallhttp_text, re, &a);
1440 }
1441 }
1442
SmallHTTPPartialMatchRE2(benchmark::State & state)1443 void SmallHTTPPartialMatchRE2(benchmark::State& state) {
1444 StringPiece a;
1445 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1446 for (auto _ : state) {
1447 RE2::PartialMatch(smallhttp_text, re, &a);
1448 }
1449 }
1450
1451 #ifdef USEPCRE
1452 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1453 #endif
1454 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1455
DotMatchPCRE(benchmark::State & state)1456 void DotMatchPCRE(benchmark::State& state) {
1457 StringPiece a;
1458 PCRE re("(?-s)^(.+)");
1459 for (auto _ : state) {
1460 PCRE::PartialMatch(http_text, re, &a);
1461 }
1462 }
1463
DotMatchRE2(benchmark::State & state)1464 void DotMatchRE2(benchmark::State& state) {
1465 StringPiece a;
1466 RE2 re("(?-s)^(.+)");
1467 for (auto _ : state) {
1468 RE2::PartialMatch(http_text, re, &a);
1469 }
1470 }
1471
1472 #ifdef USEPCRE
1473 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1474 #endif
1475 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1476
ASCIIMatchPCRE(benchmark::State & state)1477 void ASCIIMatchPCRE(benchmark::State& state) {
1478 StringPiece a;
1479 PCRE re("(?-s)^([ -~]+)");
1480 for (auto _ : state) {
1481 PCRE::PartialMatch(http_text, re, &a);
1482 }
1483 }
1484
ASCIIMatchRE2(benchmark::State & state)1485 void ASCIIMatchRE2(benchmark::State& state) {
1486 StringPiece a;
1487 RE2 re("(?-s)^([ -~]+)");
1488 for (auto _ : state) {
1489 RE2::PartialMatch(http_text, re, &a);
1490 }
1491 }
1492
1493 #ifdef USEPCRE
1494 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1495 #endif
1496 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1497
FullMatchPCRE(benchmark::State & state,const char * regexp)1498 void FullMatchPCRE(benchmark::State& state, const char *regexp) {
1499 std::string s = RandomText(state.range(0));
1500 s += "ABCDEFGHIJ";
1501 PCRE re(regexp);
1502 for (auto _ : state) {
1503 CHECK(PCRE::FullMatch(s, re));
1504 }
1505 state.SetBytesProcessed(state.iterations() * state.range(0));
1506 }
1507
FullMatchRE2(benchmark::State & state,const char * regexp)1508 void FullMatchRE2(benchmark::State& state, const char *regexp) {
1509 std::string s = RandomText(state.range(0));
1510 s += "ABCDEFGHIJ";
1511 RE2 re(regexp, RE2::Latin1);
1512 for (auto _ : state) {
1513 CHECK(RE2::FullMatch(s, re));
1514 }
1515 state.SetBytesProcessed(state.iterations() * state.range(0));
1516 }
1517
FullMatch_DotStar_CachedPCRE(benchmark::State & state)1518 void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
FullMatch_DotStar_CachedRE2(benchmark::State & state)1519 void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); }
1520
FullMatch_DotStarDollar_CachedPCRE(benchmark::State & state)1521 void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(benchmark::State & state)1522 void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); }
1523
FullMatch_DotStarCapture_CachedPCRE(benchmark::State & state)1524 void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(benchmark::State & state)1525 void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); }
1526
1527 #ifdef USEPCRE
1528 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1529 #endif
1530 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1531
1532 #ifdef USEPCRE
1533 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1534 #endif
1535 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1536
1537 #ifdef USEPCRE
1538 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1539 #endif
1540 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1541
PossibleMatchRangeCommon(benchmark::State & state,const char * regexp)1542 void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
1543 RE2 re(regexp);
1544 std::string min;
1545 std::string max;
1546 const int kMaxLen = 16;
1547 for (auto _ : state) {
1548 CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1549 }
1550 }
1551
PossibleMatchRange_Trivial(benchmark::State & state)1552 void PossibleMatchRange_Trivial(benchmark::State& state) {
1553 PossibleMatchRangeCommon(state, ".*");
1554 }
PossibleMatchRange_Complex(benchmark::State & state)1555 void PossibleMatchRange_Complex(benchmark::State& state) {
1556 PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
1557 }
PossibleMatchRange_Prefix(benchmark::State & state)1558 void PossibleMatchRange_Prefix(benchmark::State& state) {
1559 PossibleMatchRangeCommon(state, "^some_random_prefix.*");
1560 }
PossibleMatchRange_NoProg(benchmark::State & state)1561 void PossibleMatchRange_NoProg(benchmark::State& state) {
1562 PossibleMatchRangeCommon(state, "^some_random_string$");
1563 }
1564
1565 BENCHMARK(PossibleMatchRange_Trivial);
1566 BENCHMARK(PossibleMatchRange_Complex);
1567 BENCHMARK(PossibleMatchRange_Prefix);
1568 BENCHMARK(PossibleMatchRange_NoProg);
1569
1570 } // namespace re2
1571