1 // Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Benchmarks for regular expression implementations.
6 
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <thread>
12 #include <unordered_map>
13 #include <utility>
14 
15 #include "util/benchmark.h"
16 #include "util/test.h"
17 #include "util/flags.h"
18 #include "util/logging.h"
19 #include "util/malloc_counter.h"
20 #include "util/strutil.h"
21 #include "re2/prog.h"
22 #include "re2/re2.h"
23 #include "re2/regexp.h"
24 #include "util/mutex.h"
25 #include "util/pcre.h"
26 
27 namespace re2 {
28 void Test();
29 void MemoryUsage();
30 }  // namespace re2
31 
32 typedef testing::MallocCounter MallocCounter;
33 
34 namespace re2 {
35 
Test()36 void Test() {
37   Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
38   CHECK(re);
39   Prog* prog = re->CompileToProg(0);
40   CHECK(prog);
41   CHECK(prog->IsOnePass());
42   CHECK(prog->CanBitState());
43   const char* text = "650-253-0001";
44   StringPiece sp[4];
45   CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
46   CHECK_EQ(sp[0], "650-253-0001");
47   CHECK_EQ(sp[1], "650");
48   CHECK_EQ(sp[2], "253");
49   CHECK_EQ(sp[3], "0001");
50   delete prog;
51   re->Decref();
52   LOG(INFO) << "test passed\n";
53 }
54 
MemoryUsage()55 void MemoryUsage() {
56   const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
57   const char* text = "650-253-0001";
58   {
59     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
60     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
61     CHECK(re);
62     // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
63     // because LOG(INFO) might do a big allocation before they get evaluated.
64     fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
65             mc.HeapGrowth(), mc.PeakHeapGrowth());
66     mc.Reset();
67 
68     Prog* prog = re->CompileToProg(0);
69     CHECK(prog);
70     CHECK(prog->IsOnePass());
71     CHECK(prog->CanBitState());
72     fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n",
73             mc.HeapGrowth(), mc.PeakHeapGrowth());
74     mc.Reset();
75 
76     StringPiece sp[4];
77     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
78     fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
79             mc.HeapGrowth(), mc.PeakHeapGrowth());
80     delete prog;
81     re->Decref();
82   }
83 
84   {
85     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
86 
87     PCRE re(regexp, PCRE::UTF8);
88     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n",
89             mc.HeapGrowth(), mc.PeakHeapGrowth());
90     PCRE::FullMatch(text, re);
91     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n",
92             mc.HeapGrowth(), mc.PeakHeapGrowth());
93   }
94 
95   {
96     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
97 
98     PCRE* re = new PCRE(regexp, PCRE::UTF8);
99     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n",
100             mc.HeapGrowth(), mc.PeakHeapGrowth());
101     PCRE::FullMatch(text, *re);
102     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n",
103             mc.HeapGrowth(), mc.PeakHeapGrowth());
104     delete re;
105   }
106 
107   {
108     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
109 
110     RE2 re(regexp);
111     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n",
112             mc.HeapGrowth(), mc.PeakHeapGrowth());
113     RE2::FullMatch(text, re);
114     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n",
115             mc.HeapGrowth(), mc.PeakHeapGrowth());
116   }
117 
118   fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
119           sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
120 }
121 
NumCPUs()122 int NumCPUs() {
123   return static_cast<int>(std::thread::hardware_concurrency());
124 }
125 
126 // Regular expression implementation wrappers.
127 // Defined at bottom of file, but they are repetitive
128 // and not interesting.
129 
130 typedef void SearchImpl(benchmark::State& state, const char* regexp,
131                         const StringPiece& text, Prog::Anchor anchor,
132                         bool expect_match);
133 
134 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
135     SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
136     SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
137 
138 typedef void ParseImpl(benchmark::State& state, const char* regexp,
139                        const StringPiece& text);
140 
141 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
142     Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
143     Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
144 
145 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
146     Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
147     Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
148 
149 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
150 
151 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
152 
153 // Benchmark: failed search for regexp in random text.
154 
155 // Generate random text that won't contain the search string,
156 // to test worst-case search behavior.
RandomText(int64_t nbytes)157 std::string RandomText(int64_t nbytes) {
158   static const std::string* const text = []() {
159     std::string* text = new std::string;
160     srand(1);
161     text->resize(16<<20);
162     for (int64_t i = 0; i < 16<<20; i++) {
163       // Generate a one-byte rune that isn't a control character (e.g. '\n').
164       // Clipping to 0x20 introduces some bias, but we don't need uniformity.
165       int byte = rand() & 0x7F;
166       if (byte < 0x20)
167         byte = 0x20;
168       (*text)[i] = byte;
169     }
170     return text;
171   }();
172   CHECK_LE(nbytes, 16<<20);
173   return text->substr(0, nbytes);
174 }
175 
176 // Makes text of size nbytes, then calls run to search
177 // the text for regexp iters times.
Search(benchmark::State & state,const char * regexp,SearchImpl * search)178 void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
179   std::string s = RandomText(state.range(0));
180   search(state, regexp, s, Prog::kUnanchored, false);
181   state.SetBytesProcessed(state.iterations() * state.range(0));
182 }
183 
184 // These two are easy because they start with an A,
185 // giving the search loop something to memchr for.
186 #define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
187 #define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
188 
189 // This is a little harder, since it starts with a character class
190 // and thus can't be memchr'ed.  Could look for ABC and work backward,
191 // but no one does that.
192 #define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
193 
194 // This is a fair amount harder, because of the leading [ -~]*.
195 // A bad backtracking implementation will take O(text^2) time to
196 // figure out there's no match.
197 #define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
198 
199 // This has quite a high degree of fanout.
200 // NFA execution will be particularly slow.
201 #define FANOUT     "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
202 
203 // This stresses engines that are trying to track parentheses.
204 #define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
205                    "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
206 
Search_Easy0_CachedDFA(benchmark::State & state)207 void Search_Easy0_CachedDFA(benchmark::State& state)     { Search(state, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(benchmark::State & state)208 void Search_Easy0_CachedNFA(benchmark::State& state)     { Search(state, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(benchmark::State & state)209 void Search_Easy0_CachedPCRE(benchmark::State& state)    { Search(state, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(benchmark::State & state)210 void Search_Easy0_CachedRE2(benchmark::State& state)     { Search(state, EASY0, SearchCachedRE2); }
211 
212 BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
213 BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
214 #ifdef USEPCRE
215 BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
216 #endif
217 BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
218 
Search_Easy1_CachedDFA(benchmark::State & state)219 void Search_Easy1_CachedDFA(benchmark::State& state)     { Search(state, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(benchmark::State & state)220 void Search_Easy1_CachedNFA(benchmark::State& state)     { Search(state, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(benchmark::State & state)221 void Search_Easy1_CachedPCRE(benchmark::State& state)    { Search(state, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(benchmark::State & state)222 void Search_Easy1_CachedRE2(benchmark::State& state)     { Search(state, EASY1, SearchCachedRE2); }
223 
224 BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
225 BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
226 #ifdef USEPCRE
227 BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
228 #endif
229 BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
230 
Search_Medium_CachedDFA(benchmark::State & state)231 void Search_Medium_CachedDFA(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(benchmark::State & state)232 void Search_Medium_CachedNFA(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(benchmark::State & state)233 void Search_Medium_CachedPCRE(benchmark::State& state)    { Search(state, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(benchmark::State & state)234 void Search_Medium_CachedRE2(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedRE2); }
235 
236 BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
237 BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
238 #ifdef USEPCRE
239 BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
240 #endif
241 BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
242 
Search_Hard_CachedDFA(benchmark::State & state)243 void Search_Hard_CachedDFA(benchmark::State& state)     { Search(state, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(benchmark::State & state)244 void Search_Hard_CachedNFA(benchmark::State& state)     { Search(state, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(benchmark::State & state)245 void Search_Hard_CachedPCRE(benchmark::State& state)    { Search(state, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(benchmark::State & state)246 void Search_Hard_CachedRE2(benchmark::State& state)     { Search(state, HARD, SearchCachedRE2); }
247 
248 BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
249 BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
250 #ifdef USEPCRE
251 BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
252 #endif
253 BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
254 
Search_Fanout_CachedDFA(benchmark::State & state)255 void Search_Fanout_CachedDFA(benchmark::State& state)     { Search(state, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(benchmark::State & state)256 void Search_Fanout_CachedNFA(benchmark::State& state)     { Search(state, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(benchmark::State & state)257 void Search_Fanout_CachedPCRE(benchmark::State& state)    { Search(state, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(benchmark::State & state)258 void Search_Fanout_CachedRE2(benchmark::State& state)     { Search(state, FANOUT, SearchCachedRE2); }
259 
260 BENCHMARK_RANGE(Search_Fanout_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
261 BENCHMARK_RANGE(Search_Fanout_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
262 #ifdef USEPCRE
263 BENCHMARK_RANGE(Search_Fanout_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
264 #endif
265 BENCHMARK_RANGE(Search_Fanout_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
266 
Search_Parens_CachedDFA(benchmark::State & state)267 void Search_Parens_CachedDFA(benchmark::State& state)     { Search(state, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(benchmark::State & state)268 void Search_Parens_CachedNFA(benchmark::State& state)     { Search(state, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(benchmark::State & state)269 void Search_Parens_CachedPCRE(benchmark::State& state)    { Search(state, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(benchmark::State & state)270 void Search_Parens_CachedRE2(benchmark::State& state)     { Search(state, PARENS, SearchCachedRE2); }
271 
272 BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
273 BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
274 #ifdef USEPCRE
275 BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
276 #endif
277 BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
278 
SearchBigFixed(benchmark::State & state,SearchImpl * search)279 void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
280   std::string s;
281   s.append(state.range(0)/2, 'x');
282   std::string regexp = "^" + s + ".*$";
283   std::string t = RandomText(state.range(0)/2);
284   s += t;
285   search(state, regexp.c_str(), s, Prog::kUnanchored, true);
286   state.SetBytesProcessed(state.iterations() * state.range(0));
287 }
288 
Search_BigFixed_CachedDFA(benchmark::State & state)289 void Search_BigFixed_CachedDFA(benchmark::State& state)     { SearchBigFixed(state, SearchCachedDFA); }
Search_BigFixed_CachedNFA(benchmark::State & state)290 void Search_BigFixed_CachedNFA(benchmark::State& state)     { SearchBigFixed(state, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(benchmark::State & state)291 void Search_BigFixed_CachedPCRE(benchmark::State& state)    { SearchBigFixed(state, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(benchmark::State & state)292 void Search_BigFixed_CachedRE2(benchmark::State& state)     { SearchBigFixed(state, SearchCachedRE2); }
293 
294 BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
295 BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
296 #ifdef USEPCRE
297 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
298 #endif
299 BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
300 
301 // Benchmark: FindAndConsume
302 
FindAndConsume(benchmark::State & state)303 void FindAndConsume(benchmark::State& state) {
304   std::string s = RandomText(state.range(0));
305   s.append("Hello World");
306   RE2 re("((Hello World))");
307   for (auto _ : state) {
308     StringPiece t = s;
309     StringPiece u;
310     CHECK(RE2::FindAndConsume(&t, re, &u));
311     CHECK_EQ(u, "Hello World");
312   }
313   state.SetBytesProcessed(state.iterations() * state.range(0));
314 }
315 
316 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
317 
318 // Benchmark: successful anchored search.
319 
SearchSuccess(benchmark::State & state,const char * regexp,SearchImpl * search)320 void SearchSuccess(benchmark::State& state, const char* regexp,
321                    SearchImpl* search) {
322   std::string s = RandomText(state.range(0));
323   search(state, regexp, s, Prog::kAnchored, true);
324   state.SetBytesProcessed(state.iterations() * state.range(0));
325 }
326 
327 // Unambiguous search (RE2 can use OnePass).
328 
Search_Success_DFA(benchmark::State & state)329 void Search_Success_DFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchDFA); }
Search_Success_NFA(benchmark::State & state)330 void Search_Success_NFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchNFA); }
Search_Success_PCRE(benchmark::State & state)331 void Search_Success_PCRE(benchmark::State& state)    { SearchSuccess(state, ".*$", SearchPCRE); }
Search_Success_RE2(benchmark::State & state)332 void Search_Success_RE2(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchRE2); }
Search_Success_OnePass(benchmark::State & state)333 void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
334 
335 BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
336 BENCHMARK_RANGE(Search_Success_NFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
337 #ifdef USEPCRE
338 BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
339 #endif
340 BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
341 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
342 
Search_Success_CachedDFA(benchmark::State & state)343 void Search_Success_CachedDFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(benchmark::State & state)344 void Search_Success_CachedNFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(benchmark::State & state)345 void Search_Success_CachedPCRE(benchmark::State& state)    { SearchSuccess(state, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(benchmark::State & state)346 void Search_Success_CachedRE2(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(benchmark::State & state)347 void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
348 
349 BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
350 BENCHMARK_RANGE(Search_Success_CachedNFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
351 #ifdef USEPCRE
352 BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
353 #endif
354 BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
355 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
356 
357 // Ambiguous search (RE2 cannot use OnePass).
358 // Used to be ".*.$", but that is coalesced to ".+$" these days.
359 
Search_Success1_DFA(benchmark::State & state)360 void Search_Success1_DFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchDFA); }
Search_Success1_NFA(benchmark::State & state)361 void Search_Success1_NFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(benchmark::State & state)362 void Search_Success1_PCRE(benchmark::State& state)     { SearchSuccess(state, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(benchmark::State & state)363 void Search_Success1_RE2(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchRE2); }
Search_Success1_BitState(benchmark::State & state)364 void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
365 
366 BENCHMARK_RANGE(Search_Success1_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
367 BENCHMARK_RANGE(Search_Success1_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
368 #ifdef USEPCRE
369 BENCHMARK_RANGE(Search_Success1_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
370 #endif
371 BENCHMARK_RANGE(Search_Success1_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
372 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
373 
Search_Success1_CachedDFA(benchmark::State & state)374 void Search_Success1_CachedDFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(benchmark::State & state)375 void Search_Success1_CachedNFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(benchmark::State & state)376 void Search_Success1_CachedPCRE(benchmark::State& state)     { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(benchmark::State & state)377 void Search_Success1_CachedRE2(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(benchmark::State & state)378 void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
379 
380 BENCHMARK_RANGE(Search_Success1_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
381 BENCHMARK_RANGE(Search_Success1_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
382 #ifdef USEPCRE
383 BENCHMARK_RANGE(Search_Success1_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
384 #endif
385 BENCHMARK_RANGE(Search_Success1_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
386 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
387 
388 // Benchmark: AltMatch optimisation (just to verify that it works)
389 // Note that OnePass doesn't implement it!
390 
SearchAltMatch(benchmark::State & state,SearchImpl * search)391 void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
392   std::string s = RandomText(state.range(0));
393   search(state, "\\C*", s, Prog::kAnchored, true);
394   state.SetBytesProcessed(state.iterations() * state.range(0));
395 }
396 
Search_AltMatch_DFA(benchmark::State & state)397 void Search_AltMatch_DFA(benchmark::State& state)      { SearchAltMatch(state, SearchDFA); }
Search_AltMatch_NFA(benchmark::State & state)398 void Search_AltMatch_NFA(benchmark::State& state)      { SearchAltMatch(state, SearchNFA); }
Search_AltMatch_OnePass(benchmark::State & state)399 void Search_AltMatch_OnePass(benchmark::State& state)  { SearchAltMatch(state, SearchOnePass); }
Search_AltMatch_BitState(benchmark::State & state)400 void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
Search_AltMatch_PCRE(benchmark::State & state)401 void Search_AltMatch_PCRE(benchmark::State& state)     { SearchAltMatch(state, SearchPCRE); }
Search_AltMatch_RE2(benchmark::State & state)402 void Search_AltMatch_RE2(benchmark::State& state)      { SearchAltMatch(state, SearchRE2); }
403 
404 BENCHMARK_RANGE(Search_AltMatch_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
405 BENCHMARK_RANGE(Search_AltMatch_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
406 BENCHMARK_RANGE(Search_AltMatch_OnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
407 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
408 #ifdef USEPCRE
409 BENCHMARK_RANGE(Search_AltMatch_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
410 #endif
411 BENCHMARK_RANGE(Search_AltMatch_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
412 
Search_AltMatch_CachedDFA(benchmark::State & state)413 void Search_AltMatch_CachedDFA(benchmark::State& state)      { SearchAltMatch(state, SearchCachedDFA); }
Search_AltMatch_CachedNFA(benchmark::State & state)414 void Search_AltMatch_CachedNFA(benchmark::State& state)      { SearchAltMatch(state, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(benchmark::State & state)415 void Search_AltMatch_CachedOnePass(benchmark::State& state)  { SearchAltMatch(state, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(benchmark::State & state)416 void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(benchmark::State & state)417 void Search_AltMatch_CachedPCRE(benchmark::State& state)     { SearchAltMatch(state, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(benchmark::State & state)418 void Search_AltMatch_CachedRE2(benchmark::State& state)      { SearchAltMatch(state, SearchCachedRE2); }
419 
420 BENCHMARK_RANGE(Search_AltMatch_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
421 BENCHMARK_RANGE(Search_AltMatch_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
422 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
423 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
424 #ifdef USEPCRE
425 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
426 #endif
427 BENCHMARK_RANGE(Search_AltMatch_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
428 
429 // Benchmark: use regexp to find phone number.
430 
SearchDigits(benchmark::State & state,SearchImpl * search)431 void SearchDigits(benchmark::State& state, SearchImpl* search) {
432   StringPiece s("650-253-0001");
433   search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
434   state.SetItemsProcessed(state.iterations());
435 }
436 
Search_Digits_DFA(benchmark::State & state)437 void Search_Digits_DFA(benchmark::State& state)         { SearchDigits(state, SearchDFA); }
Search_Digits_NFA(benchmark::State & state)438 void Search_Digits_NFA(benchmark::State& state)         { SearchDigits(state, SearchNFA); }
Search_Digits_OnePass(benchmark::State & state)439 void Search_Digits_OnePass(benchmark::State& state)     { SearchDigits(state, SearchOnePass); }
Search_Digits_PCRE(benchmark::State & state)440 void Search_Digits_PCRE(benchmark::State& state)        { SearchDigits(state, SearchPCRE); }
Search_Digits_RE2(benchmark::State & state)441 void Search_Digits_RE2(benchmark::State& state)         { SearchDigits(state, SearchRE2); }
Search_Digits_BitState(benchmark::State & state)442 void Search_Digits_BitState(benchmark::State& state)    { SearchDigits(state, SearchBitState); }
443 
444 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
445 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
446 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
447 #ifdef USEPCRE
448 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
449 #endif
450 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
451 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
452 
453 // Benchmark: use regexp to parse digit fields in phone number.
454 
Parse3Digits(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))455 void Parse3Digits(benchmark::State& state,
456                   void (*parse3)(benchmark::State&, const char*,
457                                  const StringPiece&)) {
458   parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
459   state.SetItemsProcessed(state.iterations());
460 }
461 
Parse_Digits_NFA(benchmark::State & state)462 void Parse_Digits_NFA(benchmark::State& state)         { Parse3Digits(state, Parse3NFA); }
Parse_Digits_OnePass(benchmark::State & state)463 void Parse_Digits_OnePass(benchmark::State& state)     { Parse3Digits(state, Parse3OnePass); }
Parse_Digits_PCRE(benchmark::State & state)464 void Parse_Digits_PCRE(benchmark::State& state)        { Parse3Digits(state, Parse3PCRE); }
Parse_Digits_RE2(benchmark::State & state)465 void Parse_Digits_RE2(benchmark::State& state)         { Parse3Digits(state, Parse3RE2); }
Parse_Digits_Backtrack(benchmark::State & state)466 void Parse_Digits_Backtrack(benchmark::State& state)   { Parse3Digits(state, Parse3Backtrack); }
Parse_Digits_BitState(benchmark::State & state)467 void Parse_Digits_BitState(benchmark::State& state)    { Parse3Digits(state, Parse3BitState); }
468 
469 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
470 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
471 #ifdef USEPCRE
472 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
473 #endif
474 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
475 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
476 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
477 
Parse_CachedDigits_NFA(benchmark::State & state)478 void Parse_CachedDigits_NFA(benchmark::State& state)         { Parse3Digits(state, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(benchmark::State & state)479 void Parse_CachedDigits_OnePass(benchmark::State& state)     { Parse3Digits(state, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(benchmark::State & state)480 void Parse_CachedDigits_PCRE(benchmark::State& state)        { Parse3Digits(state, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(benchmark::State & state)481 void Parse_CachedDigits_RE2(benchmark::State& state)         { Parse3Digits(state, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(benchmark::State & state)482 void Parse_CachedDigits_Backtrack(benchmark::State& state)   { Parse3Digits(state, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(benchmark::State & state)483 void Parse_CachedDigits_BitState(benchmark::State& state)    { Parse3Digits(state, Parse3CachedBitState); }
484 
485 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
486 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
487 #ifdef USEPCRE
488 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
489 #endif
490 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
491 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
492 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
493 
Parse3DigitDs(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))494 void Parse3DigitDs(benchmark::State& state,
495                    void (*parse3)(benchmark::State&, const char*,
496                                   const StringPiece&)) {
497   parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
498   state.SetItemsProcessed(state.iterations());
499 }
500 
Parse_DigitDs_NFA(benchmark::State & state)501 void Parse_DigitDs_NFA(benchmark::State& state)         { Parse3DigitDs(state, Parse3NFA); }
Parse_DigitDs_OnePass(benchmark::State & state)502 void Parse_DigitDs_OnePass(benchmark::State& state)     { Parse3DigitDs(state, Parse3OnePass); }
Parse_DigitDs_PCRE(benchmark::State & state)503 void Parse_DigitDs_PCRE(benchmark::State& state)        { Parse3DigitDs(state, Parse3PCRE); }
Parse_DigitDs_RE2(benchmark::State & state)504 void Parse_DigitDs_RE2(benchmark::State& state)         { Parse3DigitDs(state, Parse3RE2); }
Parse_DigitDs_Backtrack(benchmark::State & state)505 void Parse_DigitDs_Backtrack(benchmark::State& state)   { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(benchmark::State & state)506 void Parse_DigitDs_BitState(benchmark::State& state)    { Parse3DigitDs(state, Parse3CachedBitState); }
507 
508 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
509 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
510 #ifdef USEPCRE
511 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
512 #endif
513 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
514 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
515 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
516 
Parse_CachedDigitDs_NFA(benchmark::State & state)517 void Parse_CachedDigitDs_NFA(benchmark::State& state)         { Parse3DigitDs(state, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(benchmark::State & state)518 void Parse_CachedDigitDs_OnePass(benchmark::State& state)     { Parse3DigitDs(state, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(benchmark::State & state)519 void Parse_CachedDigitDs_PCRE(benchmark::State& state)        { Parse3DigitDs(state, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(benchmark::State & state)520 void Parse_CachedDigitDs_RE2(benchmark::State& state)         { Parse3DigitDs(state, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(benchmark::State & state)521 void Parse_CachedDigitDs_Backtrack(benchmark::State& state)   { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(benchmark::State & state)522 void Parse_CachedDigitDs_BitState(benchmark::State& state)    { Parse3DigitDs(state, Parse3CachedBitState); }
523 
524 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
525 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
526 #ifdef USEPCRE
527 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
528 #endif
529 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
530 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
531 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
532 
533 // Benchmark: splitting off leading number field.
534 
Parse1Split(benchmark::State & state,void (* parse1)(benchmark::State &,const char *,const StringPiece &))535 void Parse1Split(benchmark::State& state,
536                  void (*parse1)(benchmark::State&, const char*,
537                                 const StringPiece&)) {
538   parse1(state, "[0-9]+-(.*)", "650-253-0001");
539   state.SetItemsProcessed(state.iterations());
540 }
541 
Parse_Split_NFA(benchmark::State & state)542 void Parse_Split_NFA(benchmark::State& state)         { Parse1Split(state, Parse1NFA); }
Parse_Split_OnePass(benchmark::State & state)543 void Parse_Split_OnePass(benchmark::State& state)     { Parse1Split(state, Parse1OnePass); }
Parse_Split_PCRE(benchmark::State & state)544 void Parse_Split_PCRE(benchmark::State& state)        { Parse1Split(state, Parse1PCRE); }
Parse_Split_RE2(benchmark::State & state)545 void Parse_Split_RE2(benchmark::State& state)         { Parse1Split(state, Parse1RE2); }
Parse_Split_BitState(benchmark::State & state)546 void Parse_Split_BitState(benchmark::State& state)    { Parse1Split(state, Parse1BitState); }
547 
548 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
549 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
550 #ifdef USEPCRE
551 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
552 #endif
553 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
554 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
555 
Parse_CachedSplit_NFA(benchmark::State & state)556 void Parse_CachedSplit_NFA(benchmark::State& state)         { Parse1Split(state, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(benchmark::State & state)557 void Parse_CachedSplit_OnePass(benchmark::State& state)     { Parse1Split(state, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(benchmark::State & state)558 void Parse_CachedSplit_PCRE(benchmark::State& state)        { Parse1Split(state, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(benchmark::State & state)559 void Parse_CachedSplit_RE2(benchmark::State& state)         { Parse1Split(state, Parse1CachedRE2); }
Parse_CachedSplit_BitState(benchmark::State & state)560 void Parse_CachedSplit_BitState(benchmark::State& state)    { Parse1Split(state, Parse1CachedBitState); }
561 
562 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
563 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
564 #ifdef USEPCRE
565 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
566 #endif
567 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
568 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
569 
570 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
571 
Parse1SplitHard(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))572 void Parse1SplitHard(benchmark::State& state,
573                      void (*run)(benchmark::State&, const char*,
574                                  const StringPiece&)) {
575   run(state, "[0-9]+.(.*)", "650-253-0001");
576   state.SetItemsProcessed(state.iterations());
577 }
578 
Parse_SplitHard_NFA(benchmark::State & state)579 void Parse_SplitHard_NFA(benchmark::State& state)         { Parse1SplitHard(state, Parse1NFA); }
Parse_SplitHard_PCRE(benchmark::State & state)580 void Parse_SplitHard_PCRE(benchmark::State& state)        { Parse1SplitHard(state, Parse1PCRE); }
Parse_SplitHard_RE2(benchmark::State & state)581 void Parse_SplitHard_RE2(benchmark::State& state)         { Parse1SplitHard(state, Parse1RE2); }
Parse_SplitHard_BitState(benchmark::State & state)582 void Parse_SplitHard_BitState(benchmark::State& state)    { Parse1SplitHard(state, Parse1BitState); }
583 
584 #ifdef USEPCRE
585 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
586 #endif
587 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
588 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
589 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
590 
Parse_CachedSplitHard_NFA(benchmark::State & state)591 void Parse_CachedSplitHard_NFA(benchmark::State& state)       { Parse1SplitHard(state, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(benchmark::State & state)592 void Parse_CachedSplitHard_PCRE(benchmark::State& state)      { Parse1SplitHard(state, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(benchmark::State & state)593 void Parse_CachedSplitHard_RE2(benchmark::State& state)       { Parse1SplitHard(state, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(benchmark::State & state)594 void Parse_CachedSplitHard_BitState(benchmark::State& state)  { Parse1SplitHard(state, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(benchmark::State & state)595 void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
596 
597 #ifdef USEPCRE
598 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
599 #endif
600 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
601 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
602 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
603 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
604 
605 // Benchmark: Parse1SplitHard, big text, small match.
606 
Parse1SplitBig1(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))607 void Parse1SplitBig1(benchmark::State& state,
608                      void (*run)(benchmark::State&, const char*,
609                                  const StringPiece&)) {
610   std::string s;
611   s.append(100000, 'x');
612   s.append("650-253-0001");
613   run(state, "[0-9]+.(.*)", s);
614   state.SetItemsProcessed(state.iterations());
615 }
616 
Parse_CachedSplitBig1_PCRE(benchmark::State & state)617 void Parse_CachedSplitBig1_PCRE(benchmark::State& state)      { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(benchmark::State & state)618 void Parse_CachedSplitBig1_RE2(benchmark::State& state)       { Parse1SplitBig1(state, SearchParse1CachedRE2); }
619 
620 #ifdef USEPCRE
621 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
622 #endif
623 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
624 
625 // Benchmark: Parse1SplitHard, big text, big match.
626 
Parse1SplitBig2(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))627 void Parse1SplitBig2(benchmark::State& state,
628                      void (*run)(benchmark::State&, const char*,
629                                  const StringPiece&)) {
630   std::string s;
631   s.append("650-253-");
632   s.append(100000, '0');
633   run(state, "[0-9]+.(.*)", s);
634   state.SetItemsProcessed(state.iterations());
635 }
636 
Parse_CachedSplitBig2_PCRE(benchmark::State & state)637 void Parse_CachedSplitBig2_PCRE(benchmark::State& state)      { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(benchmark::State & state)638 void Parse_CachedSplitBig2_RE2(benchmark::State& state)       { Parse1SplitBig2(state, SearchParse1CachedRE2); }
639 
640 #ifdef USEPCRE
641 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
642 #endif
643 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
644 
645 // Benchmark: measure time required to parse (but not execute)
646 // a simple regular expression.
647 
ParseRegexp(benchmark::State & state,const std::string & regexp)648 void ParseRegexp(benchmark::State& state, const std::string& regexp) {
649   for (auto _ : state) {
650     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
651     CHECK(re);
652     re->Decref();
653   }
654 }
655 
SimplifyRegexp(benchmark::State & state,const std::string & regexp)656 void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
657   for (auto _ : state) {
658     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
659     CHECK(re);
660     Regexp* sre = re->Simplify();
661     CHECK(sre);
662     sre->Decref();
663     re->Decref();
664   }
665 }
666 
NullWalkRegexp(benchmark::State & state,const std::string & regexp)667 void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
668   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
669   CHECK(re);
670   for (auto _ : state) {
671     re->NullWalk();
672   }
673   re->Decref();
674 }
675 
SimplifyCompileRegexp(benchmark::State & state,const std::string & regexp)676 void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
677   for (auto _ : state) {
678     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
679     CHECK(re);
680     Regexp* sre = re->Simplify();
681     CHECK(sre);
682     Prog* prog = sre->CompileToProg(0);
683     CHECK(prog);
684     delete prog;
685     sre->Decref();
686     re->Decref();
687   }
688 }
689 
CompileRegexp(benchmark::State & state,const std::string & regexp)690 void CompileRegexp(benchmark::State& state, const std::string& regexp) {
691   for (auto _ : state) {
692     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
693     CHECK(re);
694     Prog* prog = re->CompileToProg(0);
695     CHECK(prog);
696     delete prog;
697     re->Decref();
698   }
699 }
700 
CompileToProg(benchmark::State & state,const std::string & regexp)701 void CompileToProg(benchmark::State& state, const std::string& regexp) {
702   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
703   CHECK(re);
704   for (auto _ : state) {
705     Prog* prog = re->CompileToProg(0);
706     CHECK(prog);
707     delete prog;
708   }
709   re->Decref();
710 }
711 
CompileByteMap(benchmark::State & state,const std::string & regexp)712 void CompileByteMap(benchmark::State& state, const std::string& regexp) {
713   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
714   CHECK(re);
715   Prog* prog = re->CompileToProg(0);
716   CHECK(prog);
717   for (auto _ : state) {
718     prog->ComputeByteMap();
719   }
720   delete prog;
721   re->Decref();
722 }
723 
CompilePCRE(benchmark::State & state,const std::string & regexp)724 void CompilePCRE(benchmark::State& state, const std::string& regexp) {
725   for (auto _ : state) {
726     PCRE re(regexp, PCRE::UTF8);
727     CHECK_EQ(re.error(), "");
728   }
729 }
730 
CompileRE2(benchmark::State & state,const std::string & regexp)731 void CompileRE2(benchmark::State& state, const std::string& regexp) {
732   for (auto _ : state) {
733     RE2 re(regexp);
734     CHECK_EQ(re.error(), "");
735   }
736 }
737 
RunBuild(benchmark::State & state,const std::string & regexp,void (* run)(benchmark::State &,const std::string &))738 void RunBuild(benchmark::State& state, const std::string& regexp,
739               void (*run)(benchmark::State&, const std::string&)) {
740   run(state, regexp);
741   state.SetItemsProcessed(state.iterations());
742 }
743 
744 }  // namespace re2
745 
746 DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
747             "regexp for compile benchmarks");
748 
749 namespace re2 {
750 
BM_PCRE_Compile(benchmark::State & state)751 void BM_PCRE_Compile(benchmark::State& state)             { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
BM_Regexp_Parse(benchmark::State & state)752 void BM_Regexp_Parse(benchmark::State& state)             { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
BM_Regexp_Simplify(benchmark::State & state)753 void BM_Regexp_Simplify(benchmark::State& state)          { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
BM_CompileToProg(benchmark::State & state)754 void BM_CompileToProg(benchmark::State& state)            { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
BM_CompileByteMap(benchmark::State & state)755 void BM_CompileByteMap(benchmark::State& state)           { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
BM_Regexp_Compile(benchmark::State & state)756 void BM_Regexp_Compile(benchmark::State& state)           { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
BM_Regexp_SimplifyCompile(benchmark::State & state)757 void BM_Regexp_SimplifyCompile(benchmark::State& state)   { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
BM_Regexp_NullWalk(benchmark::State & state)758 void BM_Regexp_NullWalk(benchmark::State& state)          { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
BM_RE2_Compile(benchmark::State & state)759 void BM_RE2_Compile(benchmark::State& state)              { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
760 
761 #ifdef USEPCRE
762 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
763 #endif
764 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
765 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
766 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
767 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
768 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
769 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
770 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
771 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
772 
773 // Makes text of size nbytes, then calls run to search
774 // the text for regexp iters times.
SearchPhone(benchmark::State & state,ParseImpl * search)775 void SearchPhone(benchmark::State& state, ParseImpl* search) {
776   std::string s = RandomText(state.range(0));
777   s.append("(650) 253-0001");
778   search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
779   state.SetBytesProcessed(state.iterations() * state.range(0));
780 }
781 
SearchPhone_CachedPCRE(benchmark::State & state)782 void SearchPhone_CachedPCRE(benchmark::State& state) {
783   SearchPhone(state, SearchParse2CachedPCRE);
784 }
785 
SearchPhone_CachedRE2(benchmark::State & state)786 void SearchPhone_CachedRE2(benchmark::State& state) {
787   SearchPhone(state, SearchParse2CachedRE2);
788 }
789 
790 #ifdef USEPCRE
791 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
792 #endif
793 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
794 
795 /*
796 TODO(rsc): Make this work again.
797 void CacheFill(int iters, int n, SearchImpl *srch) {
798   std::string s = DeBruijnString(n+1);
799   std::string t;
800   for (int i = n+1; i < 20; i++) {
801     t = s + s;
802     using std::swap;
803     swap(s, t);
804   }
805   srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
806        Prog::kUnanchored, true);
807   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
808 }
809 
810 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
811 void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
812 void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
813 void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
814 
815 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
816 // for the static BenchmarkRegisterer, which makes it unusable inside
817 // a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
818 // to make the identifiers distinct (only possible when 'a' is a simple
819 // expression like 2, not like 1+1).
820 #define MY_BENCHMARK_WITH_ARG(n, a) \
821   bool __benchmark_ ## n ## a =     \
822     (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
823 
824 #define DO24(A, B) \
825   A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
826   A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
827   A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
828   A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
829 
830 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
831 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
832 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
833 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
834 
835 #undef DO24
836 #undef MY_BENCHMARK_WITH_ARG
837 */
838 
839 ////////////////////////////////////////////////////////////////////////
840 //
841 // Implementation routines.  Sad that there are so many,
842 // but all the interfaces are slightly different.
843 
844 // Runs implementation to search for regexp in text, iters times.
845 // Expect_match says whether the regexp should be found.
846 // Anchored says whether to run an anchored search.
847 
SearchDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)848 void SearchDFA(benchmark::State& state, const char* regexp,
849                const StringPiece& text, Prog::Anchor anchor,
850                bool expect_match) {
851   for (auto _ : state) {
852     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
853     CHECK(re);
854     Prog* prog = re->CompileToProg(0);
855     CHECK(prog);
856     bool failed = false;
857     CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
858                              NULL, &failed, NULL),
859              expect_match);
860     CHECK(!failed);
861     delete prog;
862     re->Decref();
863   }
864 }
865 
SearchNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)866 void SearchNFA(benchmark::State& state, const char* regexp,
867                const StringPiece& text, Prog::Anchor anchor,
868                bool expect_match) {
869   for (auto _ : state) {
870     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
871     CHECK(re);
872     Prog* prog = re->CompileToProg(0);
873     CHECK(prog);
874     CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
875                              NULL, 0),
876              expect_match);
877     delete prog;
878     re->Decref();
879   }
880 }
881 
SearchOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)882 void SearchOnePass(benchmark::State& state, const char* regexp,
883                    const StringPiece& text, Prog::Anchor anchor,
884                    bool expect_match) {
885   for (auto _ : state) {
886     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
887     CHECK(re);
888     Prog* prog = re->CompileToProg(0);
889     CHECK(prog);
890     CHECK(prog->IsOnePass());
891     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
892              expect_match);
893     delete prog;
894     re->Decref();
895   }
896 }
897 
SearchBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)898 void SearchBitState(benchmark::State& state, const char* regexp,
899                     const StringPiece& text, Prog::Anchor anchor,
900                     bool expect_match) {
901   for (auto _ : state) {
902     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
903     CHECK(re);
904     Prog* prog = re->CompileToProg(0);
905     CHECK(prog);
906     CHECK(prog->CanBitState());
907     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
908              expect_match);
909     delete prog;
910     re->Decref();
911   }
912 }
913 
SearchPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)914 void SearchPCRE(benchmark::State& state, const char* regexp,
915                 const StringPiece& text, Prog::Anchor anchor,
916                 bool expect_match) {
917   for (auto _ : state) {
918     PCRE re(regexp, PCRE::UTF8);
919     CHECK_EQ(re.error(), "");
920     if (anchor == Prog::kAnchored)
921       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
922     else
923       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
924   }
925 }
926 
SearchRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)927 void SearchRE2(benchmark::State& state, const char* regexp,
928                const StringPiece& text, Prog::Anchor anchor,
929                bool expect_match) {
930   for (auto _ : state) {
931     RE2 re(regexp);
932     CHECK_EQ(re.error(), "");
933     if (anchor == Prog::kAnchored)
934       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
935     else
936       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
937   }
938 }
939 
940 // SearchCachedXXX is like SearchXXX but only does the
941 // regexp parsing and compiling once.  This lets us measure
942 // search time without the per-regexp overhead.
943 
GetCachedProg(const char * regexp)944 Prog* GetCachedProg(const char* regexp) {
945   static auto& mutex = *new Mutex;
946   MutexLock lock(&mutex);
947   static auto& cache = *new std::unordered_map<std::string, Prog*>;
948   Prog* prog = cache[regexp];
949   if (prog == NULL) {
950     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
951     CHECK(re);
952     prog = re->CompileToProg(int64_t{1}<<31);  // mostly for the DFA
953     CHECK(prog);
954     cache[regexp] = prog;
955     re->Decref();
956   }
957   return prog;
958 }
959 
GetCachedPCRE(const char * regexp)960 PCRE* GetCachedPCRE(const char* regexp) {
961   static auto& mutex = *new Mutex;
962   MutexLock lock(&mutex);
963   static auto& cache = *new std::unordered_map<std::string, PCRE*>;
964   PCRE* re = cache[regexp];
965   if (re == NULL) {
966     re = new PCRE(regexp, PCRE::UTF8);
967     CHECK_EQ(re->error(), "");
968     cache[regexp] = re;
969   }
970   return re;
971 }
972 
GetCachedRE2(const char * regexp)973 RE2* GetCachedRE2(const char* regexp) {
974   static auto& mutex = *new Mutex;
975   MutexLock lock(&mutex);
976   static auto& cache = *new std::unordered_map<std::string, RE2*>;
977   RE2* re = cache[regexp];
978   if (re == NULL) {
979     re = new RE2(regexp);
980     CHECK_EQ(re->error(), "");
981     cache[regexp] = re;
982   }
983   return re;
984 }
985 
SearchCachedDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)986 void SearchCachedDFA(benchmark::State& state, const char* regexp,
987                      const StringPiece& text, Prog::Anchor anchor,
988                      bool expect_match) {
989   Prog* prog = GetCachedProg(regexp);
990   for (auto _ : state) {
991     bool failed = false;
992     CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
993                              NULL, &failed, NULL),
994              expect_match);
995     CHECK(!failed);
996   }
997 }
998 
SearchCachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)999 void SearchCachedNFA(benchmark::State& state, const char* regexp,
1000                      const StringPiece& text, Prog::Anchor anchor,
1001                      bool expect_match) {
1002   Prog* prog = GetCachedProg(regexp);
1003   for (auto _ : state) {
1004     CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
1005                              NULL, 0),
1006              expect_match);
1007   }
1008 }
1009 
SearchCachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1010 void SearchCachedOnePass(benchmark::State& state, const char* regexp,
1011                          const StringPiece& text, Prog::Anchor anchor,
1012                          bool expect_match) {
1013   Prog* prog = GetCachedProg(regexp);
1014   CHECK(prog->IsOnePass());
1015   for (auto _ : state) {
1016     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1017              expect_match);
1018   }
1019 }
1020 
SearchCachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1021 void SearchCachedBitState(benchmark::State& state, const char* regexp,
1022                           const StringPiece& text, Prog::Anchor anchor,
1023                           bool expect_match) {
1024   Prog* prog = GetCachedProg(regexp);
1025   CHECK(prog->CanBitState());
1026   for (auto _ : state) {
1027     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1028              expect_match);
1029   }
1030 }
1031 
SearchCachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1032 void SearchCachedPCRE(benchmark::State& state, const char* regexp,
1033                       const StringPiece& text, Prog::Anchor anchor,
1034                       bool expect_match) {
1035   PCRE& re = *GetCachedPCRE(regexp);
1036   for (auto _ : state) {
1037     if (anchor == Prog::kAnchored)
1038       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1039     else
1040       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1041   }
1042 }
1043 
SearchCachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1044 void SearchCachedRE2(benchmark::State& state, const char* regexp,
1045                      const StringPiece& text, Prog::Anchor anchor,
1046                      bool expect_match) {
1047   RE2& re = *GetCachedRE2(regexp);
1048   for (auto _ : state) {
1049     if (anchor == Prog::kAnchored)
1050       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1051     else
1052       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1053   }
1054 }
1055 
1056 // Runs implementation to full match regexp against text,
1057 // extracting three submatches.  Expects match always.
1058 
Parse3NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1059 void Parse3NFA(benchmark::State& state, const char* regexp,
1060                const StringPiece& text) {
1061   for (auto _ : state) {
1062     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1063     CHECK(re);
1064     Prog* prog = re->CompileToProg(0);
1065     CHECK(prog);
1066     StringPiece sp[4];  // 4 because sp[0] is whole match.
1067     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1068                           Prog::kFullMatch, sp, 4));
1069     delete prog;
1070     re->Decref();
1071   }
1072 }
1073 
Parse3OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1074 void Parse3OnePass(benchmark::State& state, const char* regexp,
1075                    const StringPiece& text) {
1076   for (auto _ : state) {
1077     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1078     CHECK(re);
1079     Prog* prog = re->CompileToProg(0);
1080     CHECK(prog);
1081     CHECK(prog->IsOnePass());
1082     StringPiece sp[4];  // 4 because sp[0] is whole match.
1083     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1084     delete prog;
1085     re->Decref();
1086   }
1087 }
1088 
Parse3BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1089 void Parse3BitState(benchmark::State& state, const char* regexp,
1090                     const StringPiece& text) {
1091   for (auto _ : state) {
1092     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1093     CHECK(re);
1094     Prog* prog = re->CompileToProg(0);
1095     CHECK(prog);
1096     CHECK(prog->CanBitState());
1097     StringPiece sp[4];  // 4 because sp[0] is whole match.
1098     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1099     delete prog;
1100     re->Decref();
1101   }
1102 }
1103 
Parse3Backtrack(benchmark::State & state,const char * regexp,const StringPiece & text)1104 void Parse3Backtrack(benchmark::State& state, const char* regexp,
1105                      const StringPiece& text) {
1106   for (auto _ : state) {
1107     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1108     CHECK(re);
1109     Prog* prog = re->CompileToProg(0);
1110     CHECK(prog);
1111     StringPiece sp[4];  // 4 because sp[0] is whole match.
1112     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1113     delete prog;
1114     re->Decref();
1115   }
1116 }
1117 
Parse3PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1118 void Parse3PCRE(benchmark::State& state, const char* regexp,
1119                 const StringPiece& text) {
1120   for (auto _ : state) {
1121     PCRE re(regexp, PCRE::UTF8);
1122     CHECK_EQ(re.error(), "");
1123     StringPiece sp1, sp2, sp3;
1124     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1125   }
1126 }
1127 
Parse3RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1128 void Parse3RE2(benchmark::State& state, const char* regexp,
1129                const StringPiece& text) {
1130   for (auto _ : state) {
1131     RE2 re(regexp);
1132     CHECK_EQ(re.error(), "");
1133     StringPiece sp1, sp2, sp3;
1134     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1135   }
1136 }
1137 
Parse3CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1138 void Parse3CachedNFA(benchmark::State& state, const char* regexp,
1139                      const StringPiece& text) {
1140   Prog* prog = GetCachedProg(regexp);
1141   StringPiece sp[4];  // 4 because sp[0] is whole match.
1142   for (auto _ : state) {
1143     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1144                           Prog::kFullMatch, sp, 4));
1145   }
1146 }
1147 
Parse3CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1148 void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
1149                          const StringPiece& text) {
1150   Prog* prog = GetCachedProg(regexp);
1151   CHECK(prog->IsOnePass());
1152   StringPiece sp[4];  // 4 because sp[0] is whole match.
1153   for (auto _ : state) {
1154     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1155   }
1156 }
1157 
Parse3CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1158 void Parse3CachedBitState(benchmark::State& state, const char* regexp,
1159                           const StringPiece& text) {
1160   Prog* prog = GetCachedProg(regexp);
1161   CHECK(prog->CanBitState());
1162   StringPiece sp[4];  // 4 because sp[0] is whole match.
1163   for (auto _ : state) {
1164     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1165   }
1166 }
1167 
Parse3CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1168 void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
1169                            const StringPiece& text) {
1170   Prog* prog = GetCachedProg(regexp);
1171   StringPiece sp[4];  // 4 because sp[0] is whole match.
1172   for (auto _ : state) {
1173     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1174   }
1175 }
1176 
Parse3CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1177 void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
1178                       const StringPiece& text) {
1179   PCRE& re = *GetCachedPCRE(regexp);
1180   StringPiece sp1, sp2, sp3;
1181   for (auto _ : state) {
1182     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1183   }
1184 }
1185 
Parse3CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1186 void Parse3CachedRE2(benchmark::State& state, const char* regexp,
1187                      const StringPiece& text) {
1188   RE2& re = *GetCachedRE2(regexp);
1189   StringPiece sp1, sp2, sp3;
1190   for (auto _ : state) {
1191     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1192   }
1193 }
1194 
1195 // Runs implementation to full match regexp against text,
1196 // extracting three submatches.  Expects match always.
1197 
Parse1NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1198 void Parse1NFA(benchmark::State& state, const char* regexp,
1199                const StringPiece& text) {
1200   for (auto _ : state) {
1201     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1202     CHECK(re);
1203     Prog* prog = re->CompileToProg(0);
1204     CHECK(prog);
1205     StringPiece sp[2];  // 2 because sp[0] is whole match.
1206     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1207                           Prog::kFullMatch, sp, 2));
1208     delete prog;
1209     re->Decref();
1210   }
1211 }
1212 
Parse1OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1213 void Parse1OnePass(benchmark::State& state, const char* regexp,
1214                    const StringPiece& text) {
1215   for (auto _ : state) {
1216     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1217     CHECK(re);
1218     Prog* prog = re->CompileToProg(0);
1219     CHECK(prog);
1220     CHECK(prog->IsOnePass());
1221     StringPiece sp[2];  // 2 because sp[0] is whole match.
1222     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1223     delete prog;
1224     re->Decref();
1225   }
1226 }
1227 
Parse1BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1228 void Parse1BitState(benchmark::State& state, const char* regexp,
1229                     const StringPiece& text) {
1230   for (auto _ : state) {
1231     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1232     CHECK(re);
1233     Prog* prog = re->CompileToProg(0);
1234     CHECK(prog);
1235     CHECK(prog->CanBitState());
1236     StringPiece sp[2];  // 2 because sp[0] is whole match.
1237     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238     delete prog;
1239     re->Decref();
1240   }
1241 }
1242 
Parse1PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1243 void Parse1PCRE(benchmark::State& state, const char* regexp,
1244                 const StringPiece& text) {
1245   for (auto _ : state) {
1246     PCRE re(regexp, PCRE::UTF8);
1247     CHECK_EQ(re.error(), "");
1248     StringPiece sp1;
1249     CHECK(PCRE::FullMatch(text, re, &sp1));
1250   }
1251 }
1252 
Parse1RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1253 void Parse1RE2(benchmark::State& state, const char* regexp,
1254                const StringPiece& text) {
1255   for (auto _ : state) {
1256     RE2 re(regexp);
1257     CHECK_EQ(re.error(), "");
1258     StringPiece sp1;
1259     CHECK(RE2::FullMatch(text, re, &sp1));
1260   }
1261 }
1262 
Parse1CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1263 void Parse1CachedNFA(benchmark::State& state, const char* regexp,
1264                      const StringPiece& text) {
1265   Prog* prog = GetCachedProg(regexp);
1266   StringPiece sp[2];  // 2 because sp[0] is whole match.
1267   for (auto _ : state) {
1268     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1269                           Prog::kFullMatch, sp, 2));
1270   }
1271 }
1272 
Parse1CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1273 void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
1274                          const StringPiece& text) {
1275   Prog* prog = GetCachedProg(regexp);
1276   CHECK(prog->IsOnePass());
1277   StringPiece sp[2];  // 2 because sp[0] is whole match.
1278   for (auto _ : state) {
1279     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1280   }
1281 }
1282 
Parse1CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1283 void Parse1CachedBitState(benchmark::State& state, const char* regexp,
1284                           const StringPiece& text) {
1285   Prog* prog = GetCachedProg(regexp);
1286   CHECK(prog->CanBitState());
1287   StringPiece sp[2];  // 2 because sp[0] is whole match.
1288   for (auto _ : state) {
1289     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1290   }
1291 }
1292 
Parse1CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1293 void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
1294                            const StringPiece& text) {
1295   Prog* prog = GetCachedProg(regexp);
1296   StringPiece sp[2];  // 2 because sp[0] is whole match.
1297   for (auto _ : state) {
1298     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1299   }
1300 }
1301 
Parse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1302 void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
1303                       const StringPiece& text) {
1304   PCRE& re = *GetCachedPCRE(regexp);
1305   StringPiece sp1;
1306   for (auto _ : state) {
1307     CHECK(PCRE::FullMatch(text, re, &sp1));
1308   }
1309 }
1310 
Parse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1311 void Parse1CachedRE2(benchmark::State& state, const char* regexp,
1312                      const StringPiece& text) {
1313   RE2& re = *GetCachedRE2(regexp);
1314   StringPiece sp1;
1315   for (auto _ : state) {
1316     CHECK(RE2::FullMatch(text, re, &sp1));
1317   }
1318 }
1319 
SearchParse2CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1320 void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
1321                             const StringPiece& text) {
1322   PCRE& re = *GetCachedPCRE(regexp);
1323   for (auto _ : state) {
1324     StringPiece sp1, sp2;
1325     CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1326   }
1327 }
1328 
SearchParse2CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1329 void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
1330                            const StringPiece& text) {
1331   RE2& re = *GetCachedRE2(regexp);
1332   for (auto _ : state) {
1333     StringPiece sp1, sp2;
1334     CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1335   }
1336 }
1337 
SearchParse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1338 void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
1339                             const StringPiece& text) {
1340   PCRE& re = *GetCachedPCRE(regexp);
1341   for (auto _ : state) {
1342     StringPiece sp1;
1343     CHECK(PCRE::PartialMatch(text, re, &sp1));
1344   }
1345 }
1346 
SearchParse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1347 void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
1348                            const StringPiece& text) {
1349   RE2& re = *GetCachedRE2(regexp);
1350   for (auto _ : state) {
1351     StringPiece sp1;
1352     CHECK(RE2::PartialMatch(text, re, &sp1));
1353   }
1354 }
1355 
EmptyPartialMatchPCRE(benchmark::State & state)1356 void EmptyPartialMatchPCRE(benchmark::State& state) {
1357   PCRE re("");
1358   for (auto _ : state) {
1359     PCRE::PartialMatch("", re);
1360   }
1361 }
1362 
EmptyPartialMatchRE2(benchmark::State & state)1363 void EmptyPartialMatchRE2(benchmark::State& state) {
1364   RE2 re("");
1365   for (auto _ : state) {
1366     RE2::PartialMatch("", re);
1367   }
1368 }
1369 #ifdef USEPCRE
1370 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1371 #endif
1372 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1373 
SimplePartialMatchPCRE(benchmark::State & state)1374 void SimplePartialMatchPCRE(benchmark::State& state) {
1375   PCRE re("abcdefg");
1376   for (auto _ : state) {
1377     PCRE::PartialMatch("abcdefg", re);
1378   }
1379 }
1380 
SimplePartialMatchRE2(benchmark::State & state)1381 void SimplePartialMatchRE2(benchmark::State& state) {
1382   RE2 re("abcdefg");
1383   for (auto _ : state) {
1384     RE2::PartialMatch("abcdefg", re);
1385   }
1386 }
1387 #ifdef USEPCRE
1388 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1389 #endif
1390 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1391 
1392 static std::string http_text =
1393   "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1394   "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1395 
HTTPPartialMatchPCRE(benchmark::State & state)1396 void HTTPPartialMatchPCRE(benchmark::State& state) {
1397   StringPiece a;
1398   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1399   for (auto _ : state) {
1400     PCRE::PartialMatch(http_text, re, &a);
1401   }
1402 }
1403 
HTTPPartialMatchRE2(benchmark::State & state)1404 void HTTPPartialMatchRE2(benchmark::State& state) {
1405   StringPiece a;
1406   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1407   for (auto _ : state) {
1408     RE2::PartialMatch(http_text, re, &a);
1409   }
1410 }
1411 
1412 #ifdef USEPCRE
1413 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1414 #endif
1415 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1416 
1417 static std::string smallhttp_text =
1418   "GET /abc HTTP/1.1";
1419 
SmallHTTPPartialMatchPCRE(benchmark::State & state)1420 void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
1421   StringPiece a;
1422   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1423   for (auto _ : state) {
1424     PCRE::PartialMatch(smallhttp_text, re, &a);
1425   }
1426 }
1427 
SmallHTTPPartialMatchRE2(benchmark::State & state)1428 void SmallHTTPPartialMatchRE2(benchmark::State& state) {
1429   StringPiece a;
1430   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1431   for (auto _ : state) {
1432     RE2::PartialMatch(smallhttp_text, re, &a);
1433   }
1434 }
1435 
1436 #ifdef USEPCRE
1437 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1438 #endif
1439 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1440 
DotMatchPCRE(benchmark::State & state)1441 void DotMatchPCRE(benchmark::State& state) {
1442   StringPiece a;
1443   PCRE re("(?-s)^(.+)");
1444   for (auto _ : state) {
1445     PCRE::PartialMatch(http_text, re, &a);
1446   }
1447 }
1448 
DotMatchRE2(benchmark::State & state)1449 void DotMatchRE2(benchmark::State& state) {
1450   StringPiece a;
1451   RE2 re("(?-s)^(.+)");
1452   for (auto _ : state) {
1453     RE2::PartialMatch(http_text, re, &a);
1454   }
1455 }
1456 
1457 #ifdef USEPCRE
1458 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1459 #endif
1460 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1461 
ASCIIMatchPCRE(benchmark::State & state)1462 void ASCIIMatchPCRE(benchmark::State& state) {
1463   StringPiece a;
1464   PCRE re("(?-s)^([ -~]+)");
1465   for (auto _ : state) {
1466     PCRE::PartialMatch(http_text, re, &a);
1467   }
1468 }
1469 
ASCIIMatchRE2(benchmark::State & state)1470 void ASCIIMatchRE2(benchmark::State& state) {
1471   StringPiece a;
1472   RE2 re("(?-s)^([ -~]+)");
1473   for (auto _ : state) {
1474     RE2::PartialMatch(http_text, re, &a);
1475   }
1476 }
1477 
1478 #ifdef USEPCRE
1479 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1480 #endif
1481 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1482 
FullMatchPCRE(benchmark::State & state,const char * regexp)1483 void FullMatchPCRE(benchmark::State& state, const char *regexp) {
1484   std::string s = RandomText(state.range(0));
1485   s += "ABCDEFGHIJ";
1486   PCRE re(regexp);
1487   for (auto _ : state) {
1488     CHECK(PCRE::FullMatch(s, re));
1489   }
1490   state.SetBytesProcessed(state.iterations() * state.range(0));
1491 }
1492 
FullMatchRE2(benchmark::State & state,const char * regexp)1493 void FullMatchRE2(benchmark::State& state, const char *regexp) {
1494   std::string s = RandomText(state.range(0));
1495   s += "ABCDEFGHIJ";
1496   RE2 re(regexp, RE2::Latin1);
1497   for (auto _ : state) {
1498     CHECK(RE2::FullMatch(s, re));
1499   }
1500   state.SetBytesProcessed(state.iterations() * state.range(0));
1501 }
1502 
FullMatch_DotStar_CachedPCRE(benchmark::State & state)1503 void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
FullMatch_DotStar_CachedRE2(benchmark::State & state)1504 void FullMatch_DotStar_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s).*"); }
1505 
FullMatch_DotStarDollar_CachedPCRE(benchmark::State & state)1506 void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(benchmark::State & state)1507 void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s).*$"); }
1508 
FullMatch_DotStarCapture_CachedPCRE(benchmark::State & state)1509 void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(benchmark::State & state)1510 void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s)((.*)()()($))"); }
1511 
1512 #ifdef USEPCRE
1513 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1514 #endif
1515 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
1516 
1517 #ifdef USEPCRE
1518 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1519 #endif
1520 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
1521 
1522 #ifdef USEPCRE
1523 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1524 #endif
1525 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
1526 
PossibleMatchRangeCommon(benchmark::State & state,const char * regexp)1527 void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
1528   RE2 re(regexp);
1529   std::string min;
1530   std::string max;
1531   const int kMaxLen = 16;
1532   for (auto _ : state) {
1533     CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1534   }
1535 }
1536 
PossibleMatchRange_Trivial(benchmark::State & state)1537 void PossibleMatchRange_Trivial(benchmark::State& state) {
1538   PossibleMatchRangeCommon(state, ".*");
1539 }
PossibleMatchRange_Complex(benchmark::State & state)1540 void PossibleMatchRange_Complex(benchmark::State& state) {
1541   PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
1542 }
PossibleMatchRange_Prefix(benchmark::State & state)1543 void PossibleMatchRange_Prefix(benchmark::State& state) {
1544   PossibleMatchRangeCommon(state, "^some_random_prefix.*");
1545 }
PossibleMatchRange_NoProg(benchmark::State & state)1546 void PossibleMatchRange_NoProg(benchmark::State& state) {
1547   PossibleMatchRangeCommon(state, "^some_random_string$");
1548 }
1549 
1550 BENCHMARK(PossibleMatchRange_Trivial);
1551 BENCHMARK(PossibleMatchRange_Complex);
1552 BENCHMARK(PossibleMatchRange_Prefix);
1553 BENCHMARK(PossibleMatchRange_NoProg);
1554 
1555 }  // namespace re2
1556