1 // Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Benchmarks for regular expression implementations.
6 
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <thread>
12 #include <utility>
13 
14 #include "util/benchmark.h"
15 #include "util/test.h"
16 #include "util/flags.h"
17 #include "util/logging.h"
18 #include "util/malloc_counter.h"
19 #include "util/strutil.h"
20 #include "re2/prog.h"
21 #include "re2/re2.h"
22 #include "re2/regexp.h"
23 #include "util/pcre.h"
24 
25 namespace re2 {
26 void Test();
27 void MemoryUsage();
28 }  // namespace re2
29 
30 typedef testing::MallocCounter MallocCounter;
31 
32 namespace re2 {
33 
Test()34 void Test() {
35   Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
36   CHECK(re);
37   Prog* prog = re->CompileToProg(0);
38   CHECK(prog);
39   CHECK(prog->IsOnePass());
40   CHECK(prog->CanBitState());
41   const char* text = "650-253-0001";
42   StringPiece sp[4];
43   CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
44   CHECK_EQ(sp[0], "650-253-0001");
45   CHECK_EQ(sp[1], "650");
46   CHECK_EQ(sp[2], "253");
47   CHECK_EQ(sp[3], "0001");
48   delete prog;
49   re->Decref();
50   LOG(INFO) << "test passed\n";
51 }
52 
MemoryUsage()53 void MemoryUsage() {
54   const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
55   const char* text = "650-253-0001";
56   {
57     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
58     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
59     CHECK(re);
60     // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
61     // because LOG(INFO) might do a big allocation before they get evaluated.
62     fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
63             mc.HeapGrowth(), mc.PeakHeapGrowth());
64     mc.Reset();
65 
66     Prog* prog = re->CompileToProg(0);
67     CHECK(prog);
68     CHECK(prog->IsOnePass());
69     CHECK(prog->CanBitState());
70     fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n",
71             mc.HeapGrowth(), mc.PeakHeapGrowth());
72     mc.Reset();
73 
74     StringPiece sp[4];
75     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
76     fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
77             mc.HeapGrowth(), mc.PeakHeapGrowth());
78     delete prog;
79     re->Decref();
80   }
81 
82   {
83     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
84 
85     PCRE re(regexp, PCRE::UTF8);
86     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n",
87             mc.HeapGrowth(), mc.PeakHeapGrowth());
88     PCRE::FullMatch(text, re);
89     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n",
90             mc.HeapGrowth(), mc.PeakHeapGrowth());
91   }
92 
93   {
94     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
95 
96     PCRE* re = new PCRE(regexp, PCRE::UTF8);
97     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n",
98             mc.HeapGrowth(), mc.PeakHeapGrowth());
99     PCRE::FullMatch(text, *re);
100     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n",
101             mc.HeapGrowth(), mc.PeakHeapGrowth());
102     delete re;
103   }
104 
105   {
106     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
107 
108     RE2 re(regexp);
109     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n",
110             mc.HeapGrowth(), mc.PeakHeapGrowth());
111     RE2::FullMatch(text, re);
112     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n",
113             mc.HeapGrowth(), mc.PeakHeapGrowth());
114   }
115 
116   fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
117           sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
118 }
119 
NumCPUs()120 int NumCPUs() {
121   return static_cast<int>(std::thread::hardware_concurrency());
122 }
123 
124 // Regular expression implementation wrappers.
125 // Defined at bottom of file, but they are repetitive
126 // and not interesting.
127 
128 typedef void SearchImpl(benchmark::State& state, const char* regexp,
129                         const StringPiece& text, Prog::Anchor anchor,
130                         bool expect_match);
131 
132 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
133     SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
134     SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
135 
136 typedef void ParseImpl(benchmark::State& state, const char* regexp,
137                        const StringPiece& text);
138 
139 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
140     Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
141     Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
142 
143 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
144     Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
145     Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
146 
147 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
148 
149 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
150 
151 // Benchmark: failed search for regexp in random text.
152 
153 // Generate random text that won't contain the search string,
154 // to test worst-case search behavior.
MakeText(std::string * text,int64_t nbytes)155 void MakeText(std::string* text, int64_t nbytes) {
156   srand(1);
157   text->resize(nbytes);
158   for (int64_t i = 0; i < nbytes; i++) {
159     // Generate a one-byte rune that isn't a control character (e.g. '\n').
160     // Clipping to 0x20 introduces some bias, but we don't need uniformity.
161     int byte = rand() & 0x7F;
162     if (byte < 0x20)
163       byte = 0x20;
164     (*text)[i] = byte;
165   }
166 }
167 
168 // Makes text of size nbytes, then calls run to search
169 // the text for regexp iters times.
Search(benchmark::State & state,const char * regexp,SearchImpl * search)170 void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
171   std::string s;
172   MakeText(&s, state.range(0));
173   search(state, regexp, s, Prog::kUnanchored, false);
174   state.SetBytesProcessed(state.iterations() * state.range(0));
175 }
176 
177 // These two are easy because they start with an A,
178 // giving the search loop something to memchr for.
179 #define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
180 #define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
181 
182 // This is a little harder, since it starts with a character class
183 // and thus can't be memchr'ed.  Could look for ABC and work backward,
184 // but no one does that.
185 #define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
186 
187 // This is a fair amount harder, because of the leading [ -~]*.
188 // A bad backtracking implementation will take O(text^2) time to
189 // figure out there's no match.
190 #define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
191 
192 // This has quite a high degree of fanout.
193 // NFA execution will be particularly slow.
194 #define FANOUT     "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
195 
196 // This stresses engines that are trying to track parentheses.
197 #define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
198                    "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
199 
Search_Easy0_CachedDFA(benchmark::State & state)200 void Search_Easy0_CachedDFA(benchmark::State& state)     { Search(state, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(benchmark::State & state)201 void Search_Easy0_CachedNFA(benchmark::State& state)     { Search(state, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(benchmark::State & state)202 void Search_Easy0_CachedPCRE(benchmark::State& state)    { Search(state, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(benchmark::State & state)203 void Search_Easy0_CachedRE2(benchmark::State& state)     { Search(state, EASY0, SearchCachedRE2); }
204 
205 BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
206 BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
207 #ifdef USEPCRE
208 BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
209 #endif
210 BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
211 
Search_Easy1_CachedDFA(benchmark::State & state)212 void Search_Easy1_CachedDFA(benchmark::State& state)     { Search(state, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(benchmark::State & state)213 void Search_Easy1_CachedNFA(benchmark::State& state)     { Search(state, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(benchmark::State & state)214 void Search_Easy1_CachedPCRE(benchmark::State& state)    { Search(state, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(benchmark::State & state)215 void Search_Easy1_CachedRE2(benchmark::State& state)     { Search(state, EASY1, SearchCachedRE2); }
216 
217 BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
218 BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
219 #ifdef USEPCRE
220 BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
221 #endif
222 BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
223 
Search_Medium_CachedDFA(benchmark::State & state)224 void Search_Medium_CachedDFA(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(benchmark::State & state)225 void Search_Medium_CachedNFA(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(benchmark::State & state)226 void Search_Medium_CachedPCRE(benchmark::State& state)    { Search(state, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(benchmark::State & state)227 void Search_Medium_CachedRE2(benchmark::State& state)     { Search(state, MEDIUM, SearchCachedRE2); }
228 
229 BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
230 BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
231 #ifdef USEPCRE
232 BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
233 #endif
234 BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
235 
Search_Hard_CachedDFA(benchmark::State & state)236 void Search_Hard_CachedDFA(benchmark::State& state)     { Search(state, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(benchmark::State & state)237 void Search_Hard_CachedNFA(benchmark::State& state)     { Search(state, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(benchmark::State & state)238 void Search_Hard_CachedPCRE(benchmark::State& state)    { Search(state, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(benchmark::State & state)239 void Search_Hard_CachedRE2(benchmark::State& state)     { Search(state, HARD, SearchCachedRE2); }
240 
241 BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
242 BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
243 #ifdef USEPCRE
244 BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
245 #endif
246 BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
247 
Search_Fanout_CachedDFA(benchmark::State & state)248 void Search_Fanout_CachedDFA(benchmark::State& state)     { Search(state, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(benchmark::State & state)249 void Search_Fanout_CachedNFA(benchmark::State& state)     { Search(state, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(benchmark::State & state)250 void Search_Fanout_CachedPCRE(benchmark::State& state)    { Search(state, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(benchmark::State & state)251 void Search_Fanout_CachedRE2(benchmark::State& state)     { Search(state, FANOUT, SearchCachedRE2); }
252 
253 BENCHMARK_RANGE(Search_Fanout_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
254 BENCHMARK_RANGE(Search_Fanout_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
255 #ifdef USEPCRE
256 BENCHMARK_RANGE(Search_Fanout_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
257 #endif
258 BENCHMARK_RANGE(Search_Fanout_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
259 
Search_Parens_CachedDFA(benchmark::State & state)260 void Search_Parens_CachedDFA(benchmark::State& state)     { Search(state, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(benchmark::State & state)261 void Search_Parens_CachedNFA(benchmark::State& state)     { Search(state, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(benchmark::State & state)262 void Search_Parens_CachedPCRE(benchmark::State& state)    { Search(state, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(benchmark::State & state)263 void Search_Parens_CachedRE2(benchmark::State& state)     { Search(state, PARENS, SearchCachedRE2); }
264 
265 BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
266 BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
267 #ifdef USEPCRE
268 BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
269 #endif
270 BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
271 
SearchBigFixed(benchmark::State & state,SearchImpl * search)272 void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
273   std::string s;
274   s.append(state.range(0)/2, 'x');
275   std::string regexp = "^" + s + ".*$";
276   std::string t;
277   MakeText(&t, state.range(0)/2);
278   s += t;
279   search(state, regexp.c_str(), s, Prog::kUnanchored, true);
280   state.SetBytesProcessed(state.iterations() * state.range(0));
281 }
282 
Search_BigFixed_CachedDFA(benchmark::State & state)283 void Search_BigFixed_CachedDFA(benchmark::State& state)     { SearchBigFixed(state, SearchCachedDFA); }
Search_BigFixed_CachedNFA(benchmark::State & state)284 void Search_BigFixed_CachedNFA(benchmark::State& state)     { SearchBigFixed(state, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(benchmark::State & state)285 void Search_BigFixed_CachedPCRE(benchmark::State& state)    { SearchBigFixed(state, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(benchmark::State & state)286 void Search_BigFixed_CachedRE2(benchmark::State& state)     { SearchBigFixed(state, SearchCachedRE2); }
287 
288 BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
289 BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
290 #ifdef USEPCRE
291 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
292 #endif
293 BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
294 
295 // Benchmark: FindAndConsume
296 
FindAndConsume(benchmark::State & state)297 void FindAndConsume(benchmark::State& state) {
298   std::string s;
299   MakeText(&s, state.range(0));
300   s.append("Hello World");
301   RE2 re("((Hello World))");
302   for (auto _ : state) {
303     StringPiece t = s;
304     StringPiece u;
305     CHECK(RE2::FindAndConsume(&t, re, &u));
306     CHECK_EQ(u, "Hello World");
307   }
308   state.SetBytesProcessed(state.iterations() * state.range(0));
309 }
310 
311 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
312 
313 // Benchmark: successful anchored search.
314 
SearchSuccess(benchmark::State & state,const char * regexp,SearchImpl * search)315 void SearchSuccess(benchmark::State& state, const char* regexp,
316                    SearchImpl* search) {
317   std::string s;
318   MakeText(&s, state.range(0));
319   search(state, regexp, s, Prog::kAnchored, true);
320   state.SetBytesProcessed(state.iterations() * state.range(0));
321 }
322 
323 // Unambiguous search (RE2 can use OnePass).
324 
Search_Success_DFA(benchmark::State & state)325 void Search_Success_DFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchDFA); }
Search_Success_NFA(benchmark::State & state)326 void Search_Success_NFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchNFA); }
Search_Success_PCRE(benchmark::State & state)327 void Search_Success_PCRE(benchmark::State& state)    { SearchSuccess(state, ".*$", SearchPCRE); }
Search_Success_RE2(benchmark::State & state)328 void Search_Success_RE2(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchRE2); }
Search_Success_OnePass(benchmark::State & state)329 void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
330 
331 BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
332 BENCHMARK_RANGE(Search_Success_NFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
333 #ifdef USEPCRE
334 BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
335 #endif
336 BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
337 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
338 
Search_Success_CachedDFA(benchmark::State & state)339 void Search_Success_CachedDFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(benchmark::State & state)340 void Search_Success_CachedNFA(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(benchmark::State & state)341 void Search_Success_CachedPCRE(benchmark::State& state)    { SearchSuccess(state, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(benchmark::State & state)342 void Search_Success_CachedRE2(benchmark::State& state)     { SearchSuccess(state, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(benchmark::State & state)343 void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
344 
345 BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
346 BENCHMARK_RANGE(Search_Success_CachedNFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
347 #ifdef USEPCRE
348 BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
349 #endif
350 BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
351 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
352 
353 // Ambiguous search (RE2 cannot use OnePass).
354 // Used to be ".*.$", but that is coalesced to ".+$" these days.
355 
Search_Success1_DFA(benchmark::State & state)356 void Search_Success1_DFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchDFA); }
Search_Success1_NFA(benchmark::State & state)357 void Search_Success1_NFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(benchmark::State & state)358 void Search_Success1_PCRE(benchmark::State& state)     { SearchSuccess(state, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(benchmark::State & state)359 void Search_Success1_RE2(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchRE2); }
Search_Success1_BitState(benchmark::State & state)360 void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
361 
362 BENCHMARK_RANGE(Search_Success1_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
363 BENCHMARK_RANGE(Search_Success1_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
364 #ifdef USEPCRE
365 BENCHMARK_RANGE(Search_Success1_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
366 #endif
367 BENCHMARK_RANGE(Search_Success1_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
368 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
369 
Search_Success1_CachedDFA(benchmark::State & state)370 void Search_Success1_CachedDFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(benchmark::State & state)371 void Search_Success1_CachedNFA(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(benchmark::State & state)372 void Search_Success1_CachedPCRE(benchmark::State& state)     { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(benchmark::State & state)373 void Search_Success1_CachedRE2(benchmark::State& state)      { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(benchmark::State & state)374 void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
375 
376 BENCHMARK_RANGE(Search_Success1_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
377 BENCHMARK_RANGE(Search_Success1_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
378 #ifdef USEPCRE
379 BENCHMARK_RANGE(Search_Success1_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
380 #endif
381 BENCHMARK_RANGE(Search_Success1_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
382 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
383 
384 // Benchmark: AltMatch optimisation (just to verify that it works)
385 // Note that OnePass doesn't implement it!
386 
SearchAltMatch(benchmark::State & state,SearchImpl * search)387 void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
388   std::string s;
389   MakeText(&s, state.range(0));
390   search(state, "\\C*", s, Prog::kAnchored, true);
391   state.SetBytesProcessed(state.iterations() * state.range(0));
392 }
393 
Search_AltMatch_DFA(benchmark::State & state)394 void Search_AltMatch_DFA(benchmark::State& state)      { SearchAltMatch(state, SearchDFA); }
Search_AltMatch_NFA(benchmark::State & state)395 void Search_AltMatch_NFA(benchmark::State& state)      { SearchAltMatch(state, SearchNFA); }
Search_AltMatch_OnePass(benchmark::State & state)396 void Search_AltMatch_OnePass(benchmark::State& state)  { SearchAltMatch(state, SearchOnePass); }
Search_AltMatch_BitState(benchmark::State & state)397 void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
Search_AltMatch_PCRE(benchmark::State & state)398 void Search_AltMatch_PCRE(benchmark::State& state)     { SearchAltMatch(state, SearchPCRE); }
Search_AltMatch_RE2(benchmark::State & state)399 void Search_AltMatch_RE2(benchmark::State& state)      { SearchAltMatch(state, SearchRE2); }
400 
401 BENCHMARK_RANGE(Search_AltMatch_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
402 BENCHMARK_RANGE(Search_AltMatch_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
403 BENCHMARK_RANGE(Search_AltMatch_OnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
404 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
405 #ifdef USEPCRE
406 BENCHMARK_RANGE(Search_AltMatch_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
407 #endif
408 BENCHMARK_RANGE(Search_AltMatch_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
409 
Search_AltMatch_CachedDFA(benchmark::State & state)410 void Search_AltMatch_CachedDFA(benchmark::State& state)      { SearchAltMatch(state, SearchCachedDFA); }
Search_AltMatch_CachedNFA(benchmark::State & state)411 void Search_AltMatch_CachedNFA(benchmark::State& state)      { SearchAltMatch(state, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(benchmark::State & state)412 void Search_AltMatch_CachedOnePass(benchmark::State& state)  { SearchAltMatch(state, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(benchmark::State & state)413 void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(benchmark::State & state)414 void Search_AltMatch_CachedPCRE(benchmark::State& state)     { SearchAltMatch(state, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(benchmark::State & state)415 void Search_AltMatch_CachedRE2(benchmark::State& state)      { SearchAltMatch(state, SearchCachedRE2); }
416 
417 BENCHMARK_RANGE(Search_AltMatch_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
418 BENCHMARK_RANGE(Search_AltMatch_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
419 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
420 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
421 #ifdef USEPCRE
422 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
423 #endif
424 BENCHMARK_RANGE(Search_AltMatch_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
425 
426 // Benchmark: use regexp to find phone number.
427 
SearchDigits(benchmark::State & state,SearchImpl * search)428 void SearchDigits(benchmark::State& state, SearchImpl* search) {
429   StringPiece s("650-253-0001");
430   search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
431   state.SetItemsProcessed(state.iterations());
432 }
433 
Search_Digits_DFA(benchmark::State & state)434 void Search_Digits_DFA(benchmark::State& state)         { SearchDigits(state, SearchDFA); }
Search_Digits_NFA(benchmark::State & state)435 void Search_Digits_NFA(benchmark::State& state)         { SearchDigits(state, SearchNFA); }
Search_Digits_OnePass(benchmark::State & state)436 void Search_Digits_OnePass(benchmark::State& state)     { SearchDigits(state, SearchOnePass); }
Search_Digits_PCRE(benchmark::State & state)437 void Search_Digits_PCRE(benchmark::State& state)        { SearchDigits(state, SearchPCRE); }
Search_Digits_RE2(benchmark::State & state)438 void Search_Digits_RE2(benchmark::State& state)         { SearchDigits(state, SearchRE2); }
Search_Digits_BitState(benchmark::State & state)439 void Search_Digits_BitState(benchmark::State& state)    { SearchDigits(state, SearchBitState); }
440 
441 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
442 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
443 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
444 #ifdef USEPCRE
445 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
446 #endif
447 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
448 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
449 
450 // Benchmark: use regexp to parse digit fields in phone number.
451 
Parse3Digits(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))452 void Parse3Digits(benchmark::State& state,
453                   void (*parse3)(benchmark::State&, const char*,
454                                  const StringPiece&)) {
455   parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
456   state.SetItemsProcessed(state.iterations());
457 }
458 
Parse_Digits_NFA(benchmark::State & state)459 void Parse_Digits_NFA(benchmark::State& state)         { Parse3Digits(state, Parse3NFA); }
Parse_Digits_OnePass(benchmark::State & state)460 void Parse_Digits_OnePass(benchmark::State& state)     { Parse3Digits(state, Parse3OnePass); }
Parse_Digits_PCRE(benchmark::State & state)461 void Parse_Digits_PCRE(benchmark::State& state)        { Parse3Digits(state, Parse3PCRE); }
Parse_Digits_RE2(benchmark::State & state)462 void Parse_Digits_RE2(benchmark::State& state)         { Parse3Digits(state, Parse3RE2); }
Parse_Digits_Backtrack(benchmark::State & state)463 void Parse_Digits_Backtrack(benchmark::State& state)   { Parse3Digits(state, Parse3Backtrack); }
Parse_Digits_BitState(benchmark::State & state)464 void Parse_Digits_BitState(benchmark::State& state)    { Parse3Digits(state, Parse3BitState); }
465 
466 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
467 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
468 #ifdef USEPCRE
469 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
470 #endif
471 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
472 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
473 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
474 
Parse_CachedDigits_NFA(benchmark::State & state)475 void Parse_CachedDigits_NFA(benchmark::State& state)         { Parse3Digits(state, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(benchmark::State & state)476 void Parse_CachedDigits_OnePass(benchmark::State& state)     { Parse3Digits(state, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(benchmark::State & state)477 void Parse_CachedDigits_PCRE(benchmark::State& state)        { Parse3Digits(state, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(benchmark::State & state)478 void Parse_CachedDigits_RE2(benchmark::State& state)         { Parse3Digits(state, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(benchmark::State & state)479 void Parse_CachedDigits_Backtrack(benchmark::State& state)   { Parse3Digits(state, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(benchmark::State & state)480 void Parse_CachedDigits_BitState(benchmark::State& state)    { Parse3Digits(state, Parse3CachedBitState); }
481 
482 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
483 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
484 #ifdef USEPCRE
485 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
486 #endif
487 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
488 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
489 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
490 
Parse3DigitDs(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))491 void Parse3DigitDs(benchmark::State& state,
492                    void (*parse3)(benchmark::State&, const char*,
493                                   const StringPiece&)) {
494   parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
495   state.SetItemsProcessed(state.iterations());
496 }
497 
Parse_DigitDs_NFA(benchmark::State & state)498 void Parse_DigitDs_NFA(benchmark::State& state)         { Parse3DigitDs(state, Parse3NFA); }
Parse_DigitDs_OnePass(benchmark::State & state)499 void Parse_DigitDs_OnePass(benchmark::State& state)     { Parse3DigitDs(state, Parse3OnePass); }
Parse_DigitDs_PCRE(benchmark::State & state)500 void Parse_DigitDs_PCRE(benchmark::State& state)        { Parse3DigitDs(state, Parse3PCRE); }
Parse_DigitDs_RE2(benchmark::State & state)501 void Parse_DigitDs_RE2(benchmark::State& state)         { Parse3DigitDs(state, Parse3RE2); }
Parse_DigitDs_Backtrack(benchmark::State & state)502 void Parse_DigitDs_Backtrack(benchmark::State& state)   { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(benchmark::State & state)503 void Parse_DigitDs_BitState(benchmark::State& state)    { Parse3DigitDs(state, Parse3CachedBitState); }
504 
505 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
506 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
507 #ifdef USEPCRE
508 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
509 #endif
510 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
511 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
512 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
513 
Parse_CachedDigitDs_NFA(benchmark::State & state)514 void Parse_CachedDigitDs_NFA(benchmark::State& state)         { Parse3DigitDs(state, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(benchmark::State & state)515 void Parse_CachedDigitDs_OnePass(benchmark::State& state)     { Parse3DigitDs(state, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(benchmark::State & state)516 void Parse_CachedDigitDs_PCRE(benchmark::State& state)        { Parse3DigitDs(state, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(benchmark::State & state)517 void Parse_CachedDigitDs_RE2(benchmark::State& state)         { Parse3DigitDs(state, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(benchmark::State & state)518 void Parse_CachedDigitDs_Backtrack(benchmark::State& state)   { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(benchmark::State & state)519 void Parse_CachedDigitDs_BitState(benchmark::State& state)    { Parse3DigitDs(state, Parse3CachedBitState); }
520 
521 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
522 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
523 #ifdef USEPCRE
524 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
525 #endif
526 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
527 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
528 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
529 
530 // Benchmark: splitting off leading number field.
531 
Parse1Split(benchmark::State & state,void (* parse1)(benchmark::State &,const char *,const StringPiece &))532 void Parse1Split(benchmark::State& state,
533                  void (*parse1)(benchmark::State&, const char*,
534                                 const StringPiece&)) {
535   parse1(state, "[0-9]+-(.*)", "650-253-0001");
536   state.SetItemsProcessed(state.iterations());
537 }
538 
Parse_Split_NFA(benchmark::State & state)539 void Parse_Split_NFA(benchmark::State& state)         { Parse1Split(state, Parse1NFA); }
Parse_Split_OnePass(benchmark::State & state)540 void Parse_Split_OnePass(benchmark::State& state)     { Parse1Split(state, Parse1OnePass); }
Parse_Split_PCRE(benchmark::State & state)541 void Parse_Split_PCRE(benchmark::State& state)        { Parse1Split(state, Parse1PCRE); }
Parse_Split_RE2(benchmark::State & state)542 void Parse_Split_RE2(benchmark::State& state)         { Parse1Split(state, Parse1RE2); }
Parse_Split_BitState(benchmark::State & state)543 void Parse_Split_BitState(benchmark::State& state)    { Parse1Split(state, Parse1BitState); }
544 
545 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
546 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
547 #ifdef USEPCRE
548 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
549 #endif
550 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
551 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
552 
Parse_CachedSplit_NFA(benchmark::State & state)553 void Parse_CachedSplit_NFA(benchmark::State& state)         { Parse1Split(state, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(benchmark::State & state)554 void Parse_CachedSplit_OnePass(benchmark::State& state)     { Parse1Split(state, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(benchmark::State & state)555 void Parse_CachedSplit_PCRE(benchmark::State& state)        { Parse1Split(state, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(benchmark::State & state)556 void Parse_CachedSplit_RE2(benchmark::State& state)         { Parse1Split(state, Parse1CachedRE2); }
Parse_CachedSplit_BitState(benchmark::State & state)557 void Parse_CachedSplit_BitState(benchmark::State& state)    { Parse1Split(state, Parse1CachedBitState); }
558 
559 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
560 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
561 #ifdef USEPCRE
562 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
563 #endif
564 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
565 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
566 
567 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
568 
Parse1SplitHard(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))569 void Parse1SplitHard(benchmark::State& state,
570                      void (*run)(benchmark::State&, const char*,
571                                  const StringPiece&)) {
572   run(state, "[0-9]+.(.*)", "650-253-0001");
573   state.SetItemsProcessed(state.iterations());
574 }
575 
Parse_SplitHard_NFA(benchmark::State & state)576 void Parse_SplitHard_NFA(benchmark::State& state)         { Parse1SplitHard(state, Parse1NFA); }
Parse_SplitHard_PCRE(benchmark::State & state)577 void Parse_SplitHard_PCRE(benchmark::State& state)        { Parse1SplitHard(state, Parse1PCRE); }
Parse_SplitHard_RE2(benchmark::State & state)578 void Parse_SplitHard_RE2(benchmark::State& state)         { Parse1SplitHard(state, Parse1RE2); }
Parse_SplitHard_BitState(benchmark::State & state)579 void Parse_SplitHard_BitState(benchmark::State& state)    { Parse1SplitHard(state, Parse1BitState); }
580 
581 #ifdef USEPCRE
582 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
583 #endif
584 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
585 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
586 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
587 
Parse_CachedSplitHard_NFA(benchmark::State & state)588 void Parse_CachedSplitHard_NFA(benchmark::State& state)       { Parse1SplitHard(state, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(benchmark::State & state)589 void Parse_CachedSplitHard_PCRE(benchmark::State& state)      { Parse1SplitHard(state, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(benchmark::State & state)590 void Parse_CachedSplitHard_RE2(benchmark::State& state)       { Parse1SplitHard(state, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(benchmark::State & state)591 void Parse_CachedSplitHard_BitState(benchmark::State& state)  { Parse1SplitHard(state, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(benchmark::State & state)592 void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
593 
594 #ifdef USEPCRE
595 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
596 #endif
597 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
598 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
599 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
600 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
601 
602 // Benchmark: Parse1SplitHard, big text, small match.
603 
Parse1SplitBig1(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))604 void Parse1SplitBig1(benchmark::State& state,
605                      void (*run)(benchmark::State&, const char*,
606                                  const StringPiece&)) {
607   std::string s;
608   s.append(100000, 'x');
609   s.append("650-253-0001");
610   run(state, "[0-9]+.(.*)", s);
611   state.SetItemsProcessed(state.iterations());
612 }
613 
Parse_CachedSplitBig1_PCRE(benchmark::State & state)614 void Parse_CachedSplitBig1_PCRE(benchmark::State& state)      { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(benchmark::State & state)615 void Parse_CachedSplitBig1_RE2(benchmark::State& state)       { Parse1SplitBig1(state, SearchParse1CachedRE2); }
616 
617 #ifdef USEPCRE
618 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
619 #endif
620 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
621 
622 // Benchmark: Parse1SplitHard, big text, big match.
623 
Parse1SplitBig2(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))624 void Parse1SplitBig2(benchmark::State& state,
625                      void (*run)(benchmark::State&, const char*,
626                                  const StringPiece&)) {
627   std::string s;
628   s.append("650-253-");
629   s.append(100000, '0');
630   run(state, "[0-9]+.(.*)", s);
631   state.SetItemsProcessed(state.iterations());
632 }
633 
Parse_CachedSplitBig2_PCRE(benchmark::State & state)634 void Parse_CachedSplitBig2_PCRE(benchmark::State& state)      { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(benchmark::State & state)635 void Parse_CachedSplitBig2_RE2(benchmark::State& state)       { Parse1SplitBig2(state, SearchParse1CachedRE2); }
636 
637 #ifdef USEPCRE
638 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
639 #endif
640 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
641 
642 // Benchmark: measure time required to parse (but not execute)
643 // a simple regular expression.
644 
ParseRegexp(benchmark::State & state,const std::string & regexp)645 void ParseRegexp(benchmark::State& state, const std::string& regexp) {
646   for (auto _ : state) {
647     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
648     CHECK(re);
649     re->Decref();
650   }
651 }
652 
SimplifyRegexp(benchmark::State & state,const std::string & regexp)653 void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
654   for (auto _ : state) {
655     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
656     CHECK(re);
657     Regexp* sre = re->Simplify();
658     CHECK(sre);
659     sre->Decref();
660     re->Decref();
661   }
662 }
663 
NullWalkRegexp(benchmark::State & state,const std::string & regexp)664 void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
665   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
666   CHECK(re);
667   for (auto _ : state) {
668     re->NullWalk();
669   }
670   re->Decref();
671 }
672 
SimplifyCompileRegexp(benchmark::State & state,const std::string & regexp)673 void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
674   for (auto _ : state) {
675     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
676     CHECK(re);
677     Regexp* sre = re->Simplify();
678     CHECK(sre);
679     Prog* prog = sre->CompileToProg(0);
680     CHECK(prog);
681     delete prog;
682     sre->Decref();
683     re->Decref();
684   }
685 }
686 
CompileRegexp(benchmark::State & state,const std::string & regexp)687 void CompileRegexp(benchmark::State& state, const std::string& regexp) {
688   for (auto _ : state) {
689     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
690     CHECK(re);
691     Prog* prog = re->CompileToProg(0);
692     CHECK(prog);
693     delete prog;
694     re->Decref();
695   }
696 }
697 
CompileToProg(benchmark::State & state,const std::string & regexp)698 void CompileToProg(benchmark::State& state, const std::string& regexp) {
699   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
700   CHECK(re);
701   for (auto _ : state) {
702     Prog* prog = re->CompileToProg(0);
703     CHECK(prog);
704     delete prog;
705   }
706   re->Decref();
707 }
708 
CompileByteMap(benchmark::State & state,const std::string & regexp)709 void CompileByteMap(benchmark::State& state, const std::string& regexp) {
710   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
711   CHECK(re);
712   Prog* prog = re->CompileToProg(0);
713   CHECK(prog);
714   for (auto _ : state) {
715     prog->ComputeByteMap();
716   }
717   delete prog;
718   re->Decref();
719 }
720 
CompilePCRE(benchmark::State & state,const std::string & regexp)721 void CompilePCRE(benchmark::State& state, const std::string& regexp) {
722   for (auto _ : state) {
723     PCRE re(regexp, PCRE::UTF8);
724     CHECK_EQ(re.error(), "");
725   }
726 }
727 
CompileRE2(benchmark::State & state,const std::string & regexp)728 void CompileRE2(benchmark::State& state, const std::string& regexp) {
729   for (auto _ : state) {
730     RE2 re(regexp);
731     CHECK_EQ(re.error(), "");
732   }
733 }
734 
RunBuild(benchmark::State & state,const std::string & regexp,void (* run)(benchmark::State &,const std::string &))735 void RunBuild(benchmark::State& state, const std::string& regexp,
736               void (*run)(benchmark::State&, const std::string&)) {
737   run(state, regexp);
738   state.SetItemsProcessed(state.iterations());
739 }
740 
741 }  // namespace re2
742 
743 DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
744             "regexp for compile benchmarks");
745 
746 namespace re2 {
747 
BM_PCRE_Compile(benchmark::State & state)748 void BM_PCRE_Compile(benchmark::State& state)             { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
BM_Regexp_Parse(benchmark::State & state)749 void BM_Regexp_Parse(benchmark::State& state)             { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
BM_Regexp_Simplify(benchmark::State & state)750 void BM_Regexp_Simplify(benchmark::State& state)          { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
BM_CompileToProg(benchmark::State & state)751 void BM_CompileToProg(benchmark::State& state)            { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
BM_CompileByteMap(benchmark::State & state)752 void BM_CompileByteMap(benchmark::State& state)           { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
BM_Regexp_Compile(benchmark::State & state)753 void BM_Regexp_Compile(benchmark::State& state)           { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
BM_Regexp_SimplifyCompile(benchmark::State & state)754 void BM_Regexp_SimplifyCompile(benchmark::State& state)   { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
BM_Regexp_NullWalk(benchmark::State & state)755 void BM_Regexp_NullWalk(benchmark::State& state)          { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
BM_RE2_Compile(benchmark::State & state)756 void BM_RE2_Compile(benchmark::State& state)              { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
757 
758 #ifdef USEPCRE
759 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
760 #endif
761 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
762 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
763 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
764 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
765 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
766 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
767 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
768 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
769 
770 // Makes text of size nbytes, then calls run to search
771 // the text for regexp iters times.
SearchPhone(benchmark::State & state,ParseImpl * search)772 void SearchPhone(benchmark::State& state, ParseImpl* search) {
773   std::string s;
774   MakeText(&s, state.range(0));
775   s.append("(650) 253-0001");
776   search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
777   state.SetBytesProcessed(state.iterations() * state.range(0));
778 }
779 
SearchPhone_CachedPCRE(benchmark::State & state)780 void SearchPhone_CachedPCRE(benchmark::State& state) {
781   SearchPhone(state, SearchParse2CachedPCRE);
782 }
783 
SearchPhone_CachedRE2(benchmark::State & state)784 void SearchPhone_CachedRE2(benchmark::State& state) {
785   SearchPhone(state, SearchParse2CachedRE2);
786 }
787 
788 #ifdef USEPCRE
789 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
790 #endif
791 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
792 
793 /*
794 TODO(rsc): Make this work again.
795 void CacheFill(int iters, int n, SearchImpl *srch) {
796   std::string s = DeBruijnString(n+1);
797   std::string t;
798   for (int i = n+1; i < 20; i++) {
799     t = s + s;
800     using std::swap;
801     swap(s, t);
802   }
803   srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
804        Prog::kUnanchored, true);
805   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
806 }
807 
808 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
809 void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
810 void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
811 void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
812 
813 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
814 // for the static BenchmarkRegisterer, which makes it unusable inside
815 // a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
816 // to make the identifiers distinct (only possible when 'a' is a simple
817 // expression like 2, not like 1+1).
818 #define MY_BENCHMARK_WITH_ARG(n, a) \
819   bool __benchmark_ ## n ## a =     \
820     (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
821 
822 #define DO24(A, B) \
823   A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
824   A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
825   A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
826   A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
827 
828 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
829 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
830 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
831 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
832 
833 #undef DO24
834 #undef MY_BENCHMARK_WITH_ARG
835 */
836 
837 ////////////////////////////////////////////////////////////////////////
838 //
839 // Implementation routines.  Sad that there are so many,
840 // but all the interfaces are slightly different.
841 
842 // Runs implementation to search for regexp in text, iters times.
843 // Expect_match says whether the regexp should be found.
844 // Anchored says whether to run an anchored search.
845 
SearchDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)846 void SearchDFA(benchmark::State& state, const char* regexp,
847                const StringPiece& text, Prog::Anchor anchor,
848                bool expect_match) {
849   for (auto _ : state) {
850     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
851     CHECK(re);
852     Prog* prog = re->CompileToProg(0);
853     CHECK(prog);
854     bool failed = false;
855     CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
856                              NULL, &failed, NULL),
857              expect_match);
858     CHECK(!failed);
859     delete prog;
860     re->Decref();
861   }
862 }
863 
SearchNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)864 void SearchNFA(benchmark::State& state, const char* regexp,
865                const StringPiece& text, Prog::Anchor anchor,
866                bool expect_match) {
867   for (auto _ : state) {
868     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
869     CHECK(re);
870     Prog* prog = re->CompileToProg(0);
871     CHECK(prog);
872     CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
873                              NULL, 0),
874              expect_match);
875     delete prog;
876     re->Decref();
877   }
878 }
879 
SearchOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)880 void SearchOnePass(benchmark::State& state, const char* regexp,
881                    const StringPiece& text, Prog::Anchor anchor,
882                    bool expect_match) {
883   for (auto _ : state) {
884     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
885     CHECK(re);
886     Prog* prog = re->CompileToProg(0);
887     CHECK(prog);
888     CHECK(prog->IsOnePass());
889     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
890              expect_match);
891     delete prog;
892     re->Decref();
893   }
894 }
895 
SearchBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)896 void SearchBitState(benchmark::State& state, const char* regexp,
897                     const StringPiece& text, Prog::Anchor anchor,
898                     bool expect_match) {
899   for (auto _ : state) {
900     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
901     CHECK(re);
902     Prog* prog = re->CompileToProg(0);
903     CHECK(prog);
904     CHECK(prog->CanBitState());
905     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
906              expect_match);
907     delete prog;
908     re->Decref();
909   }
910 }
911 
SearchPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)912 void SearchPCRE(benchmark::State& state, const char* regexp,
913                 const StringPiece& text, Prog::Anchor anchor,
914                 bool expect_match) {
915   for (auto _ : state) {
916     PCRE re(regexp, PCRE::UTF8);
917     CHECK_EQ(re.error(), "");
918     if (anchor == Prog::kAnchored)
919       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
920     else
921       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
922   }
923 }
924 
SearchRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)925 void SearchRE2(benchmark::State& state, const char* regexp,
926                const StringPiece& text, Prog::Anchor anchor,
927                bool expect_match) {
928   for (auto _ : state) {
929     RE2 re(regexp);
930     CHECK_EQ(re.error(), "");
931     if (anchor == Prog::kAnchored)
932       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
933     else
934       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
935   }
936 }
937 
938 // SearchCachedXXX is like SearchXXX but only does the
939 // regexp parsing and compiling once.  This lets us measure
940 // search time without the per-regexp overhead.
941 
SearchCachedDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)942 void SearchCachedDFA(benchmark::State& state, const char* regexp,
943                      const StringPiece& text, Prog::Anchor anchor,
944                      bool expect_match) {
945   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
946   CHECK(re);
947   Prog* prog = re->CompileToProg(int64_t{1}<<31);
948   CHECK(prog);
949   for (auto _ : state) {
950     bool failed = false;
951     CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
952                              NULL, &failed, NULL),
953              expect_match);
954     CHECK(!failed);
955   }
956   delete prog;
957   re->Decref();
958 }
959 
SearchCachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)960 void SearchCachedNFA(benchmark::State& state, const char* regexp,
961                      const StringPiece& text, Prog::Anchor anchor,
962                      bool expect_match) {
963   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
964   CHECK(re);
965   Prog* prog = re->CompileToProg(0);
966   CHECK(prog);
967   for (auto _ : state) {
968     CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
969                              NULL, 0),
970              expect_match);
971   }
972   delete prog;
973   re->Decref();
974 }
975 
SearchCachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)976 void SearchCachedOnePass(benchmark::State& state, const char* regexp,
977                          const StringPiece& text, Prog::Anchor anchor,
978                          bool expect_match) {
979   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
980   CHECK(re);
981   Prog* prog = re->CompileToProg(0);
982   CHECK(prog);
983   CHECK(prog->IsOnePass());
984   for (auto _ : state) {
985     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
986              expect_match);
987   }
988   delete prog;
989   re->Decref();
990 }
991 
SearchCachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)992 void SearchCachedBitState(benchmark::State& state, const char* regexp,
993                           const StringPiece& text, Prog::Anchor anchor,
994                           bool expect_match) {
995   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
996   CHECK(re);
997   Prog* prog = re->CompileToProg(0);
998   CHECK(prog);
999   CHECK(prog->CanBitState());
1000   for (auto _ : state) {
1001     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1002              expect_match);
1003   }
1004   delete prog;
1005   re->Decref();
1006 }
1007 
SearchCachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1008 void SearchCachedPCRE(benchmark::State& state, const char* regexp,
1009                       const StringPiece& text, Prog::Anchor anchor,
1010                       bool expect_match) {
1011   PCRE re(regexp, PCRE::UTF8);
1012   CHECK_EQ(re.error(), "");
1013   for (auto _ : state) {
1014     if (anchor == Prog::kAnchored)
1015       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1016     else
1017       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1018   }
1019 }
1020 
SearchCachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1021 void SearchCachedRE2(benchmark::State& state, const char* regexp,
1022                      const StringPiece& text, Prog::Anchor anchor,
1023                      bool expect_match) {
1024   RE2 re(regexp);
1025   CHECK_EQ(re.error(), "");
1026   for (auto _ : state) {
1027     if (anchor == Prog::kAnchored)
1028       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1029     else
1030       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1031   }
1032 }
1033 
1034 // Runs implementation to full match regexp against text,
1035 // extracting three submatches.  Expects match always.
1036 
Parse3NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1037 void Parse3NFA(benchmark::State& state, const char* regexp,
1038                const StringPiece& text) {
1039   for (auto _ : state) {
1040     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1041     CHECK(re);
1042     Prog* prog = re->CompileToProg(0);
1043     CHECK(prog);
1044     StringPiece sp[4];  // 4 because sp[0] is whole match.
1045     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1046                           Prog::kFullMatch, sp, 4));
1047     delete prog;
1048     re->Decref();
1049   }
1050 }
1051 
Parse3OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1052 void Parse3OnePass(benchmark::State& state, const char* regexp,
1053                    const StringPiece& text) {
1054   for (auto _ : state) {
1055     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1056     CHECK(re);
1057     Prog* prog = re->CompileToProg(0);
1058     CHECK(prog);
1059     CHECK(prog->IsOnePass());
1060     StringPiece sp[4];  // 4 because sp[0] is whole match.
1061     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1062     delete prog;
1063     re->Decref();
1064   }
1065 }
1066 
Parse3BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1067 void Parse3BitState(benchmark::State& state, const char* regexp,
1068                     const StringPiece& text) {
1069   for (auto _ : state) {
1070     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1071     CHECK(re);
1072     Prog* prog = re->CompileToProg(0);
1073     CHECK(prog);
1074     CHECK(prog->CanBitState());
1075     StringPiece sp[4];  // 4 because sp[0] is whole match.
1076     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1077     delete prog;
1078     re->Decref();
1079   }
1080 }
1081 
Parse3Backtrack(benchmark::State & state,const char * regexp,const StringPiece & text)1082 void Parse3Backtrack(benchmark::State& state, const char* regexp,
1083                      const StringPiece& text) {
1084   for (auto _ : state) {
1085     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1086     CHECK(re);
1087     Prog* prog = re->CompileToProg(0);
1088     CHECK(prog);
1089     StringPiece sp[4];  // 4 because sp[0] is whole match.
1090     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1091     delete prog;
1092     re->Decref();
1093   }
1094 }
1095 
Parse3PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1096 void Parse3PCRE(benchmark::State& state, const char* regexp,
1097                 const StringPiece& text) {
1098   for (auto _ : state) {
1099     PCRE re(regexp, PCRE::UTF8);
1100     CHECK_EQ(re.error(), "");
1101     StringPiece sp1, sp2, sp3;
1102     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1103   }
1104 }
1105 
Parse3RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1106 void Parse3RE2(benchmark::State& state, const char* regexp,
1107                const StringPiece& text) {
1108   for (auto _ : state) {
1109     RE2 re(regexp);
1110     CHECK_EQ(re.error(), "");
1111     StringPiece sp1, sp2, sp3;
1112     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1113   }
1114 }
1115 
Parse3CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1116 void Parse3CachedNFA(benchmark::State& state, const char* regexp,
1117                      const StringPiece& text) {
1118   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1119   CHECK(re);
1120   Prog* prog = re->CompileToProg(0);
1121   CHECK(prog);
1122   StringPiece sp[4];  // 4 because sp[0] is whole match.
1123   for (auto _ : state) {
1124     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1125                           Prog::kFullMatch, sp, 4));
1126   }
1127   delete prog;
1128   re->Decref();
1129 }
1130 
Parse3CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1131 void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
1132                          const StringPiece& text) {
1133   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1134   CHECK(re);
1135   Prog* prog = re->CompileToProg(0);
1136   CHECK(prog);
1137   CHECK(prog->IsOnePass());
1138   StringPiece sp[4];  // 4 because sp[0] is whole match.
1139   for (auto _ : state) {
1140     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1141   }
1142   delete prog;
1143   re->Decref();
1144 }
1145 
Parse3CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1146 void Parse3CachedBitState(benchmark::State& state, const char* regexp,
1147                           const StringPiece& text) {
1148   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1149   CHECK(re);
1150   Prog* prog = re->CompileToProg(0);
1151   CHECK(prog);
1152   CHECK(prog->CanBitState());
1153   StringPiece sp[4];  // 4 because sp[0] is whole match.
1154   for (auto _ : state) {
1155     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1156   }
1157   delete prog;
1158   re->Decref();
1159 }
1160 
Parse3CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1161 void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
1162                            const StringPiece& text) {
1163   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1164   CHECK(re);
1165   Prog* prog = re->CompileToProg(0);
1166   CHECK(prog);
1167   StringPiece sp[4];  // 4 because sp[0] is whole match.
1168   for (auto _ : state) {
1169     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1170   }
1171   delete prog;
1172   re->Decref();
1173 }
1174 
Parse3CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1175 void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
1176                       const StringPiece& text) {
1177   PCRE re(regexp, PCRE::UTF8);
1178   CHECK_EQ(re.error(), "");
1179   StringPiece sp1, sp2, sp3;
1180   for (auto _ : state) {
1181     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1182   }
1183 }
1184 
Parse3CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1185 void Parse3CachedRE2(benchmark::State& state, const char* regexp,
1186                      const StringPiece& text) {
1187   RE2 re(regexp);
1188   CHECK_EQ(re.error(), "");
1189   StringPiece sp1, sp2, sp3;
1190   for (auto _ : state) {
1191     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1192   }
1193 }
1194 
1195 // Runs implementation to full match regexp against text,
1196 // extracting three submatches.  Expects match always.
1197 
Parse1NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1198 void Parse1NFA(benchmark::State& state, const char* regexp,
1199                const StringPiece& text) {
1200   for (auto _ : state) {
1201     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1202     CHECK(re);
1203     Prog* prog = re->CompileToProg(0);
1204     CHECK(prog);
1205     StringPiece sp[2];  // 2 because sp[0] is whole match.
1206     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1207                           Prog::kFullMatch, sp, 2));
1208     delete prog;
1209     re->Decref();
1210   }
1211 }
1212 
Parse1OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1213 void Parse1OnePass(benchmark::State& state, const char* regexp,
1214                    const StringPiece& text) {
1215   for (auto _ : state) {
1216     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1217     CHECK(re);
1218     Prog* prog = re->CompileToProg(0);
1219     CHECK(prog);
1220     CHECK(prog->IsOnePass());
1221     StringPiece sp[2];  // 2 because sp[0] is whole match.
1222     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1223     delete prog;
1224     re->Decref();
1225   }
1226 }
1227 
Parse1BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1228 void Parse1BitState(benchmark::State& state, const char* regexp,
1229                     const StringPiece& text) {
1230   for (auto _ : state) {
1231     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1232     CHECK(re);
1233     Prog* prog = re->CompileToProg(0);
1234     CHECK(prog);
1235     CHECK(prog->CanBitState());
1236     StringPiece sp[2];  // 2 because sp[0] is whole match.
1237     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238     delete prog;
1239     re->Decref();
1240   }
1241 }
1242 
Parse1PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1243 void Parse1PCRE(benchmark::State& state, const char* regexp,
1244                 const StringPiece& text) {
1245   for (auto _ : state) {
1246     PCRE re(regexp, PCRE::UTF8);
1247     CHECK_EQ(re.error(), "");
1248     StringPiece sp1;
1249     CHECK(PCRE::FullMatch(text, re, &sp1));
1250   }
1251 }
1252 
Parse1RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1253 void Parse1RE2(benchmark::State& state, const char* regexp,
1254                const StringPiece& text) {
1255   for (auto _ : state) {
1256     RE2 re(regexp);
1257     CHECK_EQ(re.error(), "");
1258     StringPiece sp1;
1259     CHECK(RE2::FullMatch(text, re, &sp1));
1260   }
1261 }
1262 
Parse1CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1263 void Parse1CachedNFA(benchmark::State& state, const char* regexp,
1264                      const StringPiece& text) {
1265   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1266   CHECK(re);
1267   Prog* prog = re->CompileToProg(0);
1268   CHECK(prog);
1269   StringPiece sp[2];  // 2 because sp[0] is whole match.
1270   for (auto _ : state) {
1271     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1272                           Prog::kFullMatch, sp, 2));
1273   }
1274   delete prog;
1275   re->Decref();
1276 }
1277 
Parse1CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1278 void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
1279                          const StringPiece& text) {
1280   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1281   CHECK(re);
1282   Prog* prog = re->CompileToProg(0);
1283   CHECK(prog);
1284   CHECK(prog->IsOnePass());
1285   StringPiece sp[2];  // 2 because sp[0] is whole match.
1286   for (auto _ : state) {
1287     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1288   }
1289   delete prog;
1290   re->Decref();
1291 }
1292 
Parse1CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1293 void Parse1CachedBitState(benchmark::State& state, const char* regexp,
1294                           const StringPiece& text) {
1295   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1296   CHECK(re);
1297   Prog* prog = re->CompileToProg(0);
1298   CHECK(prog);
1299   CHECK(prog->CanBitState());
1300   StringPiece sp[2];  // 2 because sp[0] is whole match.
1301   for (auto _ : state) {
1302     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1303   }
1304   delete prog;
1305   re->Decref();
1306 }
1307 
Parse1CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1308 void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
1309                            const StringPiece& text) {
1310   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1311   CHECK(re);
1312   Prog* prog = re->CompileToProg(0);
1313   CHECK(prog);
1314   StringPiece sp[2];  // 2 because sp[0] is whole match.
1315   for (auto _ : state) {
1316     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1317   }
1318   delete prog;
1319   re->Decref();
1320 }
1321 
Parse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1322 void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
1323                       const StringPiece& text) {
1324   PCRE re(regexp, PCRE::UTF8);
1325   CHECK_EQ(re.error(), "");
1326   StringPiece sp1;
1327   for (auto _ : state) {
1328     CHECK(PCRE::FullMatch(text, re, &sp1));
1329   }
1330 }
1331 
Parse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1332 void Parse1CachedRE2(benchmark::State& state, const char* regexp,
1333                      const StringPiece& text) {
1334   RE2 re(regexp);
1335   CHECK_EQ(re.error(), "");
1336   StringPiece sp1;
1337   for (auto _ : state) {
1338     CHECK(RE2::FullMatch(text, re, &sp1));
1339   }
1340 }
1341 
SearchParse2CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1342 void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
1343                             const StringPiece& text) {
1344   PCRE re(regexp, PCRE::UTF8);
1345   CHECK_EQ(re.error(), "");
1346   for (auto _ : state) {
1347     StringPiece sp1, sp2;
1348     CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1349   }
1350 }
1351 
SearchParse2CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1352 void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
1353                            const StringPiece& text) {
1354   RE2 re(regexp);
1355   CHECK_EQ(re.error(), "");
1356   for (auto _ : state) {
1357     StringPiece sp1, sp2;
1358     CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1359   }
1360 }
1361 
SearchParse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1362 void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
1363                             const StringPiece& text) {
1364   PCRE re(regexp, PCRE::UTF8);
1365   CHECK_EQ(re.error(), "");
1366   for (auto _ : state) {
1367     StringPiece sp1;
1368     CHECK(PCRE::PartialMatch(text, re, &sp1));
1369   }
1370 }
1371 
SearchParse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1372 void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
1373                            const StringPiece& text) {
1374   RE2 re(regexp);
1375   CHECK_EQ(re.error(), "");
1376   for (auto _ : state) {
1377     StringPiece sp1;
1378     CHECK(RE2::PartialMatch(text, re, &sp1));
1379   }
1380 }
1381 
EmptyPartialMatchPCRE(benchmark::State & state)1382 void EmptyPartialMatchPCRE(benchmark::State& state) {
1383   PCRE re("");
1384   for (auto _ : state) {
1385     PCRE::PartialMatch("", re);
1386   }
1387 }
1388 
EmptyPartialMatchRE2(benchmark::State & state)1389 void EmptyPartialMatchRE2(benchmark::State& state) {
1390   RE2 re("");
1391   for (auto _ : state) {
1392     RE2::PartialMatch("", re);
1393   }
1394 }
1395 #ifdef USEPCRE
1396 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1397 #endif
1398 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1399 
SimplePartialMatchPCRE(benchmark::State & state)1400 void SimplePartialMatchPCRE(benchmark::State& state) {
1401   PCRE re("abcdefg");
1402   for (auto _ : state) {
1403     PCRE::PartialMatch("abcdefg", re);
1404   }
1405 }
1406 
SimplePartialMatchRE2(benchmark::State & state)1407 void SimplePartialMatchRE2(benchmark::State& state) {
1408   RE2 re("abcdefg");
1409   for (auto _ : state) {
1410     RE2::PartialMatch("abcdefg", re);
1411   }
1412 }
1413 #ifdef USEPCRE
1414 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1415 #endif
1416 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1417 
1418 static std::string http_text =
1419   "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1420   "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1421 
HTTPPartialMatchPCRE(benchmark::State & state)1422 void HTTPPartialMatchPCRE(benchmark::State& state) {
1423   StringPiece a;
1424   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1425   for (auto _ : state) {
1426     PCRE::PartialMatch(http_text, re, &a);
1427   }
1428 }
1429 
HTTPPartialMatchRE2(benchmark::State & state)1430 void HTTPPartialMatchRE2(benchmark::State& state) {
1431   StringPiece a;
1432   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1433   for (auto _ : state) {
1434     RE2::PartialMatch(http_text, re, &a);
1435   }
1436 }
1437 
1438 #ifdef USEPCRE
1439 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1440 #endif
1441 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1442 
1443 static std::string smallhttp_text =
1444   "GET /abc HTTP/1.1";
1445 
SmallHTTPPartialMatchPCRE(benchmark::State & state)1446 void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
1447   StringPiece a;
1448   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1449   for (auto _ : state) {
1450     PCRE::PartialMatch(smallhttp_text, re, &a);
1451   }
1452 }
1453 
SmallHTTPPartialMatchRE2(benchmark::State & state)1454 void SmallHTTPPartialMatchRE2(benchmark::State& state) {
1455   StringPiece a;
1456   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1457   for (auto _ : state) {
1458     RE2::PartialMatch(smallhttp_text, re, &a);
1459   }
1460 }
1461 
1462 #ifdef USEPCRE
1463 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1464 #endif
1465 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1466 
DotMatchPCRE(benchmark::State & state)1467 void DotMatchPCRE(benchmark::State& state) {
1468   StringPiece a;
1469   PCRE re("(?-s)^(.+)");
1470   for (auto _ : state) {
1471     PCRE::PartialMatch(http_text, re, &a);
1472   }
1473 }
1474 
DotMatchRE2(benchmark::State & state)1475 void DotMatchRE2(benchmark::State& state) {
1476   StringPiece a;
1477   RE2 re("(?-s)^(.+)");
1478   for (auto _ : state) {
1479     RE2::PartialMatch(http_text, re, &a);
1480   }
1481 }
1482 
1483 #ifdef USEPCRE
1484 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1485 #endif
1486 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1487 
ASCIIMatchPCRE(benchmark::State & state)1488 void ASCIIMatchPCRE(benchmark::State& state) {
1489   StringPiece a;
1490   PCRE re("(?-s)^([ -~]+)");
1491   for (auto _ : state) {
1492     PCRE::PartialMatch(http_text, re, &a);
1493   }
1494 }
1495 
ASCIIMatchRE2(benchmark::State & state)1496 void ASCIIMatchRE2(benchmark::State& state) {
1497   StringPiece a;
1498   RE2 re("(?-s)^([ -~]+)");
1499   for (auto _ : state) {
1500     RE2::PartialMatch(http_text, re, &a);
1501   }
1502 }
1503 
1504 #ifdef USEPCRE
1505 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1506 #endif
1507 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1508 
FullMatchPCRE(benchmark::State & state,const char * regexp)1509 void FullMatchPCRE(benchmark::State& state, const char *regexp) {
1510   std::string s;
1511   MakeText(&s, state.range(0));
1512   s += "ABCDEFGHIJ";
1513   PCRE re(regexp);
1514   for (auto _ : state) {
1515     CHECK(PCRE::FullMatch(s, re));
1516   }
1517   state.SetBytesProcessed(state.iterations() * state.range(0));
1518 }
1519 
FullMatchRE2(benchmark::State & state,const char * regexp)1520 void FullMatchRE2(benchmark::State& state, const char *regexp) {
1521   std::string s;
1522   MakeText(&s, state.range(0));
1523   s += "ABCDEFGHIJ";
1524   RE2 re(regexp, RE2::Latin1);
1525   for (auto _ : state) {
1526     CHECK(RE2::FullMatch(s, re));
1527   }
1528   state.SetBytesProcessed(state.iterations() * state.range(0));
1529 }
1530 
FullMatch_DotStar_CachedPCRE(benchmark::State & state)1531 void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
FullMatch_DotStar_CachedRE2(benchmark::State & state)1532 void FullMatch_DotStar_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s).*"); }
1533 
FullMatch_DotStarDollar_CachedPCRE(benchmark::State & state)1534 void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(benchmark::State & state)1535 void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s).*$"); }
1536 
FullMatch_DotStarCapture_CachedPCRE(benchmark::State & state)1537 void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(benchmark::State & state)1538 void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state)  { FullMatchRE2(state, "(?s)((.*)()()($))"); }
1539 
1540 #ifdef USEPCRE
1541 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1542 #endif
1543 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
1544 
1545 #ifdef USEPCRE
1546 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1547 #endif
1548 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
1549 
1550 #ifdef USEPCRE
1551 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1552 #endif
1553 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
1554 
PossibleMatchRangeCommon(benchmark::State & state,const char * regexp)1555 void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
1556   RE2 re(regexp);
1557   std::string min;
1558   std::string max;
1559   const int kMaxLen = 16;
1560   for (auto _ : state) {
1561     CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1562   }
1563 }
1564 
PossibleMatchRange_Trivial(benchmark::State & state)1565 void PossibleMatchRange_Trivial(benchmark::State& state) {
1566   PossibleMatchRangeCommon(state, ".*");
1567 }
PossibleMatchRange_Complex(benchmark::State & state)1568 void PossibleMatchRange_Complex(benchmark::State& state) {
1569   PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
1570 }
PossibleMatchRange_Prefix(benchmark::State & state)1571 void PossibleMatchRange_Prefix(benchmark::State& state) {
1572   PossibleMatchRangeCommon(state, "^some_random_prefix.*");
1573 }
PossibleMatchRange_NoProg(benchmark::State & state)1574 void PossibleMatchRange_NoProg(benchmark::State& state) {
1575   PossibleMatchRangeCommon(state, "^some_random_string$");
1576 }
1577 
1578 BENCHMARK(PossibleMatchRange_Trivial);
1579 BENCHMARK(PossibleMatchRange_Complex);
1580 BENCHMARK(PossibleMatchRange_Prefix);
1581 BENCHMARK(PossibleMatchRange_NoProg);
1582 
1583 }  // namespace re2
1584