1 // Copyright 2007 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Compiled regular expression representation.
6 // Tested by compile_test.cc
7
8 #include "re2/prog.h"
9
10 #if defined(__AVX2__)
11 #include <immintrin.h>
12 #ifdef _MSC_VER
13 #include <intrin.h>
14 #endif
15 #endif
16 #include <stdint.h>
17 #include <string.h>
18 #include <algorithm>
19 #include <memory>
20 #include <utility>
21
22 #include "util/util.h"
23 #include "util/logging.h"
24 #include "util/strutil.h"
25 #include "re2/bitmap256.h"
26 #include "re2/stringpiece.h"
27
28 namespace re2 {
29
30 // Constructors per Inst opcode
31
InitAlt(uint32_t out,uint32_t out1)32 void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
33 DCHECK_EQ(out_opcode_, 0);
34 set_out_opcode(out, kInstAlt);
35 out1_ = out1;
36 }
37
InitByteRange(int lo,int hi,int foldcase,uint32_t out)38 void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
39 DCHECK_EQ(out_opcode_, 0);
40 set_out_opcode(out, kInstByteRange);
41 lo_ = lo & 0xFF;
42 hi_ = hi & 0xFF;
43 hint_foldcase_ = foldcase&1;
44 }
45
InitCapture(int cap,uint32_t out)46 void Prog::Inst::InitCapture(int cap, uint32_t out) {
47 DCHECK_EQ(out_opcode_, 0);
48 set_out_opcode(out, kInstCapture);
49 cap_ = cap;
50 }
51
InitEmptyWidth(EmptyOp empty,uint32_t out)52 void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
53 DCHECK_EQ(out_opcode_, 0);
54 set_out_opcode(out, kInstEmptyWidth);
55 empty_ = empty;
56 }
57
InitMatch(int32_t id)58 void Prog::Inst::InitMatch(int32_t id) {
59 DCHECK_EQ(out_opcode_, 0);
60 set_opcode(kInstMatch);
61 match_id_ = id;
62 }
63
InitNop(uint32_t out)64 void Prog::Inst::InitNop(uint32_t out) {
65 DCHECK_EQ(out_opcode_, 0);
66 set_opcode(kInstNop);
67 }
68
InitFail()69 void Prog::Inst::InitFail() {
70 DCHECK_EQ(out_opcode_, 0);
71 set_opcode(kInstFail);
72 }
73
Dump()74 std::string Prog::Inst::Dump() {
75 switch (opcode()) {
76 default:
77 return StringPrintf("opcode %d", static_cast<int>(opcode()));
78
79 case kInstAlt:
80 return StringPrintf("alt -> %d | %d", out(), out1_);
81
82 case kInstAltMatch:
83 return StringPrintf("altmatch -> %d | %d", out(), out1_);
84
85 case kInstByteRange:
86 return StringPrintf("byte%s [%02x-%02x] %d -> %d",
87 foldcase() ? "/i" : "",
88 lo_, hi_, hint(), out());
89
90 case kInstCapture:
91 return StringPrintf("capture %d -> %d", cap_, out());
92
93 case kInstEmptyWidth:
94 return StringPrintf("emptywidth %#x -> %d",
95 static_cast<int>(empty_), out());
96
97 case kInstMatch:
98 return StringPrintf("match! %d", match_id());
99
100 case kInstNop:
101 return StringPrintf("nop -> %d", out());
102
103 case kInstFail:
104 return StringPrintf("fail");
105 }
106 }
107
Prog()108 Prog::Prog()
109 : anchor_start_(false),
110 anchor_end_(false),
111 reversed_(false),
112 did_flatten_(false),
113 did_onepass_(false),
114 start_(0),
115 start_unanchored_(0),
116 size_(0),
117 bytemap_range_(0),
118 prefix_foldcase_(false),
119 prefix_size_(0),
120 list_count_(0),
121 dfa_mem_(0),
122 dfa_first_(NULL),
123 dfa_longest_(NULL) {
124 }
125
~Prog()126 Prog::~Prog() {
127 DeleteDFA(dfa_longest_);
128 DeleteDFA(dfa_first_);
129 if (prefix_foldcase_)
130 delete[] prefix_dfa_;
131 }
132
133 typedef SparseSet Workq;
134
AddToQueue(Workq * q,int id)135 static inline void AddToQueue(Workq* q, int id) {
136 if (id != 0)
137 q->insert(id);
138 }
139
ProgToString(Prog * prog,Workq * q)140 static std::string ProgToString(Prog* prog, Workq* q) {
141 std::string s;
142 for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
143 int id = *i;
144 Prog::Inst* ip = prog->inst(id);
145 s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
146 AddToQueue(q, ip->out());
147 if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
148 AddToQueue(q, ip->out1());
149 }
150 return s;
151 }
152
FlattenedProgToString(Prog * prog,int start)153 static std::string FlattenedProgToString(Prog* prog, int start) {
154 std::string s;
155 for (int id = start; id < prog->size(); id++) {
156 Prog::Inst* ip = prog->inst(id);
157 if (ip->last())
158 s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
159 else
160 s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
161 }
162 return s;
163 }
164
Dump()165 std::string Prog::Dump() {
166 if (did_flatten_)
167 return FlattenedProgToString(this, start_);
168
169 Workq q(size_);
170 AddToQueue(&q, start_);
171 return ProgToString(this, &q);
172 }
173
DumpUnanchored()174 std::string Prog::DumpUnanchored() {
175 if (did_flatten_)
176 return FlattenedProgToString(this, start_unanchored_);
177
178 Workq q(size_);
179 AddToQueue(&q, start_unanchored_);
180 return ProgToString(this, &q);
181 }
182
DumpByteMap()183 std::string Prog::DumpByteMap() {
184 std::string map;
185 for (int c = 0; c < 256; c++) {
186 int b = bytemap_[c];
187 int lo = c;
188 while (c < 256-1 && bytemap_[c+1] == b)
189 c++;
190 int hi = c;
191 map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
192 }
193 return map;
194 }
195
196 // Is ip a guaranteed match at end of text, perhaps after some capturing?
IsMatch(Prog * prog,Prog::Inst * ip)197 static bool IsMatch(Prog* prog, Prog::Inst* ip) {
198 for (;;) {
199 switch (ip->opcode()) {
200 default:
201 LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
202 return false;
203
204 case kInstAlt:
205 case kInstAltMatch:
206 case kInstByteRange:
207 case kInstFail:
208 case kInstEmptyWidth:
209 return false;
210
211 case kInstCapture:
212 case kInstNop:
213 ip = prog->inst(ip->out());
214 break;
215
216 case kInstMatch:
217 return true;
218 }
219 }
220 }
221
222 // Peep-hole optimizer.
Optimize()223 void Prog::Optimize() {
224 Workq q(size_);
225
226 // Eliminate nops. Most are taken out during compilation
227 // but a few are hard to avoid.
228 q.clear();
229 AddToQueue(&q, start_);
230 for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
231 int id = *i;
232
233 Inst* ip = inst(id);
234 int j = ip->out();
235 Inst* jp;
236 while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
237 j = jp->out();
238 }
239 ip->set_out(j);
240 AddToQueue(&q, ip->out());
241
242 if (ip->opcode() == kInstAlt) {
243 j = ip->out1();
244 while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
245 j = jp->out();
246 }
247 ip->out1_ = j;
248 AddToQueue(&q, ip->out1());
249 }
250 }
251
252 // Insert kInstAltMatch instructions
253 // Look for
254 // ip: Alt -> j | k
255 // j: ByteRange [00-FF] -> ip
256 // k: Match
257 // or the reverse (the above is the greedy one).
258 // Rewrite Alt to AltMatch.
259 q.clear();
260 AddToQueue(&q, start_);
261 for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
262 int id = *i;
263 Inst* ip = inst(id);
264 AddToQueue(&q, ip->out());
265 if (ip->opcode() == kInstAlt)
266 AddToQueue(&q, ip->out1());
267
268 if (ip->opcode() == kInstAlt) {
269 Inst* j = inst(ip->out());
270 Inst* k = inst(ip->out1());
271 if (j->opcode() == kInstByteRange && j->out() == id &&
272 j->lo() == 0x00 && j->hi() == 0xFF &&
273 IsMatch(this, k)) {
274 ip->set_opcode(kInstAltMatch);
275 continue;
276 }
277 if (IsMatch(this, j) &&
278 k->opcode() == kInstByteRange && k->out() == id &&
279 k->lo() == 0x00 && k->hi() == 0xFF) {
280 ip->set_opcode(kInstAltMatch);
281 }
282 }
283 }
284 }
285
EmptyFlags(const StringPiece & text,const char * p)286 uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
287 int flags = 0;
288
289 // ^ and \A
290 if (p == text.data())
291 flags |= kEmptyBeginText | kEmptyBeginLine;
292 else if (p[-1] == '\n')
293 flags |= kEmptyBeginLine;
294
295 // $ and \z
296 if (p == text.data() + text.size())
297 flags |= kEmptyEndText | kEmptyEndLine;
298 else if (p < text.data() + text.size() && p[0] == '\n')
299 flags |= kEmptyEndLine;
300
301 // \b and \B
302 if (p == text.data() && p == text.data() + text.size()) {
303 // no word boundary here
304 } else if (p == text.data()) {
305 if (IsWordChar(p[0]))
306 flags |= kEmptyWordBoundary;
307 } else if (p == text.data() + text.size()) {
308 if (IsWordChar(p[-1]))
309 flags |= kEmptyWordBoundary;
310 } else {
311 if (IsWordChar(p[-1]) != IsWordChar(p[0]))
312 flags |= kEmptyWordBoundary;
313 }
314 if (!(flags & kEmptyWordBoundary))
315 flags |= kEmptyNonWordBoundary;
316
317 return flags;
318 }
319
320 // ByteMapBuilder implements a coloring algorithm.
321 //
322 // The first phase is a series of "mark and merge" batches: we mark one or more
323 // [lo-hi] ranges, then merge them into our internal state. Batching is not for
324 // performance; rather, it means that the ranges are treated indistinguishably.
325 //
326 // Internally, the ranges are represented using a bitmap that stores the splits
327 // and a vector that stores the colors; both of them are indexed by the ranges'
328 // last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
329 // hi (if not already split), then recolor each range in between. The color map
330 // (i.e. from the old color to the new color) is maintained for the lifetime of
331 // the batch and so underpins this somewhat obscure approach to set operations.
332 //
333 // The second phase builds the bytemap from our internal state: we recolor each
334 // range, then store the new color (which is now the byte class) in each of the
335 // corresponding array elements. Finally, we output the number of byte classes.
336 class ByteMapBuilder {
337 public:
ByteMapBuilder()338 ByteMapBuilder() {
339 // Initial state: the [0-255] range has color 256.
340 // This will avoid problems during the second phase,
341 // in which we assign byte classes numbered from 0.
342 splits_.Set(255);
343 colors_[255] = 256;
344 nextcolor_ = 257;
345 }
346
347 void Mark(int lo, int hi);
348 void Merge();
349 void Build(uint8_t* bytemap, int* bytemap_range);
350
351 private:
352 int Recolor(int oldcolor);
353
354 Bitmap256 splits_;
355 int colors_[256];
356 int nextcolor_;
357 std::vector<std::pair<int, int>> colormap_;
358 std::vector<std::pair<int, int>> ranges_;
359
360 ByteMapBuilder(const ByteMapBuilder&) = delete;
361 ByteMapBuilder& operator=(const ByteMapBuilder&) = delete;
362 };
363
Mark(int lo,int hi)364 void ByteMapBuilder::Mark(int lo, int hi) {
365 DCHECK_GE(lo, 0);
366 DCHECK_GE(hi, 0);
367 DCHECK_LE(lo, 255);
368 DCHECK_LE(hi, 255);
369 DCHECK_LE(lo, hi);
370
371 // Ignore any [0-255] ranges. They cause us to recolor every range, which
372 // has no effect on the eventual result and is therefore a waste of time.
373 if (lo == 0 && hi == 255)
374 return;
375
376 ranges_.emplace_back(lo, hi);
377 }
378
Merge()379 void ByteMapBuilder::Merge() {
380 for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin();
381 it != ranges_.end();
382 ++it) {
383 int lo = it->first-1;
384 int hi = it->second;
385
386 if (0 <= lo && !splits_.Test(lo)) {
387 splits_.Set(lo);
388 int next = splits_.FindNextSetBit(lo+1);
389 colors_[lo] = colors_[next];
390 }
391 if (!splits_.Test(hi)) {
392 splits_.Set(hi);
393 int next = splits_.FindNextSetBit(hi+1);
394 colors_[hi] = colors_[next];
395 }
396
397 int c = lo+1;
398 while (c < 256) {
399 int next = splits_.FindNextSetBit(c);
400 colors_[next] = Recolor(colors_[next]);
401 if (next == hi)
402 break;
403 c = next+1;
404 }
405 }
406 colormap_.clear();
407 ranges_.clear();
408 }
409
Build(uint8_t * bytemap,int * bytemap_range)410 void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) {
411 // Assign byte classes numbered from 0.
412 nextcolor_ = 0;
413
414 int c = 0;
415 while (c < 256) {
416 int next = splits_.FindNextSetBit(c);
417 uint8_t b = static_cast<uint8_t>(Recolor(colors_[next]));
418 while (c <= next) {
419 bytemap[c] = b;
420 c++;
421 }
422 }
423
424 *bytemap_range = nextcolor_;
425 }
426
Recolor(int oldcolor)427 int ByteMapBuilder::Recolor(int oldcolor) {
428 // Yes, this is a linear search. There can be at most 256
429 // colors and there will typically be far fewer than that.
430 // Also, we need to consider keys *and* values in order to
431 // avoid recoloring a given range more than once per batch.
432 std::vector<std::pair<int, int>>::const_iterator it =
433 std::find_if(colormap_.begin(), colormap_.end(),
434 [=](const std::pair<int, int>& kv) -> bool {
435 return kv.first == oldcolor || kv.second == oldcolor;
436 });
437 if (it != colormap_.end())
438 return it->second;
439 int newcolor = nextcolor_;
440 nextcolor_++;
441 colormap_.emplace_back(oldcolor, newcolor);
442 return newcolor;
443 }
444
ComputeByteMap()445 void Prog::ComputeByteMap() {
446 // Fill in bytemap with byte classes for the program.
447 // Ranges of bytes that are treated indistinguishably
448 // will be mapped to a single byte class.
449 ByteMapBuilder builder;
450
451 // Don't repeat the work for ^ and $.
452 bool marked_line_boundaries = false;
453 // Don't repeat the work for \b and \B.
454 bool marked_word_boundaries = false;
455
456 for (int id = 0; id < size(); id++) {
457 Inst* ip = inst(id);
458 if (ip->opcode() == kInstByteRange) {
459 int lo = ip->lo();
460 int hi = ip->hi();
461 builder.Mark(lo, hi);
462 if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
463 int foldlo = lo;
464 int foldhi = hi;
465 if (foldlo < 'a')
466 foldlo = 'a';
467 if (foldhi > 'z')
468 foldhi = 'z';
469 if (foldlo <= foldhi) {
470 foldlo += 'A' - 'a';
471 foldhi += 'A' - 'a';
472 builder.Mark(foldlo, foldhi);
473 }
474 }
475 // If this Inst is not the last Inst in its list AND the next Inst is
476 // also a ByteRange AND the Insts have the same out, defer the merge.
477 if (!ip->last() &&
478 inst(id+1)->opcode() == kInstByteRange &&
479 ip->out() == inst(id+1)->out())
480 continue;
481 builder.Merge();
482 } else if (ip->opcode() == kInstEmptyWidth) {
483 if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
484 !marked_line_boundaries) {
485 builder.Mark('\n', '\n');
486 builder.Merge();
487 marked_line_boundaries = true;
488 }
489 if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) &&
490 !marked_word_boundaries) {
491 // We require two batches here: the first for ranges that are word
492 // characters, the second for ranges that are not word characters.
493 for (bool isword : {true, false}) {
494 int j;
495 for (int i = 0; i < 256; i = j) {
496 for (j = i + 1; j < 256 &&
497 Prog::IsWordChar(static_cast<uint8_t>(i)) ==
498 Prog::IsWordChar(static_cast<uint8_t>(j));
499 j++)
500 ;
501 if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword)
502 builder.Mark(i, j - 1);
503 }
504 builder.Merge();
505 }
506 marked_word_boundaries = true;
507 }
508 }
509 }
510
511 builder.Build(bytemap_, &bytemap_range_);
512
513 if (0) { // For debugging, use trivial bytemap.
514 LOG(ERROR) << "Using trivial bytemap.";
515 for (int i = 0; i < 256; i++)
516 bytemap_[i] = static_cast<uint8_t>(i);
517 bytemap_range_ = 256;
518 }
519 }
520
521 // Prog::Flatten() implements a graph rewriting algorithm.
522 //
523 // The overall process is similar to epsilon removal, but retains some epsilon
524 // transitions: those from Capture and EmptyWidth instructions; and those from
525 // nullable subexpressions. (The latter avoids quadratic blowup in transitions
526 // in the worst case.) It might be best thought of as Alt instruction elision.
527 //
528 // In conceptual terms, it divides the Prog into "trees" of instructions, then
529 // traverses the "trees" in order to produce "lists" of instructions. A "tree"
530 // is one or more instructions that grow from one "root" instruction to one or
531 // more "leaf" instructions; if a "tree" has exactly one instruction, then the
532 // "root" is also the "leaf". In most cases, a "root" is the successor of some
533 // "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
534 // and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
535 // EmptyWidth or Match instruction. However, this is insufficient for handling
536 // nested nullable subexpressions correctly, so in some cases, a "root" is the
537 // dominator of the instructions reachable from some "successor root" (i.e. it
538 // has an unreachable predecessor) and is considered a "dominator root". Since
539 // only Alt instructions can be "dominator roots" (other instructions would be
540 // "leaves"), only Alt instructions are required to be marked as predecessors.
541 //
542 // Dividing the Prog into "trees" comprises two passes: marking the "successor
543 // roots" and the predecessors; and marking the "dominator roots". Sorting the
544 // "successor roots" by their bytecode offsets enables iteration in order from
545 // greatest to least during the second pass; by working backwards in this case
546 // and flooding the graph no further than "leaves" and already marked "roots",
547 // it becomes possible to mark "dominator roots" without doing excessive work.
548 //
549 // Traversing the "trees" is just iterating over the "roots" in order of their
550 // marking and flooding the graph no further than "leaves" and "roots". When a
551 // "leaf" is reached, the instruction is copied with its successor remapped to
552 // its "root" number. When a "root" is reached, a Nop instruction is generated
553 // with its successor remapped similarly. As each "list" is produced, its last
554 // instruction is marked as such. After all of the "lists" have been produced,
555 // a pass over their instructions remaps their successors to bytecode offsets.
Flatten()556 void Prog::Flatten() {
557 if (did_flatten_)
558 return;
559 did_flatten_ = true;
560
561 // Scratch structures. It's important that these are reused by functions
562 // that we call in loops because they would thrash the heap otherwise.
563 SparseSet reachable(size());
564 std::vector<int> stk;
565 stk.reserve(size());
566
567 // First pass: Marks "successor roots" and predecessors.
568 // Builds the mapping from inst-ids to root-ids.
569 SparseArray<int> rootmap(size());
570 SparseArray<int> predmap(size());
571 std::vector<std::vector<int>> predvec;
572 MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk);
573
574 // Second pass: Marks "dominator roots".
575 SparseArray<int> sorted(rootmap);
576 std::sort(sorted.begin(), sorted.end(), sorted.less);
577 for (SparseArray<int>::const_iterator i = sorted.end() - 1;
578 i != sorted.begin();
579 --i) {
580 if (i->index() != start_unanchored() && i->index() != start())
581 MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
582 }
583
584 // Third pass: Emits "lists". Remaps outs to root-ids.
585 // Builds the mapping from root-ids to flat-ids.
586 std::vector<int> flatmap(rootmap.size());
587 std::vector<Inst> flat;
588 flat.reserve(size());
589 for (SparseArray<int>::const_iterator i = rootmap.begin();
590 i != rootmap.end();
591 ++i) {
592 flatmap[i->value()] = static_cast<int>(flat.size());
593 EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
594 flat.back().set_last();
595 // We have the bounds of the "list", so this is the
596 // most convenient point at which to compute hints.
597 ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
598 }
599
600 list_count_ = static_cast<int>(flatmap.size());
601 for (int i = 0; i < kNumInst; i++)
602 inst_count_[i] = 0;
603
604 // Fourth pass: Remaps outs to flat-ids.
605 // Counts instructions by opcode.
606 for (int id = 0; id < static_cast<int>(flat.size()); id++) {
607 Inst* ip = &flat[id];
608 if (ip->opcode() != kInstAltMatch) // handled in EmitList()
609 ip->set_out(flatmap[ip->out()]);
610 inst_count_[ip->opcode()]++;
611 }
612
613 int total = 0;
614 for (int i = 0; i < kNumInst; i++)
615 total += inst_count_[i];
616 DCHECK_EQ(total, static_cast<int>(flat.size()));
617
618 // Remap start_unanchored and start.
619 if (start_unanchored() == 0) {
620 DCHECK_EQ(start(), 0);
621 } else if (start_unanchored() == start()) {
622 set_start_unanchored(flatmap[1]);
623 set_start(flatmap[1]);
624 } else {
625 set_start_unanchored(flatmap[1]);
626 set_start(flatmap[2]);
627 }
628
629 // Finally, replace the old instructions with the new instructions.
630 size_ = static_cast<int>(flat.size());
631 inst_ = PODArray<Inst>(size_);
632 memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
633
634 // Populate the list heads for BitState.
635 // 512 instructions limits the memory footprint to 1KiB.
636 if (size_ <= 512) {
637 list_heads_ = PODArray<uint16_t>(size_);
638 // 0xFF makes it more obvious if we try to look up a non-head.
639 memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
640 for (int i = 0; i < list_count_; ++i)
641 list_heads_[flatmap[i]] = i;
642 }
643 }
644
MarkSuccessors(SparseArray<int> * rootmap,SparseArray<int> * predmap,std::vector<std::vector<int>> * predvec,SparseSet * reachable,std::vector<int> * stk)645 void Prog::MarkSuccessors(SparseArray<int>* rootmap,
646 SparseArray<int>* predmap,
647 std::vector<std::vector<int>>* predvec,
648 SparseSet* reachable, std::vector<int>* stk) {
649 // Mark the kInstFail instruction.
650 rootmap->set_new(0, rootmap->size());
651
652 // Mark the start_unanchored and start instructions.
653 if (!rootmap->has_index(start_unanchored()))
654 rootmap->set_new(start_unanchored(), rootmap->size());
655 if (!rootmap->has_index(start()))
656 rootmap->set_new(start(), rootmap->size());
657
658 reachable->clear();
659 stk->clear();
660 stk->push_back(start_unanchored());
661 while (!stk->empty()) {
662 int id = stk->back();
663 stk->pop_back();
664 Loop:
665 if (reachable->contains(id))
666 continue;
667 reachable->insert_new(id);
668
669 Inst* ip = inst(id);
670 switch (ip->opcode()) {
671 default:
672 LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
673 break;
674
675 case kInstAltMatch:
676 case kInstAlt:
677 // Mark this instruction as a predecessor of each out.
678 for (int out : {ip->out(), ip->out1()}) {
679 if (!predmap->has_index(out)) {
680 predmap->set_new(out, static_cast<int>(predvec->size()));
681 predvec->emplace_back();
682 }
683 (*predvec)[predmap->get_existing(out)].emplace_back(id);
684 }
685 stk->push_back(ip->out1());
686 id = ip->out();
687 goto Loop;
688
689 case kInstByteRange:
690 case kInstCapture:
691 case kInstEmptyWidth:
692 // Mark the out of this instruction as a "root".
693 if (!rootmap->has_index(ip->out()))
694 rootmap->set_new(ip->out(), rootmap->size());
695 id = ip->out();
696 goto Loop;
697
698 case kInstNop:
699 id = ip->out();
700 goto Loop;
701
702 case kInstMatch:
703 case kInstFail:
704 break;
705 }
706 }
707 }
708
MarkDominator(int root,SparseArray<int> * rootmap,SparseArray<int> * predmap,std::vector<std::vector<int>> * predvec,SparseSet * reachable,std::vector<int> * stk)709 void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
710 SparseArray<int>* predmap,
711 std::vector<std::vector<int>>* predvec,
712 SparseSet* reachable, std::vector<int>* stk) {
713 reachable->clear();
714 stk->clear();
715 stk->push_back(root);
716 while (!stk->empty()) {
717 int id = stk->back();
718 stk->pop_back();
719 Loop:
720 if (reachable->contains(id))
721 continue;
722 reachable->insert_new(id);
723
724 if (id != root && rootmap->has_index(id)) {
725 // We reached another "tree" via epsilon transition.
726 continue;
727 }
728
729 Inst* ip = inst(id);
730 switch (ip->opcode()) {
731 default:
732 LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
733 break;
734
735 case kInstAltMatch:
736 case kInstAlt:
737 stk->push_back(ip->out1());
738 id = ip->out();
739 goto Loop;
740
741 case kInstByteRange:
742 case kInstCapture:
743 case kInstEmptyWidth:
744 break;
745
746 case kInstNop:
747 id = ip->out();
748 goto Loop;
749
750 case kInstMatch:
751 case kInstFail:
752 break;
753 }
754 }
755
756 for (SparseSet::const_iterator i = reachable->begin();
757 i != reachable->end();
758 ++i) {
759 int id = *i;
760 if (predmap->has_index(id)) {
761 for (int pred : (*predvec)[predmap->get_existing(id)]) {
762 if (!reachable->contains(pred)) {
763 // id has a predecessor that cannot be reached from root!
764 // Therefore, id must be a "root" too - mark it as such.
765 if (!rootmap->has_index(id))
766 rootmap->set_new(id, rootmap->size());
767 }
768 }
769 }
770 }
771 }
772
EmitList(int root,SparseArray<int> * rootmap,std::vector<Inst> * flat,SparseSet * reachable,std::vector<int> * stk)773 void Prog::EmitList(int root, SparseArray<int>* rootmap,
774 std::vector<Inst>* flat,
775 SparseSet* reachable, std::vector<int>* stk) {
776 reachable->clear();
777 stk->clear();
778 stk->push_back(root);
779 while (!stk->empty()) {
780 int id = stk->back();
781 stk->pop_back();
782 Loop:
783 if (reachable->contains(id))
784 continue;
785 reachable->insert_new(id);
786
787 if (id != root && rootmap->has_index(id)) {
788 // We reached another "tree" via epsilon transition. Emit a kInstNop
789 // instruction so that the Prog does not become quadratically larger.
790 flat->emplace_back();
791 flat->back().set_opcode(kInstNop);
792 flat->back().set_out(rootmap->get_existing(id));
793 continue;
794 }
795
796 Inst* ip = inst(id);
797 switch (ip->opcode()) {
798 default:
799 LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
800 break;
801
802 case kInstAltMatch:
803 flat->emplace_back();
804 flat->back().set_opcode(kInstAltMatch);
805 flat->back().set_out(static_cast<int>(flat->size()));
806 flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
807 FALLTHROUGH_INTENDED;
808
809 case kInstAlt:
810 stk->push_back(ip->out1());
811 id = ip->out();
812 goto Loop;
813
814 case kInstByteRange:
815 case kInstCapture:
816 case kInstEmptyWidth:
817 flat->emplace_back();
818 memmove(&flat->back(), ip, sizeof *ip);
819 flat->back().set_out(rootmap->get_existing(ip->out()));
820 break;
821
822 case kInstNop:
823 id = ip->out();
824 goto Loop;
825
826 case kInstMatch:
827 case kInstFail:
828 flat->emplace_back();
829 memmove(&flat->back(), ip, sizeof *ip);
830 break;
831 }
832 }
833 }
834
835 // For each ByteRange instruction in [begin, end), computes a hint to execution
836 // engines: the delta to the next instruction (in flat) worth exploring iff the
837 // current instruction matched.
838 //
839 // Implements a coloring algorithm related to ByteMapBuilder, but in this case,
840 // colors are instructions and recoloring ranges precisely identifies conflicts
841 // between instructions. Iterating backwards over [begin, end) is guaranteed to
842 // identify the nearest conflict (if any) with only linear complexity.
ComputeHints(std::vector<Inst> * flat,int begin,int end)843 void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
844 Bitmap256 splits;
845 int colors[256];
846
847 bool dirty = false;
848 for (int id = end; id >= begin; --id) {
849 if (id == end ||
850 (*flat)[id].opcode() != kInstByteRange) {
851 if (dirty) {
852 dirty = false;
853 splits.Clear();
854 }
855 splits.Set(255);
856 colors[255] = id;
857 // At this point, the [0-255] range is colored with id.
858 // Thus, hints cannot point beyond id; and if id == end,
859 // hints that would have pointed to id will be 0 instead.
860 continue;
861 }
862 dirty = true;
863
864 // We recolor the [lo-hi] range with id. Note that first ratchets backwards
865 // from end to the nearest conflict (if any) during recoloring.
866 int first = end;
867 auto Recolor = [&](int lo, int hi) {
868 // Like ByteMapBuilder, we split at lo-1 and at hi.
869 --lo;
870
871 if (0 <= lo && !splits.Test(lo)) {
872 splits.Set(lo);
873 int next = splits.FindNextSetBit(lo+1);
874 colors[lo] = colors[next];
875 }
876 if (!splits.Test(hi)) {
877 splits.Set(hi);
878 int next = splits.FindNextSetBit(hi+1);
879 colors[hi] = colors[next];
880 }
881
882 int c = lo+1;
883 while (c < 256) {
884 int next = splits.FindNextSetBit(c);
885 // Ratchet backwards...
886 first = std::min(first, colors[next]);
887 // Recolor with id - because it's the new nearest conflict!
888 colors[next] = id;
889 if (next == hi)
890 break;
891 c = next+1;
892 }
893 };
894
895 Inst* ip = &(*flat)[id];
896 int lo = ip->lo();
897 int hi = ip->hi();
898 Recolor(lo, hi);
899 if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
900 int foldlo = lo;
901 int foldhi = hi;
902 if (foldlo < 'a')
903 foldlo = 'a';
904 if (foldhi > 'z')
905 foldhi = 'z';
906 if (foldlo <= foldhi) {
907 foldlo += 'A' - 'a';
908 foldhi += 'A' - 'a';
909 Recolor(foldlo, foldhi);
910 }
911 }
912
913 if (first != end) {
914 uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
915 ip->hint_foldcase_ |= hint<<1;
916 }
917 }
918 }
919
920 // The final state will always be this, which frees up a register for the hot
921 // loop and thus avoids the spilling that can occur when building with Clang.
922 static const size_t kShiftDFAFinal = 9;
923
924 // This function takes the prefix as std::string (i.e. not const std::string&
925 // as normal) because it's going to clobber it, so a temporary is convenient.
BuildShiftDFA(std::string prefix)926 static uint64_t* BuildShiftDFA(std::string prefix) {
927 // This constant is for convenience now and also for correctness later when
928 // we clobber the prefix, but still need to know how long it was initially.
929 const size_t size = prefix.size();
930
931 // Construct the NFA.
932 // The table is indexed by input byte; each element is a bitfield of states
933 // reachable by the input byte. Given a bitfield of the current states, the
934 // bitfield of states reachable from those is - for this specific purpose -
935 // always ((ncurr << 1) | 1). Intersecting the reachability bitfields gives
936 // the bitfield of the next states reached by stepping over the input byte.
937 // Credits for this technique: the Hyperscan paper by Geoff Langdale et al.
938 uint16_t nfa[256]{};
939 for (size_t i = 0; i < size; ++i) {
940 uint8_t b = prefix[i];
941 nfa[b] |= 1 << (i+1);
942 }
943 // This is the `\C*?` for unanchored search.
944 for (int b = 0; b < 256; ++b)
945 nfa[b] |= 1;
946
947 // This maps from DFA state to NFA states; the reverse mapping is used when
948 // recording transitions and gets implemented with plain old linear search.
949 // The "Shift DFA" technique limits this to ten states when using uint64_t;
950 // to allow for the initial state, we use at most nine bytes of the prefix.
951 // That same limit is also why uint16_t is sufficient for the NFA bitfield.
952 uint16_t states[kShiftDFAFinal+1]{};
953 states[0] = 1;
954 for (size_t dcurr = 0; dcurr < size; ++dcurr) {
955 uint8_t b = prefix[dcurr];
956 uint16_t ncurr = states[dcurr];
957 uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
958 size_t dnext = dcurr+1;
959 if (dnext == size)
960 dnext = kShiftDFAFinal;
961 states[dnext] = nnext;
962 }
963
964 // Sort and unique the bytes of the prefix to avoid repeating work while we
965 // record transitions. This clobbers the prefix, but it's no longer needed.
966 std::sort(prefix.begin(), prefix.end());
967 prefix.erase(std::unique(prefix.begin(), prefix.end()), prefix.end());
968
969 // Construct the DFA.
970 // The table is indexed by input byte; each element is effectively a packed
971 // array of uint6_t; each array value will be multiplied by six in order to
972 // avoid having to do so later in the hot loop as well as masking/shifting.
973 // Credits for this technique: "Shift-based DFAs" on GitHub by Per Vognsen.
974 uint64_t* dfa = new uint64_t[256]{};
975 // Record a transition from each state for each of the bytes of the prefix.
976 // Note that all other input bytes go back to the initial state by default.
977 for (size_t dcurr = 0; dcurr < size; ++dcurr) {
978 for (uint8_t b : prefix) {
979 uint16_t ncurr = states[dcurr];
980 uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
981 size_t dnext = 0;
982 while (states[dnext] != nnext)
983 ++dnext;
984 dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
985 // Convert ASCII letters to uppercase and record the extra transitions.
986 // Note that ASCII letters are guaranteed to be lowercase at this point
987 // because that's how the parser normalises them. #FunFact: 'k' and 's'
988 // match U+212A and U+017F, respectively, so they won't occur here when
989 // using UTF-8 encoding because the parser will emit character classes.
990 if ('a' <= b && b <= 'z') {
991 b -= 'a' - 'A';
992 dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
993 }
994 }
995 }
996 // This lets the final state "saturate", which will matter for performance:
997 // in the hot loop, we check for a match only at the end of each iteration,
998 // so we must keep signalling the match until we get around to checking it.
999 for (int b = 0; b < 256; ++b)
1000 dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6);
1001
1002 return dfa;
1003 }
1004
ConfigurePrefixAccel(const std::string & prefix,bool prefix_foldcase)1005 void Prog::ConfigurePrefixAccel(const std::string& prefix,
1006 bool prefix_foldcase) {
1007 prefix_foldcase_ = prefix_foldcase;
1008 prefix_size_ = prefix.size();
1009 if (prefix_foldcase_) {
1010 // Use PrefixAccel_ShiftDFA().
1011 // ... and no more than nine bytes of the prefix. (See above for details.)
1012 prefix_size_ = std::min(prefix_size_, kShiftDFAFinal);
1013 prefix_dfa_ = BuildShiftDFA(prefix.substr(0, prefix_size_));
1014 } else if (prefix_size_ != 1) {
1015 // Use PrefixAccel_FrontAndBack().
1016 prefix_front_ = prefix.front();
1017 prefix_back_ = prefix.back();
1018 } else {
1019 // Use memchr(3).
1020 prefix_front_ = prefix.front();
1021 }
1022 }
1023
PrefixAccel_ShiftDFA(const void * data,size_t size)1024 const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
1025 if (size < prefix_size_)
1026 return NULL;
1027
1028 uint64_t curr = 0;
1029
1030 // At the time of writing, rough benchmarks on a Broadwell machine showed
1031 // that this unroll factor (i.e. eight) achieves a speedup factor of two.
1032 if (size >= 8) {
1033 const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
1034 const uint8_t* endp = p + (size&~7);
1035 do {
1036 uint8_t b0 = p[0];
1037 uint8_t b1 = p[1];
1038 uint8_t b2 = p[2];
1039 uint8_t b3 = p[3];
1040 uint8_t b4 = p[4];
1041 uint8_t b5 = p[5];
1042 uint8_t b6 = p[6];
1043 uint8_t b7 = p[7];
1044
1045 uint64_t next0 = prefix_dfa_[b0];
1046 uint64_t next1 = prefix_dfa_[b1];
1047 uint64_t next2 = prefix_dfa_[b2];
1048 uint64_t next3 = prefix_dfa_[b3];
1049 uint64_t next4 = prefix_dfa_[b4];
1050 uint64_t next5 = prefix_dfa_[b5];
1051 uint64_t next6 = prefix_dfa_[b6];
1052 uint64_t next7 = prefix_dfa_[b7];
1053
1054 uint64_t curr0 = next0 >> (curr & 63);
1055 uint64_t curr1 = next1 >> (curr0 & 63);
1056 uint64_t curr2 = next2 >> (curr1 & 63);
1057 uint64_t curr3 = next3 >> (curr2 & 63);
1058 uint64_t curr4 = next4 >> (curr3 & 63);
1059 uint64_t curr5 = next5 >> (curr4 & 63);
1060 uint64_t curr6 = next6 >> (curr5 & 63);
1061 uint64_t curr7 = next7 >> (curr6 & 63);
1062
1063 if ((curr7 & 63) == kShiftDFAFinal * 6) {
1064 // At the time of writing, using the same masking subexpressions from
1065 // the preceding lines caused Clang to clutter the hot loop computing
1066 // them - even though they aren't actually needed for shifting! Hence
1067 // these rewritten conditions, which achieve a speedup factor of two.
1068 if (((curr7-curr0) & 63) == 0) return p+1-prefix_size_;
1069 if (((curr7-curr1) & 63) == 0) return p+2-prefix_size_;
1070 if (((curr7-curr2) & 63) == 0) return p+3-prefix_size_;
1071 if (((curr7-curr3) & 63) == 0) return p+4-prefix_size_;
1072 if (((curr7-curr4) & 63) == 0) return p+5-prefix_size_;
1073 if (((curr7-curr5) & 63) == 0) return p+6-prefix_size_;
1074 if (((curr7-curr6) & 63) == 0) return p+7-prefix_size_;
1075 if (((curr7-curr7) & 63) == 0) return p+8-prefix_size_;
1076 }
1077
1078 curr = curr7;
1079 p += 8;
1080 } while (p != endp);
1081 data = p;
1082 size = size&7;
1083 }
1084
1085 const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
1086 const uint8_t* endp = p + size;
1087 while (p != endp) {
1088 uint8_t b = *p++;
1089 uint64_t next = prefix_dfa_[b];
1090 curr = next >> (curr & 63);
1091 if ((curr & 63) == kShiftDFAFinal * 6)
1092 return p-prefix_size_;
1093 }
1094 return NULL;
1095 }
1096
1097 #if defined(__AVX2__)
1098 // Finds the least significant non-zero bit in n.
FindLSBSet(uint32_t n)1099 static int FindLSBSet(uint32_t n) {
1100 DCHECK_NE(n, 0);
1101 #if defined(__GNUC__)
1102 return __builtin_ctz(n);
1103 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
1104 unsigned long c;
1105 _BitScanForward(&c, n);
1106 return static_cast<int>(c);
1107 #else
1108 int c = 31;
1109 for (int shift = 1 << 4; shift != 0; shift >>= 1) {
1110 uint32_t word = n << shift;
1111 if (word != 0) {
1112 n = word;
1113 c -= shift;
1114 }
1115 }
1116 return c;
1117 #endif
1118 }
1119 #endif
1120
PrefixAccel_FrontAndBack(const void * data,size_t size)1121 const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
1122 DCHECK_GE(prefix_size_, 2);
1123 if (size < prefix_size_)
1124 return NULL;
1125 // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
1126 // This also means that probing for prefix_back_ doesn't go out of bounds.
1127 size -= prefix_size_-1;
1128
1129 #if defined(__AVX2__)
1130 // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
1131 if (size >= sizeof(__m256i)) {
1132 const __m256i* fp = reinterpret_cast<const __m256i*>(
1133 reinterpret_cast<const char*>(data));
1134 const __m256i* bp = reinterpret_cast<const __m256i*>(
1135 reinterpret_cast<const char*>(data) + prefix_size_-1);
1136 const __m256i* endfp = fp + size/sizeof(__m256i);
1137 const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
1138 const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
1139 do {
1140 const __m256i f_loadu = _mm256_loadu_si256(fp++);
1141 const __m256i b_loadu = _mm256_loadu_si256(bp++);
1142 const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
1143 const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
1144 const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
1145 if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
1146 const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
1147 const int fb_movemask = _mm256_movemask_epi8(fb_and);
1148 const int fb_ctz = FindLSBSet(fb_movemask);
1149 return reinterpret_cast<const char*>(fp-1) + fb_ctz;
1150 }
1151 } while (fp != endfp);
1152 data = fp;
1153 size = size%sizeof(__m256i);
1154 }
1155 #endif
1156
1157 const char* p0 = reinterpret_cast<const char*>(data);
1158 for (const char* p = p0;; p++) {
1159 DCHECK_GE(size, static_cast<size_t>(p-p0));
1160 p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
1161 if (p == NULL || p[prefix_size_-1] == prefix_back_)
1162 return p;
1163 }
1164 }
1165
1166 } // namespace re2
1167