1 // Copyright 2007 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Compiled regular expression representation.
6 // Tested by compile_test.cc
7 
8 #include "re2/prog.h"
9 
10 #if defined(__AVX2__)
11 #include <immintrin.h>
12 #ifdef _MSC_VER
13 #include <intrin.h>
14 #endif
15 #endif
16 #include <stdint.h>
17 #include <string.h>
18 #include <algorithm>
19 #include <memory>
20 #include <utility>
21 
22 #include "util/util.h"
23 #include "util/logging.h"
24 #include "util/strutil.h"
25 #include "re2/bitmap256.h"
26 #include "re2/stringpiece.h"
27 
28 namespace re2 {
29 
30 // Constructors per Inst opcode
31 
InitAlt(uint32_t out,uint32_t out1)32 void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
33   DCHECK_EQ(out_opcode_, 0);
34   set_out_opcode(out, kInstAlt);
35   out1_ = out1;
36 }
37 
InitByteRange(int lo,int hi,int foldcase,uint32_t out)38 void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
39   DCHECK_EQ(out_opcode_, 0);
40   set_out_opcode(out, kInstByteRange);
41   lo_ = lo & 0xFF;
42   hi_ = hi & 0xFF;
43   hint_foldcase_ = foldcase&1;
44 }
45 
InitCapture(int cap,uint32_t out)46 void Prog::Inst::InitCapture(int cap, uint32_t out) {
47   DCHECK_EQ(out_opcode_, 0);
48   set_out_opcode(out, kInstCapture);
49   cap_ = cap;
50 }
51 
InitEmptyWidth(EmptyOp empty,uint32_t out)52 void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
53   DCHECK_EQ(out_opcode_, 0);
54   set_out_opcode(out, kInstEmptyWidth);
55   empty_ = empty;
56 }
57 
InitMatch(int32_t id)58 void Prog::Inst::InitMatch(int32_t id) {
59   DCHECK_EQ(out_opcode_, 0);
60   set_opcode(kInstMatch);
61   match_id_ = id;
62 }
63 
InitNop(uint32_t out)64 void Prog::Inst::InitNop(uint32_t out) {
65   DCHECK_EQ(out_opcode_, 0);
66   set_opcode(kInstNop);
67 }
68 
InitFail()69 void Prog::Inst::InitFail() {
70   DCHECK_EQ(out_opcode_, 0);
71   set_opcode(kInstFail);
72 }
73 
Dump()74 std::string Prog::Inst::Dump() {
75   switch (opcode()) {
76     default:
77       return StringPrintf("opcode %d", static_cast<int>(opcode()));
78 
79     case kInstAlt:
80       return StringPrintf("alt -> %d | %d", out(), out1_);
81 
82     case kInstAltMatch:
83       return StringPrintf("altmatch -> %d | %d", out(), out1_);
84 
85     case kInstByteRange:
86       return StringPrintf("byte%s [%02x-%02x] %d -> %d",
87                           foldcase() ? "/i" : "",
88                           lo_, hi_, hint(), out());
89 
90     case kInstCapture:
91       return StringPrintf("capture %d -> %d", cap_, out());
92 
93     case kInstEmptyWidth:
94       return StringPrintf("emptywidth %#x -> %d",
95                           static_cast<int>(empty_), out());
96 
97     case kInstMatch:
98       return StringPrintf("match! %d", match_id());
99 
100     case kInstNop:
101       return StringPrintf("nop -> %d", out());
102 
103     case kInstFail:
104       return StringPrintf("fail");
105   }
106 }
107 
Prog()108 Prog::Prog()
109   : anchor_start_(false),
110     anchor_end_(false),
111     reversed_(false),
112     did_flatten_(false),
113     did_onepass_(false),
114     start_(0),
115     start_unanchored_(0),
116     size_(0),
117     bytemap_range_(0),
118     prefix_size_(0),
119     prefix_front_(-1),
120     prefix_back_(-1),
121     list_count_(0),
122     dfa_mem_(0),
123     dfa_first_(NULL),
124     dfa_longest_(NULL) {
125 }
126 
~Prog()127 Prog::~Prog() {
128   DeleteDFA(dfa_longest_);
129   DeleteDFA(dfa_first_);
130 }
131 
132 typedef SparseSet Workq;
133 
AddToQueue(Workq * q,int id)134 static inline void AddToQueue(Workq* q, int id) {
135   if (id != 0)
136     q->insert(id);
137 }
138 
ProgToString(Prog * prog,Workq * q)139 static std::string ProgToString(Prog* prog, Workq* q) {
140   std::string s;
141   for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
142     int id = *i;
143     Prog::Inst* ip = prog->inst(id);
144     s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
145     AddToQueue(q, ip->out());
146     if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
147       AddToQueue(q, ip->out1());
148   }
149   return s;
150 }
151 
FlattenedProgToString(Prog * prog,int start)152 static std::string FlattenedProgToString(Prog* prog, int start) {
153   std::string s;
154   for (int id = start; id < prog->size(); id++) {
155     Prog::Inst* ip = prog->inst(id);
156     if (ip->last())
157       s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
158     else
159       s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
160   }
161   return s;
162 }
163 
Dump()164 std::string Prog::Dump() {
165   if (did_flatten_)
166     return FlattenedProgToString(this, start_);
167 
168   Workq q(size_);
169   AddToQueue(&q, start_);
170   return ProgToString(this, &q);
171 }
172 
DumpUnanchored()173 std::string Prog::DumpUnanchored() {
174   if (did_flatten_)
175     return FlattenedProgToString(this, start_unanchored_);
176 
177   Workq q(size_);
178   AddToQueue(&q, start_unanchored_);
179   return ProgToString(this, &q);
180 }
181 
DumpByteMap()182 std::string Prog::DumpByteMap() {
183   std::string map;
184   for (int c = 0; c < 256; c++) {
185     int b = bytemap_[c];
186     int lo = c;
187     while (c < 256-1 && bytemap_[c+1] == b)
188       c++;
189     int hi = c;
190     map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
191   }
192   return map;
193 }
194 
195 // Is ip a guaranteed match at end of text, perhaps after some capturing?
IsMatch(Prog * prog,Prog::Inst * ip)196 static bool IsMatch(Prog* prog, Prog::Inst* ip) {
197   for (;;) {
198     switch (ip->opcode()) {
199       default:
200         LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
201         return false;
202 
203       case kInstAlt:
204       case kInstAltMatch:
205       case kInstByteRange:
206       case kInstFail:
207       case kInstEmptyWidth:
208         return false;
209 
210       case kInstCapture:
211       case kInstNop:
212         ip = prog->inst(ip->out());
213         break;
214 
215       case kInstMatch:
216         return true;
217     }
218   }
219 }
220 
221 // Peep-hole optimizer.
Optimize()222 void Prog::Optimize() {
223   Workq q(size_);
224 
225   // Eliminate nops.  Most are taken out during compilation
226   // but a few are hard to avoid.
227   q.clear();
228   AddToQueue(&q, start_);
229   for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
230     int id = *i;
231 
232     Inst* ip = inst(id);
233     int j = ip->out();
234     Inst* jp;
235     while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
236       j = jp->out();
237     }
238     ip->set_out(j);
239     AddToQueue(&q, ip->out());
240 
241     if (ip->opcode() == kInstAlt) {
242       j = ip->out1();
243       while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
244         j = jp->out();
245       }
246       ip->out1_ = j;
247       AddToQueue(&q, ip->out1());
248     }
249   }
250 
251   // Insert kInstAltMatch instructions
252   // Look for
253   //   ip: Alt -> j | k
254   //	  j: ByteRange [00-FF] -> ip
255   //    k: Match
256   // or the reverse (the above is the greedy one).
257   // Rewrite Alt to AltMatch.
258   q.clear();
259   AddToQueue(&q, start_);
260   for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
261     int id = *i;
262     Inst* ip = inst(id);
263     AddToQueue(&q, ip->out());
264     if (ip->opcode() == kInstAlt)
265       AddToQueue(&q, ip->out1());
266 
267     if (ip->opcode() == kInstAlt) {
268       Inst* j = inst(ip->out());
269       Inst* k = inst(ip->out1());
270       if (j->opcode() == kInstByteRange && j->out() == id &&
271           j->lo() == 0x00 && j->hi() == 0xFF &&
272           IsMatch(this, k)) {
273         ip->set_opcode(kInstAltMatch);
274         continue;
275       }
276       if (IsMatch(this, j) &&
277           k->opcode() == kInstByteRange && k->out() == id &&
278           k->lo() == 0x00 && k->hi() == 0xFF) {
279         ip->set_opcode(kInstAltMatch);
280       }
281     }
282   }
283 }
284 
EmptyFlags(const StringPiece & text,const char * p)285 uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
286   int flags = 0;
287 
288   // ^ and \A
289   if (p == text.data())
290     flags |= kEmptyBeginText | kEmptyBeginLine;
291   else if (p[-1] == '\n')
292     flags |= kEmptyBeginLine;
293 
294   // $ and \z
295   if (p == text.data() + text.size())
296     flags |= kEmptyEndText | kEmptyEndLine;
297   else if (p < text.data() + text.size() && p[0] == '\n')
298     flags |= kEmptyEndLine;
299 
300   // \b and \B
301   if (p == text.data() && p == text.data() + text.size()) {
302     // no word boundary here
303   } else if (p == text.data()) {
304     if (IsWordChar(p[0]))
305       flags |= kEmptyWordBoundary;
306   } else if (p == text.data() + text.size()) {
307     if (IsWordChar(p[-1]))
308       flags |= kEmptyWordBoundary;
309   } else {
310     if (IsWordChar(p[-1]) != IsWordChar(p[0]))
311       flags |= kEmptyWordBoundary;
312   }
313   if (!(flags & kEmptyWordBoundary))
314     flags |= kEmptyNonWordBoundary;
315 
316   return flags;
317 }
318 
319 // ByteMapBuilder implements a coloring algorithm.
320 //
321 // The first phase is a series of "mark and merge" batches: we mark one or more
322 // [lo-hi] ranges, then merge them into our internal state. Batching is not for
323 // performance; rather, it means that the ranges are treated indistinguishably.
324 //
325 // Internally, the ranges are represented using a bitmap that stores the splits
326 // and a vector that stores the colors; both of them are indexed by the ranges'
327 // last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
328 // hi (if not already split), then recolor each range in between. The color map
329 // (i.e. from the old color to the new color) is maintained for the lifetime of
330 // the batch and so underpins this somewhat obscure approach to set operations.
331 //
332 // The second phase builds the bytemap from our internal state: we recolor each
333 // range, then store the new color (which is now the byte class) in each of the
334 // corresponding array elements. Finally, we output the number of byte classes.
335 class ByteMapBuilder {
336  public:
ByteMapBuilder()337   ByteMapBuilder() {
338     // Initial state: the [0-255] range has color 256.
339     // This will avoid problems during the second phase,
340     // in which we assign byte classes numbered from 0.
341     splits_.Set(255);
342     colors_[255] = 256;
343     nextcolor_ = 257;
344   }
345 
346   void Mark(int lo, int hi);
347   void Merge();
348   void Build(uint8_t* bytemap, int* bytemap_range);
349 
350  private:
351   int Recolor(int oldcolor);
352 
353   Bitmap256 splits_;
354   int colors_[256];
355   int nextcolor_;
356   std::vector<std::pair<int, int>> colormap_;
357   std::vector<std::pair<int, int>> ranges_;
358 
359   ByteMapBuilder(const ByteMapBuilder&) = delete;
360   ByteMapBuilder& operator=(const ByteMapBuilder&) = delete;
361 };
362 
Mark(int lo,int hi)363 void ByteMapBuilder::Mark(int lo, int hi) {
364   DCHECK_GE(lo, 0);
365   DCHECK_GE(hi, 0);
366   DCHECK_LE(lo, 255);
367   DCHECK_LE(hi, 255);
368   DCHECK_LE(lo, hi);
369 
370   // Ignore any [0-255] ranges. They cause us to recolor every range, which
371   // has no effect on the eventual result and is therefore a waste of time.
372   if (lo == 0 && hi == 255)
373     return;
374 
375   ranges_.emplace_back(lo, hi);
376 }
377 
Merge()378 void ByteMapBuilder::Merge() {
379   for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin();
380        it != ranges_.end();
381        ++it) {
382     int lo = it->first-1;
383     int hi = it->second;
384 
385     if (0 <= lo && !splits_.Test(lo)) {
386       splits_.Set(lo);
387       int next = splits_.FindNextSetBit(lo+1);
388       colors_[lo] = colors_[next];
389     }
390     if (!splits_.Test(hi)) {
391       splits_.Set(hi);
392       int next = splits_.FindNextSetBit(hi+1);
393       colors_[hi] = colors_[next];
394     }
395 
396     int c = lo+1;
397     while (c < 256) {
398       int next = splits_.FindNextSetBit(c);
399       colors_[next] = Recolor(colors_[next]);
400       if (next == hi)
401         break;
402       c = next+1;
403     }
404   }
405   colormap_.clear();
406   ranges_.clear();
407 }
408 
Build(uint8_t * bytemap,int * bytemap_range)409 void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) {
410   // Assign byte classes numbered from 0.
411   nextcolor_ = 0;
412 
413   int c = 0;
414   while (c < 256) {
415     int next = splits_.FindNextSetBit(c);
416     uint8_t b = static_cast<uint8_t>(Recolor(colors_[next]));
417     while (c <= next) {
418       bytemap[c] = b;
419       c++;
420     }
421   }
422 
423   *bytemap_range = nextcolor_;
424 }
425 
Recolor(int oldcolor)426 int ByteMapBuilder::Recolor(int oldcolor) {
427   // Yes, this is a linear search. There can be at most 256
428   // colors and there will typically be far fewer than that.
429   // Also, we need to consider keys *and* values in order to
430   // avoid recoloring a given range more than once per batch.
431   std::vector<std::pair<int, int>>::const_iterator it =
432       std::find_if(colormap_.begin(), colormap_.end(),
433                    [=](const std::pair<int, int>& kv) -> bool {
434                      return kv.first == oldcolor || kv.second == oldcolor;
435                    });
436   if (it != colormap_.end())
437     return it->second;
438   int newcolor = nextcolor_;
439   nextcolor_++;
440   colormap_.emplace_back(oldcolor, newcolor);
441   return newcolor;
442 }
443 
ComputeByteMap()444 void Prog::ComputeByteMap() {
445   // Fill in bytemap with byte classes for the program.
446   // Ranges of bytes that are treated indistinguishably
447   // will be mapped to a single byte class.
448   ByteMapBuilder builder;
449 
450   // Don't repeat the work for ^ and $.
451   bool marked_line_boundaries = false;
452   // Don't repeat the work for \b and \B.
453   bool marked_word_boundaries = false;
454 
455   for (int id = 0; id < size(); id++) {
456     Inst* ip = inst(id);
457     if (ip->opcode() == kInstByteRange) {
458       int lo = ip->lo();
459       int hi = ip->hi();
460       builder.Mark(lo, hi);
461       if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
462         int foldlo = lo;
463         int foldhi = hi;
464         if (foldlo < 'a')
465           foldlo = 'a';
466         if (foldhi > 'z')
467           foldhi = 'z';
468         if (foldlo <= foldhi) {
469           foldlo += 'A' - 'a';
470           foldhi += 'A' - 'a';
471           builder.Mark(foldlo, foldhi);
472         }
473       }
474       // If this Inst is not the last Inst in its list AND the next Inst is
475       // also a ByteRange AND the Insts have the same out, defer the merge.
476       if (!ip->last() &&
477           inst(id+1)->opcode() == kInstByteRange &&
478           ip->out() == inst(id+1)->out())
479         continue;
480       builder.Merge();
481     } else if (ip->opcode() == kInstEmptyWidth) {
482       if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
483           !marked_line_boundaries) {
484         builder.Mark('\n', '\n');
485         builder.Merge();
486         marked_line_boundaries = true;
487       }
488       if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) &&
489           !marked_word_boundaries) {
490         // We require two batches here: the first for ranges that are word
491         // characters, the second for ranges that are not word characters.
492         for (bool isword : {true, false}) {
493           int j;
494           for (int i = 0; i < 256; i = j) {
495             for (j = i + 1; j < 256 &&
496                             Prog::IsWordChar(static_cast<uint8_t>(i)) ==
497                                 Prog::IsWordChar(static_cast<uint8_t>(j));
498                  j++)
499               ;
500             if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword)
501               builder.Mark(i, j - 1);
502           }
503           builder.Merge();
504         }
505         marked_word_boundaries = true;
506       }
507     }
508   }
509 
510   builder.Build(bytemap_, &bytemap_range_);
511 
512   if (0) {  // For debugging, use trivial bytemap.
513     LOG(ERROR) << "Using trivial bytemap.";
514     for (int i = 0; i < 256; i++)
515       bytemap_[i] = static_cast<uint8_t>(i);
516     bytemap_range_ = 256;
517   }
518 }
519 
520 // Prog::Flatten() implements a graph rewriting algorithm.
521 //
522 // The overall process is similar to epsilon removal, but retains some epsilon
523 // transitions: those from Capture and EmptyWidth instructions; and those from
524 // nullable subexpressions. (The latter avoids quadratic blowup in transitions
525 // in the worst case.) It might be best thought of as Alt instruction elision.
526 //
527 // In conceptual terms, it divides the Prog into "trees" of instructions, then
528 // traverses the "trees" in order to produce "lists" of instructions. A "tree"
529 // is one or more instructions that grow from one "root" instruction to one or
530 // more "leaf" instructions; if a "tree" has exactly one instruction, then the
531 // "root" is also the "leaf". In most cases, a "root" is the successor of some
532 // "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
533 // and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
534 // EmptyWidth or Match instruction. However, this is insufficient for handling
535 // nested nullable subexpressions correctly, so in some cases, a "root" is the
536 // dominator of the instructions reachable from some "successor root" (i.e. it
537 // has an unreachable predecessor) and is considered a "dominator root". Since
538 // only Alt instructions can be "dominator roots" (other instructions would be
539 // "leaves"), only Alt instructions are required to be marked as predecessors.
540 //
541 // Dividing the Prog into "trees" comprises two passes: marking the "successor
542 // roots" and the predecessors; and marking the "dominator roots". Sorting the
543 // "successor roots" by their bytecode offsets enables iteration in order from
544 // greatest to least during the second pass; by working backwards in this case
545 // and flooding the graph no further than "leaves" and already marked "roots",
546 // it becomes possible to mark "dominator roots" without doing excessive work.
547 //
548 // Traversing the "trees" is just iterating over the "roots" in order of their
549 // marking and flooding the graph no further than "leaves" and "roots". When a
550 // "leaf" is reached, the instruction is copied with its successor remapped to
551 // its "root" number. When a "root" is reached, a Nop instruction is generated
552 // with its successor remapped similarly. As each "list" is produced, its last
553 // instruction is marked as such. After all of the "lists" have been produced,
554 // a pass over their instructions remaps their successors to bytecode offsets.
Flatten()555 void Prog::Flatten() {
556   if (did_flatten_)
557     return;
558   did_flatten_ = true;
559 
560   // Scratch structures. It's important that these are reused by functions
561   // that we call in loops because they would thrash the heap otherwise.
562   SparseSet reachable(size());
563   std::vector<int> stk;
564   stk.reserve(size());
565 
566   // First pass: Marks "successor roots" and predecessors.
567   // Builds the mapping from inst-ids to root-ids.
568   SparseArray<int> rootmap(size());
569   SparseArray<int> predmap(size());
570   std::vector<std::vector<int>> predvec;
571   MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk);
572 
573   // Second pass: Marks "dominator roots".
574   SparseArray<int> sorted(rootmap);
575   std::sort(sorted.begin(), sorted.end(), sorted.less);
576   for (SparseArray<int>::const_iterator i = sorted.end() - 1;
577        i != sorted.begin();
578        --i) {
579     if (i->index() != start_unanchored() && i->index() != start())
580       MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
581   }
582 
583   // Third pass: Emits "lists". Remaps outs to root-ids.
584   // Builds the mapping from root-ids to flat-ids.
585   std::vector<int> flatmap(rootmap.size());
586   std::vector<Inst> flat;
587   flat.reserve(size());
588   for (SparseArray<int>::const_iterator i = rootmap.begin();
589        i != rootmap.end();
590        ++i) {
591     flatmap[i->value()] = static_cast<int>(flat.size());
592     EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
593     flat.back().set_last();
594     // We have the bounds of the "list", so this is the
595     // most convenient point at which to compute hints.
596     ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
597   }
598 
599   list_count_ = static_cast<int>(flatmap.size());
600   for (int i = 0; i < kNumInst; i++)
601     inst_count_[i] = 0;
602 
603   // Fourth pass: Remaps outs to flat-ids.
604   // Counts instructions by opcode.
605   for (int id = 0; id < static_cast<int>(flat.size()); id++) {
606     Inst* ip = &flat[id];
607     if (ip->opcode() != kInstAltMatch)  // handled in EmitList()
608       ip->set_out(flatmap[ip->out()]);
609     inst_count_[ip->opcode()]++;
610   }
611 
612   int total = 0;
613   for (int i = 0; i < kNumInst; i++)
614     total += inst_count_[i];
615   DCHECK_EQ(total, static_cast<int>(flat.size()));
616 
617   // Remap start_unanchored and start.
618   if (start_unanchored() == 0) {
619     DCHECK_EQ(start(), 0);
620   } else if (start_unanchored() == start()) {
621     set_start_unanchored(flatmap[1]);
622     set_start(flatmap[1]);
623   } else {
624     set_start_unanchored(flatmap[1]);
625     set_start(flatmap[2]);
626   }
627 
628   // Finally, replace the old instructions with the new instructions.
629   size_ = static_cast<int>(flat.size());
630   inst_ = PODArray<Inst>(size_);
631   memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
632 
633   // Populate the list heads for BitState.
634   // 512 instructions limits the memory footprint to 1KiB.
635   if (size_ <= 512) {
636     list_heads_ = PODArray<uint16_t>(size_);
637     // 0xFF makes it more obvious if we try to look up a non-head.
638     memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
639     for (int i = 0; i < list_count_; ++i)
640       list_heads_[flatmap[i]] = i;
641   }
642 }
643 
MarkSuccessors(SparseArray<int> * rootmap,SparseArray<int> * predmap,std::vector<std::vector<int>> * predvec,SparseSet * reachable,std::vector<int> * stk)644 void Prog::MarkSuccessors(SparseArray<int>* rootmap,
645                           SparseArray<int>* predmap,
646                           std::vector<std::vector<int>>* predvec,
647                           SparseSet* reachable, std::vector<int>* stk) {
648   // Mark the kInstFail instruction.
649   rootmap->set_new(0, rootmap->size());
650 
651   // Mark the start_unanchored and start instructions.
652   if (!rootmap->has_index(start_unanchored()))
653     rootmap->set_new(start_unanchored(), rootmap->size());
654   if (!rootmap->has_index(start()))
655     rootmap->set_new(start(), rootmap->size());
656 
657   reachable->clear();
658   stk->clear();
659   stk->push_back(start_unanchored());
660   while (!stk->empty()) {
661     int id = stk->back();
662     stk->pop_back();
663   Loop:
664     if (reachable->contains(id))
665       continue;
666     reachable->insert_new(id);
667 
668     Inst* ip = inst(id);
669     switch (ip->opcode()) {
670       default:
671         LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
672         break;
673 
674       case kInstAltMatch:
675       case kInstAlt:
676         // Mark this instruction as a predecessor of each out.
677         for (int out : {ip->out(), ip->out1()}) {
678           if (!predmap->has_index(out)) {
679             predmap->set_new(out, static_cast<int>(predvec->size()));
680             predvec->emplace_back();
681           }
682           (*predvec)[predmap->get_existing(out)].emplace_back(id);
683         }
684         stk->push_back(ip->out1());
685         id = ip->out();
686         goto Loop;
687 
688       case kInstByteRange:
689       case kInstCapture:
690       case kInstEmptyWidth:
691         // Mark the out of this instruction as a "root".
692         if (!rootmap->has_index(ip->out()))
693           rootmap->set_new(ip->out(), rootmap->size());
694         id = ip->out();
695         goto Loop;
696 
697       case kInstNop:
698         id = ip->out();
699         goto Loop;
700 
701       case kInstMatch:
702       case kInstFail:
703         break;
704     }
705   }
706 }
707 
MarkDominator(int root,SparseArray<int> * rootmap,SparseArray<int> * predmap,std::vector<std::vector<int>> * predvec,SparseSet * reachable,std::vector<int> * stk)708 void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
709                          SparseArray<int>* predmap,
710                          std::vector<std::vector<int>>* predvec,
711                          SparseSet* reachable, std::vector<int>* stk) {
712   reachable->clear();
713   stk->clear();
714   stk->push_back(root);
715   while (!stk->empty()) {
716     int id = stk->back();
717     stk->pop_back();
718   Loop:
719     if (reachable->contains(id))
720       continue;
721     reachable->insert_new(id);
722 
723     if (id != root && rootmap->has_index(id)) {
724       // We reached another "tree" via epsilon transition.
725       continue;
726     }
727 
728     Inst* ip = inst(id);
729     switch (ip->opcode()) {
730       default:
731         LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
732         break;
733 
734       case kInstAltMatch:
735       case kInstAlt:
736         stk->push_back(ip->out1());
737         id = ip->out();
738         goto Loop;
739 
740       case kInstByteRange:
741       case kInstCapture:
742       case kInstEmptyWidth:
743         break;
744 
745       case kInstNop:
746         id = ip->out();
747         goto Loop;
748 
749       case kInstMatch:
750       case kInstFail:
751         break;
752     }
753   }
754 
755   for (SparseSet::const_iterator i = reachable->begin();
756        i != reachable->end();
757        ++i) {
758     int id = *i;
759     if (predmap->has_index(id)) {
760       for (int pred : (*predvec)[predmap->get_existing(id)]) {
761         if (!reachable->contains(pred)) {
762           // id has a predecessor that cannot be reached from root!
763           // Therefore, id must be a "root" too - mark it as such.
764           if (!rootmap->has_index(id))
765             rootmap->set_new(id, rootmap->size());
766         }
767       }
768     }
769   }
770 }
771 
EmitList(int root,SparseArray<int> * rootmap,std::vector<Inst> * flat,SparseSet * reachable,std::vector<int> * stk)772 void Prog::EmitList(int root, SparseArray<int>* rootmap,
773                     std::vector<Inst>* flat,
774                     SparseSet* reachable, std::vector<int>* stk) {
775   reachable->clear();
776   stk->clear();
777   stk->push_back(root);
778   while (!stk->empty()) {
779     int id = stk->back();
780     stk->pop_back();
781   Loop:
782     if (reachable->contains(id))
783       continue;
784     reachable->insert_new(id);
785 
786     if (id != root && rootmap->has_index(id)) {
787       // We reached another "tree" via epsilon transition. Emit a kInstNop
788       // instruction so that the Prog does not become quadratically larger.
789       flat->emplace_back();
790       flat->back().set_opcode(kInstNop);
791       flat->back().set_out(rootmap->get_existing(id));
792       continue;
793     }
794 
795     Inst* ip = inst(id);
796     switch (ip->opcode()) {
797       default:
798         LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
799         break;
800 
801       case kInstAltMatch:
802         flat->emplace_back();
803         flat->back().set_opcode(kInstAltMatch);
804         flat->back().set_out(static_cast<int>(flat->size()));
805         flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
806         FALLTHROUGH_INTENDED;
807 
808       case kInstAlt:
809         stk->push_back(ip->out1());
810         id = ip->out();
811         goto Loop;
812 
813       case kInstByteRange:
814       case kInstCapture:
815       case kInstEmptyWidth:
816         flat->emplace_back();
817         memmove(&flat->back(), ip, sizeof *ip);
818         flat->back().set_out(rootmap->get_existing(ip->out()));
819         break;
820 
821       case kInstNop:
822         id = ip->out();
823         goto Loop;
824 
825       case kInstMatch:
826       case kInstFail:
827         flat->emplace_back();
828         memmove(&flat->back(), ip, sizeof *ip);
829         break;
830     }
831   }
832 }
833 
834 // For each ByteRange instruction in [begin, end), computes a hint to execution
835 // engines: the delta to the next instruction (in flat) worth exploring iff the
836 // current instruction matched.
837 //
838 // Implements a coloring algorithm related to ByteMapBuilder, but in this case,
839 // colors are instructions and recoloring ranges precisely identifies conflicts
840 // between instructions. Iterating backwards over [begin, end) is guaranteed to
841 // identify the nearest conflict (if any) with only linear complexity.
ComputeHints(std::vector<Inst> * flat,int begin,int end)842 void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
843   Bitmap256 splits;
844   int colors[256];
845 
846   bool dirty = false;
847   for (int id = end; id >= begin; --id) {
848     if (id == end ||
849         (*flat)[id].opcode() != kInstByteRange) {
850       if (dirty) {
851         dirty = false;
852         splits.Clear();
853       }
854       splits.Set(255);
855       colors[255] = id;
856       // At this point, the [0-255] range is colored with id.
857       // Thus, hints cannot point beyond id; and if id == end,
858       // hints that would have pointed to id will be 0 instead.
859       continue;
860     }
861     dirty = true;
862 
863     // We recolor the [lo-hi] range with id. Note that first ratchets backwards
864     // from end to the nearest conflict (if any) during recoloring.
865     int first = end;
866     auto Recolor = [&](int lo, int hi) {
867       // Like ByteMapBuilder, we split at lo-1 and at hi.
868       --lo;
869 
870       if (0 <= lo && !splits.Test(lo)) {
871         splits.Set(lo);
872         int next = splits.FindNextSetBit(lo+1);
873         colors[lo] = colors[next];
874       }
875       if (!splits.Test(hi)) {
876         splits.Set(hi);
877         int next = splits.FindNextSetBit(hi+1);
878         colors[hi] = colors[next];
879       }
880 
881       int c = lo+1;
882       while (c < 256) {
883         int next = splits.FindNextSetBit(c);
884         // Ratchet backwards...
885         first = std::min(first, colors[next]);
886         // Recolor with id - because it's the new nearest conflict!
887         colors[next] = id;
888         if (next == hi)
889           break;
890         c = next+1;
891       }
892     };
893 
894     Inst* ip = &(*flat)[id];
895     int lo = ip->lo();
896     int hi = ip->hi();
897     Recolor(lo, hi);
898     if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
899       int foldlo = lo;
900       int foldhi = hi;
901       if (foldlo < 'a')
902         foldlo = 'a';
903       if (foldhi > 'z')
904         foldhi = 'z';
905       if (foldlo <= foldhi) {
906         foldlo += 'A' - 'a';
907         foldhi += 'A' - 'a';
908         Recolor(foldlo, foldhi);
909       }
910     }
911 
912     if (first != end) {
913       uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
914       ip->hint_foldcase_ |= hint<<1;
915     }
916   }
917 }
918 
919 #if defined(__AVX2__)
920 // Finds the least significant non-zero bit in n.
FindLSBSet(uint32_t n)921 static int FindLSBSet(uint32_t n) {
922   DCHECK_NE(n, 0);
923 #if defined(__GNUC__)
924   return __builtin_ctz(n);
925 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
926   unsigned long c;
927   _BitScanForward(&c, n);
928   return static_cast<int>(c);
929 #else
930   int c = 31;
931   for (int shift = 1 << 4; shift != 0; shift >>= 1) {
932     uint32_t word = n << shift;
933     if (word != 0) {
934       n = word;
935       c -= shift;
936     }
937   }
938   return c;
939 #endif
940 }
941 #endif
942 
PrefixAccel_FrontAndBack(const void * data,size_t size)943 const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
944   DCHECK_GE(prefix_size_, 2);
945   if (size < prefix_size_)
946     return NULL;
947   // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
948   // This also means that probing for prefix_back_ doesn't go out of bounds.
949   size -= prefix_size_-1;
950 
951 #if defined(__AVX2__)
952   // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
953   if (size >= sizeof(__m256i)) {
954     const __m256i* fp = reinterpret_cast<const __m256i*>(
955         reinterpret_cast<const char*>(data));
956     const __m256i* bp = reinterpret_cast<const __m256i*>(
957         reinterpret_cast<const char*>(data) + prefix_size_-1);
958     const __m256i* endfp = fp + size/sizeof(__m256i);
959     const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
960     const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
961     while (fp != endfp) {
962       const __m256i f_loadu = _mm256_loadu_si256(fp++);
963       const __m256i b_loadu = _mm256_loadu_si256(bp++);
964       const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
965       const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
966       const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
967       if (fb_testz == 0) {  // ZF: 1 means zero, 0 means non-zero.
968         const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
969         const int fb_movemask = _mm256_movemask_epi8(fb_and);
970         const int fb_ctz = FindLSBSet(fb_movemask);
971         return reinterpret_cast<const char*>(fp-1) + fb_ctz;
972       }
973     }
974     data = fp;
975     size = size%sizeof(__m256i);
976   }
977 #endif
978 
979   const char* p0 = reinterpret_cast<const char*>(data);
980   for (const char* p = p0;; p++) {
981     DCHECK_GE(size, static_cast<size_t>(p-p0));
982     p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
983     if (p == NULL || p[prefix_size_-1] == prefix_back_)
984       return p;
985   }
986 }
987 
988 }  // namespace re2
989