1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_REGEXP_REGEXP_AST_H_
6 #define V8_REGEXP_REGEXP_AST_H_
7 
8 #include "irregexp/RegExpShim.h"
9 
10 namespace v8 {
11 namespace internal {
12 
13 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
14   VISIT(Disjunction)                      \
15   VISIT(Alternative)                      \
16   VISIT(Assertion)                        \
17   VISIT(CharacterClass)                   \
18   VISIT(Atom)                             \
19   VISIT(Quantifier)                       \
20   VISIT(Capture)                          \
21   VISIT(Group)                            \
22   VISIT(Lookaround)                       \
23   VISIT(BackReference)                    \
24   VISIT(Empty)                            \
25   VISIT(Text)
26 
27 #define FORWARD_DECLARE(Name) class RegExp##Name;
28 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
29 #undef FORWARD_DECLARE
30 
31 class RegExpCompiler;
32 class RegExpNode;
33 class RegExpTree;
34 
35 class RegExpVisitor {
36  public:
37   virtual ~RegExpVisitor() = default;
38 #define MAKE_CASE(Name) \
39   virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
40   FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
41 #undef MAKE_CASE
42 };
43 
44 
45 // A simple closed interval.
46 class Interval {
47  public:
Interval()48   Interval() : from_(kNone), to_(kNone - 1) {}  // '- 1' for branchless size().
Interval(int from,int to)49   Interval(int from, int to) : from_(from), to_(to) {}
Union(Interval that)50   Interval Union(Interval that) {
51     if (that.from_ == kNone)
52       return *this;
53     else if (from_ == kNone)
54       return that;
55     else
56       return Interval(std::min(from_, that.from_), std::max(to_, that.to_));
57   }
58 
Contains(int value)59   bool Contains(int value) { return (from_ <= value) && (value <= to_); }
is_empty()60   bool is_empty() { return from_ == kNone; }
from()61   int from() const { return from_; }
to()62   int to() const { return to_; }
size()63   int size() const { return to_ - from_ + 1; }
64 
Empty()65   static Interval Empty() { return Interval(); }
66 
67   static constexpr int kNone = -1;
68 
69  private:
70   int from_;
71   int to_;
72 };
73 
74 // Represents code points (with values up to 0x10FFFF) in the range from from_
75 // to to_, both ends are inclusive.
76 class CharacterRange {
77  public:
CharacterRange()78   CharacterRange() : from_(0), to_(0) {}
79   // For compatibility with the CHECK_OK macro
CharacterRange(void * null)80   CharacterRange(void* null) { DCHECK_NULL(null); }  // NOLINT
81   V8_EXPORT_PRIVATE static void AddClassEscape(char type,
82                                                ZoneList<CharacterRange>* ranges,
83                                                Zone* zone);
84   // Add class escapes. Add case equivalent closure for \w and \W if necessary.
85   V8_EXPORT_PRIVATE static void AddClassEscape(
86       char type, ZoneList<CharacterRange>* ranges,
87       bool add_unicode_case_equivalents, Zone* zone);
88   static Vector<const int> GetWordBounds();
Singleton(uc32 value)89   static inline CharacterRange Singleton(uc32 value) {
90     return CharacterRange(value, value);
91   }
Range(uc32 from,uc32 to)92   static inline CharacterRange Range(uc32 from, uc32 to) {
93     DCHECK(0 <= from && to <= String::kMaxCodePoint);
94     DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to));
95     return CharacterRange(from, to);
96   }
Everything()97   static inline CharacterRange Everything() {
98     return CharacterRange(0, String::kMaxCodePoint);
99   }
List(Zone * zone,CharacterRange range)100   static inline ZoneList<CharacterRange>* List(Zone* zone,
101                                                CharacterRange range) {
102     ZoneList<CharacterRange>* list =
103         zone->New<ZoneList<CharacterRange>>(1, zone);
104     list->Add(range, zone);
105     return list;
106   }
Contains(uc32 i)107   bool Contains(uc32 i) { return from_ <= i && i <= to_; }
from()108   uc32 from() const { return from_; }
set_from(uc32 value)109   void set_from(uc32 value) { from_ = value; }
to()110   uc32 to() const { return to_; }
set_to(uc32 value)111   void set_to(uc32 value) { to_ = value; }
is_valid()112   bool is_valid() { return from_ <= to_; }
IsEverything(uc32 max)113   bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; }
IsSingleton()114   bool IsSingleton() { return (from_ == to_); }
115   V8_EXPORT_PRIVATE static void AddCaseEquivalents(
116       Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
117       bool is_one_byte);
118   // Whether a range list is in canonical form: Ranges ordered by from value,
119   // and ranges non-overlapping and non-adjacent.
120   V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges);
121   // Convert range list to canonical form. The characters covered by the ranges
122   // will still be the same, but no character is in more than one range, and
123   // adjacent ranges are merged. The resulting list may be shorter than the
124   // original, but cannot be longer.
125   static void Canonicalize(ZoneList<CharacterRange>* ranges);
126   // Negate the contents of a character range in canonical form.
127   static void Negate(ZoneList<CharacterRange>* src,
128                      ZoneList<CharacterRange>* dst, Zone* zone);
129   static const int kStartMarker = (1 << 24);
130   static const int kPayloadMask = (1 << 24) - 1;
131 
132  private:
CharacterRange(uc32 from,uc32 to)133   CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {}
134 
135   uc32 from_;
136   uc32 to_;
137 };
138 
139 class CharacterSet final {
140  public:
CharacterSet(uc16 standard_set_type)141   explicit CharacterSet(uc16 standard_set_type)
142       : ranges_(nullptr), standard_set_type_(standard_set_type) {}
CharacterSet(ZoneList<CharacterRange> * ranges)143   explicit CharacterSet(ZoneList<CharacterRange>* ranges)
144       : ranges_(ranges), standard_set_type_(0) {}
145   ZoneList<CharacterRange>* ranges(Zone* zone);
standard_set_type()146   uc16 standard_set_type() const { return standard_set_type_; }
set_standard_set_type(uc16 special_set_type)147   void set_standard_set_type(uc16 special_set_type) {
148     standard_set_type_ = special_set_type;
149   }
is_standard()150   bool is_standard() { return standard_set_type_ != 0; }
151   V8_EXPORT_PRIVATE void Canonicalize();
152 
153  private:
154   ZoneList<CharacterRange>* ranges_;
155   // If non-zero, the value represents a standard set (e.g., all whitespace
156   // characters) without having to expand the ranges.
157   uc16 standard_set_type_;
158 };
159 
160 class TextElement final {
161  public:
162   enum TextType { ATOM, CHAR_CLASS };
163 
164   static TextElement Atom(RegExpAtom* atom);
165   static TextElement CharClass(RegExpCharacterClass* char_class);
166 
cp_offset()167   int cp_offset() const { return cp_offset_; }
set_cp_offset(int cp_offset)168   void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
169   int length() const;
170 
text_type()171   TextType text_type() const { return text_type_; }
172 
tree()173   RegExpTree* tree() const { return tree_; }
174 
atom()175   RegExpAtom* atom() const {
176     DCHECK(text_type() == ATOM);
177     return reinterpret_cast<RegExpAtom*>(tree());
178   }
179 
char_class()180   RegExpCharacterClass* char_class() const {
181     DCHECK(text_type() == CHAR_CLASS);
182     return reinterpret_cast<RegExpCharacterClass*>(tree());
183   }
184 
185  private:
TextElement(TextType text_type,RegExpTree * tree)186   TextElement(TextType text_type, RegExpTree* tree)
187       : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
188 
189   int cp_offset_;
190   TextType text_type_;
191   RegExpTree* tree_;
192 };
193 
194 
195 class RegExpTree : public ZoneObject {
196  public:
197   static const int kInfinity = kMaxInt;
198   virtual ~RegExpTree() = default;
199   virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
200   virtual RegExpNode* ToNode(RegExpCompiler* compiler,
201                              RegExpNode* on_success) = 0;
IsTextElement()202   virtual bool IsTextElement() { return false; }
IsAnchoredAtStart()203   virtual bool IsAnchoredAtStart() { return false; }
IsAnchoredAtEnd()204   virtual bool IsAnchoredAtEnd() { return false; }
205   virtual int min_match() = 0;
206   virtual int max_match() = 0;
207   // Returns the interval of registers used for captures within this
208   // expression.
CaptureRegisters()209   virtual Interval CaptureRegisters() { return Interval::Empty(); }
210   virtual void AppendToText(RegExpText* text, Zone* zone);
211   V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, Zone* zone);
212 #define MAKE_ASTYPE(Name)           \
213   virtual RegExp##Name* As##Name(); \
214   virtual bool Is##Name();
215   FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
216 #undef MAKE_ASTYPE
217 };
218 
219 
220 class RegExpDisjunction final : public RegExpTree {
221  public:
222   explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
223   void* Accept(RegExpVisitor* visitor, void* data) override;
224   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
225   RegExpDisjunction* AsDisjunction() override;
226   Interval CaptureRegisters() override;
227   bool IsDisjunction() override;
228   bool IsAnchoredAtStart() override;
229   bool IsAnchoredAtEnd() override;
min_match()230   int min_match() override { return min_match_; }
max_match()231   int max_match() override { return max_match_; }
alternatives()232   ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
233 
234  private:
235   bool SortConsecutiveAtoms(RegExpCompiler* compiler);
236   void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
237   void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
238   ZoneList<RegExpTree*>* alternatives_;
239   int min_match_;
240   int max_match_;
241 };
242 
243 
244 class RegExpAlternative final : public RegExpTree {
245  public:
246   explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
247   void* Accept(RegExpVisitor* visitor, void* data) override;
248   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
249   RegExpAlternative* AsAlternative() override;
250   Interval CaptureRegisters() override;
251   bool IsAlternative() override;
252   bool IsAnchoredAtStart() override;
253   bool IsAnchoredAtEnd() override;
min_match()254   int min_match() override { return min_match_; }
max_match()255   int max_match() override { return max_match_; }
nodes()256   ZoneList<RegExpTree*>* nodes() { return nodes_; }
257 
258  private:
259   ZoneList<RegExpTree*>* nodes_;
260   int min_match_;
261   int max_match_;
262 };
263 
264 
265 class RegExpAssertion final : public RegExpTree {
266  public:
267   enum AssertionType {
268     START_OF_LINE = 0,
269     START_OF_INPUT = 1,
270     END_OF_LINE = 2,
271     END_OF_INPUT = 3,
272     BOUNDARY = 4,
273     NON_BOUNDARY = 5,
274     LAST_TYPE = NON_BOUNDARY,
275   };
RegExpAssertion(AssertionType type,JSRegExp::Flags flags)276   RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
277       : assertion_type_(type), flags_(flags) {}
278   void* Accept(RegExpVisitor* visitor, void* data) override;
279   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
280   RegExpAssertion* AsAssertion() override;
281   bool IsAssertion() override;
282   bool IsAnchoredAtStart() override;
283   bool IsAnchoredAtEnd() override;
min_match()284   int min_match() override { return 0; }
max_match()285   int max_match() override { return 0; }
assertion_type()286   AssertionType assertion_type() const { return assertion_type_; }
flags()287   JSRegExp::Flags flags() const { return flags_; }
288 
289  private:
290   const AssertionType assertion_type_;
291   const JSRegExp::Flags flags_;
292 };
293 
294 
295 class RegExpCharacterClass final : public RegExpTree {
296  public:
297   // NEGATED: The character class is negated and should match everything but
298   //     the specified ranges.
299   // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
300   //     surrogate and should not be unicode-desugared (crbug.com/641091).
301   enum Flag {
302     NEGATED = 1 << 0,
303     CONTAINS_SPLIT_SURROGATE = 1 << 1,
304   };
305   using CharacterClassFlags = base::Flags<Flag>;
306 
307   RegExpCharacterClass(
308       Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
309       CharacterClassFlags character_class_flags = CharacterClassFlags())
set_(ranges)310       : set_(ranges),
311         flags_(flags),
312         character_class_flags_(character_class_flags) {
313     // Convert the empty set of ranges to the negated Everything() range.
314     if (ranges->is_empty()) {
315       ranges->Add(CharacterRange::Everything(), zone);
316       character_class_flags_ ^= NEGATED;
317     }
318   }
RegExpCharacterClass(uc16 type,JSRegExp::Flags flags)319   RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
320       : set_(type),
321         flags_(flags),
322         character_class_flags_(CharacterClassFlags()) {}
323   void* Accept(RegExpVisitor* visitor, void* data) override;
324   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
325   RegExpCharacterClass* AsCharacterClass() override;
326   bool IsCharacterClass() override;
IsTextElement()327   bool IsTextElement() override { return true; }
min_match()328   int min_match() override { return 1; }
329   // The character class may match two code units for unicode regexps.
330   // TODO(yangguo): we should split this class for usage in TextElement, and
331   //                make max_match() dependent on the character class content.
max_match()332   int max_match() override { return 2; }
333   void AppendToText(RegExpText* text, Zone* zone) override;
character_set()334   CharacterSet character_set() { return set_; }
335   // TODO(lrn): Remove need for complex version if is_standard that
336   // recognizes a mangled standard set and just do { return set_.is_special(); }
337   bool is_standard(Zone* zone);
338   // Returns a value representing the standard character set if is_standard()
339   // returns true.
340   // Currently used values are:
341   // s : unicode whitespace
342   // S : unicode non-whitespace
343   // w : ASCII word character (digit, letter, underscore)
344   // W : non-ASCII word character
345   // d : ASCII digit
346   // D : non-ASCII digit
347   // . : non-newline
348   // * : All characters, for advancing unanchored regexp
standard_type()349   uc16 standard_type() const { return set_.standard_set_type(); }
ranges(Zone * zone)350   ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
is_negated()351   bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
flags()352   JSRegExp::Flags flags() const { return flags_; }
contains_split_surrogate()353   bool contains_split_surrogate() const {
354     return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
355   }
356 
357  private:
358   CharacterSet set_;
359   const JSRegExp::Flags flags_;
360   CharacterClassFlags character_class_flags_;
361 };
362 
363 
364 class RegExpAtom final : public RegExpTree {
365  public:
RegExpAtom(Vector<const uc16> data,JSRegExp::Flags flags)366   explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
367       : data_(data), flags_(flags) {}
368   void* Accept(RegExpVisitor* visitor, void* data) override;
369   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
370   RegExpAtom* AsAtom() override;
371   bool IsAtom() override;
IsTextElement()372   bool IsTextElement() override { return true; }
min_match()373   int min_match() override { return data_.length(); }
max_match()374   int max_match() override { return data_.length(); }
375   void AppendToText(RegExpText* text, Zone* zone) override;
data()376   Vector<const uc16> data() { return data_; }
length()377   int length() { return data_.length(); }
flags()378   JSRegExp::Flags flags() const { return flags_; }
ignore_case()379   bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
380 
381  private:
382   Vector<const uc16> data_;
383   const JSRegExp::Flags flags_;
384 };
385 
386 
387 class RegExpText final : public RegExpTree {
388  public:
RegExpText(Zone * zone)389   explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
390   void* Accept(RegExpVisitor* visitor, void* data) override;
391   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
392   RegExpText* AsText() override;
393   bool IsText() override;
IsTextElement()394   bool IsTextElement() override { return true; }
min_match()395   int min_match() override { return length_; }
max_match()396   int max_match() override { return length_; }
397   void AppendToText(RegExpText* text, Zone* zone) override;
AddElement(TextElement elm,Zone * zone)398   void AddElement(TextElement elm, Zone* zone) {
399     elements_.Add(elm, zone);
400     length_ += elm.length();
401   }
elements()402   ZoneList<TextElement>* elements() { return &elements_; }
403 
404  private:
405   ZoneList<TextElement> elements_;
406   int length_;
407 };
408 
409 
410 class RegExpQuantifier final : public RegExpTree {
411  public:
412   enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
RegExpQuantifier(int min,int max,QuantifierType type,RegExpTree * body)413   RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
414       : body_(body),
415         min_(min),
416         max_(max),
417         quantifier_type_(type) {
418     if (min > 0 && body->min_match() > kInfinity / min) {
419       min_match_ = kInfinity;
420     } else {
421       min_match_ = min * body->min_match();
422     }
423     if (max > 0 && body->max_match() > kInfinity / max) {
424       max_match_ = kInfinity;
425     } else {
426       max_match_ = max * body->max_match();
427     }
428   }
429   void* Accept(RegExpVisitor* visitor, void* data) override;
430   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
431   static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
432                             RegExpCompiler* compiler, RegExpNode* on_success,
433                             bool not_at_start = false);
434   RegExpQuantifier* AsQuantifier() override;
435   Interval CaptureRegisters() override;
436   bool IsQuantifier() override;
min_match()437   int min_match() override { return min_match_; }
max_match()438   int max_match() override { return max_match_; }
min()439   int min() const { return min_; }
max()440   int max() const { return max_; }
quantifier_type()441   QuantifierType quantifier_type() const { return quantifier_type_; }
is_possessive()442   bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
is_non_greedy()443   bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
is_greedy()444   bool is_greedy() const { return quantifier_type_ == GREEDY; }
body()445   RegExpTree* body() { return body_; }
446 
447  private:
448   RegExpTree* body_;
449   int min_;
450   int max_;
451   int min_match_;
452   int max_match_;
453   QuantifierType quantifier_type_;
454 };
455 
456 
457 class RegExpCapture final : public RegExpTree {
458  public:
RegExpCapture(int index)459   explicit RegExpCapture(int index)
460       : body_(nullptr),
461         index_(index),
462         min_match_(0),
463         max_match_(0),
464         name_(nullptr) {}
465   void* Accept(RegExpVisitor* visitor, void* data) override;
466   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
467   static RegExpNode* ToNode(RegExpTree* body, int index,
468                             RegExpCompiler* compiler, RegExpNode* on_success);
469   RegExpCapture* AsCapture() override;
470   bool IsAnchoredAtStart() override;
471   bool IsAnchoredAtEnd() override;
472   Interval CaptureRegisters() override;
473   bool IsCapture() override;
min_match()474   int min_match() override { return min_match_; }
max_match()475   int max_match() override { return max_match_; }
body()476   RegExpTree* body() { return body_; }
set_body(RegExpTree * body)477   void set_body(RegExpTree* body) {
478     body_ = body;
479     min_match_ = body->min_match();
480     max_match_ = body->max_match();
481   }
index()482   int index() const { return index_; }
name()483   const ZoneVector<uc16>* name() const { return name_; }
set_name(const ZoneVector<uc16> * name)484   void set_name(const ZoneVector<uc16>* name) { name_ = name; }
StartRegister(int index)485   static int StartRegister(int index) { return index * 2; }
EndRegister(int index)486   static int EndRegister(int index) { return index * 2 + 1; }
487 
488  private:
489   RegExpTree* body_;
490   int index_;
491   int min_match_;
492   int max_match_;
493   const ZoneVector<uc16>* name_;
494 };
495 
496 class RegExpGroup final : public RegExpTree {
497  public:
RegExpGroup(RegExpTree * body)498   explicit RegExpGroup(RegExpTree* body)
499       : body_(body),
500         min_match_(body->min_match()),
501         max_match_(body->max_match()) {}
502   void* Accept(RegExpVisitor* visitor, void* data) override;
ToNode(RegExpCompiler * compiler,RegExpNode * on_success)503   RegExpNode* ToNode(RegExpCompiler* compiler,
504                      RegExpNode* on_success) override {
505     return body_->ToNode(compiler, on_success);
506   }
507   RegExpGroup* AsGroup() override;
IsAnchoredAtStart()508   bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); }
IsAnchoredAtEnd()509   bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); }
510   bool IsGroup() override;
min_match()511   int min_match() override { return min_match_; }
max_match()512   int max_match() override { return max_match_; }
CaptureRegisters()513   Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
body()514   RegExpTree* body() { return body_; }
515 
516  private:
517   RegExpTree* body_;
518   int min_match_;
519   int max_match_;
520 };
521 
522 class RegExpLookaround final : public RegExpTree {
523  public:
524   enum Type { LOOKAHEAD, LOOKBEHIND };
525 
RegExpLookaround(RegExpTree * body,bool is_positive,int capture_count,int capture_from,Type type)526   RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
527                    int capture_from, Type type)
528       : body_(body),
529         is_positive_(is_positive),
530         capture_count_(capture_count),
531         capture_from_(capture_from),
532         type_(type) {}
533 
534   void* Accept(RegExpVisitor* visitor, void* data) override;
535   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
536   RegExpLookaround* AsLookaround() override;
537   Interval CaptureRegisters() override;
538   bool IsLookaround() override;
539   bool IsAnchoredAtStart() override;
min_match()540   int min_match() override { return 0; }
max_match()541   int max_match() override { return 0; }
body()542   RegExpTree* body() { return body_; }
is_positive()543   bool is_positive() { return is_positive_; }
capture_count()544   int capture_count() { return capture_count_; }
capture_from()545   int capture_from() { return capture_from_; }
type()546   Type type() { return type_; }
547 
548   class Builder {
549    public:
550     Builder(bool is_positive, RegExpNode* on_success,
551             int stack_pointer_register, int position_register,
552             int capture_register_count = 0, int capture_register_start = 0);
on_match_success()553     RegExpNode* on_match_success() { return on_match_success_; }
554     RegExpNode* ForMatch(RegExpNode* match);
555 
556    private:
557     bool is_positive_;
558     RegExpNode* on_match_success_;
559     RegExpNode* on_success_;
560     int stack_pointer_register_;
561     int position_register_;
562   };
563 
564  private:
565   RegExpTree* body_;
566   bool is_positive_;
567   int capture_count_;
568   int capture_from_;
569   Type type_;
570 };
571 
572 
573 class RegExpBackReference final : public RegExpTree {
574  public:
RegExpBackReference(JSRegExp::Flags flags)575   explicit RegExpBackReference(JSRegExp::Flags flags)
576       : capture_(nullptr), name_(nullptr), flags_(flags) {}
RegExpBackReference(RegExpCapture * capture,JSRegExp::Flags flags)577   RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
578       : capture_(capture), name_(nullptr), flags_(flags) {}
579   void* Accept(RegExpVisitor* visitor, void* data) override;
580   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
581   RegExpBackReference* AsBackReference() override;
582   bool IsBackReference() override;
min_match()583   int min_match() override { return 0; }
584   // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
585   // recursion, we give up. Ignorance is bliss.
max_match()586   int max_match() override { return kInfinity; }
index()587   int index() { return capture_->index(); }
capture()588   RegExpCapture* capture() { return capture_; }
set_capture(RegExpCapture * capture)589   void set_capture(RegExpCapture* capture) { capture_ = capture; }
name()590   const ZoneVector<uc16>* name() const { return name_; }
set_name(const ZoneVector<uc16> * name)591   void set_name(const ZoneVector<uc16>* name) { name_ = name; }
592 
593  private:
594   RegExpCapture* capture_;
595   const ZoneVector<uc16>* name_;
596   const JSRegExp::Flags flags_;
597 };
598 
599 
600 class RegExpEmpty final : public RegExpTree {
601  public:
602   RegExpEmpty() = default;
603   void* Accept(RegExpVisitor* visitor, void* data) override;
604   RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
605   RegExpEmpty* AsEmpty() override;
606   bool IsEmpty() override;
min_match()607   int min_match() override { return 0; }
max_match()608   int max_match() override { return 0; }
609 };
610 
611 }  // namespace internal
612 }  // namespace v8
613 
614 #endif  // V8_REGEXP_REGEXP_AST_H_
615