1 // Copyright 2016 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_AST_H_ 6 #define V8_REGEXP_REGEXP_AST_H_ 7 8 #include "irregexp/RegExpShim.h" 9 10 namespace v8 { 11 namespace internal { 12 13 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ 14 VISIT(Disjunction) \ 15 VISIT(Alternative) \ 16 VISIT(Assertion) \ 17 VISIT(CharacterClass) \ 18 VISIT(Atom) \ 19 VISIT(Quantifier) \ 20 VISIT(Capture) \ 21 VISIT(Group) \ 22 VISIT(Lookaround) \ 23 VISIT(BackReference) \ 24 VISIT(Empty) \ 25 VISIT(Text) 26 27 #define FORWARD_DECLARE(Name) class RegExp##Name; 28 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) 29 #undef FORWARD_DECLARE 30 31 class RegExpCompiler; 32 class RegExpNode; 33 class RegExpTree; 34 35 class RegExpVisitor { 36 public: 37 virtual ~RegExpVisitor() = default; 38 #define MAKE_CASE(Name) \ 39 virtual void* Visit##Name(RegExp##Name*, void* data) = 0; 40 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 41 #undef MAKE_CASE 42 }; 43 44 45 // A simple closed interval. 46 class Interval { 47 public: Interval()48 Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size(). Interval(int from,int to)49 Interval(int from, int to) : from_(from), to_(to) {} Union(Interval that)50 Interval Union(Interval that) { 51 if (that.from_ == kNone) 52 return *this; 53 else if (from_ == kNone) 54 return that; 55 else 56 return Interval(std::min(from_, that.from_), std::max(to_, that.to_)); 57 } 58 Contains(int value)59 bool Contains(int value) { return (from_ <= value) && (value <= to_); } is_empty()60 bool is_empty() { return from_ == kNone; } from()61 int from() const { return from_; } to()62 int to() const { return to_; } size()63 int size() const { return to_ - from_ + 1; } 64 Empty()65 static Interval Empty() { return Interval(); } 66 67 static constexpr int kNone = -1; 68 69 private: 70 int from_; 71 int to_; 72 }; 73 74 // Represents code points (with values up to 0x10FFFF) in the range from from_ 75 // to to_, both ends are inclusive. 76 class CharacterRange { 77 public: CharacterRange()78 CharacterRange() : from_(0), to_(0) {} 79 // For compatibility with the CHECK_OK macro CharacterRange(void * null)80 CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT 81 V8_EXPORT_PRIVATE static void AddClassEscape(char type, 82 ZoneList<CharacterRange>* ranges, 83 Zone* zone); 84 // Add class escapes. Add case equivalent closure for \w and \W if necessary. 85 V8_EXPORT_PRIVATE static void AddClassEscape( 86 char type, ZoneList<CharacterRange>* ranges, 87 bool add_unicode_case_equivalents, Zone* zone); 88 static Vector<const int> GetWordBounds(); Singleton(uc32 value)89 static inline CharacterRange Singleton(uc32 value) { 90 return CharacterRange(value, value); 91 } Range(uc32 from,uc32 to)92 static inline CharacterRange Range(uc32 from, uc32 to) { 93 DCHECK(0 <= from && to <= String::kMaxCodePoint); 94 DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to)); 95 return CharacterRange(from, to); 96 } Everything()97 static inline CharacterRange Everything() { 98 return CharacterRange(0, String::kMaxCodePoint); 99 } List(Zone * zone,CharacterRange range)100 static inline ZoneList<CharacterRange>* List(Zone* zone, 101 CharacterRange range) { 102 ZoneList<CharacterRange>* list = 103 zone->New<ZoneList<CharacterRange>>(1, zone); 104 list->Add(range, zone); 105 return list; 106 } Contains(uc32 i)107 bool Contains(uc32 i) { return from_ <= i && i <= to_; } from()108 uc32 from() const { return from_; } set_from(uc32 value)109 void set_from(uc32 value) { from_ = value; } to()110 uc32 to() const { return to_; } set_to(uc32 value)111 void set_to(uc32 value) { to_ = value; } is_valid()112 bool is_valid() { return from_ <= to_; } IsEverything(uc32 max)113 bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; } IsSingleton()114 bool IsSingleton() { return (from_ == to_); } 115 V8_EXPORT_PRIVATE static void AddCaseEquivalents( 116 Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges, 117 bool is_one_byte); 118 // Whether a range list is in canonical form: Ranges ordered by from value, 119 // and ranges non-overlapping and non-adjacent. 120 V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges); 121 // Convert range list to canonical form. The characters covered by the ranges 122 // will still be the same, but no character is in more than one range, and 123 // adjacent ranges are merged. The resulting list may be shorter than the 124 // original, but cannot be longer. 125 static void Canonicalize(ZoneList<CharacterRange>* ranges); 126 // Negate the contents of a character range in canonical form. 127 static void Negate(ZoneList<CharacterRange>* src, 128 ZoneList<CharacterRange>* dst, Zone* zone); 129 static const int kStartMarker = (1 << 24); 130 static const int kPayloadMask = (1 << 24) - 1; 131 132 private: CharacterRange(uc32 from,uc32 to)133 CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {} 134 135 uc32 from_; 136 uc32 to_; 137 }; 138 139 class CharacterSet final { 140 public: CharacterSet(uc16 standard_set_type)141 explicit CharacterSet(uc16 standard_set_type) 142 : ranges_(nullptr), standard_set_type_(standard_set_type) {} CharacterSet(ZoneList<CharacterRange> * ranges)143 explicit CharacterSet(ZoneList<CharacterRange>* ranges) 144 : ranges_(ranges), standard_set_type_(0) {} 145 ZoneList<CharacterRange>* ranges(Zone* zone); standard_set_type()146 uc16 standard_set_type() const { return standard_set_type_; } set_standard_set_type(uc16 special_set_type)147 void set_standard_set_type(uc16 special_set_type) { 148 standard_set_type_ = special_set_type; 149 } is_standard()150 bool is_standard() { return standard_set_type_ != 0; } 151 V8_EXPORT_PRIVATE void Canonicalize(); 152 153 private: 154 ZoneList<CharacterRange>* ranges_; 155 // If non-zero, the value represents a standard set (e.g., all whitespace 156 // characters) without having to expand the ranges. 157 uc16 standard_set_type_; 158 }; 159 160 class TextElement final { 161 public: 162 enum TextType { ATOM, CHAR_CLASS }; 163 164 static TextElement Atom(RegExpAtom* atom); 165 static TextElement CharClass(RegExpCharacterClass* char_class); 166 cp_offset()167 int cp_offset() const { return cp_offset_; } set_cp_offset(int cp_offset)168 void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } 169 int length() const; 170 text_type()171 TextType text_type() const { return text_type_; } 172 tree()173 RegExpTree* tree() const { return tree_; } 174 atom()175 RegExpAtom* atom() const { 176 DCHECK(text_type() == ATOM); 177 return reinterpret_cast<RegExpAtom*>(tree()); 178 } 179 char_class()180 RegExpCharacterClass* char_class() const { 181 DCHECK(text_type() == CHAR_CLASS); 182 return reinterpret_cast<RegExpCharacterClass*>(tree()); 183 } 184 185 private: TextElement(TextType text_type,RegExpTree * tree)186 TextElement(TextType text_type, RegExpTree* tree) 187 : cp_offset_(-1), text_type_(text_type), tree_(tree) {} 188 189 int cp_offset_; 190 TextType text_type_; 191 RegExpTree* tree_; 192 }; 193 194 195 class RegExpTree : public ZoneObject { 196 public: 197 static const int kInfinity = kMaxInt; 198 virtual ~RegExpTree() = default; 199 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; 200 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 201 RegExpNode* on_success) = 0; IsTextElement()202 virtual bool IsTextElement() { return false; } IsAnchoredAtStart()203 virtual bool IsAnchoredAtStart() { return false; } IsAnchoredAtEnd()204 virtual bool IsAnchoredAtEnd() { return false; } 205 virtual int min_match() = 0; 206 virtual int max_match() = 0; 207 // Returns the interval of registers used for captures within this 208 // expression. CaptureRegisters()209 virtual Interval CaptureRegisters() { return Interval::Empty(); } 210 virtual void AppendToText(RegExpText* text, Zone* zone); 211 V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, Zone* zone); 212 #define MAKE_ASTYPE(Name) \ 213 virtual RegExp##Name* As##Name(); \ 214 virtual bool Is##Name(); 215 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) 216 #undef MAKE_ASTYPE 217 }; 218 219 220 class RegExpDisjunction final : public RegExpTree { 221 public: 222 explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); 223 void* Accept(RegExpVisitor* visitor, void* data) override; 224 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 225 RegExpDisjunction* AsDisjunction() override; 226 Interval CaptureRegisters() override; 227 bool IsDisjunction() override; 228 bool IsAnchoredAtStart() override; 229 bool IsAnchoredAtEnd() override; min_match()230 int min_match() override { return min_match_; } max_match()231 int max_match() override { return max_match_; } alternatives()232 ZoneList<RegExpTree*>* alternatives() { return alternatives_; } 233 234 private: 235 bool SortConsecutiveAtoms(RegExpCompiler* compiler); 236 void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); 237 void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); 238 ZoneList<RegExpTree*>* alternatives_; 239 int min_match_; 240 int max_match_; 241 }; 242 243 244 class RegExpAlternative final : public RegExpTree { 245 public: 246 explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); 247 void* Accept(RegExpVisitor* visitor, void* data) override; 248 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 249 RegExpAlternative* AsAlternative() override; 250 Interval CaptureRegisters() override; 251 bool IsAlternative() override; 252 bool IsAnchoredAtStart() override; 253 bool IsAnchoredAtEnd() override; min_match()254 int min_match() override { return min_match_; } max_match()255 int max_match() override { return max_match_; } nodes()256 ZoneList<RegExpTree*>* nodes() { return nodes_; } 257 258 private: 259 ZoneList<RegExpTree*>* nodes_; 260 int min_match_; 261 int max_match_; 262 }; 263 264 265 class RegExpAssertion final : public RegExpTree { 266 public: 267 enum AssertionType { 268 START_OF_LINE = 0, 269 START_OF_INPUT = 1, 270 END_OF_LINE = 2, 271 END_OF_INPUT = 3, 272 BOUNDARY = 4, 273 NON_BOUNDARY = 5, 274 LAST_TYPE = NON_BOUNDARY, 275 }; RegExpAssertion(AssertionType type,JSRegExp::Flags flags)276 RegExpAssertion(AssertionType type, JSRegExp::Flags flags) 277 : assertion_type_(type), flags_(flags) {} 278 void* Accept(RegExpVisitor* visitor, void* data) override; 279 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 280 RegExpAssertion* AsAssertion() override; 281 bool IsAssertion() override; 282 bool IsAnchoredAtStart() override; 283 bool IsAnchoredAtEnd() override; min_match()284 int min_match() override { return 0; } max_match()285 int max_match() override { return 0; } assertion_type()286 AssertionType assertion_type() const { return assertion_type_; } flags()287 JSRegExp::Flags flags() const { return flags_; } 288 289 private: 290 const AssertionType assertion_type_; 291 const JSRegExp::Flags flags_; 292 }; 293 294 295 class RegExpCharacterClass final : public RegExpTree { 296 public: 297 // NEGATED: The character class is negated and should match everything but 298 // the specified ranges. 299 // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split 300 // surrogate and should not be unicode-desugared (crbug.com/641091). 301 enum Flag { 302 NEGATED = 1 << 0, 303 CONTAINS_SPLIT_SURROGATE = 1 << 1, 304 }; 305 using CharacterClassFlags = base::Flags<Flag>; 306 307 RegExpCharacterClass( 308 Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags, 309 CharacterClassFlags character_class_flags = CharacterClassFlags()) set_(ranges)310 : set_(ranges), 311 flags_(flags), 312 character_class_flags_(character_class_flags) { 313 // Convert the empty set of ranges to the negated Everything() range. 314 if (ranges->is_empty()) { 315 ranges->Add(CharacterRange::Everything(), zone); 316 character_class_flags_ ^= NEGATED; 317 } 318 } RegExpCharacterClass(uc16 type,JSRegExp::Flags flags)319 RegExpCharacterClass(uc16 type, JSRegExp::Flags flags) 320 : set_(type), 321 flags_(flags), 322 character_class_flags_(CharacterClassFlags()) {} 323 void* Accept(RegExpVisitor* visitor, void* data) override; 324 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 325 RegExpCharacterClass* AsCharacterClass() override; 326 bool IsCharacterClass() override; IsTextElement()327 bool IsTextElement() override { return true; } min_match()328 int min_match() override { return 1; } 329 // The character class may match two code units for unicode regexps. 330 // TODO(yangguo): we should split this class for usage in TextElement, and 331 // make max_match() dependent on the character class content. max_match()332 int max_match() override { return 2; } 333 void AppendToText(RegExpText* text, Zone* zone) override; character_set()334 CharacterSet character_set() { return set_; } 335 // TODO(lrn): Remove need for complex version if is_standard that 336 // recognizes a mangled standard set and just do { return set_.is_special(); } 337 bool is_standard(Zone* zone); 338 // Returns a value representing the standard character set if is_standard() 339 // returns true. 340 // Currently used values are: 341 // s : unicode whitespace 342 // S : unicode non-whitespace 343 // w : ASCII word character (digit, letter, underscore) 344 // W : non-ASCII word character 345 // d : ASCII digit 346 // D : non-ASCII digit 347 // . : non-newline 348 // * : All characters, for advancing unanchored regexp standard_type()349 uc16 standard_type() const { return set_.standard_set_type(); } ranges(Zone * zone)350 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } is_negated()351 bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; } flags()352 JSRegExp::Flags flags() const { return flags_; } contains_split_surrogate()353 bool contains_split_surrogate() const { 354 return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0; 355 } 356 357 private: 358 CharacterSet set_; 359 const JSRegExp::Flags flags_; 360 CharacterClassFlags character_class_flags_; 361 }; 362 363 364 class RegExpAtom final : public RegExpTree { 365 public: RegExpAtom(Vector<const uc16> data,JSRegExp::Flags flags)366 explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags) 367 : data_(data), flags_(flags) {} 368 void* Accept(RegExpVisitor* visitor, void* data) override; 369 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 370 RegExpAtom* AsAtom() override; 371 bool IsAtom() override; IsTextElement()372 bool IsTextElement() override { return true; } min_match()373 int min_match() override { return data_.length(); } max_match()374 int max_match() override { return data_.length(); } 375 void AppendToText(RegExpText* text, Zone* zone) override; data()376 Vector<const uc16> data() { return data_; } length()377 int length() { return data_.length(); } flags()378 JSRegExp::Flags flags() const { return flags_; } ignore_case()379 bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } 380 381 private: 382 Vector<const uc16> data_; 383 const JSRegExp::Flags flags_; 384 }; 385 386 387 class RegExpText final : public RegExpTree { 388 public: RegExpText(Zone * zone)389 explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {} 390 void* Accept(RegExpVisitor* visitor, void* data) override; 391 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 392 RegExpText* AsText() override; 393 bool IsText() override; IsTextElement()394 bool IsTextElement() override { return true; } min_match()395 int min_match() override { return length_; } max_match()396 int max_match() override { return length_; } 397 void AppendToText(RegExpText* text, Zone* zone) override; AddElement(TextElement elm,Zone * zone)398 void AddElement(TextElement elm, Zone* zone) { 399 elements_.Add(elm, zone); 400 length_ += elm.length(); 401 } elements()402 ZoneList<TextElement>* elements() { return &elements_; } 403 404 private: 405 ZoneList<TextElement> elements_; 406 int length_; 407 }; 408 409 410 class RegExpQuantifier final : public RegExpTree { 411 public: 412 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; RegExpQuantifier(int min,int max,QuantifierType type,RegExpTree * body)413 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) 414 : body_(body), 415 min_(min), 416 max_(max), 417 quantifier_type_(type) { 418 if (min > 0 && body->min_match() > kInfinity / min) { 419 min_match_ = kInfinity; 420 } else { 421 min_match_ = min * body->min_match(); 422 } 423 if (max > 0 && body->max_match() > kInfinity / max) { 424 max_match_ = kInfinity; 425 } else { 426 max_match_ = max * body->max_match(); 427 } 428 } 429 void* Accept(RegExpVisitor* visitor, void* data) override; 430 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 431 static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, 432 RegExpCompiler* compiler, RegExpNode* on_success, 433 bool not_at_start = false); 434 RegExpQuantifier* AsQuantifier() override; 435 Interval CaptureRegisters() override; 436 bool IsQuantifier() override; min_match()437 int min_match() override { return min_match_; } max_match()438 int max_match() override { return max_match_; } min()439 int min() const { return min_; } max()440 int max() const { return max_; } quantifier_type()441 QuantifierType quantifier_type() const { return quantifier_type_; } is_possessive()442 bool is_possessive() const { return quantifier_type_ == POSSESSIVE; } is_non_greedy()443 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } is_greedy()444 bool is_greedy() const { return quantifier_type_ == GREEDY; } body()445 RegExpTree* body() { return body_; } 446 447 private: 448 RegExpTree* body_; 449 int min_; 450 int max_; 451 int min_match_; 452 int max_match_; 453 QuantifierType quantifier_type_; 454 }; 455 456 457 class RegExpCapture final : public RegExpTree { 458 public: RegExpCapture(int index)459 explicit RegExpCapture(int index) 460 : body_(nullptr), 461 index_(index), 462 min_match_(0), 463 max_match_(0), 464 name_(nullptr) {} 465 void* Accept(RegExpVisitor* visitor, void* data) override; 466 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 467 static RegExpNode* ToNode(RegExpTree* body, int index, 468 RegExpCompiler* compiler, RegExpNode* on_success); 469 RegExpCapture* AsCapture() override; 470 bool IsAnchoredAtStart() override; 471 bool IsAnchoredAtEnd() override; 472 Interval CaptureRegisters() override; 473 bool IsCapture() override; min_match()474 int min_match() override { return min_match_; } max_match()475 int max_match() override { return max_match_; } body()476 RegExpTree* body() { return body_; } set_body(RegExpTree * body)477 void set_body(RegExpTree* body) { 478 body_ = body; 479 min_match_ = body->min_match(); 480 max_match_ = body->max_match(); 481 } index()482 int index() const { return index_; } name()483 const ZoneVector<uc16>* name() const { return name_; } set_name(const ZoneVector<uc16> * name)484 void set_name(const ZoneVector<uc16>* name) { name_ = name; } StartRegister(int index)485 static int StartRegister(int index) { return index * 2; } EndRegister(int index)486 static int EndRegister(int index) { return index * 2 + 1; } 487 488 private: 489 RegExpTree* body_; 490 int index_; 491 int min_match_; 492 int max_match_; 493 const ZoneVector<uc16>* name_; 494 }; 495 496 class RegExpGroup final : public RegExpTree { 497 public: RegExpGroup(RegExpTree * body)498 explicit RegExpGroup(RegExpTree* body) 499 : body_(body), 500 min_match_(body->min_match()), 501 max_match_(body->max_match()) {} 502 void* Accept(RegExpVisitor* visitor, void* data) override; ToNode(RegExpCompiler * compiler,RegExpNode * on_success)503 RegExpNode* ToNode(RegExpCompiler* compiler, 504 RegExpNode* on_success) override { 505 return body_->ToNode(compiler, on_success); 506 } 507 RegExpGroup* AsGroup() override; IsAnchoredAtStart()508 bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); } IsAnchoredAtEnd()509 bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); } 510 bool IsGroup() override; min_match()511 int min_match() override { return min_match_; } max_match()512 int max_match() override { return max_match_; } CaptureRegisters()513 Interval CaptureRegisters() override { return body_->CaptureRegisters(); } body()514 RegExpTree* body() { return body_; } 515 516 private: 517 RegExpTree* body_; 518 int min_match_; 519 int max_match_; 520 }; 521 522 class RegExpLookaround final : public RegExpTree { 523 public: 524 enum Type { LOOKAHEAD, LOOKBEHIND }; 525 RegExpLookaround(RegExpTree * body,bool is_positive,int capture_count,int capture_from,Type type)526 RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count, 527 int capture_from, Type type) 528 : body_(body), 529 is_positive_(is_positive), 530 capture_count_(capture_count), 531 capture_from_(capture_from), 532 type_(type) {} 533 534 void* Accept(RegExpVisitor* visitor, void* data) override; 535 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 536 RegExpLookaround* AsLookaround() override; 537 Interval CaptureRegisters() override; 538 bool IsLookaround() override; 539 bool IsAnchoredAtStart() override; min_match()540 int min_match() override { return 0; } max_match()541 int max_match() override { return 0; } body()542 RegExpTree* body() { return body_; } is_positive()543 bool is_positive() { return is_positive_; } capture_count()544 int capture_count() { return capture_count_; } capture_from()545 int capture_from() { return capture_from_; } type()546 Type type() { return type_; } 547 548 class Builder { 549 public: 550 Builder(bool is_positive, RegExpNode* on_success, 551 int stack_pointer_register, int position_register, 552 int capture_register_count = 0, int capture_register_start = 0); on_match_success()553 RegExpNode* on_match_success() { return on_match_success_; } 554 RegExpNode* ForMatch(RegExpNode* match); 555 556 private: 557 bool is_positive_; 558 RegExpNode* on_match_success_; 559 RegExpNode* on_success_; 560 int stack_pointer_register_; 561 int position_register_; 562 }; 563 564 private: 565 RegExpTree* body_; 566 bool is_positive_; 567 int capture_count_; 568 int capture_from_; 569 Type type_; 570 }; 571 572 573 class RegExpBackReference final : public RegExpTree { 574 public: RegExpBackReference(JSRegExp::Flags flags)575 explicit RegExpBackReference(JSRegExp::Flags flags) 576 : capture_(nullptr), name_(nullptr), flags_(flags) {} RegExpBackReference(RegExpCapture * capture,JSRegExp::Flags flags)577 RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags) 578 : capture_(capture), name_(nullptr), flags_(flags) {} 579 void* Accept(RegExpVisitor* visitor, void* data) override; 580 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 581 RegExpBackReference* AsBackReference() override; 582 bool IsBackReference() override; min_match()583 int min_match() override { return 0; } 584 // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite 585 // recursion, we give up. Ignorance is bliss. max_match()586 int max_match() override { return kInfinity; } index()587 int index() { return capture_->index(); } capture()588 RegExpCapture* capture() { return capture_; } set_capture(RegExpCapture * capture)589 void set_capture(RegExpCapture* capture) { capture_ = capture; } name()590 const ZoneVector<uc16>* name() const { return name_; } set_name(const ZoneVector<uc16> * name)591 void set_name(const ZoneVector<uc16>* name) { name_ = name; } 592 593 private: 594 RegExpCapture* capture_; 595 const ZoneVector<uc16>* name_; 596 const JSRegExp::Flags flags_; 597 }; 598 599 600 class RegExpEmpty final : public RegExpTree { 601 public: 602 RegExpEmpty() = default; 603 void* Accept(RegExpVisitor* visitor, void* data) override; 604 RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; 605 RegExpEmpty* AsEmpty() override; 606 bool IsEmpty() override; min_match()607 int min_match() override { return 0; } max_match()608 int max_match() override { return 0; } 609 }; 610 611 } // namespace internal 612 } // namespace v8 613 614 #endif // V8_REGEXP_REGEXP_AST_H_ 615