1 // Copyright 2019 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_NODES_H_ 6 #define V8_REGEXP_REGEXP_NODES_H_ 7 8 #include "irregexp/imported/regexp-macro-assembler.h" 9 10 namespace v8 { 11 namespace internal { 12 13 class AlternativeGenerationList; 14 class BoyerMooreLookahead; 15 class GreedyLoopState; 16 class Label; 17 class NodeVisitor; 18 class QuickCheckDetails; 19 class RegExpCompiler; 20 class Trace; 21 struct PreloadState; 22 class ChoiceNode; 23 24 #define FOR_EACH_NODE_TYPE(VISIT) \ 25 VISIT(End) \ 26 VISIT(Action) \ 27 VISIT(Choice) \ 28 VISIT(LoopChoice) \ 29 VISIT(NegativeLookaroundChoice) \ 30 VISIT(BackReference) \ 31 VISIT(Assertion) \ 32 VISIT(Text) 33 34 struct NodeInfo final { NodeInfofinal35 NodeInfo() 36 : being_analyzed(false), 37 been_analyzed(false), 38 follows_word_interest(false), 39 follows_newline_interest(false), 40 follows_start_interest(false), 41 at_end(false), 42 visited(false), 43 replacement_calculated(false) {} 44 45 // Returns true if the interests and assumptions of this node 46 // matches the given one. Matchesfinal47 bool Matches(NodeInfo* that) { 48 return (at_end == that->at_end) && 49 (follows_word_interest == that->follows_word_interest) && 50 (follows_newline_interest == that->follows_newline_interest) && 51 (follows_start_interest == that->follows_start_interest); 52 } 53 54 // Updates the interests of this node given the interests of the 55 // node preceding it. AddFromPrecedingfinal56 void AddFromPreceding(NodeInfo* that) { 57 at_end |= that->at_end; 58 follows_word_interest |= that->follows_word_interest; 59 follows_newline_interest |= that->follows_newline_interest; 60 follows_start_interest |= that->follows_start_interest; 61 } 62 HasLookbehindfinal63 bool HasLookbehind() { 64 return follows_word_interest || follows_newline_interest || 65 follows_start_interest; 66 } 67 68 // Sets the interests of this node to include the interests of the 69 // following node. AddFromFollowingfinal70 void AddFromFollowing(NodeInfo* that) { 71 follows_word_interest |= that->follows_word_interest; 72 follows_newline_interest |= that->follows_newline_interest; 73 follows_start_interest |= that->follows_start_interest; 74 } 75 ResetCompilationStatefinal76 void ResetCompilationState() { 77 being_analyzed = false; 78 been_analyzed = false; 79 } 80 81 bool being_analyzed : 1; 82 bool been_analyzed : 1; 83 84 // These bits are set of this node has to know what the preceding 85 // character was. 86 bool follows_word_interest : 1; 87 bool follows_newline_interest : 1; 88 bool follows_start_interest : 1; 89 90 bool at_end : 1; 91 bool visited : 1; 92 bool replacement_calculated : 1; 93 }; 94 95 struct EatsAtLeastInfo final { EatsAtLeastInfofinal96 EatsAtLeastInfo() : EatsAtLeastInfo(0) {} EatsAtLeastInfofinal97 explicit EatsAtLeastInfo(uint8_t eats) 98 : eats_at_least_from_possibly_start(eats), 99 eats_at_least_from_not_start(eats) {} SetMinfinal100 void SetMin(const EatsAtLeastInfo& other) { 101 if (other.eats_at_least_from_possibly_start < 102 eats_at_least_from_possibly_start) { 103 eats_at_least_from_possibly_start = 104 other.eats_at_least_from_possibly_start; 105 } 106 if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) { 107 eats_at_least_from_not_start = other.eats_at_least_from_not_start; 108 } 109 } 110 IsZerofinal111 bool IsZero() const { 112 return eats_at_least_from_possibly_start == 0 && 113 eats_at_least_from_not_start == 0; 114 } 115 116 // Any successful match starting from the current node will consume at least 117 // this many characters. This does not necessarily mean that there is a 118 // possible match with exactly this many characters, but we generally try to 119 // get this number as high as possible to allow for early exit on failure. 120 uint8_t eats_at_least_from_possibly_start; 121 122 // Like eats_at_least_from_possibly_start, but with the additional assumption 123 // that start-of-string assertions (^) can't match. This value is greater than 124 // or equal to eats_at_least_from_possibly_start. 125 uint8_t eats_at_least_from_not_start; 126 }; 127 128 class RegExpNode : public ZoneObject { 129 public: RegExpNode(Zone * zone)130 explicit RegExpNode(Zone* zone) 131 : replacement_(nullptr), 132 on_work_list_(false), 133 trace_count_(0), 134 zone_(zone) { 135 bm_info_[0] = bm_info_[1] = nullptr; 136 } 137 virtual ~RegExpNode(); 138 virtual void Accept(NodeVisitor* visitor) = 0; 139 // Generates a goto to this node or actually generates the code at this point. 140 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; 141 // How many characters must this node consume at a minimum in order to 142 // succeed. The not_at_start argument is used to indicate that we know we are 143 // not at the start of the input. In this case anchored branches will always 144 // fail and can be ignored when determining how many characters are consumed 145 // on success. If this node has not been analyzed yet, EatsAtLeast returns 0. 146 int EatsAtLeast(bool not_at_start); 147 // Returns how many characters this node must consume in order to succeed, 148 // given that this is a LoopChoiceNode whose counter register is in a 149 // newly-initialized state at the current position in the generated code. For 150 // example, consider /a{6,8}/. Absent any extra information, the 151 // LoopChoiceNode for the repetition must report that it consumes at least 152 // zero characters, because it may have already looped several times. However, 153 // with a newly-initialized counter, it can report that it consumes at least 154 // six characters. 155 virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry(); 156 // Emits some quick code that checks whether the preloaded characters match. 157 // Falls through on certain failure, jumps to the label on possible success. 158 // If the node cannot make a quick check it does nothing and returns false. 159 bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace, 160 Trace* trace, bool preload_has_checked_bounds, 161 Label* on_possible_success, 162 QuickCheckDetails* details_return, 163 bool fall_through_on_failure, ChoiceNode* predecessor); 164 // For a given number of characters this returns a mask and a value. The 165 // next n characters are anded with the mask and compared with the value. 166 // A comparison failure indicates the node cannot match the next n characters. 167 // A comparison success indicates the node may match. 168 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 169 RegExpCompiler* compiler, 170 int characters_filled_in, 171 bool not_at_start) = 0; 172 // Fills in quick check details for this node, given that this is a 173 // LoopChoiceNode whose counter register is in a newly-initialized state at 174 // the current position in the generated code. For example, consider /a{6,8}/. 175 // Absent any extra information, the LoopChoiceNode for the repetition cannot 176 // generate any useful quick check because a match might be the (empty) 177 // continuation node. However, with a newly-initialized counter, it can 178 // generate a quick check for several 'a' characters at once. 179 virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 180 RegExpCompiler* compiler, 181 int characters_filled_in, 182 bool not_at_start); 183 static const int kNodeIsTooComplexForGreedyLoops = kMinInt; GreedyLoopTextLength()184 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 185 // Only returns the successor for a text node of length 1 that matches any 186 // character and that has no guards on it. GetSuccessorOfOmnivorousTextNode(RegExpCompiler * compiler)187 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 188 RegExpCompiler* compiler) { 189 return nullptr; 190 } 191 192 // Collects information on the possible code units (mod 128) that can match if 193 // we look forward. This is used for a Boyer-Moore-like string searching 194 // implementation. TODO(erikcorry): This should share more code with 195 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit 196 // the number of nodes we are willing to look at in order to create this data. 197 static const int kRecursionBudget = 200; 198 bool KeepRecursing(RegExpCompiler* compiler); FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)199 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, 200 BoyerMooreLookahead* bm, bool not_at_start) { 201 UNREACHABLE(); 202 } 203 204 // If we know that the input is one-byte then there are some nodes that can 205 // never match. This method returns a node that can be substituted for 206 // itself, or nullptr if the node can never match. FilterOneByte(int depth)207 virtual RegExpNode* FilterOneByte(int depth) { return this; } 208 // Helper for FilterOneByte. replacement()209 RegExpNode* replacement() { 210 DCHECK(info()->replacement_calculated); 211 return replacement_; 212 } set_replacement(RegExpNode * replacement)213 RegExpNode* set_replacement(RegExpNode* replacement) { 214 info()->replacement_calculated = true; 215 replacement_ = replacement; 216 return replacement; // For convenience. 217 } 218 219 // We want to avoid recalculating the lookahead info, so we store it on the 220 // node. Only info that is for this node is stored. We can tell that the 221 // info is for this node when offset == 0, so the information is calculated 222 // relative to this node. SaveBMInfo(BoyerMooreLookahead * bm,bool not_at_start,int offset)223 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) { 224 if (offset == 0) set_bm_info(not_at_start, bm); 225 } 226 label()227 Label* label() { return &label_; } 228 // If non-generic code is generated for a node (i.e. the node is not at the 229 // start of the trace) then it cannot be reused. This variable sets a limit 230 // on how often we allow that to happen before we insist on starting a new 231 // trace and generating generic code for a node that can be reused by flushing 232 // the deferred actions in the current trace and generating a goto. 233 static const int kMaxCopiesCodeGenerated = 10; 234 on_work_list()235 bool on_work_list() { return on_work_list_; } set_on_work_list(bool value)236 void set_on_work_list(bool value) { on_work_list_ = value; } 237 info()238 NodeInfo* info() { return &info_; } eats_at_least_info()239 const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; } set_eats_at_least_info(const EatsAtLeastInfo & eats_at_least)240 void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) { 241 eats_at_least_ = eats_at_least; 242 } 243 244 // TODO(v8:10441): This is a hacky way to avoid exponential code size growth 245 // for very large choice nodes that can be generated by unicode property 246 // escapes. In order to avoid inlining (i.e. trace recursion), we pretend to 247 // have generated the maximum count of code copies already. 248 // We should instead fix this properly, e.g. by using the code size budget 249 // (flush_budget) or by generating property escape matches as calls to a C 250 // function. SetDoNotInline()251 void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; } 252 bm_info(bool not_at_start)253 BoyerMooreLookahead* bm_info(bool not_at_start) { 254 return bm_info_[not_at_start ? 1 : 0]; 255 } 256 zone()257 Zone* zone() const { return zone_; } 258 259 protected: 260 enum LimitResult { DONE, CONTINUE }; 261 RegExpNode* replacement_; 262 263 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); 264 set_bm_info(bool not_at_start,BoyerMooreLookahead * bm)265 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { 266 bm_info_[not_at_start ? 1 : 0] = bm; 267 } 268 269 private: 270 static const int kFirstCharBudget = 10; 271 Label label_; 272 bool on_work_list_; 273 NodeInfo info_; 274 275 // Saved values for EatsAtLeast results, to avoid recomputation. Filled in 276 // during analysis (valid if info_.been_analyzed is true). 277 EatsAtLeastInfo eats_at_least_; 278 279 // This variable keeps track of how many times code has been generated for 280 // this node (in different traces). We don't keep track of where the 281 // generated code is located unless the code is generated at the start of 282 // a trace, in which case it is generic and can be reused by flushing the 283 // deferred operations in the current trace and generating a goto. 284 int trace_count_; 285 BoyerMooreLookahead* bm_info_[2]; 286 287 Zone* zone_; 288 }; 289 290 class SeqRegExpNode : public RegExpNode { 291 public: SeqRegExpNode(RegExpNode * on_success)292 explicit SeqRegExpNode(RegExpNode* on_success) 293 : RegExpNode(on_success->zone()), on_success_(on_success) {} on_success()294 RegExpNode* on_success() { return on_success_; } set_on_success(RegExpNode * node)295 void set_on_success(RegExpNode* node) { on_success_ = node; } 296 RegExpNode* FilterOneByte(int depth) override; FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)297 void FillInBMInfo(Isolate* isolate, int offset, int budget, 298 BoyerMooreLookahead* bm, bool not_at_start) override { 299 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 300 if (offset == 0) set_bm_info(not_at_start, bm); 301 } 302 303 protected: 304 RegExpNode* FilterSuccessor(int depth); 305 306 private: 307 RegExpNode* on_success_; 308 }; 309 310 class ActionNode : public SeqRegExpNode { 311 public: 312 enum ActionType { 313 SET_REGISTER_FOR_LOOP, 314 INCREMENT_REGISTER, 315 STORE_POSITION, 316 BEGIN_POSITIVE_SUBMATCH, 317 BEGIN_NEGATIVE_SUBMATCH, 318 POSITIVE_SUBMATCH_SUCCESS, 319 EMPTY_MATCH_CHECK, 320 CLEAR_CAPTURES 321 }; 322 static ActionNode* SetRegisterForLoop(int reg, int val, 323 RegExpNode* on_success); 324 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); 325 static ActionNode* StorePosition(int reg, bool is_capture, 326 RegExpNode* on_success); 327 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); 328 static ActionNode* BeginPositiveSubmatch(int stack_pointer_reg, 329 int position_reg, 330 RegExpNode* on_success); 331 static ActionNode* BeginNegativeSubmatch(int stack_pointer_reg, 332 int position_reg, 333 RegExpNode* on_success); 334 static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg, 335 int restore_reg, 336 int clear_capture_count, 337 int clear_capture_from, 338 RegExpNode* on_success); 339 static ActionNode* EmptyMatchCheck(int start_register, 340 int repetition_register, 341 int repetition_limit, 342 RegExpNode* on_success); 343 void Accept(NodeVisitor* visitor) override; 344 void Emit(RegExpCompiler* compiler, Trace* trace) override; 345 void GetQuickCheckDetails(QuickCheckDetails* details, 346 RegExpCompiler* compiler, int filled_in, 347 bool not_at_start) override; 348 void FillInBMInfo(Isolate* isolate, int offset, int budget, 349 BoyerMooreLookahead* bm, bool not_at_start) override; action_type()350 ActionType action_type() { return action_type_; } 351 // TODO(erikcorry): We should allow some action nodes in greedy loops. GreedyLoopTextLength()352 int GreedyLoopTextLength() override { 353 return kNodeIsTooComplexForGreedyLoops; 354 } 355 356 private: 357 union { 358 struct { 359 int reg; 360 int value; 361 } u_store_register; 362 struct { 363 int reg; 364 } u_increment_register; 365 struct { 366 int reg; 367 bool is_capture; 368 } u_position_register; 369 struct { 370 int stack_pointer_register; 371 int current_position_register; 372 int clear_register_count; 373 int clear_register_from; 374 } u_submatch; 375 struct { 376 int start_register; 377 int repetition_register; 378 int repetition_limit; 379 } u_empty_match_check; 380 struct { 381 int range_from; 382 int range_to; 383 } u_clear_captures; 384 } data_; ActionNode(ActionType action_type,RegExpNode * on_success)385 ActionNode(ActionType action_type, RegExpNode* on_success) 386 : SeqRegExpNode(on_success), action_type_(action_type) {} 387 ActionType action_type_; 388 friend class DotPrinterImpl; 389 friend Zone; 390 }; 391 392 class TextNode : public SeqRegExpNode { 393 public: TextNode(ZoneList<TextElement> * elms,bool read_backward,RegExpNode * on_success)394 TextNode(ZoneList<TextElement>* elms, bool read_backward, 395 RegExpNode* on_success) 396 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} TextNode(RegExpCharacterClass * that,bool read_backward,RegExpNode * on_success)397 TextNode(RegExpCharacterClass* that, bool read_backward, 398 RegExpNode* on_success) 399 : SeqRegExpNode(on_success), 400 elms_(zone()->New<ZoneList<TextElement>>(1, zone())), 401 read_backward_(read_backward) { 402 elms_->Add(TextElement::CharClass(that), zone()); 403 } 404 // Create TextNode for a single character class for the given ranges. 405 static TextNode* CreateForCharacterRanges(Zone* zone, 406 ZoneList<CharacterRange>* ranges, 407 bool read_backward, 408 RegExpNode* on_success, 409 JSRegExp::Flags flags); 410 // Create TextNode for a surrogate pair with a range given for the 411 // lead and the trail surrogate each. 412 static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead, 413 CharacterRange trail, 414 bool read_backward, 415 RegExpNode* on_success, 416 JSRegExp::Flags flags); 417 void Accept(NodeVisitor* visitor) override; 418 void Emit(RegExpCompiler* compiler, Trace* trace) override; 419 void GetQuickCheckDetails(QuickCheckDetails* details, 420 RegExpCompiler* compiler, int characters_filled_in, 421 bool not_at_start) override; elements()422 ZoneList<TextElement>* elements() { return elms_; } read_backward()423 bool read_backward() { return read_backward_; } 424 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); 425 int GreedyLoopTextLength() override; 426 RegExpNode* GetSuccessorOfOmnivorousTextNode( 427 RegExpCompiler* compiler) override; 428 void FillInBMInfo(Isolate* isolate, int offset, int budget, 429 BoyerMooreLookahead* bm, bool not_at_start) override; 430 void CalculateOffsets(); 431 RegExpNode* FilterOneByte(int depth) override; 432 int Length(); 433 434 private: 435 enum TextEmitPassType { 436 NON_LATIN1_MATCH, // Check for characters that can't match. 437 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 438 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 439 CASE_CHARACTER_MATCH, // Case-independent single character check. 440 CHARACTER_CLASS_MATCH // Character class. 441 }; 442 static bool SkipPass(TextEmitPassType pass, bool ignore_case); 443 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; 444 static const int kLastPass = CHARACTER_CLASS_MATCH; 445 void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, 446 bool preloaded, Trace* trace, bool first_element_checked, 447 int* checked_up_to); 448 ZoneList<TextElement>* elms_; 449 bool read_backward_; 450 }; 451 452 class AssertionNode : public SeqRegExpNode { 453 public: 454 enum AssertionType { 455 AT_END, 456 AT_START, 457 AT_BOUNDARY, 458 AT_NON_BOUNDARY, 459 AFTER_NEWLINE 460 }; AtEnd(RegExpNode * on_success)461 static AssertionNode* AtEnd(RegExpNode* on_success) { 462 return on_success->zone()->New<AssertionNode>(AT_END, on_success); 463 } AtStart(RegExpNode * on_success)464 static AssertionNode* AtStart(RegExpNode* on_success) { 465 return on_success->zone()->New<AssertionNode>(AT_START, on_success); 466 } AtBoundary(RegExpNode * on_success)467 static AssertionNode* AtBoundary(RegExpNode* on_success) { 468 return on_success->zone()->New<AssertionNode>(AT_BOUNDARY, on_success); 469 } AtNonBoundary(RegExpNode * on_success)470 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { 471 return on_success->zone()->New<AssertionNode>(AT_NON_BOUNDARY, on_success); 472 } AfterNewline(RegExpNode * on_success)473 static AssertionNode* AfterNewline(RegExpNode* on_success) { 474 return on_success->zone()->New<AssertionNode>(AFTER_NEWLINE, on_success); 475 } 476 void Accept(NodeVisitor* visitor) override; 477 void Emit(RegExpCompiler* compiler, Trace* trace) override; 478 void GetQuickCheckDetails(QuickCheckDetails* details, 479 RegExpCompiler* compiler, int filled_in, 480 bool not_at_start) override; 481 void FillInBMInfo(Isolate* isolate, int offset, int budget, 482 BoyerMooreLookahead* bm, bool not_at_start) override; assertion_type()483 AssertionType assertion_type() { return assertion_type_; } 484 485 private: 486 friend Zone; 487 488 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 489 enum IfPrevious { kIsNonWord, kIsWord }; 490 void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace, 491 IfPrevious backtrack_if_previous); AssertionNode(AssertionType t,RegExpNode * on_success)492 AssertionNode(AssertionType t, RegExpNode* on_success) 493 : SeqRegExpNode(on_success), assertion_type_(t) {} 494 AssertionType assertion_type_; 495 }; 496 497 class BackReferenceNode : public SeqRegExpNode { 498 public: BackReferenceNode(int start_reg,int end_reg,JSRegExp::Flags flags,bool read_backward,RegExpNode * on_success)499 BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags, 500 bool read_backward, RegExpNode* on_success) 501 : SeqRegExpNode(on_success), 502 start_reg_(start_reg), 503 end_reg_(end_reg), 504 flags_(flags), 505 read_backward_(read_backward) {} 506 void Accept(NodeVisitor* visitor) override; start_register()507 int start_register() { return start_reg_; } end_register()508 int end_register() { return end_reg_; } read_backward()509 bool read_backward() { return read_backward_; } 510 void Emit(RegExpCompiler* compiler, Trace* trace) override; GetQuickCheckDetails(QuickCheckDetails * details,RegExpCompiler * compiler,int characters_filled_in,bool not_at_start)511 void GetQuickCheckDetails(QuickCheckDetails* details, 512 RegExpCompiler* compiler, int characters_filled_in, 513 bool not_at_start) override { 514 return; 515 } 516 void FillInBMInfo(Isolate* isolate, int offset, int budget, 517 BoyerMooreLookahead* bm, bool not_at_start) override; 518 519 private: 520 int start_reg_; 521 int end_reg_; 522 JSRegExp::Flags flags_; 523 bool read_backward_; 524 }; 525 526 class EndNode : public RegExpNode { 527 public: 528 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; EndNode(Action action,Zone * zone)529 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} 530 void Accept(NodeVisitor* visitor) override; 531 void Emit(RegExpCompiler* compiler, Trace* trace) override; GetQuickCheckDetails(QuickCheckDetails * details,RegExpCompiler * compiler,int characters_filled_in,bool not_at_start)532 void GetQuickCheckDetails(QuickCheckDetails* details, 533 RegExpCompiler* compiler, int characters_filled_in, 534 bool not_at_start) override { 535 // Returning 0 from EatsAtLeast should ensure we never get here. 536 UNREACHABLE(); 537 } FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)538 void FillInBMInfo(Isolate* isolate, int offset, int budget, 539 BoyerMooreLookahead* bm, bool not_at_start) override { 540 // Returning 0 from EatsAtLeast should ensure we never get here. 541 UNREACHABLE(); 542 } 543 544 private: 545 Action action_; 546 }; 547 548 class NegativeSubmatchSuccess : public EndNode { 549 public: NegativeSubmatchSuccess(int stack_pointer_reg,int position_reg,int clear_capture_count,int clear_capture_start,Zone * zone)550 NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, 551 int clear_capture_count, int clear_capture_start, 552 Zone* zone) 553 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), 554 stack_pointer_register_(stack_pointer_reg), 555 current_position_register_(position_reg), 556 clear_capture_count_(clear_capture_count), 557 clear_capture_start_(clear_capture_start) {} 558 void Emit(RegExpCompiler* compiler, Trace* trace) override; 559 560 private: 561 int stack_pointer_register_; 562 int current_position_register_; 563 int clear_capture_count_; 564 int clear_capture_start_; 565 }; 566 567 class Guard : public ZoneObject { 568 public: 569 enum Relation { LT, GEQ }; Guard(int reg,Relation op,int value)570 Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {} reg()571 int reg() { return reg_; } op()572 Relation op() { return op_; } value()573 int value() { return value_; } 574 575 private: 576 int reg_; 577 Relation op_; 578 int value_; 579 }; 580 581 class GuardedAlternative { 582 public: GuardedAlternative(RegExpNode * node)583 explicit GuardedAlternative(RegExpNode* node) 584 : node_(node), guards_(nullptr) {} 585 void AddGuard(Guard* guard, Zone* zone); node()586 RegExpNode* node() { return node_; } set_node(RegExpNode * node)587 void set_node(RegExpNode* node) { node_ = node; } guards()588 ZoneList<Guard*>* guards() { return guards_; } 589 590 private: 591 RegExpNode* node_; 592 ZoneList<Guard*>* guards_; 593 }; 594 595 class AlternativeGeneration; 596 597 class ChoiceNode : public RegExpNode { 598 public: ChoiceNode(int expected_size,Zone * zone)599 explicit ChoiceNode(int expected_size, Zone* zone) 600 : RegExpNode(zone), 601 alternatives_( 602 zone->New<ZoneList<GuardedAlternative>>(expected_size, zone)), 603 not_at_start_(false), 604 being_calculated_(false) {} 605 void Accept(NodeVisitor* visitor) override; AddAlternative(GuardedAlternative node)606 void AddAlternative(GuardedAlternative node) { 607 alternatives()->Add(node, zone()); 608 } alternatives()609 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } 610 void Emit(RegExpCompiler* compiler, Trace* trace) override; 611 void GetQuickCheckDetails(QuickCheckDetails* details, 612 RegExpCompiler* compiler, int characters_filled_in, 613 bool not_at_start) override; 614 void FillInBMInfo(Isolate* isolate, int offset, int budget, 615 BoyerMooreLookahead* bm, bool not_at_start) override; 616 being_calculated()617 bool being_calculated() { return being_calculated_; } not_at_start()618 bool not_at_start() { return not_at_start_; } set_not_at_start()619 void set_not_at_start() { not_at_start_ = true; } set_being_calculated(bool b)620 void set_being_calculated(bool b) { being_calculated_ = b; } try_to_emit_quick_check_for_alternative(bool is_first)621 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { 622 return true; 623 } 624 RegExpNode* FilterOneByte(int depth) override; read_backward()625 virtual bool read_backward() { return false; } 626 627 protected: 628 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 629 ZoneList<GuardedAlternative>* alternatives_; 630 631 private: 632 template <typename...> 633 friend class Analysis; 634 635 void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard, 636 Trace* trace); 637 int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); 638 void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace, 639 GuardedAlternative alternative, 640 AlternativeGeneration* alt_gen, 641 int preload_characters, 642 bool next_expects_preload); 643 void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace, 644 PreloadState* preloads); 645 void AssertGuardsMentionRegisters(Trace* trace); 646 int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace); 647 Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace, 648 AlternativeGenerationList* alt_gens, 649 PreloadState* preloads, 650 GreedyLoopState* greedy_loop_state, int text_length); 651 void EmitChoices(RegExpCompiler* compiler, 652 AlternativeGenerationList* alt_gens, int first_choice, 653 Trace* trace, PreloadState* preloads); 654 655 // If true, this node is never checked at the start of the input. 656 // Allows a new trace to start with at_start() set to false. 657 bool not_at_start_; 658 bool being_calculated_; 659 }; 660 661 class NegativeLookaroundChoiceNode : public ChoiceNode { 662 public: NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,GuardedAlternative then_do_this,Zone * zone)663 explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail, 664 GuardedAlternative then_do_this, 665 Zone* zone) 666 : ChoiceNode(2, zone) { 667 AddAlternative(this_must_fail); 668 AddAlternative(then_do_this); 669 } 670 void GetQuickCheckDetails(QuickCheckDetails* details, 671 RegExpCompiler* compiler, int characters_filled_in, 672 bool not_at_start) override; FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)673 void FillInBMInfo(Isolate* isolate, int offset, int budget, 674 BoyerMooreLookahead* bm, bool not_at_start) override { 675 continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm, 676 not_at_start); 677 if (offset == 0) set_bm_info(not_at_start, bm); 678 } 679 static constexpr int kLookaroundIndex = 0; 680 static constexpr int kContinueIndex = 1; lookaround_node()681 RegExpNode* lookaround_node() { 682 return alternatives()->at(kLookaroundIndex).node(); 683 } continue_node()684 RegExpNode* continue_node() { 685 return alternatives()->at(kContinueIndex).node(); 686 } 687 // For a negative lookahead we don't emit the quick check for the 688 // alternative that is expected to fail. This is because quick check code 689 // starts by loading enough characters for the alternative that takes fewest 690 // characters, but on a negative lookahead the negative branch did not take 691 // part in that calculation (EatsAtLeast) so the assumptions don't hold. try_to_emit_quick_check_for_alternative(bool is_first)692 bool try_to_emit_quick_check_for_alternative(bool is_first) override { 693 return !is_first; 694 } 695 void Accept(NodeVisitor* visitor) override; 696 RegExpNode* FilterOneByte(int depth) override; 697 }; 698 699 class LoopChoiceNode : public ChoiceNode { 700 public: LoopChoiceNode(bool body_can_be_zero_length,bool read_backward,int min_loop_iterations,Zone * zone)701 LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, 702 int min_loop_iterations, Zone* zone) 703 : ChoiceNode(2, zone), 704 loop_node_(nullptr), 705 continue_node_(nullptr), 706 body_can_be_zero_length_(body_can_be_zero_length), 707 read_backward_(read_backward), 708 traversed_loop_initialization_node_(false), 709 min_loop_iterations_(min_loop_iterations) {} 710 void AddLoopAlternative(GuardedAlternative alt); 711 void AddContinueAlternative(GuardedAlternative alt); 712 void Emit(RegExpCompiler* compiler, Trace* trace) override; 713 void GetQuickCheckDetails(QuickCheckDetails* details, 714 RegExpCompiler* compiler, int characters_filled_in, 715 bool not_at_start) override; 716 void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 717 RegExpCompiler* compiler, 718 int characters_filled_in, 719 bool not_at_start) override; 720 void FillInBMInfo(Isolate* isolate, int offset, int budget, 721 BoyerMooreLookahead* bm, bool not_at_start) override; 722 EatsAtLeastInfo EatsAtLeastFromLoopEntry() override; loop_node()723 RegExpNode* loop_node() { return loop_node_; } continue_node()724 RegExpNode* continue_node() { return continue_node_; } body_can_be_zero_length()725 bool body_can_be_zero_length() { return body_can_be_zero_length_; } min_loop_iterations()726 int min_loop_iterations() const { return min_loop_iterations_; } read_backward()727 bool read_backward() override { return read_backward_; } 728 void Accept(NodeVisitor* visitor) override; 729 RegExpNode* FilterOneByte(int depth) override; 730 731 private: 732 // AddAlternative is made private for loop nodes because alternatives 733 // should not be added freely, we need to keep track of which node 734 // goes back to the node itself. AddAlternative(GuardedAlternative node)735 void AddAlternative(GuardedAlternative node) { 736 ChoiceNode::AddAlternative(node); 737 } 738 739 RegExpNode* loop_node_; 740 RegExpNode* continue_node_; 741 bool body_can_be_zero_length_; 742 bool read_backward_; 743 744 // Temporary marker set only while generating quick check details. Represents 745 // whether GetQuickCheckDetails traversed the initialization node for this 746 // loop's counter. If so, we may be able to generate stricter quick checks 747 // because we know the loop node must match at least min_loop_iterations_ 748 // times before the continuation node can match. 749 bool traversed_loop_initialization_node_; 750 751 // The minimum number of times the loop_node_ must match before the 752 // continue_node_ might be considered. This value can be temporarily decreased 753 // while generating quick check details, to represent the remaining iterations 754 // after the completed portion of the quick check details. 755 int min_loop_iterations_; 756 757 friend class IterationDecrementer; 758 friend class LoopInitializationMarker; 759 }; 760 761 class NodeVisitor { 762 public: 763 virtual ~NodeVisitor() = default; 764 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0; 765 FOR_EACH_NODE_TYPE(DECLARE_VISIT) 766 #undef DECLARE_VISIT 767 }; 768 769 } // namespace internal 770 } // namespace v8 771 772 #endif // V8_REGEXP_REGEXP_NODES_H_ 773