1 // Copyright 2019 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_NODES_H_ 6 #define V8_REGEXP_REGEXP_NODES_H_ 7 8 #include "new-regexp/regexp-macro-assembler.h" 9 10 namespace v8 { 11 namespace internal { 12 13 class AlternativeGenerationList; 14 class BoyerMooreLookahead; 15 class GreedyLoopState; 16 class Label; 17 class NodeVisitor; 18 class QuickCheckDetails; 19 class RegExpCompiler; 20 class Trace; 21 struct PreloadState; 22 class ChoiceNode; 23 24 #define FOR_EACH_NODE_TYPE(VISIT) \ 25 VISIT(End) \ 26 VISIT(Action) \ 27 VISIT(Choice) \ 28 VISIT(LoopChoice) \ 29 VISIT(NegativeLookaroundChoice) \ 30 VISIT(BackReference) \ 31 VISIT(Assertion) \ 32 VISIT(Text) 33 34 struct NodeInfo final { NodeInfofinal35 NodeInfo() 36 : being_analyzed(false), 37 been_analyzed(false), 38 follows_word_interest(false), 39 follows_newline_interest(false), 40 follows_start_interest(false), 41 at_end(false), 42 visited(false), 43 replacement_calculated(false) {} 44 45 // Returns true if the interests and assumptions of this node 46 // matches the given one. Matchesfinal47 bool Matches(NodeInfo* that) { 48 return (at_end == that->at_end) && 49 (follows_word_interest == that->follows_word_interest) && 50 (follows_newline_interest == that->follows_newline_interest) && 51 (follows_start_interest == that->follows_start_interest); 52 } 53 54 // Updates the interests of this node given the interests of the 55 // node preceding it. AddFromPrecedingfinal56 void AddFromPreceding(NodeInfo* that) { 57 at_end |= that->at_end; 58 follows_word_interest |= that->follows_word_interest; 59 follows_newline_interest |= that->follows_newline_interest; 60 follows_start_interest |= that->follows_start_interest; 61 } 62 HasLookbehindfinal63 bool HasLookbehind() { 64 return follows_word_interest || follows_newline_interest || 65 follows_start_interest; 66 } 67 68 // Sets the interests of this node to include the interests of the 69 // following node. AddFromFollowingfinal70 void AddFromFollowing(NodeInfo* that) { 71 follows_word_interest |= that->follows_word_interest; 72 follows_newline_interest |= that->follows_newline_interest; 73 follows_start_interest |= that->follows_start_interest; 74 } 75 ResetCompilationStatefinal76 void ResetCompilationState() { 77 being_analyzed = false; 78 been_analyzed = false; 79 } 80 81 bool being_analyzed : 1; 82 bool been_analyzed : 1; 83 84 // These bits are set of this node has to know what the preceding 85 // character was. 86 bool follows_word_interest : 1; 87 bool follows_newline_interest : 1; 88 bool follows_start_interest : 1; 89 90 bool at_end : 1; 91 bool visited : 1; 92 bool replacement_calculated : 1; 93 }; 94 95 struct EatsAtLeastInfo final { EatsAtLeastInfofinal96 EatsAtLeastInfo() : EatsAtLeastInfo(0) {} EatsAtLeastInfofinal97 explicit EatsAtLeastInfo(uint8_t eats) 98 : eats_at_least_from_possibly_start(eats), 99 eats_at_least_from_not_start(eats) {} SetMinfinal100 void SetMin(const EatsAtLeastInfo& other) { 101 if (other.eats_at_least_from_possibly_start < 102 eats_at_least_from_possibly_start) { 103 eats_at_least_from_possibly_start = 104 other.eats_at_least_from_possibly_start; 105 } 106 if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) { 107 eats_at_least_from_not_start = other.eats_at_least_from_not_start; 108 } 109 } 110 111 // Any successful match starting from the current node will consume at least 112 // this many characters. This does not necessarily mean that there is a 113 // possible match with exactly this many characters, but we generally try to 114 // get this number as high as possible to allow for early exit on failure. 115 uint8_t eats_at_least_from_possibly_start; 116 117 // Like eats_at_least_from_possibly_start, but with the additional assumption 118 // that start-of-string assertions (^) can't match. This value is greater than 119 // or equal to eats_at_least_from_possibly_start. 120 uint8_t eats_at_least_from_not_start; 121 }; 122 123 class RegExpNode : public ZoneObject { 124 public: RegExpNode(Zone * zone)125 explicit RegExpNode(Zone* zone) 126 : replacement_(nullptr), 127 on_work_list_(false), 128 trace_count_(0), 129 zone_(zone) { 130 bm_info_[0] = bm_info_[1] = nullptr; 131 } 132 virtual ~RegExpNode(); 133 virtual void Accept(NodeVisitor* visitor) = 0; 134 // Generates a goto to this node or actually generates the code at this point. 135 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; 136 // How many characters must this node consume at a minimum in order to 137 // succeed. The not_at_start argument is used to indicate that we know we are 138 // not at the start of the input. In this case anchored branches will always 139 // fail and can be ignored when determining how many characters are consumed 140 // on success. If this node has not been analyzed yet, EatsAtLeast returns 0. 141 int EatsAtLeast(bool not_at_start); 142 // Returns how many characters this node must consume in order to succeed, 143 // given that this is a LoopChoiceNode whose counter register is in a 144 // newly-initialized state at the current position in the generated code. For 145 // example, consider /a{6,8}/. Absent any extra information, the 146 // LoopChoiceNode for the repetition must report that it consumes at least 147 // zero characters, because it may have already looped several times. However, 148 // with a newly-initialized counter, it can report that it consumes at least 149 // six characters. 150 virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry(); 151 // Emits some quick code that checks whether the preloaded characters match. 152 // Falls through on certain failure, jumps to the label on possible success. 153 // If the node cannot make a quick check it does nothing and returns false. 154 bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace, 155 Trace* trace, bool preload_has_checked_bounds, 156 Label* on_possible_success, 157 QuickCheckDetails* details_return, 158 bool fall_through_on_failure, ChoiceNode* predecessor); 159 // For a given number of characters this returns a mask and a value. The 160 // next n characters are anded with the mask and compared with the value. 161 // A comparison failure indicates the node cannot match the next n characters. 162 // A comparison success indicates the node may match. 163 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 164 RegExpCompiler* compiler, 165 int characters_filled_in, 166 bool not_at_start) = 0; 167 // Fills in quick check details for this node, given that this is a 168 // LoopChoiceNode whose counter register is in a newly-initialized state at 169 // the current position in the generated code. For example, consider /a{6,8}/. 170 // Absent any extra information, the LoopChoiceNode for the repetition cannot 171 // generate any useful quick check because a match might be the (empty) 172 // continuation node. However, with a newly-initialized counter, it can 173 // generate a quick check for several 'a' characters at once. 174 virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 175 RegExpCompiler* compiler, 176 int characters_filled_in, 177 bool not_at_start); 178 static const int kNodeIsTooComplexForGreedyLoops = kMinInt; GreedyLoopTextLength()179 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 180 // Only returns the successor for a text node of length 1 that matches any 181 // character and that has no guards on it. GetSuccessorOfOmnivorousTextNode(RegExpCompiler * compiler)182 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( 183 RegExpCompiler* compiler) { 184 return nullptr; 185 } 186 187 // Collects information on the possible code units (mod 128) that can match if 188 // we look forward. This is used for a Boyer-Moore-like string searching 189 // implementation. TODO(erikcorry): This should share more code with 190 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit 191 // the number of nodes we are willing to look at in order to create this data. 192 static const int kRecursionBudget = 200; 193 bool KeepRecursing(RegExpCompiler* compiler); FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)194 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, 195 BoyerMooreLookahead* bm, bool not_at_start) { 196 UNREACHABLE(); 197 } 198 199 // If we know that the input is one-byte then there are some nodes that can 200 // never match. This method returns a node that can be substituted for 201 // itself, or nullptr if the node can never match. FilterOneByte(int depth)202 virtual RegExpNode* FilterOneByte(int depth) { return this; } 203 // Helper for FilterOneByte. replacement()204 RegExpNode* replacement() { 205 DCHECK(info()->replacement_calculated); 206 return replacement_; 207 } set_replacement(RegExpNode * replacement)208 RegExpNode* set_replacement(RegExpNode* replacement) { 209 info()->replacement_calculated = true; 210 replacement_ = replacement; 211 return replacement; // For convenience. 212 } 213 214 // We want to avoid recalculating the lookahead info, so we store it on the 215 // node. Only info that is for this node is stored. We can tell that the 216 // info is for this node when offset == 0, so the information is calculated 217 // relative to this node. SaveBMInfo(BoyerMooreLookahead * bm,bool not_at_start,int offset)218 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) { 219 if (offset == 0) set_bm_info(not_at_start, bm); 220 } 221 label()222 Label* label() { return &label_; } 223 // If non-generic code is generated for a node (i.e. the node is not at the 224 // start of the trace) then it cannot be reused. This variable sets a limit 225 // on how often we allow that to happen before we insist on starting a new 226 // trace and generating generic code for a node that can be reused by flushing 227 // the deferred actions in the current trace and generating a goto. 228 static const int kMaxCopiesCodeGenerated = 10; 229 on_work_list()230 bool on_work_list() { return on_work_list_; } set_on_work_list(bool value)231 void set_on_work_list(bool value) { on_work_list_ = value; } 232 info()233 NodeInfo* info() { return &info_; } eats_at_least_info()234 const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; } set_eats_at_least_info(const EatsAtLeastInfo & eats_at_least)235 void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) { 236 eats_at_least_ = eats_at_least; 237 } 238 239 // TODO(v8:10441): This is a hacky way to avoid exponential code size growth 240 // for very large choice nodes that can be generated by unicode property 241 // escapes. In order to avoid inlining (i.e. trace recursion), we pretend to 242 // have generated the maximum count of code copies already. 243 // We should instead fix this properly, e.g. by using the code size budget 244 // (flush_budget) or by generating property escape matches as calls to a C 245 // function. SetDoNotInline()246 void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; } 247 bm_info(bool not_at_start)248 BoyerMooreLookahead* bm_info(bool not_at_start) { 249 return bm_info_[not_at_start ? 1 : 0]; 250 } 251 zone()252 Zone* zone() const { return zone_; } 253 254 protected: 255 enum LimitResult { DONE, CONTINUE }; 256 RegExpNode* replacement_; 257 258 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); 259 set_bm_info(bool not_at_start,BoyerMooreLookahead * bm)260 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) { 261 bm_info_[not_at_start ? 1 : 0] = bm; 262 } 263 264 private: 265 static const int kFirstCharBudget = 10; 266 Label label_; 267 bool on_work_list_; 268 NodeInfo info_; 269 270 // Saved values for EatsAtLeast results, to avoid recomputation. Filled in 271 // during analysis (valid if info_.been_analyzed is true). 272 EatsAtLeastInfo eats_at_least_; 273 274 // This variable keeps track of how many times code has been generated for 275 // this node (in different traces). We don't keep track of where the 276 // generated code is located unless the code is generated at the start of 277 // a trace, in which case it is generic and can be reused by flushing the 278 // deferred operations in the current trace and generating a goto. 279 int trace_count_; 280 BoyerMooreLookahead* bm_info_[2]; 281 282 Zone* zone_; 283 }; 284 285 class SeqRegExpNode : public RegExpNode { 286 public: SeqRegExpNode(RegExpNode * on_success)287 explicit SeqRegExpNode(RegExpNode* on_success) 288 : RegExpNode(on_success->zone()), on_success_(on_success) {} on_success()289 RegExpNode* on_success() { return on_success_; } set_on_success(RegExpNode * node)290 void set_on_success(RegExpNode* node) { on_success_ = node; } 291 RegExpNode* FilterOneByte(int depth) override; FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)292 void FillInBMInfo(Isolate* isolate, int offset, int budget, 293 BoyerMooreLookahead* bm, bool not_at_start) override { 294 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 295 if (offset == 0) set_bm_info(not_at_start, bm); 296 } 297 298 protected: 299 RegExpNode* FilterSuccessor(int depth); 300 301 private: 302 RegExpNode* on_success_; 303 }; 304 305 class ActionNode : public SeqRegExpNode { 306 public: 307 enum ActionType { 308 SET_REGISTER_FOR_LOOP, 309 INCREMENT_REGISTER, 310 STORE_POSITION, 311 BEGIN_SUBMATCH, 312 POSITIVE_SUBMATCH_SUCCESS, 313 EMPTY_MATCH_CHECK, 314 CLEAR_CAPTURES 315 }; 316 static ActionNode* SetRegisterForLoop(int reg, int val, 317 RegExpNode* on_success); 318 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); 319 static ActionNode* StorePosition(int reg, bool is_capture, 320 RegExpNode* on_success); 321 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); 322 static ActionNode* BeginSubmatch(int stack_pointer_reg, int position_reg, 323 RegExpNode* on_success); 324 static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg, 325 int restore_reg, 326 int clear_capture_count, 327 int clear_capture_from, 328 RegExpNode* on_success); 329 static ActionNode* EmptyMatchCheck(int start_register, 330 int repetition_register, 331 int repetition_limit, 332 RegExpNode* on_success); 333 void Accept(NodeVisitor* visitor) override; 334 void Emit(RegExpCompiler* compiler, Trace* trace) override; 335 void GetQuickCheckDetails(QuickCheckDetails* details, 336 RegExpCompiler* compiler, int filled_in, 337 bool not_at_start) override; 338 void FillInBMInfo(Isolate* isolate, int offset, int budget, 339 BoyerMooreLookahead* bm, bool not_at_start) override; action_type()340 ActionType action_type() { return action_type_; } 341 // TODO(erikcorry): We should allow some action nodes in greedy loops. GreedyLoopTextLength()342 int GreedyLoopTextLength() override { 343 return kNodeIsTooComplexForGreedyLoops; 344 } 345 346 private: 347 union { 348 struct { 349 int reg; 350 int value; 351 } u_store_register; 352 struct { 353 int reg; 354 } u_increment_register; 355 struct { 356 int reg; 357 bool is_capture; 358 } u_position_register; 359 struct { 360 int stack_pointer_register; 361 int current_position_register; 362 int clear_register_count; 363 int clear_register_from; 364 } u_submatch; 365 struct { 366 int start_register; 367 int repetition_register; 368 int repetition_limit; 369 } u_empty_match_check; 370 struct { 371 int range_from; 372 int range_to; 373 } u_clear_captures; 374 } data_; ActionNode(ActionType action_type,RegExpNode * on_success)375 ActionNode(ActionType action_type, RegExpNode* on_success) 376 : SeqRegExpNode(on_success), action_type_(action_type) {} 377 ActionType action_type_; 378 friend class DotPrinterImpl; 379 }; 380 381 class TextNode : public SeqRegExpNode { 382 public: TextNode(ZoneList<TextElement> * elms,bool read_backward,RegExpNode * on_success)383 TextNode(ZoneList<TextElement>* elms, bool read_backward, 384 RegExpNode* on_success) 385 : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {} TextNode(RegExpCharacterClass * that,bool read_backward,RegExpNode * on_success)386 TextNode(RegExpCharacterClass* that, bool read_backward, 387 RegExpNode* on_success) 388 : SeqRegExpNode(on_success), 389 elms_(new (zone()) ZoneList<TextElement>(1, zone())), 390 read_backward_(read_backward) { 391 elms_->Add(TextElement::CharClass(that), zone()); 392 } 393 // Create TextNode for a single character class for the given ranges. 394 static TextNode* CreateForCharacterRanges(Zone* zone, 395 ZoneList<CharacterRange>* ranges, 396 bool read_backward, 397 RegExpNode* on_success, 398 JSRegExp::Flags flags); 399 // Create TextNode for a surrogate pair with a range given for the 400 // lead and the trail surrogate each. 401 static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead, 402 CharacterRange trail, 403 bool read_backward, 404 RegExpNode* on_success, 405 JSRegExp::Flags flags); 406 void Accept(NodeVisitor* visitor) override; 407 void Emit(RegExpCompiler* compiler, Trace* trace) override; 408 void GetQuickCheckDetails(QuickCheckDetails* details, 409 RegExpCompiler* compiler, int characters_filled_in, 410 bool not_at_start) override; elements()411 ZoneList<TextElement>* elements() { return elms_; } read_backward()412 bool read_backward() { return read_backward_; } 413 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); 414 int GreedyLoopTextLength() override; 415 RegExpNode* GetSuccessorOfOmnivorousTextNode( 416 RegExpCompiler* compiler) override; 417 void FillInBMInfo(Isolate* isolate, int offset, int budget, 418 BoyerMooreLookahead* bm, bool not_at_start) override; 419 void CalculateOffsets(); 420 RegExpNode* FilterOneByte(int depth) override; 421 int Length(); 422 423 private: 424 enum TextEmitPassType { 425 NON_LATIN1_MATCH, // Check for characters that can't match. 426 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 427 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 428 CASE_CHARACTER_MATCH, // Case-independent single character check. 429 CHARACTER_CLASS_MATCH // Character class. 430 }; 431 static bool SkipPass(TextEmitPassType pass, bool ignore_case); 432 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; 433 static const int kLastPass = CHARACTER_CLASS_MATCH; 434 void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, 435 bool preloaded, Trace* trace, bool first_element_checked, 436 int* checked_up_to); 437 ZoneList<TextElement>* elms_; 438 bool read_backward_; 439 }; 440 441 class AssertionNode : public SeqRegExpNode { 442 public: 443 enum AssertionType { 444 AT_END, 445 AT_START, 446 AT_BOUNDARY, 447 AT_NON_BOUNDARY, 448 AFTER_NEWLINE 449 }; AtEnd(RegExpNode * on_success)450 static AssertionNode* AtEnd(RegExpNode* on_success) { 451 return new (on_success->zone()) AssertionNode(AT_END, on_success); 452 } AtStart(RegExpNode * on_success)453 static AssertionNode* AtStart(RegExpNode* on_success) { 454 return new (on_success->zone()) AssertionNode(AT_START, on_success); 455 } AtBoundary(RegExpNode * on_success)456 static AssertionNode* AtBoundary(RegExpNode* on_success) { 457 return new (on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); 458 } AtNonBoundary(RegExpNode * on_success)459 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { 460 return new (on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success); 461 } AfterNewline(RegExpNode * on_success)462 static AssertionNode* AfterNewline(RegExpNode* on_success) { 463 return new (on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); 464 } 465 void Accept(NodeVisitor* visitor) override; 466 void Emit(RegExpCompiler* compiler, Trace* trace) override; 467 void GetQuickCheckDetails(QuickCheckDetails* details, 468 RegExpCompiler* compiler, int filled_in, 469 bool not_at_start) override; 470 void FillInBMInfo(Isolate* isolate, int offset, int budget, 471 BoyerMooreLookahead* bm, bool not_at_start) override; assertion_type()472 AssertionType assertion_type() { return assertion_type_; } 473 474 private: 475 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); 476 enum IfPrevious { kIsNonWord, kIsWord }; 477 void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace, 478 IfPrevious backtrack_if_previous); AssertionNode(AssertionType t,RegExpNode * on_success)479 AssertionNode(AssertionType t, RegExpNode* on_success) 480 : SeqRegExpNode(on_success), assertion_type_(t) {} 481 AssertionType assertion_type_; 482 }; 483 484 class BackReferenceNode : public SeqRegExpNode { 485 public: BackReferenceNode(int start_reg,int end_reg,JSRegExp::Flags flags,bool read_backward,RegExpNode * on_success)486 BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags, 487 bool read_backward, RegExpNode* on_success) 488 : SeqRegExpNode(on_success), 489 start_reg_(start_reg), 490 end_reg_(end_reg), 491 flags_(flags), 492 read_backward_(read_backward) {} 493 void Accept(NodeVisitor* visitor) override; start_register()494 int start_register() { return start_reg_; } end_register()495 int end_register() { return end_reg_; } read_backward()496 bool read_backward() { return read_backward_; } 497 void Emit(RegExpCompiler* compiler, Trace* trace) override; GetQuickCheckDetails(QuickCheckDetails * details,RegExpCompiler * compiler,int characters_filled_in,bool not_at_start)498 void GetQuickCheckDetails(QuickCheckDetails* details, 499 RegExpCompiler* compiler, int characters_filled_in, 500 bool not_at_start) override { 501 return; 502 } 503 void FillInBMInfo(Isolate* isolate, int offset, int budget, 504 BoyerMooreLookahead* bm, bool not_at_start) override; 505 506 private: 507 int start_reg_; 508 int end_reg_; 509 JSRegExp::Flags flags_; 510 bool read_backward_; 511 }; 512 513 class EndNode : public RegExpNode { 514 public: 515 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; EndNode(Action action,Zone * zone)516 EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {} 517 void Accept(NodeVisitor* visitor) override; 518 void Emit(RegExpCompiler* compiler, Trace* trace) override; GetQuickCheckDetails(QuickCheckDetails * details,RegExpCompiler * compiler,int characters_filled_in,bool not_at_start)519 void GetQuickCheckDetails(QuickCheckDetails* details, 520 RegExpCompiler* compiler, int characters_filled_in, 521 bool not_at_start) override { 522 // Returning 0 from EatsAtLeast should ensure we never get here. 523 UNREACHABLE(); 524 } FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)525 void FillInBMInfo(Isolate* isolate, int offset, int budget, 526 BoyerMooreLookahead* bm, bool not_at_start) override { 527 // Returning 0 from EatsAtLeast should ensure we never get here. 528 UNREACHABLE(); 529 } 530 531 private: 532 Action action_; 533 }; 534 535 class NegativeSubmatchSuccess : public EndNode { 536 public: NegativeSubmatchSuccess(int stack_pointer_reg,int position_reg,int clear_capture_count,int clear_capture_start,Zone * zone)537 NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg, 538 int clear_capture_count, int clear_capture_start, 539 Zone* zone) 540 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), 541 stack_pointer_register_(stack_pointer_reg), 542 current_position_register_(position_reg), 543 clear_capture_count_(clear_capture_count), 544 clear_capture_start_(clear_capture_start) {} 545 void Emit(RegExpCompiler* compiler, Trace* trace) override; 546 547 private: 548 int stack_pointer_register_; 549 int current_position_register_; 550 int clear_capture_count_; 551 int clear_capture_start_; 552 }; 553 554 class Guard : public ZoneObject { 555 public: 556 enum Relation { LT, GEQ }; Guard(int reg,Relation op,int value)557 Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {} reg()558 int reg() { return reg_; } op()559 Relation op() { return op_; } value()560 int value() { return value_; } 561 562 private: 563 int reg_; 564 Relation op_; 565 int value_; 566 }; 567 568 class GuardedAlternative { 569 public: GuardedAlternative(RegExpNode * node)570 explicit GuardedAlternative(RegExpNode* node) 571 : node_(node), guards_(nullptr) {} 572 void AddGuard(Guard* guard, Zone* zone); node()573 RegExpNode* node() { return node_; } set_node(RegExpNode * node)574 void set_node(RegExpNode* node) { node_ = node; } guards()575 ZoneList<Guard*>* guards() { return guards_; } 576 577 private: 578 RegExpNode* node_; 579 ZoneList<Guard*>* guards_; 580 }; 581 582 class AlternativeGeneration; 583 584 class ChoiceNode : public RegExpNode { 585 public: ChoiceNode(int expected_size,Zone * zone)586 explicit ChoiceNode(int expected_size, Zone* zone) 587 : RegExpNode(zone), 588 alternatives_(new (zone) 589 ZoneList<GuardedAlternative>(expected_size, zone)), 590 not_at_start_(false), 591 being_calculated_(false) {} 592 void Accept(NodeVisitor* visitor) override; AddAlternative(GuardedAlternative node)593 void AddAlternative(GuardedAlternative node) { 594 alternatives()->Add(node, zone()); 595 } alternatives()596 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } 597 void Emit(RegExpCompiler* compiler, Trace* trace) override; 598 void GetQuickCheckDetails(QuickCheckDetails* details, 599 RegExpCompiler* compiler, int characters_filled_in, 600 bool not_at_start) override; 601 void FillInBMInfo(Isolate* isolate, int offset, int budget, 602 BoyerMooreLookahead* bm, bool not_at_start) override; 603 being_calculated()604 bool being_calculated() { return being_calculated_; } not_at_start()605 bool not_at_start() { return not_at_start_; } set_not_at_start()606 void set_not_at_start() { not_at_start_ = true; } set_being_calculated(bool b)607 void set_being_calculated(bool b) { being_calculated_ = b; } try_to_emit_quick_check_for_alternative(bool is_first)608 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { 609 return true; 610 } 611 RegExpNode* FilterOneByte(int depth) override; read_backward()612 virtual bool read_backward() { return false; } 613 614 protected: 615 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 616 ZoneList<GuardedAlternative>* alternatives_; 617 618 private: 619 template <typename...> 620 friend class Analysis; 621 622 void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard, 623 Trace* trace); 624 int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); 625 void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace, 626 GuardedAlternative alternative, 627 AlternativeGeneration* alt_gen, 628 int preload_characters, 629 bool next_expects_preload); 630 void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace, 631 PreloadState* preloads); 632 void AssertGuardsMentionRegisters(Trace* trace); 633 int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace); 634 Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace, 635 AlternativeGenerationList* alt_gens, 636 PreloadState* preloads, 637 GreedyLoopState* greedy_loop_state, int text_length); 638 void EmitChoices(RegExpCompiler* compiler, 639 AlternativeGenerationList* alt_gens, int first_choice, 640 Trace* trace, PreloadState* preloads); 641 642 // If true, this node is never checked at the start of the input. 643 // Allows a new trace to start with at_start() set to false. 644 bool not_at_start_; 645 bool being_calculated_; 646 }; 647 648 class NegativeLookaroundChoiceNode : public ChoiceNode { 649 public: NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,GuardedAlternative then_do_this,Zone * zone)650 explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail, 651 GuardedAlternative then_do_this, 652 Zone* zone) 653 : ChoiceNode(2, zone) { 654 AddAlternative(this_must_fail); 655 AddAlternative(then_do_this); 656 } 657 void GetQuickCheckDetails(QuickCheckDetails* details, 658 RegExpCompiler* compiler, int characters_filled_in, 659 bool not_at_start) override; FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)660 void FillInBMInfo(Isolate* isolate, int offset, int budget, 661 BoyerMooreLookahead* bm, bool not_at_start) override { 662 continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm, 663 not_at_start); 664 if (offset == 0) set_bm_info(not_at_start, bm); 665 } 666 static constexpr int kLookaroundIndex = 0; 667 static constexpr int kContinueIndex = 1; lookaround_node()668 RegExpNode* lookaround_node() { 669 return alternatives()->at(kLookaroundIndex).node(); 670 } continue_node()671 RegExpNode* continue_node() { 672 return alternatives()->at(kContinueIndex).node(); 673 } 674 // For a negative lookahead we don't emit the quick check for the 675 // alternative that is expected to fail. This is because quick check code 676 // starts by loading enough characters for the alternative that takes fewest 677 // characters, but on a negative lookahead the negative branch did not take 678 // part in that calculation (EatsAtLeast) so the assumptions don't hold. try_to_emit_quick_check_for_alternative(bool is_first)679 bool try_to_emit_quick_check_for_alternative(bool is_first) override { 680 return !is_first; 681 } 682 void Accept(NodeVisitor* visitor) override; 683 RegExpNode* FilterOneByte(int depth) override; 684 }; 685 686 class LoopChoiceNode : public ChoiceNode { 687 public: LoopChoiceNode(bool body_can_be_zero_length,bool read_backward,int min_loop_iterations,Zone * zone)688 LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, 689 int min_loop_iterations, Zone* zone) 690 : ChoiceNode(2, zone), 691 loop_node_(nullptr), 692 continue_node_(nullptr), 693 body_can_be_zero_length_(body_can_be_zero_length), 694 read_backward_(read_backward), 695 traversed_loop_initialization_node_(false), 696 min_loop_iterations_(min_loop_iterations) {} 697 void AddLoopAlternative(GuardedAlternative alt); 698 void AddContinueAlternative(GuardedAlternative alt); 699 void Emit(RegExpCompiler* compiler, Trace* trace) override; 700 void GetQuickCheckDetails(QuickCheckDetails* details, 701 RegExpCompiler* compiler, int characters_filled_in, 702 bool not_at_start) override; 703 void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details, 704 RegExpCompiler* compiler, 705 int characters_filled_in, 706 bool not_at_start) override; 707 void FillInBMInfo(Isolate* isolate, int offset, int budget, 708 BoyerMooreLookahead* bm, bool not_at_start) override; 709 EatsAtLeastInfo EatsAtLeastFromLoopEntry() override; loop_node()710 RegExpNode* loop_node() { return loop_node_; } continue_node()711 RegExpNode* continue_node() { return continue_node_; } body_can_be_zero_length()712 bool body_can_be_zero_length() { return body_can_be_zero_length_; } min_loop_iterations()713 int min_loop_iterations() const { return min_loop_iterations_; } read_backward()714 bool read_backward() override { return read_backward_; } 715 void Accept(NodeVisitor* visitor) override; 716 RegExpNode* FilterOneByte(int depth) override; 717 718 private: 719 // AddAlternative is made private for loop nodes because alternatives 720 // should not be added freely, we need to keep track of which node 721 // goes back to the node itself. AddAlternative(GuardedAlternative node)722 void AddAlternative(GuardedAlternative node) { 723 ChoiceNode::AddAlternative(node); 724 } 725 726 RegExpNode* loop_node_; 727 RegExpNode* continue_node_; 728 bool body_can_be_zero_length_; 729 bool read_backward_; 730 731 // Temporary marker set only while generating quick check details. Represents 732 // whether GetQuickCheckDetails traversed the initialization node for this 733 // loop's counter. If so, we may be able to generate stricter quick checks 734 // because we know the loop node must match at least min_loop_iterations_ 735 // times before the continuation node can match. 736 bool traversed_loop_initialization_node_; 737 738 // The minimum number of times the loop_node_ must match before the 739 // continue_node_ might be considered. This value can be temporarily decreased 740 // while generating quick check details, to represent the remaining iterations 741 // after the completed portion of the quick check details. 742 int min_loop_iterations_; 743 744 friend class IterationDecrementer; 745 friend class LoopInitializationMarker; 746 }; 747 748 class NodeVisitor { 749 public: 750 virtual ~NodeVisitor() = default; 751 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0; 752 FOR_EACH_NODE_TYPE(DECLARE_VISIT) 753 #undef DECLARE_VISIT 754 }; 755 756 } // namespace internal 757 } // namespace v8 758 759 #endif // V8_REGEXP_REGEXP_NODES_H_ 760