1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_REGEXP_REGEXP_NODES_H_
6 #define V8_REGEXP_REGEXP_NODES_H_
7 
8 #include "new-regexp/regexp-macro-assembler.h"
9 
10 namespace v8 {
11 namespace internal {
12 
13 class AlternativeGenerationList;
14 class BoyerMooreLookahead;
15 class GreedyLoopState;
16 class Label;
17 class NodeVisitor;
18 class QuickCheckDetails;
19 class RegExpCompiler;
20 class Trace;
21 struct PreloadState;
22 class ChoiceNode;
23 
24 #define FOR_EACH_NODE_TYPE(VISIT) \
25   VISIT(End)                      \
26   VISIT(Action)                   \
27   VISIT(Choice)                   \
28   VISIT(LoopChoice)               \
29   VISIT(NegativeLookaroundChoice) \
30   VISIT(BackReference)            \
31   VISIT(Assertion)                \
32   VISIT(Text)
33 
34 struct NodeInfo final {
NodeInfofinal35   NodeInfo()
36       : being_analyzed(false),
37         been_analyzed(false),
38         follows_word_interest(false),
39         follows_newline_interest(false),
40         follows_start_interest(false),
41         at_end(false),
42         visited(false),
43         replacement_calculated(false) {}
44 
45   // Returns true if the interests and assumptions of this node
46   // matches the given one.
Matchesfinal47   bool Matches(NodeInfo* that) {
48     return (at_end == that->at_end) &&
49            (follows_word_interest == that->follows_word_interest) &&
50            (follows_newline_interest == that->follows_newline_interest) &&
51            (follows_start_interest == that->follows_start_interest);
52   }
53 
54   // Updates the interests of this node given the interests of the
55   // node preceding it.
AddFromPrecedingfinal56   void AddFromPreceding(NodeInfo* that) {
57     at_end |= that->at_end;
58     follows_word_interest |= that->follows_word_interest;
59     follows_newline_interest |= that->follows_newline_interest;
60     follows_start_interest |= that->follows_start_interest;
61   }
62 
HasLookbehindfinal63   bool HasLookbehind() {
64     return follows_word_interest || follows_newline_interest ||
65            follows_start_interest;
66   }
67 
68   // Sets the interests of this node to include the interests of the
69   // following node.
AddFromFollowingfinal70   void AddFromFollowing(NodeInfo* that) {
71     follows_word_interest |= that->follows_word_interest;
72     follows_newline_interest |= that->follows_newline_interest;
73     follows_start_interest |= that->follows_start_interest;
74   }
75 
ResetCompilationStatefinal76   void ResetCompilationState() {
77     being_analyzed = false;
78     been_analyzed = false;
79   }
80 
81   bool being_analyzed : 1;
82   bool been_analyzed : 1;
83 
84   // These bits are set of this node has to know what the preceding
85   // character was.
86   bool follows_word_interest : 1;
87   bool follows_newline_interest : 1;
88   bool follows_start_interest : 1;
89 
90   bool at_end : 1;
91   bool visited : 1;
92   bool replacement_calculated : 1;
93 };
94 
95 struct EatsAtLeastInfo final {
EatsAtLeastInfofinal96   EatsAtLeastInfo() : EatsAtLeastInfo(0) {}
EatsAtLeastInfofinal97   explicit EatsAtLeastInfo(uint8_t eats)
98       : eats_at_least_from_possibly_start(eats),
99         eats_at_least_from_not_start(eats) {}
SetMinfinal100   void SetMin(const EatsAtLeastInfo& other) {
101     if (other.eats_at_least_from_possibly_start <
102         eats_at_least_from_possibly_start) {
103       eats_at_least_from_possibly_start =
104           other.eats_at_least_from_possibly_start;
105     }
106     if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) {
107       eats_at_least_from_not_start = other.eats_at_least_from_not_start;
108     }
109   }
110 
111   // Any successful match starting from the current node will consume at least
112   // this many characters. This does not necessarily mean that there is a
113   // possible match with exactly this many characters, but we generally try to
114   // get this number as high as possible to allow for early exit on failure.
115   uint8_t eats_at_least_from_possibly_start;
116 
117   // Like eats_at_least_from_possibly_start, but with the additional assumption
118   // that start-of-string assertions (^) can't match. This value is greater than
119   // or equal to eats_at_least_from_possibly_start.
120   uint8_t eats_at_least_from_not_start;
121 };
122 
123 class RegExpNode : public ZoneObject {
124  public:
RegExpNode(Zone * zone)125   explicit RegExpNode(Zone* zone)
126       : replacement_(nullptr),
127         on_work_list_(false),
128         trace_count_(0),
129         zone_(zone) {
130     bm_info_[0] = bm_info_[1] = nullptr;
131   }
132   virtual ~RegExpNode();
133   virtual void Accept(NodeVisitor* visitor) = 0;
134   // Generates a goto to this node or actually generates the code at this point.
135   virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
136   // How many characters must this node consume at a minimum in order to
137   // succeed.  The not_at_start argument is used to indicate that we know we are
138   // not at the start of the input.  In this case anchored branches will always
139   // fail and can be ignored when determining how many characters are consumed
140   // on success.  If this node has not been analyzed yet, EatsAtLeast returns 0.
141   int EatsAtLeast(bool not_at_start);
142   // Returns how many characters this node must consume in order to succeed,
143   // given that this is a LoopChoiceNode whose counter register is in a
144   // newly-initialized state at the current position in the generated code. For
145   // example, consider /a{6,8}/. Absent any extra information, the
146   // LoopChoiceNode for the repetition must report that it consumes at least
147   // zero characters, because it may have already looped several times. However,
148   // with a newly-initialized counter, it can report that it consumes at least
149   // six characters.
150   virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry();
151   // Emits some quick code that checks whether the preloaded characters match.
152   // Falls through on certain failure, jumps to the label on possible success.
153   // If the node cannot make a quick check it does nothing and returns false.
154   bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace,
155                       Trace* trace, bool preload_has_checked_bounds,
156                       Label* on_possible_success,
157                       QuickCheckDetails* details_return,
158                       bool fall_through_on_failure, ChoiceNode* predecessor);
159   // For a given number of characters this returns a mask and a value.  The
160   // next n characters are anded with the mask and compared with the value.
161   // A comparison failure indicates the node cannot match the next n characters.
162   // A comparison success indicates the node may match.
163   virtual void GetQuickCheckDetails(QuickCheckDetails* details,
164                                     RegExpCompiler* compiler,
165                                     int characters_filled_in,
166                                     bool not_at_start) = 0;
167   // Fills in quick check details for this node, given that this is a
168   // LoopChoiceNode whose counter register is in a newly-initialized state at
169   // the current position in the generated code. For example, consider /a{6,8}/.
170   // Absent any extra information, the LoopChoiceNode for the repetition cannot
171   // generate any useful quick check because a match might be the (empty)
172   // continuation node. However, with a newly-initialized counter, it can
173   // generate a quick check for several 'a' characters at once.
174   virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
175                                                  RegExpCompiler* compiler,
176                                                  int characters_filled_in,
177                                                  bool not_at_start);
178   static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
GreedyLoopTextLength()179   virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
180   // Only returns the successor for a text node of length 1 that matches any
181   // character and that has no guards on it.
GetSuccessorOfOmnivorousTextNode(RegExpCompiler * compiler)182   virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
183       RegExpCompiler* compiler) {
184     return nullptr;
185   }
186 
187   // Collects information on the possible code units (mod 128) that can match if
188   // we look forward.  This is used for a Boyer-Moore-like string searching
189   // implementation.  TODO(erikcorry):  This should share more code with
190   // EatsAtLeast, GetQuickCheckDetails.  The budget argument is used to limit
191   // the number of nodes we are willing to look at in order to create this data.
192   static const int kRecursionBudget = 200;
193   bool KeepRecursing(RegExpCompiler* compiler);
FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)194   virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
195                             BoyerMooreLookahead* bm, bool not_at_start) {
196     UNREACHABLE();
197   }
198 
199   // If we know that the input is one-byte then there are some nodes that can
200   // never match.  This method returns a node that can be substituted for
201   // itself, or nullptr if the node can never match.
FilterOneByte(int depth)202   virtual RegExpNode* FilterOneByte(int depth) { return this; }
203   // Helper for FilterOneByte.
replacement()204   RegExpNode* replacement() {
205     DCHECK(info()->replacement_calculated);
206     return replacement_;
207   }
set_replacement(RegExpNode * replacement)208   RegExpNode* set_replacement(RegExpNode* replacement) {
209     info()->replacement_calculated = true;
210     replacement_ = replacement;
211     return replacement;  // For convenience.
212   }
213 
214   // We want to avoid recalculating the lookahead info, so we store it on the
215   // node.  Only info that is for this node is stored.  We can tell that the
216   // info is for this node when offset == 0, so the information is calculated
217   // relative to this node.
SaveBMInfo(BoyerMooreLookahead * bm,bool not_at_start,int offset)218   void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
219     if (offset == 0) set_bm_info(not_at_start, bm);
220   }
221 
label()222   Label* label() { return &label_; }
223   // If non-generic code is generated for a node (i.e. the node is not at the
224   // start of the trace) then it cannot be reused.  This variable sets a limit
225   // on how often we allow that to happen before we insist on starting a new
226   // trace and generating generic code for a node that can be reused by flushing
227   // the deferred actions in the current trace and generating a goto.
228   static const int kMaxCopiesCodeGenerated = 10;
229 
on_work_list()230   bool on_work_list() { return on_work_list_; }
set_on_work_list(bool value)231   void set_on_work_list(bool value) { on_work_list_ = value; }
232 
info()233   NodeInfo* info() { return &info_; }
eats_at_least_info()234   const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; }
set_eats_at_least_info(const EatsAtLeastInfo & eats_at_least)235   void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) {
236     eats_at_least_ = eats_at_least;
237   }
238 
239   // TODO(v8:10441): This is a hacky way to avoid exponential code size growth
240   // for very large choice nodes that can be generated by unicode property
241   // escapes. In order to avoid inlining (i.e. trace recursion), we pretend to
242   // have generated the maximum count of code copies already.
243   // We should instead fix this properly, e.g. by using the code size budget
244   // (flush_budget) or by generating property escape matches as calls to a C
245   // function.
SetDoNotInline()246   void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; }
247 
bm_info(bool not_at_start)248   BoyerMooreLookahead* bm_info(bool not_at_start) {
249     return bm_info_[not_at_start ? 1 : 0];
250   }
251 
zone()252   Zone* zone() const { return zone_; }
253 
254  protected:
255   enum LimitResult { DONE, CONTINUE };
256   RegExpNode* replacement_;
257 
258   LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
259 
set_bm_info(bool not_at_start,BoyerMooreLookahead * bm)260   void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
261     bm_info_[not_at_start ? 1 : 0] = bm;
262   }
263 
264  private:
265   static const int kFirstCharBudget = 10;
266   Label label_;
267   bool on_work_list_;
268   NodeInfo info_;
269 
270   // Saved values for EatsAtLeast results, to avoid recomputation. Filled in
271   // during analysis (valid if info_.been_analyzed is true).
272   EatsAtLeastInfo eats_at_least_;
273 
274   // This variable keeps track of how many times code has been generated for
275   // this node (in different traces).  We don't keep track of where the
276   // generated code is located unless the code is generated at the start of
277   // a trace, in which case it is generic and can be reused by flushing the
278   // deferred operations in the current trace and generating a goto.
279   int trace_count_;
280   BoyerMooreLookahead* bm_info_[2];
281 
282   Zone* zone_;
283 };
284 
285 class SeqRegExpNode : public RegExpNode {
286  public:
SeqRegExpNode(RegExpNode * on_success)287   explicit SeqRegExpNode(RegExpNode* on_success)
288       : RegExpNode(on_success->zone()), on_success_(on_success) {}
on_success()289   RegExpNode* on_success() { return on_success_; }
set_on_success(RegExpNode * node)290   void set_on_success(RegExpNode* node) { on_success_ = node; }
291   RegExpNode* FilterOneByte(int depth) override;
FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)292   void FillInBMInfo(Isolate* isolate, int offset, int budget,
293                     BoyerMooreLookahead* bm, bool not_at_start) override {
294     on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
295     if (offset == 0) set_bm_info(not_at_start, bm);
296   }
297 
298  protected:
299   RegExpNode* FilterSuccessor(int depth);
300 
301  private:
302   RegExpNode* on_success_;
303 };
304 
305 class ActionNode : public SeqRegExpNode {
306  public:
307   enum ActionType {
308     SET_REGISTER_FOR_LOOP,
309     INCREMENT_REGISTER,
310     STORE_POSITION,
311     BEGIN_SUBMATCH,
312     POSITIVE_SUBMATCH_SUCCESS,
313     EMPTY_MATCH_CHECK,
314     CLEAR_CAPTURES
315   };
316   static ActionNode* SetRegisterForLoop(int reg, int val,
317                                         RegExpNode* on_success);
318   static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
319   static ActionNode* StorePosition(int reg, bool is_capture,
320                                    RegExpNode* on_success);
321   static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
322   static ActionNode* BeginSubmatch(int stack_pointer_reg, int position_reg,
323                                    RegExpNode* on_success);
324   static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
325                                              int restore_reg,
326                                              int clear_capture_count,
327                                              int clear_capture_from,
328                                              RegExpNode* on_success);
329   static ActionNode* EmptyMatchCheck(int start_register,
330                                      int repetition_register,
331                                      int repetition_limit,
332                                      RegExpNode* on_success);
333   void Accept(NodeVisitor* visitor) override;
334   void Emit(RegExpCompiler* compiler, Trace* trace) override;
335   void GetQuickCheckDetails(QuickCheckDetails* details,
336                             RegExpCompiler* compiler, int filled_in,
337                             bool not_at_start) override;
338   void FillInBMInfo(Isolate* isolate, int offset, int budget,
339                     BoyerMooreLookahead* bm, bool not_at_start) override;
action_type()340   ActionType action_type() { return action_type_; }
341   // TODO(erikcorry): We should allow some action nodes in greedy loops.
GreedyLoopTextLength()342   int GreedyLoopTextLength() override {
343     return kNodeIsTooComplexForGreedyLoops;
344   }
345 
346  private:
347   union {
348     struct {
349       int reg;
350       int value;
351     } u_store_register;
352     struct {
353       int reg;
354     } u_increment_register;
355     struct {
356       int reg;
357       bool is_capture;
358     } u_position_register;
359     struct {
360       int stack_pointer_register;
361       int current_position_register;
362       int clear_register_count;
363       int clear_register_from;
364     } u_submatch;
365     struct {
366       int start_register;
367       int repetition_register;
368       int repetition_limit;
369     } u_empty_match_check;
370     struct {
371       int range_from;
372       int range_to;
373     } u_clear_captures;
374   } data_;
ActionNode(ActionType action_type,RegExpNode * on_success)375   ActionNode(ActionType action_type, RegExpNode* on_success)
376       : SeqRegExpNode(on_success), action_type_(action_type) {}
377   ActionType action_type_;
378   friend class DotPrinterImpl;
379 };
380 
381 class TextNode : public SeqRegExpNode {
382  public:
TextNode(ZoneList<TextElement> * elms,bool read_backward,RegExpNode * on_success)383   TextNode(ZoneList<TextElement>* elms, bool read_backward,
384            RegExpNode* on_success)
385       : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
TextNode(RegExpCharacterClass * that,bool read_backward,RegExpNode * on_success)386   TextNode(RegExpCharacterClass* that, bool read_backward,
387            RegExpNode* on_success)
388       : SeqRegExpNode(on_success),
389         elms_(new (zone()) ZoneList<TextElement>(1, zone())),
390         read_backward_(read_backward) {
391     elms_->Add(TextElement::CharClass(that), zone());
392   }
393   // Create TextNode for a single character class for the given ranges.
394   static TextNode* CreateForCharacterRanges(Zone* zone,
395                                             ZoneList<CharacterRange>* ranges,
396                                             bool read_backward,
397                                             RegExpNode* on_success,
398                                             JSRegExp::Flags flags);
399   // Create TextNode for a surrogate pair with a range given for the
400   // lead and the trail surrogate each.
401   static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
402                                           CharacterRange trail,
403                                           bool read_backward,
404                                           RegExpNode* on_success,
405                                           JSRegExp::Flags flags);
406   void Accept(NodeVisitor* visitor) override;
407   void Emit(RegExpCompiler* compiler, Trace* trace) override;
408   void GetQuickCheckDetails(QuickCheckDetails* details,
409                             RegExpCompiler* compiler, int characters_filled_in,
410                             bool not_at_start) override;
elements()411   ZoneList<TextElement>* elements() { return elms_; }
read_backward()412   bool read_backward() { return read_backward_; }
413   void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
414   int GreedyLoopTextLength() override;
415   RegExpNode* GetSuccessorOfOmnivorousTextNode(
416       RegExpCompiler* compiler) override;
417   void FillInBMInfo(Isolate* isolate, int offset, int budget,
418                     BoyerMooreLookahead* bm, bool not_at_start) override;
419   void CalculateOffsets();
420   RegExpNode* FilterOneByte(int depth) override;
421   int Length();
422 
423  private:
424   enum TextEmitPassType {
425     NON_LATIN1_MATCH,            // Check for characters that can't match.
426     SIMPLE_CHARACTER_MATCH,      // Case-dependent single character check.
427     NON_LETTER_CHARACTER_MATCH,  // Check characters that have no case equivs.
428     CASE_CHARACTER_MATCH,        // Case-independent single character check.
429     CHARACTER_CLASS_MATCH        // Character class.
430   };
431   static bool SkipPass(TextEmitPassType pass, bool ignore_case);
432   static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
433   static const int kLastPass = CHARACTER_CLASS_MATCH;
434   void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
435                     bool preloaded, Trace* trace, bool first_element_checked,
436                     int* checked_up_to);
437   ZoneList<TextElement>* elms_;
438   bool read_backward_;
439 };
440 
441 class AssertionNode : public SeqRegExpNode {
442  public:
443   enum AssertionType {
444     AT_END,
445     AT_START,
446     AT_BOUNDARY,
447     AT_NON_BOUNDARY,
448     AFTER_NEWLINE
449   };
AtEnd(RegExpNode * on_success)450   static AssertionNode* AtEnd(RegExpNode* on_success) {
451     return new (on_success->zone()) AssertionNode(AT_END, on_success);
452   }
AtStart(RegExpNode * on_success)453   static AssertionNode* AtStart(RegExpNode* on_success) {
454     return new (on_success->zone()) AssertionNode(AT_START, on_success);
455   }
AtBoundary(RegExpNode * on_success)456   static AssertionNode* AtBoundary(RegExpNode* on_success) {
457     return new (on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
458   }
AtNonBoundary(RegExpNode * on_success)459   static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
460     return new (on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
461   }
AfterNewline(RegExpNode * on_success)462   static AssertionNode* AfterNewline(RegExpNode* on_success) {
463     return new (on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
464   }
465   void Accept(NodeVisitor* visitor) override;
466   void Emit(RegExpCompiler* compiler, Trace* trace) override;
467   void GetQuickCheckDetails(QuickCheckDetails* details,
468                             RegExpCompiler* compiler, int filled_in,
469                             bool not_at_start) override;
470   void FillInBMInfo(Isolate* isolate, int offset, int budget,
471                     BoyerMooreLookahead* bm, bool not_at_start) override;
assertion_type()472   AssertionType assertion_type() { return assertion_type_; }
473 
474  private:
475   void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
476   enum IfPrevious { kIsNonWord, kIsWord };
477   void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace,
478                            IfPrevious backtrack_if_previous);
AssertionNode(AssertionType t,RegExpNode * on_success)479   AssertionNode(AssertionType t, RegExpNode* on_success)
480       : SeqRegExpNode(on_success), assertion_type_(t) {}
481   AssertionType assertion_type_;
482 };
483 
484 class BackReferenceNode : public SeqRegExpNode {
485  public:
BackReferenceNode(int start_reg,int end_reg,JSRegExp::Flags flags,bool read_backward,RegExpNode * on_success)486   BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags,
487                     bool read_backward, RegExpNode* on_success)
488       : SeqRegExpNode(on_success),
489         start_reg_(start_reg),
490         end_reg_(end_reg),
491         flags_(flags),
492         read_backward_(read_backward) {}
493   void Accept(NodeVisitor* visitor) override;
start_register()494   int start_register() { return start_reg_; }
end_register()495   int end_register() { return end_reg_; }
read_backward()496   bool read_backward() { return read_backward_; }
497   void Emit(RegExpCompiler* compiler, Trace* trace) override;
GetQuickCheckDetails(QuickCheckDetails * details,RegExpCompiler * compiler,int characters_filled_in,bool not_at_start)498   void GetQuickCheckDetails(QuickCheckDetails* details,
499                             RegExpCompiler* compiler, int characters_filled_in,
500                             bool not_at_start) override {
501     return;
502   }
503   void FillInBMInfo(Isolate* isolate, int offset, int budget,
504                     BoyerMooreLookahead* bm, bool not_at_start) override;
505 
506  private:
507   int start_reg_;
508   int end_reg_;
509   JSRegExp::Flags flags_;
510   bool read_backward_;
511 };
512 
513 class EndNode : public RegExpNode {
514  public:
515   enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
EndNode(Action action,Zone * zone)516   EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
517   void Accept(NodeVisitor* visitor) override;
518   void Emit(RegExpCompiler* compiler, Trace* trace) override;
GetQuickCheckDetails(QuickCheckDetails * details,RegExpCompiler * compiler,int characters_filled_in,bool not_at_start)519   void GetQuickCheckDetails(QuickCheckDetails* details,
520                             RegExpCompiler* compiler, int characters_filled_in,
521                             bool not_at_start) override {
522     // Returning 0 from EatsAtLeast should ensure we never get here.
523     UNREACHABLE();
524   }
FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)525   void FillInBMInfo(Isolate* isolate, int offset, int budget,
526                     BoyerMooreLookahead* bm, bool not_at_start) override {
527     // Returning 0 from EatsAtLeast should ensure we never get here.
528     UNREACHABLE();
529   }
530 
531  private:
532   Action action_;
533 };
534 
535 class NegativeSubmatchSuccess : public EndNode {
536  public:
NegativeSubmatchSuccess(int stack_pointer_reg,int position_reg,int clear_capture_count,int clear_capture_start,Zone * zone)537   NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg,
538                           int clear_capture_count, int clear_capture_start,
539                           Zone* zone)
540       : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
541         stack_pointer_register_(stack_pointer_reg),
542         current_position_register_(position_reg),
543         clear_capture_count_(clear_capture_count),
544         clear_capture_start_(clear_capture_start) {}
545   void Emit(RegExpCompiler* compiler, Trace* trace) override;
546 
547  private:
548   int stack_pointer_register_;
549   int current_position_register_;
550   int clear_capture_count_;
551   int clear_capture_start_;
552 };
553 
554 class Guard : public ZoneObject {
555  public:
556   enum Relation { LT, GEQ };
Guard(int reg,Relation op,int value)557   Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {}
reg()558   int reg() { return reg_; }
op()559   Relation op() { return op_; }
value()560   int value() { return value_; }
561 
562  private:
563   int reg_;
564   Relation op_;
565   int value_;
566 };
567 
568 class GuardedAlternative {
569  public:
GuardedAlternative(RegExpNode * node)570   explicit GuardedAlternative(RegExpNode* node)
571       : node_(node), guards_(nullptr) {}
572   void AddGuard(Guard* guard, Zone* zone);
node()573   RegExpNode* node() { return node_; }
set_node(RegExpNode * node)574   void set_node(RegExpNode* node) { node_ = node; }
guards()575   ZoneList<Guard*>* guards() { return guards_; }
576 
577  private:
578   RegExpNode* node_;
579   ZoneList<Guard*>* guards_;
580 };
581 
582 class AlternativeGeneration;
583 
584 class ChoiceNode : public RegExpNode {
585  public:
ChoiceNode(int expected_size,Zone * zone)586   explicit ChoiceNode(int expected_size, Zone* zone)
587       : RegExpNode(zone),
588         alternatives_(new (zone)
589                           ZoneList<GuardedAlternative>(expected_size, zone)),
590         not_at_start_(false),
591         being_calculated_(false) {}
592   void Accept(NodeVisitor* visitor) override;
AddAlternative(GuardedAlternative node)593   void AddAlternative(GuardedAlternative node) {
594     alternatives()->Add(node, zone());
595   }
alternatives()596   ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
597   void Emit(RegExpCompiler* compiler, Trace* trace) override;
598   void GetQuickCheckDetails(QuickCheckDetails* details,
599                             RegExpCompiler* compiler, int characters_filled_in,
600                             bool not_at_start) override;
601   void FillInBMInfo(Isolate* isolate, int offset, int budget,
602                     BoyerMooreLookahead* bm, bool not_at_start) override;
603 
being_calculated()604   bool being_calculated() { return being_calculated_; }
not_at_start()605   bool not_at_start() { return not_at_start_; }
set_not_at_start()606   void set_not_at_start() { not_at_start_ = true; }
set_being_calculated(bool b)607   void set_being_calculated(bool b) { being_calculated_ = b; }
try_to_emit_quick_check_for_alternative(bool is_first)608   virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
609     return true;
610   }
611   RegExpNode* FilterOneByte(int depth) override;
read_backward()612   virtual bool read_backward() { return false; }
613 
614  protected:
615   int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
616   ZoneList<GuardedAlternative>* alternatives_;
617 
618  private:
619   template <typename...>
620   friend class Analysis;
621 
622   void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard,
623                      Trace* trace);
624   int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
625   void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace,
626                                  GuardedAlternative alternative,
627                                  AlternativeGeneration* alt_gen,
628                                  int preload_characters,
629                                  bool next_expects_preload);
630   void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
631                     PreloadState* preloads);
632   void AssertGuardsMentionRegisters(Trace* trace);
633   int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
634   Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
635                         AlternativeGenerationList* alt_gens,
636                         PreloadState* preloads,
637                         GreedyLoopState* greedy_loop_state, int text_length);
638   void EmitChoices(RegExpCompiler* compiler,
639                    AlternativeGenerationList* alt_gens, int first_choice,
640                    Trace* trace, PreloadState* preloads);
641 
642   // If true, this node is never checked at the start of the input.
643   // Allows a new trace to start with at_start() set to false.
644   bool not_at_start_;
645   bool being_calculated_;
646 };
647 
648 class NegativeLookaroundChoiceNode : public ChoiceNode {
649  public:
NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,GuardedAlternative then_do_this,Zone * zone)650   explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
651                                         GuardedAlternative then_do_this,
652                                         Zone* zone)
653       : ChoiceNode(2, zone) {
654     AddAlternative(this_must_fail);
655     AddAlternative(then_do_this);
656   }
657   void GetQuickCheckDetails(QuickCheckDetails* details,
658                             RegExpCompiler* compiler, int characters_filled_in,
659                             bool not_at_start) override;
FillInBMInfo(Isolate * isolate,int offset,int budget,BoyerMooreLookahead * bm,bool not_at_start)660   void FillInBMInfo(Isolate* isolate, int offset, int budget,
661                     BoyerMooreLookahead* bm, bool not_at_start) override {
662     continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm,
663                                   not_at_start);
664     if (offset == 0) set_bm_info(not_at_start, bm);
665   }
666   static constexpr int kLookaroundIndex = 0;
667   static constexpr int kContinueIndex = 1;
lookaround_node()668   RegExpNode* lookaround_node() {
669     return alternatives()->at(kLookaroundIndex).node();
670   }
continue_node()671   RegExpNode* continue_node() {
672     return alternatives()->at(kContinueIndex).node();
673   }
674   // For a negative lookahead we don't emit the quick check for the
675   // alternative that is expected to fail.  This is because quick check code
676   // starts by loading enough characters for the alternative that takes fewest
677   // characters, but on a negative lookahead the negative branch did not take
678   // part in that calculation (EatsAtLeast) so the assumptions don't hold.
try_to_emit_quick_check_for_alternative(bool is_first)679   bool try_to_emit_quick_check_for_alternative(bool is_first) override {
680     return !is_first;
681   }
682   void Accept(NodeVisitor* visitor) override;
683   RegExpNode* FilterOneByte(int depth) override;
684 };
685 
686 class LoopChoiceNode : public ChoiceNode {
687  public:
LoopChoiceNode(bool body_can_be_zero_length,bool read_backward,int min_loop_iterations,Zone * zone)688   LoopChoiceNode(bool body_can_be_zero_length, bool read_backward,
689                  int min_loop_iterations, Zone* zone)
690       : ChoiceNode(2, zone),
691         loop_node_(nullptr),
692         continue_node_(nullptr),
693         body_can_be_zero_length_(body_can_be_zero_length),
694         read_backward_(read_backward),
695         traversed_loop_initialization_node_(false),
696         min_loop_iterations_(min_loop_iterations) {}
697   void AddLoopAlternative(GuardedAlternative alt);
698   void AddContinueAlternative(GuardedAlternative alt);
699   void Emit(RegExpCompiler* compiler, Trace* trace) override;
700   void GetQuickCheckDetails(QuickCheckDetails* details,
701                             RegExpCompiler* compiler, int characters_filled_in,
702                             bool not_at_start) override;
703   void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
704                                          RegExpCompiler* compiler,
705                                          int characters_filled_in,
706                                          bool not_at_start) override;
707   void FillInBMInfo(Isolate* isolate, int offset, int budget,
708                     BoyerMooreLookahead* bm, bool not_at_start) override;
709   EatsAtLeastInfo EatsAtLeastFromLoopEntry() override;
loop_node()710   RegExpNode* loop_node() { return loop_node_; }
continue_node()711   RegExpNode* continue_node() { return continue_node_; }
body_can_be_zero_length()712   bool body_can_be_zero_length() { return body_can_be_zero_length_; }
min_loop_iterations()713   int min_loop_iterations() const { return min_loop_iterations_; }
read_backward()714   bool read_backward() override { return read_backward_; }
715   void Accept(NodeVisitor* visitor) override;
716   RegExpNode* FilterOneByte(int depth) override;
717 
718  private:
719   // AddAlternative is made private for loop nodes because alternatives
720   // should not be added freely, we need to keep track of which node
721   // goes back to the node itself.
AddAlternative(GuardedAlternative node)722   void AddAlternative(GuardedAlternative node) {
723     ChoiceNode::AddAlternative(node);
724   }
725 
726   RegExpNode* loop_node_;
727   RegExpNode* continue_node_;
728   bool body_can_be_zero_length_;
729   bool read_backward_;
730 
731   // Temporary marker set only while generating quick check details. Represents
732   // whether GetQuickCheckDetails traversed the initialization node for this
733   // loop's counter. If so, we may be able to generate stricter quick checks
734   // because we know the loop node must match at least min_loop_iterations_
735   // times before the continuation node can match.
736   bool traversed_loop_initialization_node_;
737 
738   // The minimum number of times the loop_node_ must match before the
739   // continue_node_ might be considered. This value can be temporarily decreased
740   // while generating quick check details, to represent the remaining iterations
741   // after the completed portion of the quick check details.
742   int min_loop_iterations_;
743 
744   friend class IterationDecrementer;
745   friend class LoopInitializationMarker;
746 };
747 
748 class NodeVisitor {
749  public:
750   virtual ~NodeVisitor() = default;
751 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0;
752   FOR_EACH_NODE_TYPE(DECLARE_VISIT)
753 #undef DECLARE_VISIT
754 };
755 
756 }  // namespace internal
757 }  // namespace v8
758 
759 #endif  // V8_REGEXP_REGEXP_NODES_H_
760