1 // Copyright 2008-2009 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/regexp/regexp-bytecode-generator.h"
6 
7 #include "src/ast/ast.h"
8 #include "src/objects/fixed-array-inl.h"
9 #include "src/regexp/regexp-bytecode-generator-inl.h"
10 #include "src/regexp/regexp-bytecode-peephole.h"
11 #include "src/regexp/regexp-bytecodes.h"
12 #include "src/regexp/regexp-macro-assembler.h"
13 
14 namespace v8 {
15 namespace internal {
16 
RegExpBytecodeGenerator(Isolate * isolate,Zone * zone)17 RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
18     : RegExpMacroAssembler(isolate, zone),
19       buffer_(kInitialBufferSize, zone),
20       pc_(0),
21       advance_current_end_(kInvalidPC),
22       jump_edges_(zone),
23       isolate_(isolate) {}
24 
~RegExpBytecodeGenerator()25 RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
26   if (backtrack_.is_linked()) backtrack_.Unuse();
27 }
28 
29 RegExpBytecodeGenerator::IrregexpImplementation
Implementation()30 RegExpBytecodeGenerator::Implementation() {
31   return kBytecodeImplementation;
32 }
33 
Bind(Label * l)34 void RegExpBytecodeGenerator::Bind(Label* l) {
35   advance_current_end_ = kInvalidPC;
36   DCHECK(!l->is_bound());
37   if (l->is_linked()) {
38     int pos = l->pos();
39     while (pos != 0) {
40       int fixup = pos;
41       pos = *reinterpret_cast<int32_t*>(buffer_.data() + fixup);
42       *reinterpret_cast<uint32_t*>(buffer_.data() + fixup) = pc_;
43       jump_edges_.emplace(fixup, pc_);
44     }
45   }
46   l->bind_to(pc_);
47 }
48 
EmitOrLink(Label * l)49 void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
50   if (l == nullptr) l = &backtrack_;
51   int pos = 0;
52   if (l->is_bound()) {
53     pos = l->pos();
54     jump_edges_.emplace(pc_, pos);
55   } else {
56     if (l->is_linked()) {
57       pos = l->pos();
58     }
59     l->link_to(pc_);
60   }
61   Emit32(pos);
62 }
63 
PopRegister(int register_index)64 void RegExpBytecodeGenerator::PopRegister(int register_index) {
65   DCHECK_LE(0, register_index);
66   DCHECK_GE(kMaxRegister, register_index);
67   Emit(BC_POP_REGISTER, register_index);
68 }
69 
PushRegister(int register_index,StackCheckFlag check_stack_limit)70 void RegExpBytecodeGenerator::PushRegister(int register_index,
71                                            StackCheckFlag check_stack_limit) {
72   DCHECK_LE(0, register_index);
73   DCHECK_GE(kMaxRegister, register_index);
74   Emit(BC_PUSH_REGISTER, register_index);
75 }
76 
WriteCurrentPositionToRegister(int register_index,int cp_offset)77 void RegExpBytecodeGenerator::WriteCurrentPositionToRegister(int register_index,
78                                                              int cp_offset) {
79   DCHECK_LE(0, register_index);
80   DCHECK_GE(kMaxRegister, register_index);
81   Emit(BC_SET_REGISTER_TO_CP, register_index);
82   Emit32(cp_offset);  // Current position offset.
83 }
84 
ClearRegisters(int reg_from,int reg_to)85 void RegExpBytecodeGenerator::ClearRegisters(int reg_from, int reg_to) {
86   DCHECK(reg_from <= reg_to);
87   for (int reg = reg_from; reg <= reg_to; reg++) {
88     SetRegister(reg, -1);
89   }
90 }
91 
ReadCurrentPositionFromRegister(int register_index)92 void RegExpBytecodeGenerator::ReadCurrentPositionFromRegister(
93     int register_index) {
94   DCHECK_LE(0, register_index);
95   DCHECK_GE(kMaxRegister, register_index);
96   Emit(BC_SET_CP_TO_REGISTER, register_index);
97 }
98 
WriteStackPointerToRegister(int register_index)99 void RegExpBytecodeGenerator::WriteStackPointerToRegister(int register_index) {
100   DCHECK_LE(0, register_index);
101   DCHECK_GE(kMaxRegister, register_index);
102   Emit(BC_SET_REGISTER_TO_SP, register_index);
103 }
104 
ReadStackPointerFromRegister(int register_index)105 void RegExpBytecodeGenerator::ReadStackPointerFromRegister(int register_index) {
106   DCHECK_LE(0, register_index);
107   DCHECK_GE(kMaxRegister, register_index);
108   Emit(BC_SET_SP_TO_REGISTER, register_index);
109 }
110 
SetCurrentPositionFromEnd(int by)111 void RegExpBytecodeGenerator::SetCurrentPositionFromEnd(int by) {
112   DCHECK(is_uint24(by));
113   Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
114 }
115 
SetRegister(int register_index,int to)116 void RegExpBytecodeGenerator::SetRegister(int register_index, int to) {
117   DCHECK_LE(0, register_index);
118   DCHECK_GE(kMaxRegister, register_index);
119   Emit(BC_SET_REGISTER, register_index);
120   Emit32(to);
121 }
122 
AdvanceRegister(int register_index,int by)123 void RegExpBytecodeGenerator::AdvanceRegister(int register_index, int by) {
124   DCHECK_LE(0, register_index);
125   DCHECK_GE(kMaxRegister, register_index);
126   Emit(BC_ADVANCE_REGISTER, register_index);
127   Emit32(by);
128 }
129 
PopCurrentPosition()130 void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); }
131 
PushCurrentPosition()132 void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); }
133 
Backtrack()134 void RegExpBytecodeGenerator::Backtrack() {
135   int error_code =
136       can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE;
137   Emit(BC_POP_BT, error_code);
138 }
139 
GoTo(Label * l)140 void RegExpBytecodeGenerator::GoTo(Label* l) {
141   if (advance_current_end_ == pc_) {
142     // Combine advance current and goto.
143     pc_ = advance_current_start_;
144     Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
145     EmitOrLink(l);
146     advance_current_end_ = kInvalidPC;
147   } else {
148     // Regular goto.
149     Emit(BC_GOTO, 0);
150     EmitOrLink(l);
151   }
152 }
153 
PushBacktrack(Label * l)154 void RegExpBytecodeGenerator::PushBacktrack(Label* l) {
155   Emit(BC_PUSH_BT, 0);
156   EmitOrLink(l);
157 }
158 
Succeed()159 bool RegExpBytecodeGenerator::Succeed() {
160   Emit(BC_SUCCEED, 0);
161   return false;  // Restart matching for global regexp not supported.
162 }
163 
Fail()164 void RegExpBytecodeGenerator::Fail() { Emit(BC_FAIL, 0); }
165 
AdvanceCurrentPosition(int by)166 void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) {
167   // TODO(chromium:1166138): Turn back into DCHECKs once the underlying issue
168   // is fixed.
169   CHECK_LE(kMinCPOffset, by);
170   CHECK_GE(kMaxCPOffset, by);
171   advance_current_start_ = pc_;
172   advance_current_offset_ = by;
173   Emit(BC_ADVANCE_CP, by);
174   advance_current_end_ = pc_;
175 }
176 
CheckGreedyLoop(Label * on_tos_equals_current_position)177 void RegExpBytecodeGenerator::CheckGreedyLoop(
178     Label* on_tos_equals_current_position) {
179   Emit(BC_CHECK_GREEDY, 0);
180   EmitOrLink(on_tos_equals_current_position);
181 }
182 
LoadCurrentCharacterImpl(int cp_offset,Label * on_failure,bool check_bounds,int characters,int eats_at_least)183 void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
184                                                        Label* on_failure,
185                                                        bool check_bounds,
186                                                        int characters,
187                                                        int eats_at_least) {
188   DCHECK_GE(eats_at_least, characters);
189   if (eats_at_least > characters && check_bounds) {
190     DCHECK(is_int24(cp_offset + eats_at_least));
191     Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
192     EmitOrLink(on_failure);
193     check_bounds = false;  // Load below doesn't need to check.
194   }
195 
196   DCHECK_LE(kMinCPOffset, cp_offset);
197   DCHECK_GE(kMaxCPOffset, cp_offset);
198   int bytecode;
199   if (check_bounds) {
200     if (characters == 4) {
201       bytecode = BC_LOAD_4_CURRENT_CHARS;
202     } else if (characters == 2) {
203       bytecode = BC_LOAD_2_CURRENT_CHARS;
204     } else {
205       DCHECK_EQ(1, characters);
206       bytecode = BC_LOAD_CURRENT_CHAR;
207     }
208   } else {
209     if (characters == 4) {
210       bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
211     } else if (characters == 2) {
212       bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
213     } else {
214       DCHECK_EQ(1, characters);
215       bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
216     }
217   }
218   Emit(bytecode, cp_offset);
219   if (check_bounds) EmitOrLink(on_failure);
220 }
221 
CheckCharacterLT(base::uc16 limit,Label * on_less)222 void RegExpBytecodeGenerator::CheckCharacterLT(base::uc16 limit,
223                                                Label* on_less) {
224   Emit(BC_CHECK_LT, limit);
225   EmitOrLink(on_less);
226 }
227 
CheckCharacterGT(base::uc16 limit,Label * on_greater)228 void RegExpBytecodeGenerator::CheckCharacterGT(base::uc16 limit,
229                                                Label* on_greater) {
230   Emit(BC_CHECK_GT, limit);
231   EmitOrLink(on_greater);
232 }
233 
CheckCharacter(uint32_t c,Label * on_equal)234 void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) {
235   if (c > MAX_FIRST_ARG) {
236     Emit(BC_CHECK_4_CHARS, 0);
237     Emit32(c);
238   } else {
239     Emit(BC_CHECK_CHAR, c);
240   }
241   EmitOrLink(on_equal);
242 }
243 
CheckAtStart(int cp_offset,Label * on_at_start)244 void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) {
245   Emit(BC_CHECK_AT_START, cp_offset);
246   EmitOrLink(on_at_start);
247 }
248 
CheckNotAtStart(int cp_offset,Label * on_not_at_start)249 void RegExpBytecodeGenerator::CheckNotAtStart(int cp_offset,
250                                               Label* on_not_at_start) {
251   Emit(BC_CHECK_NOT_AT_START, cp_offset);
252   EmitOrLink(on_not_at_start);
253 }
254 
CheckNotCharacter(uint32_t c,Label * on_not_equal)255 void RegExpBytecodeGenerator::CheckNotCharacter(uint32_t c,
256                                                 Label* on_not_equal) {
257   if (c > MAX_FIRST_ARG) {
258     Emit(BC_CHECK_NOT_4_CHARS, 0);
259     Emit32(c);
260   } else {
261     Emit(BC_CHECK_NOT_CHAR, c);
262   }
263   EmitOrLink(on_not_equal);
264 }
265 
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)266 void RegExpBytecodeGenerator::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
267                                                      Label* on_equal) {
268   if (c > MAX_FIRST_ARG) {
269     Emit(BC_AND_CHECK_4_CHARS, 0);
270     Emit32(c);
271   } else {
272     Emit(BC_AND_CHECK_CHAR, c);
273   }
274   Emit32(mask);
275   EmitOrLink(on_equal);
276 }
277 
CheckNotCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_not_equal)278 void RegExpBytecodeGenerator::CheckNotCharacterAfterAnd(uint32_t c,
279                                                         uint32_t mask,
280                                                         Label* on_not_equal) {
281   if (c > MAX_FIRST_ARG) {
282     Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
283     Emit32(c);
284   } else {
285     Emit(BC_AND_CHECK_NOT_CHAR, c);
286   }
287   Emit32(mask);
288   EmitOrLink(on_not_equal);
289 }
290 
CheckNotCharacterAfterMinusAnd(base::uc16 c,base::uc16 minus,base::uc16 mask,Label * on_not_equal)291 void RegExpBytecodeGenerator::CheckNotCharacterAfterMinusAnd(
292     base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
293   Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
294   Emit16(minus);
295   Emit16(mask);
296   EmitOrLink(on_not_equal);
297 }
298 
CheckCharacterInRange(base::uc16 from,base::uc16 to,Label * on_in_range)299 void RegExpBytecodeGenerator::CheckCharacterInRange(base::uc16 from,
300                                                     base::uc16 to,
301                                                     Label* on_in_range) {
302   Emit(BC_CHECK_CHAR_IN_RANGE, 0);
303   Emit16(from);
304   Emit16(to);
305   EmitOrLink(on_in_range);
306 }
307 
CheckCharacterNotInRange(base::uc16 from,base::uc16 to,Label * on_not_in_range)308 void RegExpBytecodeGenerator::CheckCharacterNotInRange(base::uc16 from,
309                                                        base::uc16 to,
310                                                        Label* on_not_in_range) {
311   Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
312   Emit16(from);
313   Emit16(to);
314   EmitOrLink(on_not_in_range);
315 }
316 
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)317 void RegExpBytecodeGenerator::CheckBitInTable(Handle<ByteArray> table,
318                                               Label* on_bit_set) {
319   Emit(BC_CHECK_BIT_IN_TABLE, 0);
320   EmitOrLink(on_bit_set);
321   for (int i = 0; i < kTableSize; i += kBitsPerByte) {
322     int byte = 0;
323     for (int j = 0; j < kBitsPerByte; j++) {
324       if (table->get(i + j) != 0) byte |= 1 << j;
325     }
326     Emit8(byte);
327   }
328 }
329 
CheckNotBackReference(int start_reg,bool read_backward,Label * on_not_equal)330 void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg,
331                                                     bool read_backward,
332                                                     Label* on_not_equal) {
333   DCHECK_LE(0, start_reg);
334   DCHECK_GE(kMaxRegister, start_reg);
335   Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
336        start_reg);
337   EmitOrLink(on_not_equal);
338 }
339 
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_not_equal)340 void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase(
341     int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
342   DCHECK_LE(0, start_reg);
343   DCHECK_GE(kMaxRegister, start_reg);
344   Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
345                                 : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
346                      : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
347                                 : BC_CHECK_NOT_BACK_REF_NO_CASE),
348        start_reg);
349   EmitOrLink(on_not_equal);
350 }
351 
IfRegisterLT(int register_index,int comparand,Label * on_less_than)352 void RegExpBytecodeGenerator::IfRegisterLT(int register_index, int comparand,
353                                            Label* on_less_than) {
354   DCHECK_LE(0, register_index);
355   DCHECK_GE(kMaxRegister, register_index);
356   Emit(BC_CHECK_REGISTER_LT, register_index);
357   Emit32(comparand);
358   EmitOrLink(on_less_than);
359 }
360 
IfRegisterGE(int register_index,int comparand,Label * on_greater_or_equal)361 void RegExpBytecodeGenerator::IfRegisterGE(int register_index, int comparand,
362                                            Label* on_greater_or_equal) {
363   DCHECK_LE(0, register_index);
364   DCHECK_GE(kMaxRegister, register_index);
365   Emit(BC_CHECK_REGISTER_GE, register_index);
366   Emit32(comparand);
367   EmitOrLink(on_greater_or_equal);
368 }
369 
IfRegisterEqPos(int register_index,Label * on_eq)370 void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
371                                               Label* on_eq) {
372   DCHECK_LE(0, register_index);
373   DCHECK_GE(kMaxRegister, register_index);
374   Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
375   EmitOrLink(on_eq);
376 }
377 
GetCode(Handle<String> source)378 Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
379   Bind(&backtrack_);
380   Backtrack();
381 
382   Handle<ByteArray> array;
383   if (FLAG_regexp_peephole_optimization) {
384     array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
385         isolate_, zone(), source, buffer_.data(), length(), jump_edges_);
386   } else {
387     array = isolate_->factory()->NewByteArray(length());
388     Copy(array->GetDataStartAddress());
389   }
390 
391   return array;
392 }
393 
length()394 int RegExpBytecodeGenerator::length() { return pc_; }
395 
Copy(byte * a)396 void RegExpBytecodeGenerator::Copy(byte* a) {
397   MemCopy(a, buffer_.data(), length());
398 }
399 
ExpandBuffer()400 void RegExpBytecodeGenerator::ExpandBuffer() {
401   // TODO(jgruber): The growth strategy could be smarter for large sizes.
402   // TODO(jgruber): It's not necessary to default-initialize new elements.
403   buffer_.resize(buffer_.size() * 2);
404 }
405 
406 }  // namespace internal
407 }  // namespace v8
408