1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99: */
3
4 // Copyright 2012 the V8 project authors. All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #include "irregexp/RegExpMacroAssembler.h"
32
33 #include "irregexp/RegExpBytecode.h"
34
35 using namespace js;
36 using namespace js::irregexp;
37
38 template <typename CharT>
39 int
CaseInsensitiveCompareStrings(const CharT * substring1,const CharT * substring2,size_t byteLength)40 irregexp::CaseInsensitiveCompareStrings(const CharT* substring1, const CharT* substring2,
41 size_t byteLength)
42 {
43 MOZ_ASSERT(byteLength % sizeof(CharT) == 0);
44 size_t length = byteLength / sizeof(CharT);
45
46 for (size_t i = 0; i < length; i++) {
47 char16_t c1 = substring1[i];
48 char16_t c2 = substring2[i];
49 if (c1 != c2) {
50 c1 = unicode::ToLowerCase(c1);
51 c2 = unicode::ToLowerCase(c2);
52 if (c1 != c2)
53 return 0;
54 }
55 }
56
57 return 1;
58 }
59
60 template int
61 irregexp::CaseInsensitiveCompareStrings(const Latin1Char* substring1, const Latin1Char* substring2,
62 size_t byteLength);
63
64 template int
65 irregexp::CaseInsensitiveCompareStrings(const char16_t* substring1, const char16_t* substring2,
66 size_t byteLength);
67
InterpretedRegExpMacroAssembler(LifoAlloc * alloc,RegExpShared * shared,size_t numSavedRegisters)68 InterpretedRegExpMacroAssembler::InterpretedRegExpMacroAssembler(LifoAlloc* alloc, RegExpShared* shared,
69 size_t numSavedRegisters)
70 : RegExpMacroAssembler(*alloc, shared, numSavedRegisters),
71 pc_(0),
72 advance_current_start_(0),
73 advance_current_offset_(0),
74 advance_current_end_(kInvalidPC),
75 buffer_(nullptr),
76 length_(0)
77 {
78 // The first int32 word is the number of registers.
79 Emit32(0);
80 }
81
~InterpretedRegExpMacroAssembler()82 InterpretedRegExpMacroAssembler::~InterpretedRegExpMacroAssembler()
83 {
84 js_free(buffer_);
85 }
86
87 RegExpCode
GenerateCode(JSContext * cx,bool match_only)88 InterpretedRegExpMacroAssembler::GenerateCode(JSContext* cx, bool match_only)
89 {
90 Bind(&backtrack_);
91 Emit(BC_POP_BT, 0);
92
93 // Update the number of registers.
94 *(int32_t*)buffer_ = num_registers_;
95
96 RegExpCode res;
97 res.byteCode = buffer_;
98 buffer_ = nullptr;
99 return res;
100 }
101
102 void
AdvanceCurrentPosition(int by)103 InterpretedRegExpMacroAssembler::AdvanceCurrentPosition(int by)
104 {
105 MOZ_ASSERT(by >= kMinCPOffset);
106 MOZ_ASSERT(by <= kMaxCPOffset);
107 advance_current_start_ = pc_;
108 advance_current_offset_ = by;
109 Emit(BC_ADVANCE_CP, by);
110 advance_current_end_ = pc_;
111 }
112
113 void
AdvanceRegister(int reg,int by)114 InterpretedRegExpMacroAssembler::AdvanceRegister(int reg, int by)
115 {
116 checkRegister(reg);
117 Emit(BC_ADVANCE_REGISTER, reg);
118 Emit32(by);
119 }
120
121 void
Backtrack()122 InterpretedRegExpMacroAssembler::Backtrack()
123 {
124 Emit(BC_POP_BT, 0);
125 }
126
127 void
Bind(jit::Label * label)128 InterpretedRegExpMacroAssembler::Bind(jit::Label* label)
129 {
130 advance_current_end_ = kInvalidPC;
131 MOZ_ASSERT(!label->bound());
132 if (label->used()) {
133 int pos = label->offset();
134 while (pos != jit::Label::INVALID_OFFSET) {
135 int fixup = pos;
136 pos = *reinterpret_cast<int32_t*>(buffer_ + fixup);
137 *reinterpret_cast<uint32_t*>(buffer_ + fixup) = pc_;
138 }
139 }
140 label->bind(pc_);
141 }
142
143 void
CheckAtStart(jit::Label * on_at_start)144 InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start)
145 {
146 Emit(BC_CHECK_AT_START, 0);
147 EmitOrLink(on_at_start);
148 }
149
150 void
CheckCharacter(unsigned c,jit::Label * on_equal)151 InterpretedRegExpMacroAssembler::CheckCharacter(unsigned c, jit::Label* on_equal)
152 {
153 if (c > MAX_FIRST_ARG) {
154 Emit(BC_CHECK_4_CHARS, 0);
155 Emit32(c);
156 } else {
157 Emit(BC_CHECK_CHAR, c);
158 }
159 EmitOrLink(on_equal);
160 }
161
162 void
CheckCharacterAfterAnd(unsigned c,unsigned and_with,jit::Label * on_equal)163 InterpretedRegExpMacroAssembler::CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal)
164 {
165 if (c > MAX_FIRST_ARG) {
166 Emit(BC_AND_CHECK_4_CHARS, 0);
167 Emit32(c);
168 } else {
169 Emit(BC_AND_CHECK_CHAR, c);
170 }
171 Emit32(and_with);
172 EmitOrLink(on_equal);
173 }
174
175 void
CheckCharacterGT(char16_t limit,jit::Label * on_greater)176 InterpretedRegExpMacroAssembler::CheckCharacterGT(char16_t limit, jit::Label* on_greater)
177 {
178 Emit(BC_CHECK_GT, limit);
179 EmitOrLink(on_greater);
180 }
181
182 void
CheckCharacterLT(char16_t limit,jit::Label * on_less)183 InterpretedRegExpMacroAssembler::CheckCharacterLT(char16_t limit, jit::Label* on_less)
184 {
185 Emit(BC_CHECK_LT, limit);
186 EmitOrLink(on_less);
187 }
188
189 void
CheckGreedyLoop(jit::Label * on_tos_equals_current_position)190 InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_current_position)
191 {
192 Emit(BC_CHECK_GREEDY, 0);
193 EmitOrLink(on_tos_equals_current_position);
194 }
195
196 void
CheckNotAtStart(jit::Label * on_not_at_start)197 InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
198 {
199 Emit(BC_CHECK_NOT_AT_START, 0);
200 EmitOrLink(on_not_at_start);
201 }
202
203 void
CheckNotBackReference(int start_reg,jit::Label * on_no_match)204 InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
205 {
206 MOZ_ASSERT(start_reg >= 0);
207 MOZ_ASSERT(start_reg <= kMaxRegister);
208 Emit(BC_CHECK_NOT_BACK_REF, start_reg);
209 EmitOrLink(on_no_match);
210 }
211
212 void
CheckNotBackReferenceIgnoreCase(int start_reg,jit::Label * on_no_match)213 InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match)
214 {
215 MOZ_ASSERT(start_reg >= 0);
216 MOZ_ASSERT(start_reg <= kMaxRegister);
217 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
218 EmitOrLink(on_no_match);
219 }
220
221 void
CheckNotCharacter(unsigned c,jit::Label * on_not_equal)222 InterpretedRegExpMacroAssembler::CheckNotCharacter(unsigned c, jit::Label* on_not_equal)
223 {
224 if (c > MAX_FIRST_ARG) {
225 Emit(BC_CHECK_NOT_4_CHARS, 0);
226 Emit32(c);
227 } else {
228 Emit(BC_CHECK_NOT_CHAR, c);
229 }
230 EmitOrLink(on_not_equal);
231 }
232
233 void
CheckNotCharacterAfterAnd(unsigned c,unsigned and_with,jit::Label * on_not_equal)234 InterpretedRegExpMacroAssembler::CheckNotCharacterAfterAnd(unsigned c, unsigned and_with,
235 jit::Label* on_not_equal)
236 {
237 if (c > MAX_FIRST_ARG) {
238 Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
239 Emit32(c);
240 } else {
241 Emit(BC_AND_CHECK_NOT_CHAR, c);
242 }
243 Emit32(and_with);
244 EmitOrLink(on_not_equal);
245 }
246
247 void
CheckNotCharacterAfterMinusAnd(char16_t c,char16_t minus,char16_t and_with,jit::Label * on_not_equal)248 InterpretedRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
249 jit::Label* on_not_equal)
250 {
251 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
252 Emit16(minus);
253 Emit16(and_with);
254 EmitOrLink(on_not_equal);
255 }
256
257 void
CheckCharacterInRange(char16_t from,char16_t to,jit::Label * on_in_range)258 InterpretedRegExpMacroAssembler::CheckCharacterInRange(char16_t from, char16_t to,
259 jit::Label* on_in_range)
260 {
261 Emit(BC_CHECK_CHAR_IN_RANGE, 0);
262 Emit16(from);
263 Emit16(to);
264 EmitOrLink(on_in_range);
265 }
266
267 void
CheckCharacterNotInRange(char16_t from,char16_t to,jit::Label * on_not_in_range)268 InterpretedRegExpMacroAssembler::CheckCharacterNotInRange(char16_t from, char16_t to,
269 jit::Label* on_not_in_range)
270 {
271 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
272 Emit16(from);
273 Emit16(to);
274 EmitOrLink(on_not_in_range);
275 }
276
277 void
CheckBitInTable(uint8_t * table,jit::Label * on_bit_set)278 InterpretedRegExpMacroAssembler::CheckBitInTable(uint8_t* table, jit::Label* on_bit_set)
279 {
280 static const int kBitsPerByte = 8;
281
282 Emit(BC_CHECK_BIT_IN_TABLE, 0);
283 EmitOrLink(on_bit_set);
284 for (int i = 0; i < kTableSize; i += kBitsPerByte) {
285 int byte = 0;
286 for (int j = 0; j < kBitsPerByte; j++) {
287 if (table[i + j] != 0)
288 byte |= 1 << j;
289 }
290 Emit8(byte);
291 }
292 }
293
294 void
JumpOrBacktrack(jit::Label * to)295 InterpretedRegExpMacroAssembler::JumpOrBacktrack(jit::Label* to)
296 {
297 if (advance_current_end_ == pc_) {
298 // Combine advance current and goto.
299 pc_ = advance_current_start_;
300 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
301 EmitOrLink(to);
302 advance_current_end_ = kInvalidPC;
303 } else {
304 // Regular goto.
305 Emit(BC_GOTO, 0);
306 EmitOrLink(to);
307 }
308 }
309
310 void
Fail()311 InterpretedRegExpMacroAssembler::Fail()
312 {
313 Emit(BC_FAIL, 0);
314 }
315
316 void
IfRegisterGE(int reg,int comparand,jit::Label * if_ge)317 InterpretedRegExpMacroAssembler::IfRegisterGE(int reg, int comparand, jit::Label* if_ge)
318 {
319 checkRegister(reg);
320 Emit(BC_CHECK_REGISTER_GE, reg);
321 Emit32(comparand);
322 EmitOrLink(if_ge);
323 }
324
325 void
IfRegisterLT(int reg,int comparand,jit::Label * if_lt)326 InterpretedRegExpMacroAssembler::IfRegisterLT(int reg, int comparand, jit::Label* if_lt)
327 {
328 checkRegister(reg);
329 Emit(BC_CHECK_REGISTER_LT, reg);
330 Emit32(comparand);
331 EmitOrLink(if_lt);
332 }
333
334 void
IfRegisterEqPos(int reg,jit::Label * if_eq)335 InterpretedRegExpMacroAssembler::IfRegisterEqPos(int reg, jit::Label* if_eq)
336 {
337 checkRegister(reg);
338 Emit(BC_CHECK_REGISTER_EQ_POS, reg);
339 EmitOrLink(if_eq);
340 }
341
342 void
LoadCurrentCharacter(int cp_offset,jit::Label * on_end_of_input,bool check_bounds,int characters)343 InterpretedRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, jit::Label* on_end_of_input,
344 bool check_bounds, int characters)
345 {
346 MOZ_ASSERT(cp_offset >= kMinCPOffset);
347 MOZ_ASSERT(cp_offset <= kMaxCPOffset);
348 int bytecode;
349 if (check_bounds) {
350 if (characters == 4) {
351 bytecode = BC_LOAD_4_CURRENT_CHARS;
352 } else if (characters == 2) {
353 bytecode = BC_LOAD_2_CURRENT_CHARS;
354 } else {
355 MOZ_ASSERT(characters == 1);
356 bytecode = BC_LOAD_CURRENT_CHAR;
357 }
358 } else {
359 if (characters == 4) {
360 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
361 } else if (characters == 2) {
362 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
363 } else {
364 MOZ_ASSERT(characters == 1);
365 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
366 }
367 }
368 Emit(bytecode, cp_offset);
369 if (check_bounds)
370 EmitOrLink(on_end_of_input);
371 }
372
373 void
PopCurrentPosition()374 InterpretedRegExpMacroAssembler::PopCurrentPosition()
375 {
376 Emit(BC_POP_CP, 0);
377 }
378
379 void
PopRegister(int reg)380 InterpretedRegExpMacroAssembler::PopRegister(int reg)
381 {
382 checkRegister(reg);
383 Emit(BC_POP_REGISTER, reg);
384 }
385
386 void
PushCurrentPosition()387 InterpretedRegExpMacroAssembler::PushCurrentPosition()
388 {
389 Emit(BC_PUSH_CP, 0);
390 }
391
392 void
PushRegister(int reg,StackCheckFlag check_stack_limit)393 InterpretedRegExpMacroAssembler::PushRegister(int reg, StackCheckFlag check_stack_limit)
394 {
395 checkRegister(reg);
396 Emit(BC_PUSH_REGISTER, reg);
397 }
398
399 void
ReadCurrentPositionFromRegister(int reg)400 InterpretedRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg)
401 {
402 checkRegister(reg);
403 Emit(BC_SET_CP_TO_REGISTER, reg);
404 }
405
406 void
ReadBacktrackStackPointerFromRegister(int reg)407 InterpretedRegExpMacroAssembler::ReadBacktrackStackPointerFromRegister(int reg)
408 {
409 checkRegister(reg);
410 Emit(BC_SET_SP_TO_REGISTER, reg);
411 }
412
413 void
SetCurrentPositionFromEnd(int by)414 InterpretedRegExpMacroAssembler::SetCurrentPositionFromEnd(int by)
415 {
416 MOZ_ASSERT(by >= 0 && by < (1 << 24));
417 Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
418 }
419
420 void
SetRegister(int reg,int to)421 InterpretedRegExpMacroAssembler::SetRegister(int reg, int to)
422 {
423 checkRegister(reg);
424 Emit(BC_SET_REGISTER, reg);
425 Emit32(to);
426 }
427
428 bool
Succeed()429 InterpretedRegExpMacroAssembler::Succeed()
430 {
431 Emit(BC_SUCCEED, 0);
432
433 // Restart matching for global regexp not supported.
434 return false;
435 }
436
437 void
WriteCurrentPositionToRegister(int reg,int cp_offset)438 InterpretedRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg, int cp_offset)
439 {
440 checkRegister(reg);
441 Emit(BC_SET_REGISTER_TO_CP, reg);
442 Emit32(cp_offset); // Current position offset.
443 }
444
445 void
ClearRegisters(int reg_from,int reg_to)446 InterpretedRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to)
447 {
448 MOZ_ASSERT(reg_from <= reg_to);
449 for (int reg = reg_from; reg <= reg_to; reg++)
450 SetRegister(reg, -1);
451 }
452
453 void
WriteBacktrackStackPointerToRegister(int reg)454 InterpretedRegExpMacroAssembler::WriteBacktrackStackPointerToRegister(int reg)
455 {
456 checkRegister(reg);
457 Emit(BC_SET_REGISTER_TO_SP, reg);
458 }
459
460 void
PushBacktrack(jit::Label * label)461 InterpretedRegExpMacroAssembler::PushBacktrack(jit::Label* label)
462 {
463 Emit(BC_PUSH_BT, 0);
464 EmitOrLink(label);
465 }
466
467 void
BindBacktrack(jit::Label * label)468 InterpretedRegExpMacroAssembler::BindBacktrack(jit::Label* label)
469 {
470 Bind(label);
471 }
472
473 void
EmitOrLink(jit::Label * label)474 InterpretedRegExpMacroAssembler::EmitOrLink(jit::Label* label)
475 {
476 if (label == nullptr)
477 label = &backtrack_;
478 if (label->bound()) {
479 Emit32(label->offset());
480 } else {
481 int pos = label->use(pc_);
482 Emit32(pos);
483 }
484 }
485
486 void
Emit(uint32_t byte,uint32_t twenty_four_bits)487 InterpretedRegExpMacroAssembler::Emit(uint32_t byte, uint32_t twenty_four_bits)
488 {
489 uint32_t word = ((twenty_four_bits << BYTECODE_SHIFT) | byte);
490 Emit32(word);
491 }
492
493 void
Emit32(uint32_t word)494 InterpretedRegExpMacroAssembler::Emit32(uint32_t word)
495 {
496 MOZ_ASSERT(pc_ <= length_);
497 if (pc_ + 3 >= length_)
498 Expand();
499 *reinterpret_cast<uint32_t*>(buffer_ + pc_) = word;
500 pc_ += 4;
501 }
502
503 void
Emit16(uint32_t word)504 InterpretedRegExpMacroAssembler::Emit16(uint32_t word)
505 {
506 MOZ_ASSERT(pc_ <= length_);
507 if (pc_ + 1 >= length_)
508 Expand();
509 *reinterpret_cast<uint16_t*>(buffer_ + pc_) = word;
510 pc_ += 2;
511 }
512
513 void
Emit8(uint32_t word)514 InterpretedRegExpMacroAssembler::Emit8(uint32_t word)
515 {
516 MOZ_ASSERT(pc_ <= length_);
517 if (pc_ == length_)
518 Expand();
519 *reinterpret_cast<unsigned char*>(buffer_ + pc_) = word;
520 pc_ += 1;
521 }
522
523 void
Expand()524 InterpretedRegExpMacroAssembler::Expand()
525 {
526 AutoEnterOOMUnsafeRegion oomUnsafe;
527
528 int newLength = Max(100, length_ * 2);
529 if (newLength < length_ + 4)
530 oomUnsafe.crash("InterpretedRegExpMacroAssembler::Expand");
531
532 buffer_ = (uint8_t*) js_realloc(buffer_, newLength);
533 if (!buffer_)
534 oomUnsafe.crash("InterpretedRegExpMacroAssembler::Expand");
535 length_ = newLength;
536 }
537