1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80: */
3
4 // Copyright 2012 the V8 project authors. All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #include "irregexp/RegExpMacroAssembler.h"
32
33 #include "irregexp/RegExpBytecode.h"
34
35 using namespace js;
36 using namespace js::irregexp;
37
38 template <typename CharT>
39 int
CaseInsensitiveCompareStrings(const CharT * substring1,const CharT * substring2,size_t byteLength)40 irregexp::CaseInsensitiveCompareStrings(const CharT* substring1, const CharT* substring2,
41 size_t byteLength)
42 {
43 AutoUnsafeCallWithABI unsafe;
44
45 MOZ_ASSERT(byteLength % sizeof(CharT) == 0);
46 size_t length = byteLength / sizeof(CharT);
47
48 for (size_t i = 0; i < length; i++) {
49 char16_t c1 = substring1[i];
50 char16_t c2 = substring2[i];
51 if (c1 != c2) {
52 c1 = unicode::ToLowerCase(c1);
53 c2 = unicode::ToLowerCase(c2);
54 if (c1 != c2)
55 return 0;
56 }
57 }
58
59 return 1;
60 }
61
62 template int
63 irregexp::CaseInsensitiveCompareStrings(const Latin1Char* substring1, const Latin1Char* substring2,
64 size_t byteLength);
65
66 template int
67 irregexp::CaseInsensitiveCompareStrings(const char16_t* substring1, const char16_t* substring2,
68 size_t byteLength);
69
70 template <typename CharT>
71 int
CaseInsensitiveCompareUCStrings(const CharT * substring1,const CharT * substring2,size_t byteLength)72 irregexp::CaseInsensitiveCompareUCStrings(const CharT* substring1, const CharT* substring2,
73 size_t byteLength)
74 {
75 AutoUnsafeCallWithABI unsafe;
76
77 MOZ_ASSERT(byteLength % sizeof(CharT) == 0);
78 size_t length = byteLength / sizeof(CharT);
79
80 for (size_t i = 0; i < length; i++) {
81 char16_t c1 = substring1[i];
82 char16_t c2 = substring2[i];
83 if (c1 != c2) {
84 c1 = unicode::FoldCase(c1);
85 c2 = unicode::FoldCase(c2);
86 if (c1 != c2)
87 return 0;
88 }
89 }
90
91 return 1;
92 }
93
94 template int
95 irregexp::CaseInsensitiveCompareUCStrings(const Latin1Char* substring1,
96 const Latin1Char* substring2,
97 size_t byteLength);
98
99 template int
100 irregexp::CaseInsensitiveCompareUCStrings(const char16_t* substring1,
101 const char16_t* substring2,
102 size_t byteLength);
103
InterpretedRegExpMacroAssembler(JSContext * cx,LifoAlloc * alloc,size_t numSavedRegisters)104 InterpretedRegExpMacroAssembler::InterpretedRegExpMacroAssembler(JSContext* cx, LifoAlloc* alloc,
105 size_t numSavedRegisters)
106 : RegExpMacroAssembler(cx, *alloc, numSavedRegisters),
107 pc_(0),
108 advance_current_start_(0),
109 advance_current_offset_(0),
110 advance_current_end_(kInvalidPC),
111 buffer_(nullptr),
112 length_(0)
113 {
114 // The first int32 word is the size of the buffer.
115 Emit32(0);
116 // The second int32 word is the number of registers.
117 Emit32(0);
118 }
119
~InterpretedRegExpMacroAssembler()120 InterpretedRegExpMacroAssembler::~InterpretedRegExpMacroAssembler()
121 {
122 js_free(buffer_);
123 }
124
125 RegExpCode
GenerateCode(JSContext * cx,bool match_only)126 InterpretedRegExpMacroAssembler::GenerateCode(JSContext* cx, bool match_only)
127 {
128 Bind(&backtrack_);
129 Emit(BC_POP_BT, 0);
130
131 // Update the buffer length and number of registers.
132 MOZ_ASSERT(size_t(length_) > sizeof(RegExpByteCodeHeader));
133 auto header = reinterpret_cast<RegExpByteCodeHeader*>(buffer_);
134 header->length = length_;
135 header->numRegisters = num_registers_;
136
137 RegExpCode res;
138 res.byteCode = buffer_;
139 buffer_ = nullptr;
140 return res;
141 }
142
143 void
AdvanceCurrentPosition(int by)144 InterpretedRegExpMacroAssembler::AdvanceCurrentPosition(int by)
145 {
146 MOZ_ASSERT(by >= kMinCPOffset);
147 MOZ_ASSERT(by <= kMaxCPOffset);
148 advance_current_start_ = pc_;
149 advance_current_offset_ = by;
150 Emit(BC_ADVANCE_CP, by);
151 advance_current_end_ = pc_;
152 }
153
154 void
AdvanceRegister(int reg,int by)155 InterpretedRegExpMacroAssembler::AdvanceRegister(int reg, int by)
156 {
157 checkRegister(reg);
158 Emit(BC_ADVANCE_REGISTER, reg);
159 Emit32(by);
160 }
161
162 void
Backtrack()163 InterpretedRegExpMacroAssembler::Backtrack()
164 {
165 Emit(BC_POP_BT, 0);
166 }
167
168 static const int32_t INVALID_OFFSET = -1;
169
170 void
Bind(jit::Label * label)171 InterpretedRegExpMacroAssembler::Bind(jit::Label* label)
172 {
173 advance_current_end_ = kInvalidPC;
174 MOZ_ASSERT(!label->bound());
175 if (label->used()) {
176 int pos = label->offset();
177 MOZ_ASSERT(pos >= 0);
178 do {
179 int fixup = pos;
180 pos = *reinterpret_cast<int32_t*>(buffer_ + fixup);
181 *reinterpret_cast<uint32_t*>(buffer_ + fixup) = pc_;
182 } while (pos != INVALID_OFFSET);
183 }
184 label->bind(pc_);
185 }
186
187 void
CheckAtStart(jit::Label * on_at_start)188 InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start)
189 {
190 Emit(BC_CHECK_AT_START, 0);
191 EmitOrLink(on_at_start);
192 }
193
194 void
CheckCharacter(unsigned c,jit::Label * on_equal)195 InterpretedRegExpMacroAssembler::CheckCharacter(unsigned c, jit::Label* on_equal)
196 {
197 if (c > MAX_FIRST_ARG) {
198 Emit(BC_CHECK_4_CHARS, 0);
199 Emit32(c);
200 } else {
201 Emit(BC_CHECK_CHAR, c);
202 }
203 EmitOrLink(on_equal);
204 }
205
206 void
CheckCharacterAfterAnd(unsigned c,unsigned and_with,jit::Label * on_equal)207 InterpretedRegExpMacroAssembler::CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal)
208 {
209 if (c > MAX_FIRST_ARG) {
210 Emit(BC_AND_CHECK_4_CHARS, 0);
211 Emit32(c);
212 } else {
213 Emit(BC_AND_CHECK_CHAR, c);
214 }
215 Emit32(and_with);
216 EmitOrLink(on_equal);
217 }
218
219 void
CheckCharacterGT(char16_t limit,jit::Label * on_greater)220 InterpretedRegExpMacroAssembler::CheckCharacterGT(char16_t limit, jit::Label* on_greater)
221 {
222 Emit(BC_CHECK_GT, limit);
223 EmitOrLink(on_greater);
224 }
225
226 void
CheckCharacterLT(char16_t limit,jit::Label * on_less)227 InterpretedRegExpMacroAssembler::CheckCharacterLT(char16_t limit, jit::Label* on_less)
228 {
229 Emit(BC_CHECK_LT, limit);
230 EmitOrLink(on_less);
231 }
232
233 void
CheckGreedyLoop(jit::Label * on_tos_equals_current_position)234 InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_current_position)
235 {
236 Emit(BC_CHECK_GREEDY, 0);
237 EmitOrLink(on_tos_equals_current_position);
238 }
239
240 void
CheckNotAtStart(jit::Label * on_not_at_start)241 InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
242 {
243 Emit(BC_CHECK_NOT_AT_START, 0);
244 EmitOrLink(on_not_at_start);
245 }
246
247 void
CheckNotBackReference(int start_reg,jit::Label * on_no_match)248 InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
249 {
250 MOZ_ASSERT(start_reg >= 0);
251 MOZ_ASSERT(start_reg <= kMaxRegister);
252 Emit(BC_CHECK_NOT_BACK_REF, start_reg);
253 EmitOrLink(on_no_match);
254 }
255
256 void
CheckNotBackReferenceIgnoreCase(int start_reg,jit::Label * on_no_match,bool unicode)257 InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg,
258 jit::Label* on_no_match,
259 bool unicode)
260 {
261 MOZ_ASSERT(start_reg >= 0);
262 MOZ_ASSERT(start_reg <= kMaxRegister);
263 if (unicode)
264 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg);
265 else
266 Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
267 EmitOrLink(on_no_match);
268 }
269
270 void
CheckNotCharacter(unsigned c,jit::Label * on_not_equal)271 InterpretedRegExpMacroAssembler::CheckNotCharacter(unsigned c, jit::Label* on_not_equal)
272 {
273 if (c > MAX_FIRST_ARG) {
274 Emit(BC_CHECK_NOT_4_CHARS, 0);
275 Emit32(c);
276 } else {
277 Emit(BC_CHECK_NOT_CHAR, c);
278 }
279 EmitOrLink(on_not_equal);
280 }
281
282 void
CheckNotCharacterAfterAnd(unsigned c,unsigned and_with,jit::Label * on_not_equal)283 InterpretedRegExpMacroAssembler::CheckNotCharacterAfterAnd(unsigned c, unsigned and_with,
284 jit::Label* on_not_equal)
285 {
286 if (c > MAX_FIRST_ARG) {
287 Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
288 Emit32(c);
289 } else {
290 Emit(BC_AND_CHECK_NOT_CHAR, c);
291 }
292 Emit32(and_with);
293 EmitOrLink(on_not_equal);
294 }
295
296 void
CheckNotCharacterAfterMinusAnd(char16_t c,char16_t minus,char16_t and_with,jit::Label * on_not_equal)297 InterpretedRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
298 jit::Label* on_not_equal)
299 {
300 Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
301 Emit16(minus);
302 Emit16(and_with);
303 EmitOrLink(on_not_equal);
304 }
305
306 void
CheckCharacterInRange(char16_t from,char16_t to,jit::Label * on_in_range)307 InterpretedRegExpMacroAssembler::CheckCharacterInRange(char16_t from, char16_t to,
308 jit::Label* on_in_range)
309 {
310 Emit(BC_CHECK_CHAR_IN_RANGE, 0);
311 Emit16(from);
312 Emit16(to);
313 EmitOrLink(on_in_range);
314 }
315
316 void
CheckCharacterNotInRange(char16_t from,char16_t to,jit::Label * on_not_in_range)317 InterpretedRegExpMacroAssembler::CheckCharacterNotInRange(char16_t from, char16_t to,
318 jit::Label* on_not_in_range)
319 {
320 Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
321 Emit16(from);
322 Emit16(to);
323 EmitOrLink(on_not_in_range);
324 }
325
326 void
CheckBitInTable(RegExpShared::JitCodeTable table,jit::Label * on_bit_set)327 InterpretedRegExpMacroAssembler::CheckBitInTable(RegExpShared::JitCodeTable table,
328 jit::Label* on_bit_set)
329 {
330 static const int kBitsPerByte = 8;
331
332 Emit(BC_CHECK_BIT_IN_TABLE, 0);
333 EmitOrLink(on_bit_set);
334 for (int i = 0; i < kTableSize; i += kBitsPerByte) {
335 int byte = 0;
336 for (int j = 0; j < kBitsPerByte; j++) {
337 if (table[i + j] != 0)
338 byte |= 1 << j;
339 }
340 Emit8(byte);
341 }
342 }
343
344 void
JumpOrBacktrack(jit::Label * to)345 InterpretedRegExpMacroAssembler::JumpOrBacktrack(jit::Label* to)
346 {
347 if (advance_current_end_ == pc_) {
348 // Combine advance current and goto.
349 pc_ = advance_current_start_;
350 Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
351 EmitOrLink(to);
352 advance_current_end_ = kInvalidPC;
353 } else {
354 // Regular goto.
355 Emit(BC_GOTO, 0);
356 EmitOrLink(to);
357 }
358 }
359
360 void
Fail()361 InterpretedRegExpMacroAssembler::Fail()
362 {
363 Emit(BC_FAIL, 0);
364 }
365
366 void
IfRegisterGE(int reg,int comparand,jit::Label * if_ge)367 InterpretedRegExpMacroAssembler::IfRegisterGE(int reg, int comparand, jit::Label* if_ge)
368 {
369 checkRegister(reg);
370 Emit(BC_CHECK_REGISTER_GE, reg);
371 Emit32(comparand);
372 EmitOrLink(if_ge);
373 }
374
375 void
IfRegisterLT(int reg,int comparand,jit::Label * if_lt)376 InterpretedRegExpMacroAssembler::IfRegisterLT(int reg, int comparand, jit::Label* if_lt)
377 {
378 checkRegister(reg);
379 Emit(BC_CHECK_REGISTER_LT, reg);
380 Emit32(comparand);
381 EmitOrLink(if_lt);
382 }
383
384 void
IfRegisterEqPos(int reg,jit::Label * if_eq)385 InterpretedRegExpMacroAssembler::IfRegisterEqPos(int reg, jit::Label* if_eq)
386 {
387 checkRegister(reg);
388 Emit(BC_CHECK_REGISTER_EQ_POS, reg);
389 EmitOrLink(if_eq);
390 }
391
392 void
LoadCurrentCharacter(int cp_offset,jit::Label * on_end_of_input,bool check_bounds,int characters)393 InterpretedRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, jit::Label* on_end_of_input,
394 bool check_bounds, int characters)
395 {
396 MOZ_ASSERT(cp_offset >= kMinCPOffset);
397 MOZ_ASSERT(cp_offset <= kMaxCPOffset);
398 int bytecode;
399 if (check_bounds) {
400 if (characters == 4) {
401 bytecode = BC_LOAD_4_CURRENT_CHARS;
402 } else if (characters == 2) {
403 bytecode = BC_LOAD_2_CURRENT_CHARS;
404 } else {
405 MOZ_ASSERT(characters == 1);
406 bytecode = BC_LOAD_CURRENT_CHAR;
407 }
408 } else {
409 if (characters == 4) {
410 bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
411 } else if (characters == 2) {
412 bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
413 } else {
414 MOZ_ASSERT(characters == 1);
415 bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
416 }
417 }
418 Emit(bytecode, cp_offset);
419 if (check_bounds)
420 EmitOrLink(on_end_of_input);
421 }
422
423 void
PopCurrentPosition()424 InterpretedRegExpMacroAssembler::PopCurrentPosition()
425 {
426 Emit(BC_POP_CP, 0);
427 }
428
429 void
PopRegister(int reg)430 InterpretedRegExpMacroAssembler::PopRegister(int reg)
431 {
432 checkRegister(reg);
433 Emit(BC_POP_REGISTER, reg);
434 }
435
436 void
PushCurrentPosition()437 InterpretedRegExpMacroAssembler::PushCurrentPosition()
438 {
439 Emit(BC_PUSH_CP, 0);
440 }
441
442 void
PushRegister(int reg,StackCheckFlag check_stack_limit)443 InterpretedRegExpMacroAssembler::PushRegister(int reg, StackCheckFlag check_stack_limit)
444 {
445 checkRegister(reg);
446 Emit(BC_PUSH_REGISTER, reg);
447 }
448
449 void
ReadCurrentPositionFromRegister(int reg)450 InterpretedRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg)
451 {
452 checkRegister(reg);
453 Emit(BC_SET_CP_TO_REGISTER, reg);
454 }
455
456 void
ReadBacktrackStackPointerFromRegister(int reg)457 InterpretedRegExpMacroAssembler::ReadBacktrackStackPointerFromRegister(int reg)
458 {
459 checkRegister(reg);
460 Emit(BC_SET_SP_TO_REGISTER, reg);
461 }
462
463 void
SetCurrentPositionFromEnd(int by)464 InterpretedRegExpMacroAssembler::SetCurrentPositionFromEnd(int by)
465 {
466 MOZ_ASSERT(by >= 0 && by < (1 << 24));
467 Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
468 }
469
470 void
SetRegister(int reg,int to)471 InterpretedRegExpMacroAssembler::SetRegister(int reg, int to)
472 {
473 checkRegister(reg);
474 Emit(BC_SET_REGISTER, reg);
475 Emit32(to);
476 }
477
478 bool
Succeed()479 InterpretedRegExpMacroAssembler::Succeed()
480 {
481 Emit(BC_SUCCEED, 0);
482
483 // Restart matching for global regexp not supported.
484 return false;
485 }
486
487 void
WriteCurrentPositionToRegister(int reg,int cp_offset)488 InterpretedRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg, int cp_offset)
489 {
490 checkRegister(reg);
491 Emit(BC_SET_REGISTER_TO_CP, reg);
492 Emit32(cp_offset); // Current position offset.
493 }
494
495 void
ClearRegisters(int reg_from,int reg_to)496 InterpretedRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to)
497 {
498 MOZ_ASSERT(reg_from <= reg_to);
499 for (int reg = reg_from; reg <= reg_to; reg++)
500 SetRegister(reg, -1);
501 }
502
503 void
WriteBacktrackStackPointerToRegister(int reg)504 InterpretedRegExpMacroAssembler::WriteBacktrackStackPointerToRegister(int reg)
505 {
506 checkRegister(reg);
507 Emit(BC_SET_REGISTER_TO_SP, reg);
508 }
509
510 void
PushBacktrack(jit::Label * label)511 InterpretedRegExpMacroAssembler::PushBacktrack(jit::Label* label)
512 {
513 Emit(BC_PUSH_BT, 0);
514 EmitOrLink(label);
515 }
516
517 void
BindBacktrack(jit::Label * label)518 InterpretedRegExpMacroAssembler::BindBacktrack(jit::Label* label)
519 {
520 Bind(label);
521 }
522
523 void
EmitOrLink(jit::Label * label)524 InterpretedRegExpMacroAssembler::EmitOrLink(jit::Label* label)
525 {
526 if (label == nullptr)
527 label = &backtrack_;
528 if (label->bound()) {
529 Emit32(label->offset());
530 } else {
531 int pos = label->used() ? label->offset() : INVALID_OFFSET;
532 label->use(pc_);
533 Emit32(pos);
534 }
535 }
536
537 void
Emit(uint32_t byte,uint32_t twenty_four_bits)538 InterpretedRegExpMacroAssembler::Emit(uint32_t byte, uint32_t twenty_four_bits)
539 {
540 uint32_t word = ((twenty_four_bits << BYTECODE_SHIFT) | byte);
541 Emit32(word);
542 }
543
544 void
Emit32(uint32_t word)545 InterpretedRegExpMacroAssembler::Emit32(uint32_t word)
546 {
547 MOZ_ASSERT(pc_ <= length_);
548 if (pc_ + 3 >= length_)
549 Expand();
550 *reinterpret_cast<uint32_t*>(buffer_ + pc_) = word;
551 pc_ += 4;
552 }
553
554 void
Emit16(uint32_t word)555 InterpretedRegExpMacroAssembler::Emit16(uint32_t word)
556 {
557 MOZ_ASSERT(pc_ <= length_);
558 if (pc_ + 1 >= length_)
559 Expand();
560 *reinterpret_cast<uint16_t*>(buffer_ + pc_) = word;
561 pc_ += 2;
562 }
563
564 void
Emit8(uint32_t word)565 InterpretedRegExpMacroAssembler::Emit8(uint32_t word)
566 {
567 MOZ_ASSERT(pc_ <= length_);
568 if (pc_ == length_)
569 Expand();
570 *reinterpret_cast<unsigned char*>(buffer_ + pc_) = word;
571 pc_ += 1;
572 }
573
574 void
Expand()575 InterpretedRegExpMacroAssembler::Expand()
576 {
577 AutoEnterOOMUnsafeRegion oomUnsafe;
578
579 int newLength = Max(100, length_ * 2);
580 if (newLength < length_ + 4)
581 oomUnsafe.crash("InterpretedRegExpMacroAssembler::Expand");
582
583 buffer_ = (uint8_t*) js_realloc(buffer_, newLength);
584 if (!buffer_)
585 oomUnsafe.crash("InterpretedRegExpMacroAssembler::Expand");
586 length_ = newLength;
587 }
588