1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99: */
3 
4 // Copyright 2012 the V8 project authors. All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 //       notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 //       copyright notice, this list of conditions and the following
13 //       disclaimer in the documentation and/or other materials provided
14 //       with the distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 //       contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #include "irregexp/RegExpMacroAssembler.h"
32 
33 #include "irregexp/RegExpBytecode.h"
34 
35 using namespace js;
36 using namespace js::irregexp;
37 
38 template <typename CharT>
39 int
CaseInsensitiveCompareStrings(const CharT * substring1,const CharT * substring2,size_t byteLength)40 irregexp::CaseInsensitiveCompareStrings(const CharT* substring1, const CharT* substring2,
41 					size_t byteLength)
42 {
43     MOZ_ASSERT(byteLength % sizeof(CharT) == 0);
44     size_t length = byteLength / sizeof(CharT);
45 
46     for (size_t i = 0; i < length; i++) {
47         char16_t c1 = substring1[i];
48         char16_t c2 = substring2[i];
49         if (c1 != c2) {
50             c1 = unicode::ToLowerCase(c1);
51             c2 = unicode::ToLowerCase(c2);
52             if (c1 != c2)
53                 return 0;
54         }
55     }
56 
57     return 1;
58 }
59 
60 template int
61 irregexp::CaseInsensitiveCompareStrings(const Latin1Char* substring1, const Latin1Char* substring2,
62 					size_t byteLength);
63 
64 template int
65 irregexp::CaseInsensitiveCompareStrings(const char16_t* substring1, const char16_t* substring2,
66 					size_t byteLength);
67 
InterpretedRegExpMacroAssembler(LifoAlloc * alloc,RegExpShared * shared,size_t numSavedRegisters)68 InterpretedRegExpMacroAssembler::InterpretedRegExpMacroAssembler(LifoAlloc* alloc, RegExpShared* shared,
69                                                                  size_t numSavedRegisters)
70   : RegExpMacroAssembler(*alloc, shared, numSavedRegisters),
71     pc_(0),
72     advance_current_start_(0),
73     advance_current_offset_(0),
74     advance_current_end_(kInvalidPC),
75     buffer_(nullptr),
76     length_(0)
77 {
78     // The first int32 word is the number of registers.
79     Emit32(0);
80 }
81 
~InterpretedRegExpMacroAssembler()82 InterpretedRegExpMacroAssembler::~InterpretedRegExpMacroAssembler()
83 {
84     js_free(buffer_);
85 }
86 
87 RegExpCode
GenerateCode(JSContext * cx,bool match_only)88 InterpretedRegExpMacroAssembler::GenerateCode(JSContext* cx, bool match_only)
89 {
90     Bind(&backtrack_);
91     Emit(BC_POP_BT, 0);
92 
93     // Update the number of registers.
94     *(int32_t*)buffer_ = num_registers_;
95 
96     RegExpCode res;
97     res.byteCode = buffer_;
98     buffer_ = nullptr;
99     return res;
100 }
101 
102 void
AdvanceCurrentPosition(int by)103 InterpretedRegExpMacroAssembler::AdvanceCurrentPosition(int by)
104 {
105     MOZ_ASSERT(by >= kMinCPOffset);
106     MOZ_ASSERT(by <= kMaxCPOffset);
107     advance_current_start_ = pc_;
108     advance_current_offset_ = by;
109     Emit(BC_ADVANCE_CP, by);
110     advance_current_end_ = pc_;
111 }
112 
113 void
AdvanceRegister(int reg,int by)114 InterpretedRegExpMacroAssembler::AdvanceRegister(int reg, int by)
115 {
116     checkRegister(reg);
117     Emit(BC_ADVANCE_REGISTER, reg);
118     Emit32(by);
119 }
120 
121 void
Backtrack()122 InterpretedRegExpMacroAssembler::Backtrack()
123 {
124     Emit(BC_POP_BT, 0);
125 }
126 
127 void
Bind(jit::Label * label)128 InterpretedRegExpMacroAssembler::Bind(jit::Label* label)
129 {
130     advance_current_end_ = kInvalidPC;
131     MOZ_ASSERT(!label->bound());
132     if (label->used()) {
133         int pos = label->offset();
134         while (pos != jit::Label::INVALID_OFFSET) {
135             int fixup = pos;
136             pos = *reinterpret_cast<int32_t*>(buffer_ + fixup);
137             *reinterpret_cast<uint32_t*>(buffer_ + fixup) = pc_;
138         }
139     }
140     label->bind(pc_);
141 }
142 
143 void
CheckAtStart(jit::Label * on_at_start)144 InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start)
145 {
146     Emit(BC_CHECK_AT_START, 0);
147     EmitOrLink(on_at_start);
148 }
149 
150 void
CheckCharacter(unsigned c,jit::Label * on_equal)151 InterpretedRegExpMacroAssembler::CheckCharacter(unsigned c, jit::Label* on_equal)
152 {
153     if (c > MAX_FIRST_ARG) {
154         Emit(BC_CHECK_4_CHARS, 0);
155         Emit32(c);
156     } else {
157         Emit(BC_CHECK_CHAR, c);
158     }
159     EmitOrLink(on_equal);
160 }
161 
162 void
CheckCharacterAfterAnd(unsigned c,unsigned and_with,jit::Label * on_equal)163 InterpretedRegExpMacroAssembler::CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal)
164 {
165     if (c > MAX_FIRST_ARG) {
166         Emit(BC_AND_CHECK_4_CHARS, 0);
167         Emit32(c);
168     } else {
169         Emit(BC_AND_CHECK_CHAR, c);
170     }
171     Emit32(and_with);
172     EmitOrLink(on_equal);
173 }
174 
175 void
CheckCharacterGT(char16_t limit,jit::Label * on_greater)176 InterpretedRegExpMacroAssembler::CheckCharacterGT(char16_t limit, jit::Label* on_greater)
177 {
178     Emit(BC_CHECK_GT, limit);
179     EmitOrLink(on_greater);
180 }
181 
182 void
CheckCharacterLT(char16_t limit,jit::Label * on_less)183 InterpretedRegExpMacroAssembler::CheckCharacterLT(char16_t limit, jit::Label* on_less)
184 {
185     Emit(BC_CHECK_LT, limit);
186     EmitOrLink(on_less);
187 }
188 
189 void
CheckGreedyLoop(jit::Label * on_tos_equals_current_position)190 InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_current_position)
191 {
192     Emit(BC_CHECK_GREEDY, 0);
193     EmitOrLink(on_tos_equals_current_position);
194 }
195 
196 void
CheckNotAtStart(jit::Label * on_not_at_start)197 InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
198 {
199     Emit(BC_CHECK_NOT_AT_START, 0);
200     EmitOrLink(on_not_at_start);
201 }
202 
203 void
CheckNotBackReference(int start_reg,jit::Label * on_no_match)204 InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
205 {
206     MOZ_ASSERT(start_reg >= 0);
207     MOZ_ASSERT(start_reg <= kMaxRegister);
208     Emit(BC_CHECK_NOT_BACK_REF, start_reg);
209     EmitOrLink(on_no_match);
210 }
211 
212 void
CheckNotBackReferenceIgnoreCase(int start_reg,jit::Label * on_no_match)213 InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match)
214 {
215     MOZ_ASSERT(start_reg >= 0);
216     MOZ_ASSERT(start_reg <= kMaxRegister);
217     Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
218     EmitOrLink(on_no_match);
219 }
220 
221 void
CheckNotCharacter(unsigned c,jit::Label * on_not_equal)222 InterpretedRegExpMacroAssembler::CheckNotCharacter(unsigned c, jit::Label* on_not_equal)
223 {
224     if (c > MAX_FIRST_ARG) {
225         Emit(BC_CHECK_NOT_4_CHARS, 0);
226         Emit32(c);
227     } else {
228         Emit(BC_CHECK_NOT_CHAR, c);
229     }
230     EmitOrLink(on_not_equal);
231 }
232 
233 void
CheckNotCharacterAfterAnd(unsigned c,unsigned and_with,jit::Label * on_not_equal)234 InterpretedRegExpMacroAssembler::CheckNotCharacterAfterAnd(unsigned c, unsigned and_with,
235                                                            jit::Label* on_not_equal)
236 {
237     if (c > MAX_FIRST_ARG) {
238         Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
239         Emit32(c);
240     } else {
241         Emit(BC_AND_CHECK_NOT_CHAR, c);
242     }
243     Emit32(and_with);
244     EmitOrLink(on_not_equal);
245 }
246 
247 void
CheckNotCharacterAfterMinusAnd(char16_t c,char16_t minus,char16_t and_with,jit::Label * on_not_equal)248 InterpretedRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
249                                                                 jit::Label* on_not_equal)
250 {
251     Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
252     Emit16(minus);
253     Emit16(and_with);
254     EmitOrLink(on_not_equal);
255 }
256 
257 void
CheckCharacterInRange(char16_t from,char16_t to,jit::Label * on_in_range)258 InterpretedRegExpMacroAssembler::CheckCharacterInRange(char16_t from, char16_t to,
259                                                        jit::Label* on_in_range)
260 {
261     Emit(BC_CHECK_CHAR_IN_RANGE, 0);
262     Emit16(from);
263     Emit16(to);
264     EmitOrLink(on_in_range);
265 }
266 
267 void
CheckCharacterNotInRange(char16_t from,char16_t to,jit::Label * on_not_in_range)268 InterpretedRegExpMacroAssembler::CheckCharacterNotInRange(char16_t from, char16_t to,
269                                                           jit::Label* on_not_in_range)
270 {
271     Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
272     Emit16(from);
273     Emit16(to);
274     EmitOrLink(on_not_in_range);
275 }
276 
277 void
CheckBitInTable(uint8_t * table,jit::Label * on_bit_set)278 InterpretedRegExpMacroAssembler::CheckBitInTable(uint8_t* table, jit::Label* on_bit_set)
279 {
280     static const int kBitsPerByte = 8;
281 
282     Emit(BC_CHECK_BIT_IN_TABLE, 0);
283     EmitOrLink(on_bit_set);
284     for (int i = 0; i < kTableSize; i += kBitsPerByte) {
285         int byte = 0;
286         for (int j = 0; j < kBitsPerByte; j++) {
287             if (table[i + j] != 0)
288                 byte |= 1 << j;
289         }
290         Emit8(byte);
291     }
292 }
293 
294 void
JumpOrBacktrack(jit::Label * to)295 InterpretedRegExpMacroAssembler::JumpOrBacktrack(jit::Label* to)
296 {
297     if (advance_current_end_ == pc_) {
298         // Combine advance current and goto.
299         pc_ = advance_current_start_;
300         Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
301         EmitOrLink(to);
302         advance_current_end_ = kInvalidPC;
303     } else {
304         // Regular goto.
305         Emit(BC_GOTO, 0);
306         EmitOrLink(to);
307     }
308 }
309 
310 void
Fail()311 InterpretedRegExpMacroAssembler::Fail()
312 {
313     Emit(BC_FAIL, 0);
314 }
315 
316 void
IfRegisterGE(int reg,int comparand,jit::Label * if_ge)317 InterpretedRegExpMacroAssembler::IfRegisterGE(int reg, int comparand, jit::Label* if_ge)
318 {
319     checkRegister(reg);
320     Emit(BC_CHECK_REGISTER_GE, reg);
321     Emit32(comparand);
322     EmitOrLink(if_ge);
323 }
324 
325 void
IfRegisterLT(int reg,int comparand,jit::Label * if_lt)326 InterpretedRegExpMacroAssembler::IfRegisterLT(int reg, int comparand, jit::Label* if_lt)
327 {
328     checkRegister(reg);
329     Emit(BC_CHECK_REGISTER_LT, reg);
330     Emit32(comparand);
331     EmitOrLink(if_lt);
332 }
333 
334 void
IfRegisterEqPos(int reg,jit::Label * if_eq)335 InterpretedRegExpMacroAssembler::IfRegisterEqPos(int reg, jit::Label* if_eq)
336 {
337     checkRegister(reg);
338     Emit(BC_CHECK_REGISTER_EQ_POS, reg);
339     EmitOrLink(if_eq);
340 }
341 
342 void
LoadCurrentCharacter(int cp_offset,jit::Label * on_end_of_input,bool check_bounds,int characters)343 InterpretedRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, jit::Label* on_end_of_input,
344                                                       bool check_bounds, int characters)
345 {
346     MOZ_ASSERT(cp_offset >= kMinCPOffset);
347     MOZ_ASSERT(cp_offset <= kMaxCPOffset);
348     int bytecode;
349     if (check_bounds) {
350         if (characters == 4) {
351             bytecode = BC_LOAD_4_CURRENT_CHARS;
352         } else if (characters == 2) {
353             bytecode = BC_LOAD_2_CURRENT_CHARS;
354         } else {
355             MOZ_ASSERT(characters == 1);
356             bytecode = BC_LOAD_CURRENT_CHAR;
357         }
358     } else {
359         if (characters == 4) {
360             bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
361         } else if (characters == 2) {
362             bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
363         } else {
364             MOZ_ASSERT(characters == 1);
365             bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
366         }
367     }
368     Emit(bytecode, cp_offset);
369     if (check_bounds)
370         EmitOrLink(on_end_of_input);
371 }
372 
373 void
PopCurrentPosition()374 InterpretedRegExpMacroAssembler::PopCurrentPosition()
375 {
376     Emit(BC_POP_CP, 0);
377 }
378 
379 void
PopRegister(int reg)380 InterpretedRegExpMacroAssembler::PopRegister(int reg)
381 {
382     checkRegister(reg);
383     Emit(BC_POP_REGISTER, reg);
384 }
385 
386 void
PushCurrentPosition()387 InterpretedRegExpMacroAssembler::PushCurrentPosition()
388 {
389     Emit(BC_PUSH_CP, 0);
390 }
391 
392 void
PushRegister(int reg,StackCheckFlag check_stack_limit)393 InterpretedRegExpMacroAssembler::PushRegister(int reg, StackCheckFlag check_stack_limit)
394 {
395     checkRegister(reg);
396     Emit(BC_PUSH_REGISTER, reg);
397 }
398 
399 void
ReadCurrentPositionFromRegister(int reg)400 InterpretedRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg)
401 {
402     checkRegister(reg);
403     Emit(BC_SET_CP_TO_REGISTER, reg);
404 }
405 
406 void
ReadBacktrackStackPointerFromRegister(int reg)407 InterpretedRegExpMacroAssembler::ReadBacktrackStackPointerFromRegister(int reg)
408 {
409     checkRegister(reg);
410     Emit(BC_SET_SP_TO_REGISTER, reg);
411 }
412 
413 void
SetCurrentPositionFromEnd(int by)414 InterpretedRegExpMacroAssembler::SetCurrentPositionFromEnd(int by)
415 {
416     MOZ_ASSERT(by >= 0 && by < (1 << 24));
417     Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
418 }
419 
420 void
SetRegister(int reg,int to)421 InterpretedRegExpMacroAssembler::SetRegister(int reg, int to)
422 {
423     checkRegister(reg);
424     Emit(BC_SET_REGISTER, reg);
425     Emit32(to);
426 }
427 
428 bool
Succeed()429 InterpretedRegExpMacroAssembler::Succeed()
430 {
431     Emit(BC_SUCCEED, 0);
432 
433     // Restart matching for global regexp not supported.
434     return false;
435 }
436 
437 void
WriteCurrentPositionToRegister(int reg,int cp_offset)438 InterpretedRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg, int cp_offset)
439 {
440     checkRegister(reg);
441     Emit(BC_SET_REGISTER_TO_CP, reg);
442     Emit32(cp_offset);  // Current position offset.
443 }
444 
445 void
ClearRegisters(int reg_from,int reg_to)446 InterpretedRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to)
447 {
448     MOZ_ASSERT(reg_from <= reg_to);
449     for (int reg = reg_from; reg <= reg_to; reg++)
450         SetRegister(reg, -1);
451 }
452 
453 void
WriteBacktrackStackPointerToRegister(int reg)454 InterpretedRegExpMacroAssembler::WriteBacktrackStackPointerToRegister(int reg)
455 {
456     checkRegister(reg);
457     Emit(BC_SET_REGISTER_TO_SP, reg);
458 }
459 
460 void
PushBacktrack(jit::Label * label)461 InterpretedRegExpMacroAssembler::PushBacktrack(jit::Label* label)
462 {
463     Emit(BC_PUSH_BT, 0);
464     EmitOrLink(label);
465 }
466 
467 void
BindBacktrack(jit::Label * label)468 InterpretedRegExpMacroAssembler::BindBacktrack(jit::Label* label)
469 {
470     Bind(label);
471 }
472 
473 void
EmitOrLink(jit::Label * label)474 InterpretedRegExpMacroAssembler::EmitOrLink(jit::Label* label)
475 {
476     if (label == nullptr)
477         label = &backtrack_;
478     if (label->bound()) {
479         Emit32(label->offset());
480     } else {
481         int pos = label->use(pc_);
482         Emit32(pos);
483     }
484 }
485 
486 void
Emit(uint32_t byte,uint32_t twenty_four_bits)487 InterpretedRegExpMacroAssembler::Emit(uint32_t byte, uint32_t twenty_four_bits)
488 {
489     uint32_t word = ((twenty_four_bits << BYTECODE_SHIFT) | byte);
490     Emit32(word);
491 }
492 
493 void
Emit32(uint32_t word)494 InterpretedRegExpMacroAssembler::Emit32(uint32_t word)
495 {
496     MOZ_ASSERT(pc_ <= length_);
497     if (pc_  + 3 >= length_)
498         Expand();
499     *reinterpret_cast<uint32_t*>(buffer_ + pc_) = word;
500     pc_ += 4;
501 }
502 
503 void
Emit16(uint32_t word)504 InterpretedRegExpMacroAssembler::Emit16(uint32_t word)
505 {
506     MOZ_ASSERT(pc_ <= length_);
507     if (pc_ + 1 >= length_)
508         Expand();
509     *reinterpret_cast<uint16_t*>(buffer_ + pc_) = word;
510     pc_ += 2;
511 }
512 
513 void
Emit8(uint32_t word)514 InterpretedRegExpMacroAssembler::Emit8(uint32_t word)
515 {
516     MOZ_ASSERT(pc_ <= length_);
517     if (pc_ == length_)
518         Expand();
519     *reinterpret_cast<unsigned char*>(buffer_ + pc_) = word;
520     pc_ += 1;
521 }
522 
523 void
Expand()524 InterpretedRegExpMacroAssembler::Expand()
525 {
526     AutoEnterOOMUnsafeRegion oomUnsafe;
527 
528     int newLength = Max(100, length_ * 2);
529     if (newLength < length_ + 4)
530         oomUnsafe.crash("InterpretedRegExpMacroAssembler::Expand");
531 
532     buffer_ = (uint8_t*) js_realloc(buffer_, newLength);
533     if (!buffer_)
534         oomUnsafe.crash("InterpretedRegExpMacroAssembler::Expand");
535     length_ = newLength;
536 }
537