1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "vm/RegExpObject.h"
8
9 #include "mozilla/MemoryReporting.h"
10 #include "mozilla/PodOperations.h"
11
12 #include <algorithm>
13 #include <type_traits>
14
15 #include "builtin/RegExp.h"
16 #include "builtin/SelfHostingDefines.h" // REGEXP_*_FLAG
17 #include "frontend/TokenStream.h"
18 #include "gc/HashUtil.h"
19 #ifndef ENABLE_NEW_REGEXP
20 # ifdef DEBUG
21 # include "irregexp/RegExpBytecode.h"
22 # endif
23 # include "irregexp/RegExpParser.h"
24 #endif
25 #include "jit/VMFunctions.h"
26 #include "js/RegExp.h"
27 #include "js/RegExpFlags.h" // JS::RegExpFlags
28 #include "js/StableStringChars.h"
29 #ifdef ENABLE_NEW_REGEXP
30 # include "new-regexp/regexp-stack.h"
31 # include "new-regexp/RegExpAPI.h"
32 #endif
33 #include "util/StringBuffer.h"
34 #include "vm/MatchPairs.h"
35 #include "vm/RegExpStatics.h"
36 #include "vm/StringType.h"
37 #include "vm/TraceLogging.h"
38 #ifdef DEBUG
39 # include "util/Unicode.h"
40 #endif
41 #include "vm/Xdr.h"
42
43 #include "vm/JSObject-inl.h"
44 #include "vm/NativeObject-inl.h"
45 #include "vm/Shape-inl.h"
46
47 using namespace js;
48
49 using JS::AutoStableStringChars;
50 using JS::CompileOptions;
51 using JS::RegExpFlag;
52 using JS::RegExpFlags;
53 using mozilla::ArrayLength;
54 using mozilla::DebugOnly;
55 using mozilla::PodCopy;
56
57 using JS::AutoCheckCannotGC;
58
59 static_assert(RegExpFlag::Global == REGEXP_GLOBAL_FLAG,
60 "self-hosted JS and /g flag bits must agree");
61 static_assert(RegExpFlag::IgnoreCase == REGEXP_IGNORECASE_FLAG,
62 "self-hosted JS and /i flag bits must agree");
63 static_assert(RegExpFlag::Multiline == REGEXP_MULTILINE_FLAG,
64 "self-hosted JS and /m flag bits must agree");
65 static_assert(RegExpFlag::DotAll == REGEXP_DOTALL_FLAG,
66 "self-hosted JS and /s flag bits must agree");
67 static_assert(RegExpFlag::Unicode == REGEXP_UNICODE_FLAG,
68 "self-hosted JS and /u flag bits must agree");
69 static_assert(RegExpFlag::Sticky == REGEXP_STICKY_FLAG,
70 "self-hosted JS and /y flag bits must agree");
71
RegExpAlloc(JSContext * cx,NewObjectKind newKind,HandleObject proto)72 RegExpObject* js::RegExpAlloc(JSContext* cx, NewObjectKind newKind,
73 HandleObject proto /* = nullptr */) {
74 Rooted<RegExpObject*> regexp(
75 cx, NewObjectWithClassProtoAndKind<RegExpObject>(cx, proto, newKind));
76 if (!regexp) {
77 return nullptr;
78 }
79
80 regexp->initPrivate(nullptr);
81
82 if (!EmptyShape::ensureInitialCustomShape<RegExpObject>(cx, regexp)) {
83 return nullptr;
84 }
85
86 MOZ_ASSERT(regexp->lookupPure(cx->names().lastIndex)->slot() ==
87 RegExpObject::lastIndexSlot());
88
89 return regexp;
90 }
91
92 /* MatchPairs */
93
initArrayFrom(VectorMatchPairs & copyFrom)94 bool VectorMatchPairs::initArrayFrom(VectorMatchPairs& copyFrom) {
95 MOZ_ASSERT(copyFrom.pairCount() > 0);
96
97 if (!allocOrExpandArray(copyFrom.pairCount())) {
98 return false;
99 }
100
101 PodCopy(pairs_, copyFrom.pairs_, pairCount_);
102
103 return true;
104 }
105
allocOrExpandArray(size_t pairCount)106 bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) {
107 if (!vec_.resizeUninitialized(pairCount)) {
108 return false;
109 }
110
111 pairs_ = &vec_[0];
112 pairCount_ = pairCount;
113 return true;
114 }
115
116 /* RegExpObject */
117
118 /* static */
getShared(JSContext * cx,Handle<RegExpObject * > regexp)119 RegExpShared* RegExpObject::getShared(JSContext* cx,
120 Handle<RegExpObject*> regexp) {
121 if (regexp->hasShared()) {
122 return regexp->sharedRef();
123 }
124
125 return createShared(cx, regexp);
126 }
127
128 /* static */
isOriginalFlagGetter(JSNative native,RegExpFlags * mask)129 bool RegExpObject::isOriginalFlagGetter(JSNative native, RegExpFlags* mask) {
130 if (native == regexp_global) {
131 *mask = RegExpFlag::Global;
132 return true;
133 }
134 if (native == regexp_ignoreCase) {
135 *mask = RegExpFlag::IgnoreCase;
136 return true;
137 }
138 if (native == regexp_multiline) {
139 *mask = RegExpFlag::Multiline;
140 return true;
141 }
142 if (native == regexp_dotAll) {
143 *mask = RegExpFlag::DotAll;
144 return true;
145 }
146 if (native == regexp_sticky) {
147 *mask = RegExpFlag::Sticky;
148 return true;
149 }
150 if (native == regexp_unicode) {
151 *mask = RegExpFlag::Unicode;
152 return true;
153 }
154
155 return false;
156 }
157
158 /* static */
trace(JSTracer * trc,JSObject * obj)159 void RegExpObject::trace(JSTracer* trc, JSObject* obj) {
160 obj->as<RegExpObject>().trace(trc);
161 }
162
IsMarkingTrace(JSTracer * trc)163 static inline bool IsMarkingTrace(JSTracer* trc) {
164 // Determine whether tracing is happening during normal marking. We need to
165 // test all the following conditions, since:
166 //
167 // 1. During TraceRuntime, RuntimeHeapIsBusy() is true, but the
168 // tracer might not be a marking tracer.
169 // 2. When a write barrier executes, isMarkingTracer is true, but
170 // RuntimeHeapIsBusy() will be false.
171
172 return JS::RuntimeHeapIsCollecting() && trc->isMarkingTracer();
173 }
174
trace(JSTracer * trc)175 void RegExpObject::trace(JSTracer* trc) {
176 TraceNullableEdge(trc, &sharedRef(), "RegExpObject shared");
177 }
178
179 static const JSClassOps RegExpObjectClassOps = {
180 nullptr, // addProperty
181 nullptr, // delProperty
182 nullptr, // enumerate
183 nullptr, // newEnumerate
184 nullptr, // resolve
185 nullptr, // mayResolve
186 nullptr, // finalize
187 nullptr, // call
188 nullptr, // hasInstance
189 nullptr, // construct
190 RegExpObject::trace, // trace
191 };
192
193 static const ClassSpec RegExpObjectClassSpec = {
194 GenericCreateConstructor<js::regexp_construct, 2, gc::AllocKind::FUNCTION>,
195 GenericCreatePrototype<RegExpObject>,
196 nullptr,
197 js::regexp_static_props,
198 js::regexp_methods,
199 js::regexp_properties};
200
201 const JSClass RegExpObject::class_ = {
202 js_RegExp_str,
203 JSCLASS_HAS_PRIVATE |
204 JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS) |
205 JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
206 &RegExpObjectClassOps, &RegExpObjectClassSpec};
207
208 const JSClass RegExpObject::protoClass_ = {
209 js_Object_str, JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp), JS_NULL_CLASS_OPS,
210 &RegExpObjectClassSpec};
211
212 template <typename CharT>
create(JSContext * cx,const CharT * chars,size_t length,RegExpFlags flags,frontend::TokenStreamAnyChars & tokenStream,NewObjectKind newKind)213 RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
214 size_t length, RegExpFlags flags,
215 frontend::TokenStreamAnyChars& tokenStream,
216 NewObjectKind newKind) {
217 static_assert(std::is_same_v<CharT, char16_t>,
218 "this code may need updating if/when CharT encodes UTF-8");
219
220 RootedAtom source(cx, AtomizeChars(cx, chars, length));
221 if (!source) {
222 return nullptr;
223 }
224
225 return create(cx, source, flags, tokenStream, newKind);
226 }
227
228 template RegExpObject* RegExpObject::create(
229 JSContext* cx, const char16_t* chars, size_t length, RegExpFlags flags,
230 frontend::TokenStreamAnyChars& tokenStream, NewObjectKind newKind);
231
232 template <typename CharT>
create(JSContext * cx,const CharT * chars,size_t length,RegExpFlags flags,NewObjectKind newKind)233 RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
234 size_t length, RegExpFlags flags,
235 NewObjectKind newKind) {
236 static_assert(std::is_same_v<CharT, char16_t>,
237 "this code may need updating if/when CharT encodes UTF-8");
238
239 RootedAtom source(cx, AtomizeChars(cx, chars, length));
240 if (!source) {
241 return nullptr;
242 }
243
244 return create(cx, source, flags, newKind);
245 }
246
247 template RegExpObject* RegExpObject::create(JSContext* cx,
248 const char16_t* chars,
249 size_t length, RegExpFlags flags,
250 NewObjectKind newKind);
251
create(JSContext * cx,HandleAtom source,RegExpFlags flags,frontend::TokenStreamAnyChars & tokenStream,NewObjectKind newKind)252 RegExpObject* RegExpObject::create(JSContext* cx, HandleAtom source,
253 RegExpFlags flags,
254 frontend::TokenStreamAnyChars& tokenStream,
255 NewObjectKind newKind) {
256 LifoAllocScope allocScope(&cx->tempLifoAlloc());
257 #ifdef ENABLE_NEW_REGEXP
258 if (!irregexp::CheckPatternSyntax(cx, tokenStream, source, flags)) {
259 return nullptr;
260 }
261 #else
262 if (!irregexp::ParsePatternSyntax(tokenStream, allocScope.alloc(), source,
263 flags.unicode())) {
264 return nullptr;
265 }
266 #endif
267 return createSyntaxChecked(cx, source, flags, newKind);
268 }
269
createSyntaxChecked(JSContext * cx,const char16_t * chars,size_t length,RegExpFlags flags,NewObjectKind newKind)270 RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
271 const char16_t* chars,
272 size_t length,
273 RegExpFlags flags,
274 NewObjectKind newKind) {
275 RootedAtom source(cx, AtomizeChars(cx, chars, length));
276 if (!source) {
277 return nullptr;
278 }
279
280 return createSyntaxChecked(cx, source, flags, newKind);
281 }
282
createSyntaxChecked(JSContext * cx,HandleAtom source,RegExpFlags flags,NewObjectKind newKind)283 RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
284 HandleAtom source,
285 RegExpFlags flags,
286 NewObjectKind newKind) {
287 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
288 if (!regexp) {
289 return nullptr;
290 }
291
292 regexp->initAndZeroLastIndex(source, flags, cx);
293
294 return regexp;
295 }
296
create(JSContext * cx,HandleAtom source,RegExpFlags flags,NewObjectKind newKind)297 RegExpObject* RegExpObject::create(JSContext* cx, HandleAtom source,
298 RegExpFlags flags, NewObjectKind newKind) {
299 CompileOptions dummyOptions(cx);
300 frontend::DummyTokenStream dummyTokenStream(cx, dummyOptions);
301
302 LifoAllocScope allocScope(&cx->tempLifoAlloc());
303 #ifdef ENABLE_NEW_REGEXP
304 if (!irregexp::CheckPatternSyntax(cx, dummyTokenStream, source, flags)) {
305 return nullptr;
306 }
307 #else
308 if (!irregexp::ParsePatternSyntax(dummyTokenStream, allocScope.alloc(),
309 source, flags.unicode())) {
310 return nullptr;
311 }
312 #endif
313
314 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
315 if (!regexp) {
316 return nullptr;
317 }
318
319 regexp->initAndZeroLastIndex(source, flags, cx);
320
321 return regexp;
322 }
323
324 /* static */
createShared(JSContext * cx,Handle<RegExpObject * > regexp)325 RegExpShared* RegExpObject::createShared(JSContext* cx,
326 Handle<RegExpObject*> regexp) {
327 MOZ_ASSERT(!regexp->hasShared());
328 RootedAtom source(cx, regexp->getSource());
329 RegExpShared* shared =
330 cx->zone()->regExps().get(cx, source, regexp->getFlags());
331 if (!shared) {
332 return nullptr;
333 }
334
335 regexp->setShared(*shared);
336 return shared;
337 }
338
assignInitialShape(JSContext * cx,Handle<RegExpObject * > self)339 Shape* RegExpObject::assignInitialShape(JSContext* cx,
340 Handle<RegExpObject*> self) {
341 MOZ_ASSERT(self->empty());
342
343 static_assert(LAST_INDEX_SLOT == 0);
344
345 /* The lastIndex property alone is writable but non-configurable. */
346 return NativeObject::addDataProperty(cx, self, cx->names().lastIndex,
347 LAST_INDEX_SLOT, JSPROP_PERMANENT);
348 }
349
initIgnoringLastIndex(JSAtom * source,RegExpFlags flags)350 void RegExpObject::initIgnoringLastIndex(JSAtom* source, RegExpFlags flags) {
351 // If this is a re-initialization with an existing RegExpShared, 'flags'
352 // may not match getShared()->flags, so forget the RegExpShared.
353 sharedRef() = nullptr;
354
355 setSource(source);
356 setFlags(flags);
357 }
358
initAndZeroLastIndex(JSAtom * source,RegExpFlags flags,JSContext * cx)359 void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags,
360 JSContext* cx) {
361 initIgnoringLastIndex(source, flags);
362 zeroLastIndex(cx);
363 }
364
IsRegExpLineTerminator(const JS::Latin1Char c)365 static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const JS::Latin1Char c) {
366 return c == '\n' || c == '\r';
367 }
368
IsRegExpLineTerminator(const char16_t c)369 static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const char16_t c) {
370 return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
371 }
372
AppendEscapedLineTerminator(StringBuffer & sb,const JS::Latin1Char c)373 static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(
374 StringBuffer& sb, const JS::Latin1Char c) {
375 switch (c) {
376 case '\n':
377 if (!sb.append('n')) {
378 return false;
379 }
380 break;
381 case '\r':
382 if (!sb.append('r')) {
383 return false;
384 }
385 break;
386 default:
387 MOZ_CRASH("Bad LineTerminator");
388 }
389 return true;
390 }
391
AppendEscapedLineTerminator(StringBuffer & sb,const char16_t c)392 static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(StringBuffer& sb,
393 const char16_t c) {
394 switch (c) {
395 case '\n':
396 if (!sb.append('n')) {
397 return false;
398 }
399 break;
400 case '\r':
401 if (!sb.append('r')) {
402 return false;
403 }
404 break;
405 case 0x2028:
406 if (!sb.append("u2028")) {
407 return false;
408 }
409 break;
410 case 0x2029:
411 if (!sb.append("u2029")) {
412 return false;
413 }
414 break;
415 default:
416 MOZ_CRASH("Bad LineTerminator");
417 }
418 return true;
419 }
420
421 template <typename CharT>
SetupBuffer(StringBuffer & sb,const CharT * oldChars,size_t oldLen,const CharT * it)422 static MOZ_ALWAYS_INLINE bool SetupBuffer(StringBuffer& sb,
423 const CharT* oldChars, size_t oldLen,
424 const CharT* it) {
425 if constexpr (std::is_same_v<CharT, char16_t>) {
426 if (!sb.ensureTwoByteChars()) {
427 return false;
428 }
429 }
430
431 if (!sb.reserve(oldLen + 1)) {
432 return false;
433 }
434
435 sb.infallibleAppend(oldChars, size_t(it - oldChars));
436 return true;
437 }
438
439 // Note: leaves the string buffer empty if no escaping need be performed.
440 template <typename CharT>
EscapeRegExpPattern(StringBuffer & sb,const CharT * oldChars,size_t oldLen)441 static bool EscapeRegExpPattern(StringBuffer& sb, const CharT* oldChars,
442 size_t oldLen) {
443 bool inBrackets = false;
444 bool previousCharacterWasBackslash = false;
445
446 for (const CharT* it = oldChars; it < oldChars + oldLen; ++it) {
447 CharT ch = *it;
448 if (!previousCharacterWasBackslash) {
449 if (inBrackets) {
450 if (ch == ']') {
451 inBrackets = false;
452 }
453 } else if (ch == '/') {
454 // There's a forward slash that needs escaping.
455 if (sb.empty()) {
456 // This is the first char we've seen that needs escaping,
457 // copy everything up to this point.
458 if (!SetupBuffer(sb, oldChars, oldLen, it)) {
459 return false;
460 }
461 }
462 if (!sb.append('\\')) {
463 return false;
464 }
465 } else if (ch == '[') {
466 inBrackets = true;
467 }
468 }
469
470 if (IsRegExpLineTerminator(ch)) {
471 // There's LineTerminator that needs escaping.
472 if (sb.empty()) {
473 // This is the first char we've seen that needs escaping,
474 // copy everything up to this point.
475 if (!SetupBuffer(sb, oldChars, oldLen, it)) {
476 return false;
477 }
478 }
479 if (!previousCharacterWasBackslash) {
480 if (!sb.append('\\')) {
481 return false;
482 }
483 }
484 if (!AppendEscapedLineTerminator(sb, ch)) {
485 return false;
486 }
487 } else if (!sb.empty()) {
488 if (!sb.append(ch)) {
489 return false;
490 }
491 }
492
493 if (previousCharacterWasBackslash) {
494 previousCharacterWasBackslash = false;
495 } else if (ch == '\\') {
496 previousCharacterWasBackslash = true;
497 }
498 }
499
500 return true;
501 }
502
503 // ES6 draft rev32 21.2.3.2.4.
EscapeRegExpPattern(JSContext * cx,HandleAtom src)504 JSAtom* js::EscapeRegExpPattern(JSContext* cx, HandleAtom src) {
505 // Step 2.
506 if (src->length() == 0) {
507 return cx->names().emptyRegExp;
508 }
509
510 // We may never need to use |sb|. Start using it lazily.
511 StringBuffer sb(cx);
512
513 if (src->hasLatin1Chars()) {
514 JS::AutoCheckCannotGC nogc;
515 if (!::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length())) {
516 return nullptr;
517 }
518 } else {
519 JS::AutoCheckCannotGC nogc;
520 if (!::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length())) {
521 return nullptr;
522 }
523 }
524
525 // Step 3.
526 return sb.empty() ? src : sb.finishAtom();
527 }
528
529 // ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
toString(JSContext * cx) const530 JSLinearString* RegExpObject::toString(JSContext* cx) const {
531 // Steps 3-4.
532 RootedAtom src(cx, getSource());
533 if (!src) {
534 return nullptr;
535 }
536 RootedAtom escapedSrc(cx, EscapeRegExpPattern(cx, src));
537
538 // Step 7.
539 JSStringBuilder sb(cx);
540 size_t len = escapedSrc->length();
541 if (!sb.reserve(len + 2)) {
542 return nullptr;
543 }
544 sb.infallibleAppend('/');
545 if (!sb.append(escapedSrc)) {
546 return nullptr;
547 }
548 sb.infallibleAppend('/');
549
550 // Steps 5-7.
551 if (global() && !sb.append('g')) {
552 return nullptr;
553 }
554 if (ignoreCase() && !sb.append('i')) {
555 return nullptr;
556 }
557 if (multiline() && !sb.append('m')) {
558 return nullptr;
559 }
560 if (dotAll() && !sb.append('s')) {
561 return nullptr;
562 }
563 if (unicode() && !sb.append('u')) {
564 return nullptr;
565 }
566 if (sticky() && !sb.append('y')) {
567 return nullptr;
568 }
569
570 return sb.finishString();
571 }
572
573 #if defined(DEBUG) && !defined(ENABLE_NEW_REGEXP)
574 /* static */
dumpBytecode(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input)575 bool RegExpShared::dumpBytecode(JSContext* cx, MutableHandleRegExpShared re,
576 HandleLinearString input) {
577 if (!RegExpShared::compileIfNecessary(cx, re, input, CodeKind::Bytecode)) {
578 return false;
579 }
580
581 const uint8_t* byteCode = re->compilation(input->hasLatin1Chars()).byteCode;
582 const uint8_t* pc = byteCode;
583
584 auto Load32Aligned = [](const uint8_t* pc) -> int32_t {
585 MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0);
586 return *reinterpret_cast<const int32_t*>(pc);
587 };
588
589 auto Load16Aligned = [](const uint8_t* pc) -> int32_t {
590 MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0);
591 return *reinterpret_cast<const uint16_t*>(pc);
592 };
593
594 int32_t numRegisters = Load32Aligned(pc);
595 fprintf(stderr, "numRegisters: %d\n", numRegisters);
596 pc += 4;
597
598 fprintf(stderr, "loc op\n");
599 fprintf(stderr, "----- --\n");
600
601 auto DumpLower = [](const char* text) {
602 while (*text) {
603 fprintf(stderr, "%c", unicode::ToLowerCase(*text));
604 text++;
605 }
606 };
607
608 # define BYTECODE(NAME) \
609 case irregexp::BC_##NAME: \
610 DumpLower(#NAME);
611 # define ADVANCE(NAME) \
612 fprintf(stderr, "\n"); \
613 pc += irregexp::BC_##NAME##_LENGTH; \
614 maxPc = std::max(maxPc, pc); \
615 break;
616 # define STOP(NAME) \
617 fprintf(stderr, "\n"); \
618 pc += irregexp::BC_##NAME##_LENGTH; \
619 break;
620 # define JUMP(NAME, OFFSET) \
621 fprintf(stderr, "\n"); \
622 maxPc = std::max(maxPc, byteCode + OFFSET); \
623 pc += irregexp::BC_##NAME##_LENGTH; \
624 break;
625 # define BRANCH(NAME, OFFSET) \
626 fprintf(stderr, "\n"); \
627 pc += irregexp::BC_##NAME##_LENGTH; \
628 maxPc = std::max(maxPc, std::max(pc, byteCode + OFFSET)); \
629 break;
630
631 // Bytecode has no end marker, we need to calculate the bytecode length by
632 // tracing jumps and branches.
633 const uint8_t* maxPc = pc;
634 while (pc <= maxPc) {
635 fprintf(stderr, "%05d: ", int32_t(pc - byteCode));
636 int32_t insn = Load32Aligned(pc);
637 switch (insn & irregexp::BYTECODE_MASK) {
638 BYTECODE(BREAK) { STOP(BREAK); }
639 BYTECODE(PUSH_CP) { ADVANCE(PUSH_CP); }
640 BYTECODE(PUSH_BT) {
641 int32_t offset = Load32Aligned(pc + 4);
642 fprintf(stderr, " %d", offset);
643 // Pushed value is used by POP_BT for jumping.
644 // Resolve maxPc here.
645 BRANCH(PUSH_BT, offset);
646 }
647 BYTECODE(PUSH_REGISTER) {
648 fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
649 ADVANCE(PUSH_REGISTER);
650 }
651 BYTECODE(SET_REGISTER) {
652 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
653 Load32Aligned(pc + 4));
654 ADVANCE(SET_REGISTER);
655 }
656 BYTECODE(ADVANCE_REGISTER) {
657 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
658 Load32Aligned(pc + 4));
659 ADVANCE(ADVANCE_REGISTER);
660 }
661 BYTECODE(SET_REGISTER_TO_CP) {
662 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
663 Load32Aligned(pc + 4));
664 ADVANCE(SET_REGISTER_TO_CP);
665 }
666 BYTECODE(SET_CP_TO_REGISTER) {
667 fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
668 ADVANCE(SET_CP_TO_REGISTER);
669 }
670 BYTECODE(SET_REGISTER_TO_SP) {
671 fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
672 ADVANCE(SET_REGISTER_TO_SP);
673 }
674 BYTECODE(SET_SP_TO_REGISTER) {
675 fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
676 ADVANCE(SET_SP_TO_REGISTER);
677 }
678 BYTECODE(POP_CP) { ADVANCE(POP_CP); }
679 BYTECODE(POP_BT) {
680 // Jump is already resolved in PUSH_BT.
681 STOP(POP_BT);
682 }
683 BYTECODE(POP_REGISTER) {
684 fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
685 ADVANCE(POP_REGISTER);
686 }
687 BYTECODE(FAIL) { ADVANCE(FAIL); }
688 BYTECODE(SUCCEED) { ADVANCE(SUCCEED); }
689 BYTECODE(ADVANCE_CP) {
690 fprintf(stderr, " %d", insn >> irregexp::BYTECODE_SHIFT);
691 ADVANCE(ADVANCE_CP);
692 }
693 BYTECODE(GOTO) {
694 int32_t offset = Load32Aligned(pc + 4);
695 fprintf(stderr, " %d", offset);
696 JUMP(GOTO, offset);
697 }
698 BYTECODE(ADVANCE_CP_AND_GOTO) {
699 int32_t offset = Load32Aligned(pc + 4);
700 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
701 JUMP(ADVANCE_CP_AND_GOTO, offset);
702 }
703 BYTECODE(CHECK_GREEDY) {
704 int32_t offset = Load32Aligned(pc + 4);
705 fprintf(stderr, " %d", offset);
706 BRANCH(CHECK_GREEDY, offset);
707 }
708 BYTECODE(LOAD_CURRENT_CHAR) {
709 int32_t offset = Load32Aligned(pc + 4);
710 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
711 BRANCH(LOAD_CURRENT_CHAR, offset);
712 }
713 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
714 fprintf(stderr, " %d", insn >> irregexp::BYTECODE_SHIFT);
715 ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
716 }
717 BYTECODE(LOAD_2_CURRENT_CHARS) {
718 int32_t offset = Load32Aligned(pc + 4);
719 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
720 BRANCH(LOAD_2_CURRENT_CHARS, offset);
721 }
722 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
723 fprintf(stderr, " %d", insn >> irregexp::BYTECODE_SHIFT);
724 ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
725 }
726 BYTECODE(LOAD_4_CURRENT_CHARS) { ADVANCE(LOAD_4_CURRENT_CHARS); }
727 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
728 ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
729 }
730 BYTECODE(CHECK_4_CHARS) {
731 int32_t offset = Load32Aligned(pc + 8);
732 fprintf(stderr, " %d, %d", Load32Aligned(pc + 4), offset);
733 BRANCH(CHECK_4_CHARS, offset);
734 }
735 BYTECODE(CHECK_CHAR) {
736 int32_t offset = Load32Aligned(pc + 4);
737 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
738 BRANCH(CHECK_CHAR, offset);
739 }
740 BYTECODE(CHECK_NOT_4_CHARS) {
741 int32_t offset = Load32Aligned(pc + 8);
742 fprintf(stderr, " %d, %d", Load32Aligned(pc + 4), offset);
743 BRANCH(CHECK_NOT_4_CHARS, offset);
744 }
745 BYTECODE(CHECK_NOT_CHAR) {
746 int32_t offset = Load32Aligned(pc + 4);
747 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
748 BRANCH(CHECK_NOT_CHAR, offset);
749 }
750 BYTECODE(AND_CHECK_4_CHARS) {
751 int32_t offset = Load32Aligned(pc + 12);
752 fprintf(stderr, " %d, %d, %d", Load32Aligned(pc + 4),
753 Load32Aligned(pc + 8), offset);
754 BRANCH(AND_CHECK_4_CHARS, offset);
755 }
756 BYTECODE(AND_CHECK_CHAR) {
757 int32_t offset = Load32Aligned(pc + 8);
758 fprintf(stderr, " %d, %d, %d", insn >> irregexp::BYTECODE_SHIFT,
759 Load32Aligned(pc + 4), offset);
760 BRANCH(AND_CHECK_CHAR, offset);
761 }
762 BYTECODE(AND_CHECK_NOT_4_CHARS) {
763 int32_t offset = Load32Aligned(pc + 12);
764 fprintf(stderr, " %d, %d, %d", Load32Aligned(pc + 4),
765 Load32Aligned(pc + 8), offset);
766 BRANCH(AND_CHECK_NOT_4_CHARS, offset);
767 }
768 BYTECODE(AND_CHECK_NOT_CHAR) {
769 int32_t offset = Load32Aligned(pc + 8);
770 fprintf(stderr, " %d, %d, %d", insn >> irregexp::BYTECODE_SHIFT,
771 Load32Aligned(pc + 4), offset);
772 BRANCH(AND_CHECK_NOT_CHAR, offset);
773 }
774 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
775 int32_t offset = Load32Aligned(pc + 8);
776 fprintf(stderr, " %d, %d, %d, %d", insn >> irregexp::BYTECODE_SHIFT,
777 Load16Aligned(pc + 4), Load16Aligned(pc + 6), offset);
778 BRANCH(MINUS_AND_CHECK_NOT_CHAR, offset);
779 }
780 BYTECODE(CHECK_CHAR_IN_RANGE) {
781 int32_t offset = Load32Aligned(pc + 8);
782 fprintf(stderr, " %d, %d, %d", Load16Aligned(pc + 4),
783 Load16Aligned(pc + 6), offset);
784 BRANCH(CHECK_CHAR_IN_RANGE, offset);
785 }
786 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
787 int32_t offset = Load32Aligned(pc + 8);
788 fprintf(stderr, " %d, %d, %d", Load16Aligned(pc + 4),
789 Load16Aligned(pc + 6), offset);
790 BRANCH(CHECK_CHAR_NOT_IN_RANGE, offset);
791 }
792 BYTECODE(CHECK_BIT_IN_TABLE) {
793 int32_t offset = Load32Aligned(pc + 4);
794 fprintf(stderr,
795 " %d, "
796 "%02x %02x %02x %02x %02x %02x %02x %02x "
797 "%02x %02x %02x %02x %02x %02x %02x %02x",
798 offset, pc[8], pc[9], pc[10], pc[11], pc[12], pc[13], pc[14],
799 pc[15], pc[16], pc[17], pc[18], pc[19], pc[20], pc[21], pc[22],
800 pc[23]);
801 BRANCH(CHECK_BIT_IN_TABLE, offset);
802 }
803 BYTECODE(CHECK_LT) {
804 int32_t offset = Load32Aligned(pc + 4);
805 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
806 BRANCH(CHECK_LT, offset);
807 }
808 BYTECODE(CHECK_GT) {
809 int32_t offset = Load32Aligned(pc + 4);
810 fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
811 BRANCH(CHECK_GT, offset);
812 }
813 BYTECODE(CHECK_REGISTER_LT) {
814 int32_t offset = Load32Aligned(pc + 8);
815 fprintf(stderr, " reg[%d], %d, %d", insn >> irregexp::BYTECODE_SHIFT,
816 Load32Aligned(pc + 4), offset);
817 BRANCH(CHECK_REGISTER_LT, offset);
818 }
819 BYTECODE(CHECK_REGISTER_GE) {
820 int32_t offset = Load32Aligned(pc + 8);
821 fprintf(stderr, " reg[%d], %d, %d", insn >> irregexp::BYTECODE_SHIFT,
822 Load32Aligned(pc + 4), offset);
823 BRANCH(CHECK_REGISTER_GE, offset);
824 }
825 BYTECODE(CHECK_REGISTER_EQ_POS) {
826 int32_t offset = Load32Aligned(pc + 4);
827 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
828 offset);
829 BRANCH(CHECK_REGISTER_EQ_POS, offset);
830 }
831 BYTECODE(CHECK_NOT_REGS_EQUAL) {
832 int32_t offset = Load32Aligned(pc + 8);
833 fprintf(stderr, " reg[%d], %d, %d", insn >> irregexp::BYTECODE_SHIFT,
834 Load32Aligned(pc + 4), offset);
835 BRANCH(CHECK_NOT_REGS_EQUAL, offset);
836 }
837 BYTECODE(CHECK_NOT_BACK_REF) {
838 int32_t offset = Load32Aligned(pc + 4);
839 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
840 offset);
841 BRANCH(CHECK_NOT_BACK_REF, offset);
842 }
843 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
844 int32_t offset = Load32Aligned(pc + 4);
845 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
846 offset);
847 BRANCH(CHECK_NOT_BACK_REF_NO_CASE, offset);
848 }
849 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
850 int32_t offset = Load32Aligned(pc + 4);
851 fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
852 offset);
853 BRANCH(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, offset);
854 }
855 BYTECODE(CHECK_AT_START) {
856 int32_t offset = Load32Aligned(pc + 4);
857 fprintf(stderr, " %d", offset);
858 BRANCH(CHECK_AT_START, offset);
859 }
860 BYTECODE(CHECK_NOT_AT_START) {
861 int32_t offset = Load32Aligned(pc + 4);
862 fprintf(stderr, " %d", offset);
863 BRANCH(CHECK_NOT_AT_START, offset);
864 }
865 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
866 fprintf(stderr, " %u",
867 static_cast<uint32_t>(insn) >> irregexp::BYTECODE_SHIFT);
868 ADVANCE(SET_CURRENT_POSITION_FROM_END);
869 }
870 default:
871 MOZ_CRASH("Bad bytecode");
872 }
873 }
874
875 # undef BYTECODE
876 # undef ADVANCE
877 # undef STOP
878 # undef JUMP
879 # undef BRANCH
880
881 return true;
882 }
883
884 /* static */
dumpBytecode(JSContext * cx,Handle<RegExpObject * > regexp,HandleLinearString input)885 bool RegExpObject::dumpBytecode(JSContext* cx, Handle<RegExpObject*> regexp,
886 HandleLinearString input) {
887 RootedRegExpShared shared(cx, getShared(cx, regexp));
888 if (!shared) {
889 return false;
890 }
891
892 return RegExpShared::dumpBytecode(cx, &shared, input);
893 }
894 #endif // DEBUG && !ENABLE_NEW_REGEXP
895
896 template <typename CharT>
IsRegExpMetaChar(CharT ch)897 static MOZ_ALWAYS_INLINE bool IsRegExpMetaChar(CharT ch) {
898 switch (ch) {
899 /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */
900 case '^':
901 case '$':
902 case '\\':
903 case '.':
904 case '*':
905 case '+':
906 case '?':
907 case '(':
908 case ')':
909 case '[':
910 case ']':
911 case '{':
912 case '}':
913 case '|':
914 return true;
915 default:
916 return false;
917 }
918 }
919
920 template <typename CharT>
HasRegExpMetaChars(const CharT * chars,size_t length)921 bool js::HasRegExpMetaChars(const CharT* chars, size_t length) {
922 for (size_t i = 0; i < length; ++i) {
923 if (IsRegExpMetaChar<CharT>(chars[i])) {
924 return true;
925 }
926 }
927 return false;
928 }
929
930 template bool js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars,
931 size_t length);
932
933 template bool js::HasRegExpMetaChars<char16_t>(const char16_t* chars,
934 size_t length);
935
StringHasRegExpMetaChars(JSLinearString * str)936 bool js::StringHasRegExpMetaChars(JSLinearString* str) {
937 AutoCheckCannotGC nogc;
938 if (str->hasLatin1Chars()) {
939 return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
940 }
941
942 return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
943 }
944
945 /* RegExpShared */
946
RegExpShared(JSAtom * source,RegExpFlags flags)947 RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags)
948 : headerAndSource(source), pairCount_(0), flags(flags) {}
949
traceChildren(JSTracer * trc)950 void RegExpShared::traceChildren(JSTracer* trc) {
951 // Discard code to avoid holding onto ExecutablePools.
952 if (IsMarkingTrace(trc) && trc->runtime()->gc.isShrinkingGC()) {
953 discardJitCode();
954 }
955
956 TraceNullableEdge(trc, &headerAndSource, "RegExpShared source");
957 #ifdef ENABLE_NEW_REGEXP
958 if (kind() == RegExpShared::Kind::Atom) {
959 TraceNullableEdge(trc, &patternAtom_, "RegExpShared pattern atom");
960 } else {
961 for (auto& comp : compilationArray) {
962 TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
963 }
964 TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template");
965 }
966 #else
967 for (auto& comp : compilationArray) {
968 TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
969 }
970 #endif
971 }
972
discardJitCode()973 void RegExpShared::discardJitCode() {
974 for (auto& comp : compilationArray) {
975 comp.jitCode = nullptr;
976 }
977
978 // We can also purge the tables used by JIT code.
979 tables.clearAndFree();
980 }
981
finalize(JSFreeOp * fop)982 void RegExpShared::finalize(JSFreeOp* fop) {
983 for (auto& comp : compilationArray) {
984 if (comp.byteCode) {
985 size_t length = comp.byteCodeLength();
986 fop->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode);
987 }
988 }
989 #ifdef ENABLE_NEW_REGEXP
990 if (namedCaptureIndices_) {
991 size_t length = numNamedCaptures() * sizeof(uint32_t);
992 fop->free_(this, namedCaptureIndices_, length,
993 MemoryUse::RegExpSharedNamedCaptureData);
994 }
995 #endif
996 tables.~JitCodeTables();
997 }
998
999 /* static */
compile(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)1000 bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
1001 HandleLinearString input,
1002 RegExpShared::CodeKind codeKind) {
1003 TraceLoggerThread* logger = TraceLoggerForCurrentThread(cx);
1004 AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile);
1005
1006 RootedAtom pattern(cx, re->getSource());
1007 return compile(cx, re, pattern, input, codeKind);
1008 }
1009
1010 #ifdef ENABLE_NEW_REGEXP
compile(JSContext * cx,MutableHandleRegExpShared re,HandleAtom pattern,HandleLinearString input,RegExpShared::CodeKind code)1011 bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
1012 HandleAtom pattern, HandleLinearString input,
1013 RegExpShared::CodeKind code) {
1014 MOZ_CRASH("TODO");
1015 }
1016 /* static */
compileIfNecessary(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)1017 bool RegExpShared::compileIfNecessary(JSContext* cx,
1018 MutableHandleRegExpShared re,
1019 HandleLinearString input,
1020 RegExpShared::CodeKind codeKind) {
1021 if (codeKind == RegExpShared::CodeKind::Any) {
1022 // We start by interpreting regexps, then compile them once they are
1023 // sufficiently hot. For very long input strings, we tier up eagerly.
1024 codeKind = RegExpShared::CodeKind::Bytecode;
1025 if (IsNativeRegExpEnabled() &&
1026 (re->markedForTierUp() || input->length() > 1000)) {
1027 codeKind = RegExpShared::CodeKind::Jitcode;
1028 }
1029 }
1030
1031 bool needsCompile = false;
1032 if (re->kind() == RegExpShared::Kind::Unparsed) {
1033 needsCompile = true;
1034 }
1035 if (re->kind() == RegExpShared::Kind::RegExp) {
1036 if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) {
1037 needsCompile = true;
1038 }
1039 }
1040 if (needsCompile) {
1041 return irregexp::CompilePattern(cx, re, input, codeKind);
1042 }
1043 return true;
1044 }
1045
1046 /* static */
execute(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t start,VectorMatchPairs * matches)1047 RegExpRunStatus RegExpShared::execute(JSContext* cx,
1048 MutableHandleRegExpShared re,
1049 HandleLinearString input, size_t start,
1050 VectorMatchPairs* matches) {
1051 MOZ_ASSERT(matches);
1052
1053 // TODO: Add tracelogger support
1054
1055 /* Compile the code at point-of-use. */
1056 if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
1057 return RegExpRunStatus_Error;
1058 }
1059
1060 /*
1061 * Ensure sufficient memory for output vector.
1062 * No need to initialize it. The RegExp engine fills them in on a match.
1063 */
1064 if (!matches->allocOrExpandArray(re->pairCount())) {
1065 ReportOutOfMemory(cx);
1066 return RegExpRunStatus_Error;
1067 }
1068
1069 if (re->kind() == RegExpShared::Kind::Atom) {
1070 return RegExpShared::executeAtom(cx, re, input, start, matches);
1071 }
1072
1073 // Reset the Irregexp backtrack stack if it grows during execution.
1074 irregexp::RegExpStackScope stackScope(cx->isolate);
1075
1076 /*
1077 * Ensure sufficient memory for output vector.
1078 * No need to initialize it. The RegExp engine fills them in on a match.
1079 */
1080 if (!matches->allocOrExpandArray(re->pairCount())) {
1081 ReportOutOfMemory(cx);
1082 return RegExpRunStatus_Error;
1083 }
1084
1085 uint32_t interruptRetries = 0;
1086 const uint32_t maxInterruptRetries = 4;
1087 do {
1088 RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches);
1089
1090 if (result == RegExpRunStatus_Error) {
1091 /* Execute can return RegExpRunStatus_Error:
1092 *
1093 * 1. If the native stack overflowed
1094 * 2. If the backtrack stack overflowed
1095 * 3. If an interrupt was requested during execution.
1096 *
1097 * In the first two cases, we want to throw an error. In the
1098 * third case, we want to handle the interrupt and try again.
1099 * We cap the number of times we will retry.
1100 */
1101 if (cx->hasAnyPendingInterrupt()) {
1102 if (!CheckForInterrupt(cx)) {
1103 return RegExpRunStatus_Error;
1104 }
1105 if (interruptRetries++ < maxInterruptRetries) {
1106 continue;
1107 }
1108 }
1109 // If we have run out of retries, this regexp takes too long to execute.
1110 ReportOverRecursed(cx);
1111 return RegExpRunStatus_Error;
1112 }
1113
1114 MOZ_ASSERT(result == RegExpRunStatus_Success ||
1115 result == RegExpRunStatus_Success_NotFound);
1116
1117 return result;
1118 } while (true);
1119
1120 MOZ_CRASH("Unreachable");
1121 }
1122
useAtomMatch(HandleAtom pattern)1123 void RegExpShared::useAtomMatch(HandleAtom pattern) {
1124 MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
1125 kind_ = RegExpShared::Kind::Atom;
1126 patternAtom_ = pattern;
1127 pairCount_ = 1;
1128 }
1129
useRegExpMatch(size_t pairCount)1130 void RegExpShared::useRegExpMatch(size_t pairCount) {
1131 MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
1132 kind_ = RegExpShared::Kind::RegExp;
1133 pairCount_ = pairCount;
1134 ticks_ = jit::JitOptions.regexpWarmUpThreshold;
1135 }
1136
1137 /* static */
initializeNamedCaptures(JSContext * cx,HandleRegExpShared re,HandleNativeObject namedCaptures)1138 bool RegExpShared::initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
1139 HandleNativeObject namedCaptures) {
1140 MOZ_ASSERT(!re->groupsTemplate_);
1141 MOZ_ASSERT(!re->namedCaptureIndices_);
1142
1143 // The irregexp parser returns named capture information in the form
1144 // of an ArrayObject, where even elements store the capture name and
1145 // odd elements store the corresponding capture index. We create a
1146 // template object with a property for each capture name, and store
1147 // the capture indices as a heap-allocated array.
1148 uint32_t numNamedCaptures = namedCaptures->getDenseInitializedLength() / 2;
1149
1150 // Create a plain template object.
1151 RootedPlainObject templateObject(
1152 cx, NewTenuredObjectWithGivenProto<PlainObject>(cx, nullptr));
1153 if (!templateObject) {
1154 return false;
1155 }
1156
1157 // Create a new group for the template.
1158 Rooted<TaggedProto> proto(cx, templateObject->taggedProto());
1159 ObjectGroup* group = ObjectGroupRealm::makeGroup(
1160 cx, templateObject->realm(), templateObject->getClass(), proto);
1161 if (!group) {
1162 return false;
1163 }
1164 templateObject->setGroup(group);
1165
1166 // Initialize the properties of the template.
1167 RootedValue dummyString(cx, StringValue(cx->runtime()->emptyString));
1168 for (uint32_t i = 0; i < numNamedCaptures; i++) {
1169 RootedString name(cx, namedCaptures->getDenseElement(i * 2).toString());
1170 RootedId id(cx, NameToId(name->asAtom().asPropertyName()));
1171 if (!NativeDefineDataProperty(cx, templateObject, id, dummyString,
1172 JSPROP_ENUMERATE)) {
1173 return false;
1174 }
1175 AddTypePropertyId(cx, templateObject, id, UndefinedValue());
1176 }
1177
1178 // Allocate the capture index array.
1179 uint32_t arraySize = numNamedCaptures * sizeof(uint32_t);
1180 uint32_t* captureIndices = static_cast<uint32_t*>(js_malloc(arraySize));
1181 if (!captureIndices) {
1182 js::ReportOutOfMemory(cx);
1183 return false;
1184 }
1185
1186 // Populate the capture index array
1187 for (uint32_t i = 0; i < numNamedCaptures; i++) {
1188 captureIndices[i] = namedCaptures->getDenseElement(i * 2 + 1).toInt32();
1189 }
1190
1191 re->numNamedCaptures_ = numNamedCaptures;
1192 re->groupsTemplate_ = templateObject;
1193 re->namedCaptureIndices_ = captureIndices;
1194 js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData);
1195 return true;
1196 }
1197
tierUpTick()1198 void RegExpShared::tierUpTick() {
1199 MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
1200 if (ticks_ > 0) {
1201 ticks_--;
1202 }
1203 }
1204
markedForTierUp() const1205 bool RegExpShared::markedForTierUp() const {
1206 if (!IsNativeRegExpEnabled()) {
1207 return false;
1208 }
1209 if (kind() != RegExpShared::Kind::RegExp) {
1210 return false;
1211 }
1212 return ticks_ == 0;
1213 }
1214
1215 #else // !ENABLE_NEW_REGEXP
1216
1217 /* static */
compile(JSContext * cx,MutableHandleRegExpShared re,HandleAtom pattern,HandleLinearString input,RegExpShared::CodeKind codeKind)1218 bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
1219 HandleAtom pattern, HandleLinearString input,
1220 RegExpShared::CodeKind codeKind) {
1221 if (!re->ignoreCase() && !StringHasRegExpMetaChars(pattern)) {
1222 re->canStringMatch = true;
1223 }
1224
1225 CompileOptions options(cx);
1226 frontend::DummyTokenStream dummyTokenStream(cx, options);
1227
1228 /* Parse the pattern. The RegExpCompileData is allocated in LifoAlloc and
1229 * will only be live while LifoAllocScope is on stack. */
1230 LifoAllocScope allocScope(&cx->tempLifoAlloc());
1231 irregexp::RegExpCompileData data;
1232 if (!irregexp::ParsePattern(dummyTokenStream, allocScope.alloc(), pattern,
1233 /*match_only =*/false, re->getFlags(), &data)) {
1234 return false;
1235 }
1236
1237 // Add one to account for the whole-match capture.
1238 re->pairCount_ = data.capture_count + 1;
1239
1240 bool forceBytecode = codeKind == RegExpShared::CodeKind::Bytecode;
1241 JitCodeTables tables;
1242 irregexp::RegExpCode code = irregexp::CompilePattern(
1243 cx, allocScope.alloc(), re, &data, input, false /* global() */,
1244 re->ignoreCase(), input->hasLatin1Chars(), /*match_only = */ false,
1245 forceBytecode, re->sticky(), re->unicode(), tables);
1246 if (code.empty()) {
1247 return false;
1248 }
1249
1250 MOZ_ASSERT(!code.jitCode || !code.byteCode);
1251
1252 RegExpCompilation& compilation = re->compilation(input->hasLatin1Chars());
1253 if (code.jitCode) {
1254 // First copy the tables. GC can purge the tables if the RegExpShared
1255 // has no JIT code, so it's important to do this right before setting
1256 // compilation.jitCode (to ensure no purging happens between adding the
1257 // tables and setting the JIT code).
1258 for (size_t i = 0; i < tables.length(); i++) {
1259 if (!re->addTable(std::move(tables[i]))) {
1260 ReportOutOfMemory(cx);
1261 return false;
1262 }
1263 }
1264 compilation.jitCode = code.jitCode;
1265 } else if (code.byteCode) {
1266 MOZ_ASSERT(tables.empty(), "RegExpInterpreter does not use data tables");
1267 compilation.byteCode = code.byteCode;
1268 AddCellMemory(re, compilation.byteCodeLength(),
1269 MemoryUse::RegExpSharedBytecode);
1270 }
1271
1272 return true;
1273 }
1274
1275 /* static */
compileIfNecessary(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)1276 bool RegExpShared::compileIfNecessary(JSContext* cx,
1277 MutableHandleRegExpShared re,
1278 HandleLinearString input,
1279 RegExpShared::CodeKind codeKind) {
1280 if (re->isCompiled(input->hasLatin1Chars(), codeKind)) {
1281 return true;
1282 }
1283 return compile(cx, re, input, codeKind);
1284 }
1285
1286 /* static */
execute(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t start,VectorMatchPairs * matches)1287 RegExpRunStatus RegExpShared::execute(JSContext* cx,
1288 MutableHandleRegExpShared re,
1289 HandleLinearString input, size_t start,
1290 VectorMatchPairs* matches) {
1291 MOZ_ASSERT(matches);
1292 TraceLoggerThread* logger = TraceLoggerForCurrentThread(cx);
1293
1294 /* Compile the code at point-of-use. */
1295 if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
1296 return RegExpRunStatus_Error;
1297 }
1298
1299 /*
1300 * Ensure sufficient memory for output vector.
1301 * No need to initialize it. The RegExp engine fills them in on a match.
1302 */
1303 if (!matches->allocOrExpandArray(re->pairCount())) {
1304 ReportOutOfMemory(cx);
1305 return RegExpRunStatus_Error;
1306 }
1307
1308 size_t length = input->length();
1309
1310 // Reset the Irregexp backtrack stack if it grows during execution.
1311 irregexp::RegExpStackScope stackScope(cx);
1312
1313 if (re->canStringMatch) {
1314 return executeAtom(cx, re, input, start, matches);
1315 }
1316
1317 do {
1318 jit::JitCode* code = re->compilation(input->hasLatin1Chars()).jitCode;
1319 if (!code) {
1320 break;
1321 }
1322
1323 RegExpRunStatus result;
1324 {
1325 AutoTraceLog logJIT(logger, TraceLogger_IrregexpExecute);
1326 AutoCheckCannotGC nogc;
1327 if (input->hasLatin1Chars()) {
1328 const Latin1Char* chars = input->latin1Chars(nogc);
1329 result = irregexp::ExecuteCode(cx, code, chars, start, length, matches,
1330 /*endIndex = */ nullptr);
1331 } else {
1332 const char16_t* chars = input->twoByteChars(nogc);
1333 result = irregexp::ExecuteCode(cx, code, chars, start, length, matches,
1334 /*endIndex = */ nullptr);
1335 }
1336 }
1337
1338 if (result == RegExpRunStatus_Error) {
1339 // An 'Error' result is returned if a stack overflow guard or
1340 // interrupt guard failed. If CheckOverRecursed doesn't throw, break
1341 // out and retry the regexp in the bytecode interpreter, which can
1342 // execute while tolerating future interrupts. Otherwise, if we keep
1343 // getting interrupted we will never finish executing the regexp.
1344 if (!jit::CheckOverRecursed(cx)) {
1345 return RegExpRunStatus_Error;
1346 }
1347 break;
1348 }
1349
1350 if (result == RegExpRunStatus_Success_NotFound) {
1351 return RegExpRunStatus_Success_NotFound;
1352 }
1353
1354 MOZ_ASSERT(result == RegExpRunStatus_Success);
1355
1356 matches->checkAgainst(length);
1357 return RegExpRunStatus_Success;
1358 } while (false);
1359
1360 // Compile bytecode for the RegExp if necessary.
1361 if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Bytecode)) {
1362 return RegExpRunStatus_Error;
1363 }
1364
1365 uint8_t* byteCode = re->compilation(input->hasLatin1Chars()).byteCode;
1366 AutoTraceLog logInterpreter(logger, TraceLogger_IrregexpExecute);
1367
1368 AutoStableStringChars inputChars(cx);
1369 if (!inputChars.init(cx, input)) {
1370 return RegExpRunStatus_Error;
1371 }
1372
1373 RegExpRunStatus result;
1374 if (inputChars.isLatin1()) {
1375 const Latin1Char* chars = inputChars.latin1Range().begin().get();
1376 result = irregexp::InterpretCode(cx, byteCode, chars, start, length,
1377 matches, /*endIndex = */ nullptr);
1378 } else {
1379 const char16_t* chars = inputChars.twoByteRange().begin().get();
1380 result = irregexp::InterpretCode(cx, byteCode, chars, start, length,
1381 matches, /*endIndex = */ nullptr);
1382 }
1383
1384 if (result == RegExpRunStatus_Success) {
1385 matches->checkAgainst(length);
1386 }
1387 return result;
1388 }
1389 #endif // !ENABLE_NEW_REGEXP
1390
1391 /* static */
executeAtom(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t start,VectorMatchPairs * matches)1392 RegExpRunStatus RegExpShared::executeAtom(JSContext* cx,
1393 MutableHandleRegExpShared re,
1394 HandleLinearString input,
1395 size_t start,
1396 VectorMatchPairs* matches) {
1397 MOZ_ASSERT(re->pairCount() == 1);
1398
1399 size_t length = input->length();
1400 size_t searchLength = re->patternAtom()->length();
1401
1402 if (re->sticky()) {
1403 // First part checks size_t overflow.
1404 if (searchLength + start < searchLength || searchLength + start > length) {
1405 return RegExpRunStatus_Success_NotFound;
1406 }
1407 if (!HasSubstringAt(input, re->patternAtom(), start)) {
1408 return RegExpRunStatus_Success_NotFound;
1409 }
1410
1411 (*matches)[0].start = start;
1412 (*matches)[0].limit = start + searchLength;
1413 matches->checkAgainst(length);
1414
1415 return RegExpRunStatus_Success;
1416 }
1417
1418 int res = StringFindPattern(input, re->patternAtom(), start);
1419 if (res == -1) {
1420 return RegExpRunStatus_Success_NotFound;
1421 }
1422
1423 (*matches)[0].start = res;
1424 (*matches)[0].limit = res + searchLength;
1425 matches->checkAgainst(length);
1426
1427 return RegExpRunStatus_Success;
1428 }
1429
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)1430 size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
1431 size_t n = 0;
1432
1433 for (size_t i = 0; i < ArrayLength(compilationArray); i++) {
1434 const RegExpCompilation& compilation = compilationArray[i];
1435 if (compilation.byteCode) {
1436 n += mallocSizeOf(compilation.byteCode);
1437 }
1438 }
1439
1440 n += tables.sizeOfExcludingThis(mallocSizeOf);
1441 for (size_t i = 0; i < tables.length(); i++) {
1442 n += mallocSizeOf(tables[i].get());
1443 }
1444
1445 return n;
1446 }
1447
1448 /* RegExpRealm */
1449
RegExpRealm()1450 RegExpRealm::RegExpRealm()
1451 : matchResultTemplateObject_(nullptr),
1452 optimizableRegExpPrototypeShape_(nullptr),
1453 optimizableRegExpInstanceShape_(nullptr) {}
1454
createMatchResultTemplateObject(JSContext * cx)1455 ArrayObject* RegExpRealm::createMatchResultTemplateObject(JSContext* cx) {
1456 MOZ_ASSERT(!matchResultTemplateObject_);
1457
1458 /* Create template array object */
1459 RootedArrayObject templateObject(
1460 cx, NewDenseUnallocatedArray(cx, RegExpObject::MaxPairCount, nullptr,
1461 TenuredObject));
1462 if (!templateObject) {
1463 return nullptr;
1464 }
1465
1466 // Create a new group for the template.
1467 Rooted<TaggedProto> proto(cx, templateObject->taggedProto());
1468 ObjectGroup* group = ObjectGroupRealm::makeGroup(
1469 cx, templateObject->realm(), templateObject->getClass(), proto);
1470 if (!group) {
1471 return nullptr;
1472 }
1473 templateObject->setGroup(group);
1474
1475 /* Set dummy index property */
1476 RootedValue index(cx, Int32Value(0));
1477 if (!NativeDefineDataProperty(cx, templateObject, cx->names().index, index,
1478 JSPROP_ENUMERATE)) {
1479 return nullptr;
1480 }
1481
1482 /* Set dummy input property */
1483 RootedValue inputVal(cx, StringValue(cx->runtime()->emptyString));
1484 if (!NativeDefineDataProperty(cx, templateObject, cx->names().input, inputVal,
1485 JSPROP_ENUMERATE)) {
1486 return nullptr;
1487 }
1488
1489 #ifdef ENABLE_NEW_REGEXP
1490 /* Set dummy groups property */
1491 RootedValue groupsVal(cx, UndefinedValue());
1492 if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
1493 groupsVal, JSPROP_ENUMERATE)) {
1494 return nullptr;
1495 }
1496 AddTypePropertyId(cx, templateObject, NameToId(cx->names().groups),
1497 TypeSet::AnyObjectType());
1498
1499 // Make sure that the properties are in the right slots.
1500 # ifdef DEBUG
1501 Shape* groupsShape = templateObject->lastProperty();
1502 MOZ_ASSERT(groupsShape->slot() == MatchResultObjectGroupsSlot &&
1503 groupsShape->propidRef() == NameToId(cx->names().groups));
1504 Shape* inputShape = groupsShape->previous().get();
1505 MOZ_ASSERT(inputShape->slot() == MatchResultObjectInputSlot &&
1506 inputShape->propidRef() == NameToId(cx->names().input));
1507 Shape* indexShape = inputShape->previous().get();
1508 MOZ_ASSERT(indexShape->slot() == MatchResultObjectIndexSlot &&
1509 indexShape->propidRef() == NameToId(cx->names().index));
1510 # endif
1511 #endif
1512
1513 // Make sure type information reflects the indexed properties which might
1514 // be added.
1515 AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::StringType());
1516 AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::UndefinedType());
1517
1518 matchResultTemplateObject_.set(templateObject);
1519
1520 return matchResultTemplateObject_;
1521 }
1522
traceWeak(JSTracer * trc)1523 void RegExpRealm::traceWeak(JSTracer* trc) {
1524 if (matchResultTemplateObject_) {
1525 TraceWeakEdge(trc, &matchResultTemplateObject_,
1526 "RegExpRealm::matchResultTemplateObject_");
1527 }
1528
1529 if (optimizableRegExpPrototypeShape_) {
1530 TraceWeakEdge(trc, &optimizableRegExpPrototypeShape_,
1531 "RegExpRealm::optimizableRegExpPrototypeShape_");
1532 }
1533
1534 if (optimizableRegExpInstanceShape_) {
1535 TraceWeakEdge(trc, &optimizableRegExpInstanceShape_,
1536 "RegExpRealm::optimizableRegExpInstanceShape_");
1537 }
1538 }
1539
get(JSContext * cx,HandleAtom source,RegExpFlags flags)1540 RegExpShared* RegExpZone::get(JSContext* cx, HandleAtom source,
1541 RegExpFlags flags) {
1542 DependentAddPtr<Set> p(cx, set_, Key(source, flags));
1543 if (p) {
1544 return *p;
1545 }
1546
1547 auto shared = Allocate<RegExpShared>(cx);
1548 if (!shared) {
1549 return nullptr;
1550 }
1551
1552 new (shared) RegExpShared(source, flags);
1553
1554 if (!p.add(cx, set_, Key(source, flags), shared)) {
1555 ReportOutOfMemory(cx);
1556 return nullptr;
1557 }
1558
1559 return shared;
1560 }
1561
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)1562 size_t RegExpZone::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
1563 return set_.sizeOfExcludingThis(mallocSizeOf);
1564 }
1565
RegExpZone(Zone * zone)1566 RegExpZone::RegExpZone(Zone* zone) : set_(zone, zone) {}
1567
1568 /* Functions */
1569
CloneRegExpObject(JSContext * cx,Handle<RegExpObject * > regex)1570 JSObject* js::CloneRegExpObject(JSContext* cx, Handle<RegExpObject*> regex) {
1571 // Unlike RegExpAlloc, all clones must use |regex|'s group.
1572 RootedObjectGroup group(cx, regex->group());
1573 Rooted<RegExpObject*> clone(
1574 cx, NewObjectWithGroup<RegExpObject>(cx, group, GenericObject));
1575 if (!clone) {
1576 return nullptr;
1577 }
1578 clone->initPrivate(nullptr);
1579
1580 if (!EmptyShape::ensureInitialCustomShape<RegExpObject>(cx, clone)) {
1581 return nullptr;
1582 }
1583
1584 RegExpShared* shared = RegExpObject::getShared(cx, regex);
1585 if (!shared) {
1586 return nullptr;
1587 }
1588
1589 clone->initAndZeroLastIndex(shared->getSource(), shared->getFlags(), cx);
1590 clone->setShared(*shared);
1591
1592 return clone;
1593 }
1594
1595 template <typename CharT>
ParseRegExpFlags(const CharT * chars,size_t length,RegExpFlags * flagsOut,char16_t * invalidFlag)1596 static bool ParseRegExpFlags(const CharT* chars, size_t length,
1597 RegExpFlags* flagsOut, char16_t* invalidFlag) {
1598 *flagsOut = RegExpFlag::NoFlags;
1599
1600 for (size_t i = 0; i < length; i++) {
1601 uint8_t flag;
1602 switch (chars[i]) {
1603 case 'g':
1604 flag = RegExpFlag::Global;
1605 break;
1606 case 'i':
1607 flag = RegExpFlag::IgnoreCase;
1608 break;
1609 case 'm':
1610 flag = RegExpFlag::Multiline;
1611 break;
1612 case 's':
1613 flag = RegExpFlag::DotAll;
1614 break;
1615 case 'u':
1616 flag = RegExpFlag::Unicode;
1617 break;
1618 case 'y':
1619 flag = RegExpFlag::Sticky;
1620 break;
1621 default:
1622 *invalidFlag = chars[i];
1623 return false;
1624 }
1625 if (*flagsOut & flag) {
1626 *invalidFlag = chars[i];
1627 return false;
1628 }
1629 *flagsOut |= flag;
1630 }
1631
1632 return true;
1633 }
1634
ParseRegExpFlags(JSContext * cx,JSString * flagStr,RegExpFlags * flagsOut)1635 bool js::ParseRegExpFlags(JSContext* cx, JSString* flagStr,
1636 RegExpFlags* flagsOut) {
1637 JSLinearString* linear = flagStr->ensureLinear(cx);
1638 if (!linear) {
1639 return false;
1640 }
1641
1642 size_t len = linear->length();
1643
1644 bool ok;
1645 char16_t invalidFlag;
1646 if (linear->hasLatin1Chars()) {
1647 AutoCheckCannotGC nogc;
1648 ok = ::ParseRegExpFlags(linear->latin1Chars(nogc), len, flagsOut,
1649 &invalidFlag);
1650 } else {
1651 AutoCheckCannotGC nogc;
1652 ok = ::ParseRegExpFlags(linear->twoByteChars(nogc), len, flagsOut,
1653 &invalidFlag);
1654 }
1655
1656 if (!ok) {
1657 TwoByteChars range(&invalidFlag, 1);
1658 UniqueChars utf8(JS::CharsToNewUTF8CharsZ(cx, range).c_str());
1659 if (!utf8) {
1660 return false;
1661 }
1662 JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
1663 JSMSG_BAD_REGEXP_FLAG, utf8.get());
1664 return false;
1665 }
1666
1667 return true;
1668 }
1669
1670 template <XDRMode mode>
XDRScriptRegExpObject(XDRState<mode> * xdr,MutableHandle<RegExpObject * > objp)1671 XDRResult js::XDRScriptRegExpObject(XDRState<mode>* xdr,
1672 MutableHandle<RegExpObject*> objp) {
1673 /* NB: Keep this in sync with CloneScriptRegExpObject. */
1674
1675 RootedAtom source(xdr->cx());
1676 uint8_t flags = 0;
1677
1678 if (mode == XDR_ENCODE) {
1679 MOZ_ASSERT(objp);
1680 RegExpObject& reobj = *objp;
1681 source = reobj.getSource();
1682 flags = reobj.getFlags().value();
1683 }
1684 MOZ_TRY(XDRAtom(xdr, &source));
1685 MOZ_TRY(xdr->codeUint8(&flags));
1686 if (mode == XDR_DECODE) {
1687 RegExpObject* reobj = RegExpObject::create(
1688 xdr->cx(), source, RegExpFlags(flags), TenuredObject);
1689 if (!reobj) {
1690 return xdr->fail(JS::TranscodeResult_Throw);
1691 }
1692
1693 objp.set(reobj);
1694 }
1695 return Ok();
1696 }
1697
1698 template XDRResult js::XDRScriptRegExpObject(XDRState<XDR_ENCODE>* xdr,
1699 MutableHandle<RegExpObject*> objp);
1700
1701 template XDRResult js::XDRScriptRegExpObject(XDRState<XDR_DECODE>* xdr,
1702 MutableHandle<RegExpObject*> objp);
1703
CloneScriptRegExpObject(JSContext * cx,RegExpObject & reobj)1704 JSObject* js::CloneScriptRegExpObject(JSContext* cx, RegExpObject& reobj) {
1705 /* NB: Keep this in sync with XDRScriptRegExpObject. */
1706
1707 RootedAtom source(cx, reobj.getSource());
1708 cx->markAtom(source);
1709
1710 return RegExpObject::create(cx, source, reobj.getFlags(), TenuredObject);
1711 }
1712
RegExpToSharedNonInline(JSContext * cx,HandleObject obj)1713 JS_FRIEND_API RegExpShared* js::RegExpToSharedNonInline(JSContext* cx,
1714 HandleObject obj) {
1715 return RegExpToShared(cx, obj);
1716 }
1717
size(mozilla::MallocSizeOf mallocSizeOf) const1718 JS::ubi::Node::Size JS::ubi::Concrete<RegExpShared>::size(
1719 mozilla::MallocSizeOf mallocSizeOf) const {
1720 return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED) +
1721 get().sizeOfExcludingThis(mallocSizeOf);
1722 }
1723
1724 /*
1725 * Regular Expressions.
1726 */
NewRegExpObject(JSContext * cx,const char * bytes,size_t length,RegExpFlags flags)1727 JS_PUBLIC_API JSObject* JS::NewRegExpObject(JSContext* cx, const char* bytes,
1728 size_t length, RegExpFlags flags) {
1729 AssertHeapIsIdle();
1730 CHECK_THREAD(cx);
1731
1732 UniqueTwoByteChars chars(InflateString(cx, bytes, length));
1733 if (!chars) {
1734 return nullptr;
1735 }
1736
1737 return RegExpObject::create(cx, chars.get(), length, flags, GenericObject);
1738 }
1739
NewUCRegExpObject(JSContext * cx,const char16_t * chars,size_t length,RegExpFlags flags)1740 JS_PUBLIC_API JSObject* JS::NewUCRegExpObject(JSContext* cx,
1741 const char16_t* chars,
1742 size_t length,
1743 RegExpFlags flags) {
1744 AssertHeapIsIdle();
1745 CHECK_THREAD(cx);
1746
1747 return RegExpObject::create(cx, chars, length, flags, GenericObject);
1748 }
1749
SetRegExpInput(JSContext * cx,HandleObject obj,HandleString input)1750 JS_PUBLIC_API bool JS::SetRegExpInput(JSContext* cx, HandleObject obj,
1751 HandleString input) {
1752 AssertHeapIsIdle();
1753 CHECK_THREAD(cx);
1754 cx->check(input);
1755
1756 Handle<GlobalObject*> global = obj.as<GlobalObject>();
1757 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1758 if (!res) {
1759 return false;
1760 }
1761
1762 res->reset(input);
1763 return true;
1764 }
1765
ClearRegExpStatics(JSContext * cx,HandleObject obj)1766 JS_PUBLIC_API bool JS::ClearRegExpStatics(JSContext* cx, HandleObject obj) {
1767 AssertHeapIsIdle();
1768 CHECK_THREAD(cx);
1769 MOZ_ASSERT(obj);
1770
1771 Handle<GlobalObject*> global = obj.as<GlobalObject>();
1772 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1773 if (!res) {
1774 return false;
1775 }
1776
1777 res->clear();
1778 return true;
1779 }
1780
ExecuteRegExp(JSContext * cx,HandleObject obj,HandleObject reobj,char16_t * chars,size_t length,size_t * indexp,bool test,MutableHandleValue rval)1781 JS_PUBLIC_API bool JS::ExecuteRegExp(JSContext* cx, HandleObject obj,
1782 HandleObject reobj, char16_t* chars,
1783 size_t length, size_t* indexp, bool test,
1784 MutableHandleValue rval) {
1785 AssertHeapIsIdle();
1786 CHECK_THREAD(cx);
1787
1788 Handle<GlobalObject*> global = obj.as<GlobalObject>();
1789 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1790 if (!res) {
1791 return false;
1792 }
1793
1794 RootedLinearString input(cx, NewStringCopyN<CanGC>(cx, chars, length));
1795 if (!input) {
1796 return false;
1797 }
1798
1799 return ExecuteRegExpLegacy(cx, res, reobj.as<RegExpObject>(), input, indexp,
1800 test, rval);
1801 }
1802
ExecuteRegExpNoStatics(JSContext * cx,HandleObject obj,const char16_t * chars,size_t length,size_t * indexp,bool test,MutableHandleValue rval)1803 JS_PUBLIC_API bool JS::ExecuteRegExpNoStatics(JSContext* cx, HandleObject obj,
1804 const char16_t* chars,
1805 size_t length, size_t* indexp,
1806 bool test,
1807 MutableHandleValue rval) {
1808 AssertHeapIsIdle();
1809 CHECK_THREAD(cx);
1810
1811 RootedLinearString input(cx, NewStringCopyN<CanGC>(cx, chars, length));
1812 if (!input) {
1813 return false;
1814 }
1815
1816 return ExecuteRegExpLegacy(cx, nullptr, obj.as<RegExpObject>(), input, indexp,
1817 test, rval);
1818 }
1819
ObjectIsRegExp(JSContext * cx,HandleObject obj,bool * isRegExp)1820 JS_PUBLIC_API bool JS::ObjectIsRegExp(JSContext* cx, HandleObject obj,
1821 bool* isRegExp) {
1822 cx->check(obj);
1823
1824 ESClass cls;
1825 if (!GetBuiltinClass(cx, obj, &cls)) {
1826 return false;
1827 }
1828
1829 *isRegExp = cls == ESClass::RegExp;
1830 return true;
1831 }
1832
GetRegExpFlags(JSContext * cx,HandleObject obj)1833 JS_PUBLIC_API RegExpFlags JS::GetRegExpFlags(JSContext* cx, HandleObject obj) {
1834 AssertHeapIsIdle();
1835 CHECK_THREAD(cx);
1836
1837 RegExpShared* shared = RegExpToShared(cx, obj);
1838 if (!shared) {
1839 return RegExpFlag::NoFlags;
1840 }
1841 return shared->getFlags();
1842 }
1843
GetRegExpSource(JSContext * cx,HandleObject obj)1844 JS_PUBLIC_API JSString* JS::GetRegExpSource(JSContext* cx, HandleObject obj) {
1845 AssertHeapIsIdle();
1846 CHECK_THREAD(cx);
1847
1848 RegExpShared* shared = RegExpToShared(cx, obj);
1849 if (!shared) {
1850 return nullptr;
1851 }
1852 return shared->getSource();
1853 }
1854
CheckRegExpSyntax(JSContext * cx,const char16_t * chars,size_t length,RegExpFlags flags,MutableHandleValue error)1855 JS_PUBLIC_API bool JS::CheckRegExpSyntax(JSContext* cx, const char16_t* chars,
1856 size_t length, RegExpFlags flags,
1857 MutableHandleValue error) {
1858 AssertHeapIsIdle();
1859 CHECK_THREAD(cx);
1860
1861 CompileOptions dummyOptions(cx);
1862 frontend::DummyTokenStream dummyTokenStream(cx, dummyOptions);
1863
1864 LifoAllocScope allocScope(&cx->tempLifoAlloc());
1865
1866 mozilla::Range<const char16_t> source(chars, length);
1867 #ifdef ENABLE_NEW_REGEXP
1868 bool success =
1869 irregexp::CheckPatternSyntax(cx, dummyTokenStream, source, flags);
1870 #else
1871 bool success = irregexp::ParsePatternSyntax(
1872 dummyTokenStream, allocScope.alloc(), source, flags.unicode());
1873 #endif
1874 error.set(UndefinedValue());
1875 if (!success) {
1876 // We can fail because of OOM or over-recursion even if the syntax is valid.
1877 if (cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()) {
1878 return false;
1879 }
1880 if (!cx->getPendingException(error)) {
1881 return false;
1882 }
1883 cx->clearPendingException();
1884 }
1885 return true;
1886 }
1887