1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "vm/RegExpObject.h"
8 
9 #include "mozilla/MemoryReporting.h"
10 #include "mozilla/PodOperations.h"
11 
12 #include <algorithm>
13 #include <type_traits>
14 
15 #include "builtin/RegExp.h"
16 #include "builtin/SelfHostingDefines.h"  // REGEXP_*_FLAG
17 #include "frontend/TokenStream.h"
18 #include "gc/HashUtil.h"
19 #ifndef ENABLE_NEW_REGEXP
20 #  ifdef DEBUG
21 #    include "irregexp/RegExpBytecode.h"
22 #  endif
23 #  include "irregexp/RegExpParser.h"
24 #endif
25 #include "jit/VMFunctions.h"
26 #include "js/RegExp.h"
27 #include "js/RegExpFlags.h"  // JS::RegExpFlags
28 #include "js/StableStringChars.h"
29 #ifdef ENABLE_NEW_REGEXP
30 #  include "new-regexp/regexp-stack.h"
31 #  include "new-regexp/RegExpAPI.h"
32 #endif
33 #include "util/StringBuffer.h"
34 #include "vm/MatchPairs.h"
35 #include "vm/RegExpStatics.h"
36 #include "vm/StringType.h"
37 #include "vm/TraceLogging.h"
38 #ifdef DEBUG
39 #  include "util/Unicode.h"
40 #endif
41 #include "vm/Xdr.h"
42 
43 #include "vm/JSObject-inl.h"
44 #include "vm/NativeObject-inl.h"
45 #include "vm/Shape-inl.h"
46 
47 using namespace js;
48 
49 using JS::AutoStableStringChars;
50 using JS::CompileOptions;
51 using JS::RegExpFlag;
52 using JS::RegExpFlags;
53 using mozilla::ArrayLength;
54 using mozilla::DebugOnly;
55 using mozilla::PodCopy;
56 
57 using JS::AutoCheckCannotGC;
58 
59 static_assert(RegExpFlag::Global == REGEXP_GLOBAL_FLAG,
60               "self-hosted JS and /g flag bits must agree");
61 static_assert(RegExpFlag::IgnoreCase == REGEXP_IGNORECASE_FLAG,
62               "self-hosted JS and /i flag bits must agree");
63 static_assert(RegExpFlag::Multiline == REGEXP_MULTILINE_FLAG,
64               "self-hosted JS and /m flag bits must agree");
65 static_assert(RegExpFlag::DotAll == REGEXP_DOTALL_FLAG,
66               "self-hosted JS and /s flag bits must agree");
67 static_assert(RegExpFlag::Unicode == REGEXP_UNICODE_FLAG,
68               "self-hosted JS and /u flag bits must agree");
69 static_assert(RegExpFlag::Sticky == REGEXP_STICKY_FLAG,
70               "self-hosted JS and /y flag bits must agree");
71 
RegExpAlloc(JSContext * cx,NewObjectKind newKind,HandleObject proto)72 RegExpObject* js::RegExpAlloc(JSContext* cx, NewObjectKind newKind,
73                               HandleObject proto /* = nullptr */) {
74   Rooted<RegExpObject*> regexp(
75       cx, NewObjectWithClassProtoAndKind<RegExpObject>(cx, proto, newKind));
76   if (!regexp) {
77     return nullptr;
78   }
79 
80   regexp->initPrivate(nullptr);
81 
82   if (!EmptyShape::ensureInitialCustomShape<RegExpObject>(cx, regexp)) {
83     return nullptr;
84   }
85 
86   MOZ_ASSERT(regexp->lookupPure(cx->names().lastIndex)->slot() ==
87              RegExpObject::lastIndexSlot());
88 
89   return regexp;
90 }
91 
92 /* MatchPairs */
93 
initArrayFrom(VectorMatchPairs & copyFrom)94 bool VectorMatchPairs::initArrayFrom(VectorMatchPairs& copyFrom) {
95   MOZ_ASSERT(copyFrom.pairCount() > 0);
96 
97   if (!allocOrExpandArray(copyFrom.pairCount())) {
98     return false;
99   }
100 
101   PodCopy(pairs_, copyFrom.pairs_, pairCount_);
102 
103   return true;
104 }
105 
allocOrExpandArray(size_t pairCount)106 bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) {
107   if (!vec_.resizeUninitialized(pairCount)) {
108     return false;
109   }
110 
111   pairs_ = &vec_[0];
112   pairCount_ = pairCount;
113   return true;
114 }
115 
116 /* RegExpObject */
117 
118 /* static */
getShared(JSContext * cx,Handle<RegExpObject * > regexp)119 RegExpShared* RegExpObject::getShared(JSContext* cx,
120                                       Handle<RegExpObject*> regexp) {
121   if (regexp->hasShared()) {
122     return regexp->sharedRef();
123   }
124 
125   return createShared(cx, regexp);
126 }
127 
128 /* static */
isOriginalFlagGetter(JSNative native,RegExpFlags * mask)129 bool RegExpObject::isOriginalFlagGetter(JSNative native, RegExpFlags* mask) {
130   if (native == regexp_global) {
131     *mask = RegExpFlag::Global;
132     return true;
133   }
134   if (native == regexp_ignoreCase) {
135     *mask = RegExpFlag::IgnoreCase;
136     return true;
137   }
138   if (native == regexp_multiline) {
139     *mask = RegExpFlag::Multiline;
140     return true;
141   }
142   if (native == regexp_dotAll) {
143     *mask = RegExpFlag::DotAll;
144     return true;
145   }
146   if (native == regexp_sticky) {
147     *mask = RegExpFlag::Sticky;
148     return true;
149   }
150   if (native == regexp_unicode) {
151     *mask = RegExpFlag::Unicode;
152     return true;
153   }
154 
155   return false;
156 }
157 
158 /* static */
trace(JSTracer * trc,JSObject * obj)159 void RegExpObject::trace(JSTracer* trc, JSObject* obj) {
160   obj->as<RegExpObject>().trace(trc);
161 }
162 
IsMarkingTrace(JSTracer * trc)163 static inline bool IsMarkingTrace(JSTracer* trc) {
164   // Determine whether tracing is happening during normal marking.  We need to
165   // test all the following conditions, since:
166   //
167   //   1. During TraceRuntime, RuntimeHeapIsBusy() is true, but the
168   //      tracer might not be a marking tracer.
169   //   2. When a write barrier executes, isMarkingTracer is true, but
170   //      RuntimeHeapIsBusy() will be false.
171 
172   return JS::RuntimeHeapIsCollecting() && trc->isMarkingTracer();
173 }
174 
trace(JSTracer * trc)175 void RegExpObject::trace(JSTracer* trc) {
176   TraceNullableEdge(trc, &sharedRef(), "RegExpObject shared");
177 }
178 
179 static const JSClassOps RegExpObjectClassOps = {
180     nullptr,              // addProperty
181     nullptr,              // delProperty
182     nullptr,              // enumerate
183     nullptr,              // newEnumerate
184     nullptr,              // resolve
185     nullptr,              // mayResolve
186     nullptr,              // finalize
187     nullptr,              // call
188     nullptr,              // hasInstance
189     nullptr,              // construct
190     RegExpObject::trace,  // trace
191 };
192 
193 static const ClassSpec RegExpObjectClassSpec = {
194     GenericCreateConstructor<js::regexp_construct, 2, gc::AllocKind::FUNCTION>,
195     GenericCreatePrototype<RegExpObject>,
196     nullptr,
197     js::regexp_static_props,
198     js::regexp_methods,
199     js::regexp_properties};
200 
201 const JSClass RegExpObject::class_ = {
202     js_RegExp_str,
203     JSCLASS_HAS_PRIVATE |
204         JSCLASS_HAS_RESERVED_SLOTS(RegExpObject::RESERVED_SLOTS) |
205         JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
206     &RegExpObjectClassOps, &RegExpObjectClassSpec};
207 
208 const JSClass RegExpObject::protoClass_ = {
209     js_Object_str, JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp), JS_NULL_CLASS_OPS,
210     &RegExpObjectClassSpec};
211 
212 template <typename CharT>
create(JSContext * cx,const CharT * chars,size_t length,RegExpFlags flags,frontend::TokenStreamAnyChars & tokenStream,NewObjectKind newKind)213 RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
214                                    size_t length, RegExpFlags flags,
215                                    frontend::TokenStreamAnyChars& tokenStream,
216                                    NewObjectKind newKind) {
217   static_assert(std::is_same_v<CharT, char16_t>,
218                 "this code may need updating if/when CharT encodes UTF-8");
219 
220   RootedAtom source(cx, AtomizeChars(cx, chars, length));
221   if (!source) {
222     return nullptr;
223   }
224 
225   return create(cx, source, flags, tokenStream, newKind);
226 }
227 
228 template RegExpObject* RegExpObject::create(
229     JSContext* cx, const char16_t* chars, size_t length, RegExpFlags flags,
230     frontend::TokenStreamAnyChars& tokenStream, NewObjectKind newKind);
231 
232 template <typename CharT>
create(JSContext * cx,const CharT * chars,size_t length,RegExpFlags flags,NewObjectKind newKind)233 RegExpObject* RegExpObject::create(JSContext* cx, const CharT* chars,
234                                    size_t length, RegExpFlags flags,
235                                    NewObjectKind newKind) {
236   static_assert(std::is_same_v<CharT, char16_t>,
237                 "this code may need updating if/when CharT encodes UTF-8");
238 
239   RootedAtom source(cx, AtomizeChars(cx, chars, length));
240   if (!source) {
241     return nullptr;
242   }
243 
244   return create(cx, source, flags, newKind);
245 }
246 
247 template RegExpObject* RegExpObject::create(JSContext* cx,
248                                             const char16_t* chars,
249                                             size_t length, RegExpFlags flags,
250                                             NewObjectKind newKind);
251 
create(JSContext * cx,HandleAtom source,RegExpFlags flags,frontend::TokenStreamAnyChars & tokenStream,NewObjectKind newKind)252 RegExpObject* RegExpObject::create(JSContext* cx, HandleAtom source,
253                                    RegExpFlags flags,
254                                    frontend::TokenStreamAnyChars& tokenStream,
255                                    NewObjectKind newKind) {
256   LifoAllocScope allocScope(&cx->tempLifoAlloc());
257 #ifdef ENABLE_NEW_REGEXP
258   if (!irregexp::CheckPatternSyntax(cx, tokenStream, source, flags)) {
259     return nullptr;
260   }
261 #else
262   if (!irregexp::ParsePatternSyntax(tokenStream, allocScope.alloc(), source,
263                                     flags.unicode())) {
264     return nullptr;
265   }
266 #endif
267   return createSyntaxChecked(cx, source, flags, newKind);
268 }
269 
createSyntaxChecked(JSContext * cx,const char16_t * chars,size_t length,RegExpFlags flags,NewObjectKind newKind)270 RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
271                                                 const char16_t* chars,
272                                                 size_t length,
273                                                 RegExpFlags flags,
274                                                 NewObjectKind newKind) {
275   RootedAtom source(cx, AtomizeChars(cx, chars, length));
276   if (!source) {
277     return nullptr;
278   }
279 
280   return createSyntaxChecked(cx, source, flags, newKind);
281 }
282 
createSyntaxChecked(JSContext * cx,HandleAtom source,RegExpFlags flags,NewObjectKind newKind)283 RegExpObject* RegExpObject::createSyntaxChecked(JSContext* cx,
284                                                 HandleAtom source,
285                                                 RegExpFlags flags,
286                                                 NewObjectKind newKind) {
287   Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
288   if (!regexp) {
289     return nullptr;
290   }
291 
292   regexp->initAndZeroLastIndex(source, flags, cx);
293 
294   return regexp;
295 }
296 
create(JSContext * cx,HandleAtom source,RegExpFlags flags,NewObjectKind newKind)297 RegExpObject* RegExpObject::create(JSContext* cx, HandleAtom source,
298                                    RegExpFlags flags, NewObjectKind newKind) {
299   CompileOptions dummyOptions(cx);
300   frontend::DummyTokenStream dummyTokenStream(cx, dummyOptions);
301 
302   LifoAllocScope allocScope(&cx->tempLifoAlloc());
303 #ifdef ENABLE_NEW_REGEXP
304   if (!irregexp::CheckPatternSyntax(cx, dummyTokenStream, source, flags)) {
305     return nullptr;
306   }
307 #else
308   if (!irregexp::ParsePatternSyntax(dummyTokenStream, allocScope.alloc(),
309                                     source, flags.unicode())) {
310     return nullptr;
311   }
312 #endif
313 
314   Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, newKind));
315   if (!regexp) {
316     return nullptr;
317   }
318 
319   regexp->initAndZeroLastIndex(source, flags, cx);
320 
321   return regexp;
322 }
323 
324 /* static */
createShared(JSContext * cx,Handle<RegExpObject * > regexp)325 RegExpShared* RegExpObject::createShared(JSContext* cx,
326                                          Handle<RegExpObject*> regexp) {
327   MOZ_ASSERT(!regexp->hasShared());
328   RootedAtom source(cx, regexp->getSource());
329   RegExpShared* shared =
330       cx->zone()->regExps().get(cx, source, regexp->getFlags());
331   if (!shared) {
332     return nullptr;
333   }
334 
335   regexp->setShared(*shared);
336   return shared;
337 }
338 
assignInitialShape(JSContext * cx,Handle<RegExpObject * > self)339 Shape* RegExpObject::assignInitialShape(JSContext* cx,
340                                         Handle<RegExpObject*> self) {
341   MOZ_ASSERT(self->empty());
342 
343   static_assert(LAST_INDEX_SLOT == 0);
344 
345   /* The lastIndex property alone is writable but non-configurable. */
346   return NativeObject::addDataProperty(cx, self, cx->names().lastIndex,
347                                        LAST_INDEX_SLOT, JSPROP_PERMANENT);
348 }
349 
initIgnoringLastIndex(JSAtom * source,RegExpFlags flags)350 void RegExpObject::initIgnoringLastIndex(JSAtom* source, RegExpFlags flags) {
351   // If this is a re-initialization with an existing RegExpShared, 'flags'
352   // may not match getShared()->flags, so forget the RegExpShared.
353   sharedRef() = nullptr;
354 
355   setSource(source);
356   setFlags(flags);
357 }
358 
initAndZeroLastIndex(JSAtom * source,RegExpFlags flags,JSContext * cx)359 void RegExpObject::initAndZeroLastIndex(JSAtom* source, RegExpFlags flags,
360                                         JSContext* cx) {
361   initIgnoringLastIndex(source, flags);
362   zeroLastIndex(cx);
363 }
364 
IsRegExpLineTerminator(const JS::Latin1Char c)365 static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const JS::Latin1Char c) {
366   return c == '\n' || c == '\r';
367 }
368 
IsRegExpLineTerminator(const char16_t c)369 static MOZ_ALWAYS_INLINE bool IsRegExpLineTerminator(const char16_t c) {
370   return c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029;
371 }
372 
AppendEscapedLineTerminator(StringBuffer & sb,const JS::Latin1Char c)373 static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(
374     StringBuffer& sb, const JS::Latin1Char c) {
375   switch (c) {
376     case '\n':
377       if (!sb.append('n')) {
378         return false;
379       }
380       break;
381     case '\r':
382       if (!sb.append('r')) {
383         return false;
384       }
385       break;
386     default:
387       MOZ_CRASH("Bad LineTerminator");
388   }
389   return true;
390 }
391 
AppendEscapedLineTerminator(StringBuffer & sb,const char16_t c)392 static MOZ_ALWAYS_INLINE bool AppendEscapedLineTerminator(StringBuffer& sb,
393                                                           const char16_t c) {
394   switch (c) {
395     case '\n':
396       if (!sb.append('n')) {
397         return false;
398       }
399       break;
400     case '\r':
401       if (!sb.append('r')) {
402         return false;
403       }
404       break;
405     case 0x2028:
406       if (!sb.append("u2028")) {
407         return false;
408       }
409       break;
410     case 0x2029:
411       if (!sb.append("u2029")) {
412         return false;
413       }
414       break;
415     default:
416       MOZ_CRASH("Bad LineTerminator");
417   }
418   return true;
419 }
420 
421 template <typename CharT>
SetupBuffer(StringBuffer & sb,const CharT * oldChars,size_t oldLen,const CharT * it)422 static MOZ_ALWAYS_INLINE bool SetupBuffer(StringBuffer& sb,
423                                           const CharT* oldChars, size_t oldLen,
424                                           const CharT* it) {
425   if constexpr (std::is_same_v<CharT, char16_t>) {
426     if (!sb.ensureTwoByteChars()) {
427       return false;
428     }
429   }
430 
431   if (!sb.reserve(oldLen + 1)) {
432     return false;
433   }
434 
435   sb.infallibleAppend(oldChars, size_t(it - oldChars));
436   return true;
437 }
438 
439 // Note: leaves the string buffer empty if no escaping need be performed.
440 template <typename CharT>
EscapeRegExpPattern(StringBuffer & sb,const CharT * oldChars,size_t oldLen)441 static bool EscapeRegExpPattern(StringBuffer& sb, const CharT* oldChars,
442                                 size_t oldLen) {
443   bool inBrackets = false;
444   bool previousCharacterWasBackslash = false;
445 
446   for (const CharT* it = oldChars; it < oldChars + oldLen; ++it) {
447     CharT ch = *it;
448     if (!previousCharacterWasBackslash) {
449       if (inBrackets) {
450         if (ch == ']') {
451           inBrackets = false;
452         }
453       } else if (ch == '/') {
454         // There's a forward slash that needs escaping.
455         if (sb.empty()) {
456           // This is the first char we've seen that needs escaping,
457           // copy everything up to this point.
458           if (!SetupBuffer(sb, oldChars, oldLen, it)) {
459             return false;
460           }
461         }
462         if (!sb.append('\\')) {
463           return false;
464         }
465       } else if (ch == '[') {
466         inBrackets = true;
467       }
468     }
469 
470     if (IsRegExpLineTerminator(ch)) {
471       // There's LineTerminator that needs escaping.
472       if (sb.empty()) {
473         // This is the first char we've seen that needs escaping,
474         // copy everything up to this point.
475         if (!SetupBuffer(sb, oldChars, oldLen, it)) {
476           return false;
477         }
478       }
479       if (!previousCharacterWasBackslash) {
480         if (!sb.append('\\')) {
481           return false;
482         }
483       }
484       if (!AppendEscapedLineTerminator(sb, ch)) {
485         return false;
486       }
487     } else if (!sb.empty()) {
488       if (!sb.append(ch)) {
489         return false;
490       }
491     }
492 
493     if (previousCharacterWasBackslash) {
494       previousCharacterWasBackslash = false;
495     } else if (ch == '\\') {
496       previousCharacterWasBackslash = true;
497     }
498   }
499 
500   return true;
501 }
502 
503 // ES6 draft rev32 21.2.3.2.4.
EscapeRegExpPattern(JSContext * cx,HandleAtom src)504 JSAtom* js::EscapeRegExpPattern(JSContext* cx, HandleAtom src) {
505   // Step 2.
506   if (src->length() == 0) {
507     return cx->names().emptyRegExp;
508   }
509 
510   // We may never need to use |sb|. Start using it lazily.
511   StringBuffer sb(cx);
512 
513   if (src->hasLatin1Chars()) {
514     JS::AutoCheckCannotGC nogc;
515     if (!::EscapeRegExpPattern(sb, src->latin1Chars(nogc), src->length())) {
516       return nullptr;
517     }
518   } else {
519     JS::AutoCheckCannotGC nogc;
520     if (!::EscapeRegExpPattern(sb, src->twoByteChars(nogc), src->length())) {
521       return nullptr;
522     }
523   }
524 
525   // Step 3.
526   return sb.empty() ? src : sb.finishAtom();
527 }
528 
529 // ES6 draft rev32 21.2.5.14. Optimized for RegExpObject.
toString(JSContext * cx) const530 JSLinearString* RegExpObject::toString(JSContext* cx) const {
531   // Steps 3-4.
532   RootedAtom src(cx, getSource());
533   if (!src) {
534     return nullptr;
535   }
536   RootedAtom escapedSrc(cx, EscapeRegExpPattern(cx, src));
537 
538   // Step 7.
539   JSStringBuilder sb(cx);
540   size_t len = escapedSrc->length();
541   if (!sb.reserve(len + 2)) {
542     return nullptr;
543   }
544   sb.infallibleAppend('/');
545   if (!sb.append(escapedSrc)) {
546     return nullptr;
547   }
548   sb.infallibleAppend('/');
549 
550   // Steps 5-7.
551   if (global() && !sb.append('g')) {
552     return nullptr;
553   }
554   if (ignoreCase() && !sb.append('i')) {
555     return nullptr;
556   }
557   if (multiline() && !sb.append('m')) {
558     return nullptr;
559   }
560   if (dotAll() && !sb.append('s')) {
561     return nullptr;
562   }
563   if (unicode() && !sb.append('u')) {
564     return nullptr;
565   }
566   if (sticky() && !sb.append('y')) {
567     return nullptr;
568   }
569 
570   return sb.finishString();
571 }
572 
573 #if defined(DEBUG) && !defined(ENABLE_NEW_REGEXP)
574 /* static */
dumpBytecode(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input)575 bool RegExpShared::dumpBytecode(JSContext* cx, MutableHandleRegExpShared re,
576                                 HandleLinearString input) {
577   if (!RegExpShared::compileIfNecessary(cx, re, input, CodeKind::Bytecode)) {
578     return false;
579   }
580 
581   const uint8_t* byteCode = re->compilation(input->hasLatin1Chars()).byteCode;
582   const uint8_t* pc = byteCode;
583 
584   auto Load32Aligned = [](const uint8_t* pc) -> int32_t {
585     MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0);
586     return *reinterpret_cast<const int32_t*>(pc);
587   };
588 
589   auto Load16Aligned = [](const uint8_t* pc) -> int32_t {
590     MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0);
591     return *reinterpret_cast<const uint16_t*>(pc);
592   };
593 
594   int32_t numRegisters = Load32Aligned(pc);
595   fprintf(stderr, "numRegisters: %d\n", numRegisters);
596   pc += 4;
597 
598   fprintf(stderr, "loc     op\n");
599   fprintf(stderr, "-----   --\n");
600 
601   auto DumpLower = [](const char* text) {
602     while (*text) {
603       fprintf(stderr, "%c", unicode::ToLowerCase(*text));
604       text++;
605     }
606   };
607 
608 #  define BYTECODE(NAME)      \
609     case irregexp::BC_##NAME: \
610       DumpLower(#NAME);
611 #  define ADVANCE(NAME)                 \
612     fprintf(stderr, "\n");              \
613     pc += irregexp::BC_##NAME##_LENGTH; \
614     maxPc = std::max(maxPc, pc);        \
615     break;
616 #  define STOP(NAME)                    \
617     fprintf(stderr, "\n");              \
618     pc += irregexp::BC_##NAME##_LENGTH; \
619     break;
620 #  define JUMP(NAME, OFFSET)                    \
621     fprintf(stderr, "\n");                      \
622     maxPc = std::max(maxPc, byteCode + OFFSET); \
623     pc += irregexp::BC_##NAME##_LENGTH;         \
624     break;
625 #  define BRANCH(NAME, OFFSET)                                \
626     fprintf(stderr, "\n");                                    \
627     pc += irregexp::BC_##NAME##_LENGTH;                       \
628     maxPc = std::max(maxPc, std::max(pc, byteCode + OFFSET)); \
629     break;
630 
631   // Bytecode has no end marker, we need to calculate the bytecode length by
632   // tracing jumps and branches.
633   const uint8_t* maxPc = pc;
634   while (pc <= maxPc) {
635     fprintf(stderr, "%05d:  ", int32_t(pc - byteCode));
636     int32_t insn = Load32Aligned(pc);
637     switch (insn & irregexp::BYTECODE_MASK) {
638       BYTECODE(BREAK) { STOP(BREAK); }
639       BYTECODE(PUSH_CP) { ADVANCE(PUSH_CP); }
640       BYTECODE(PUSH_BT) {
641         int32_t offset = Load32Aligned(pc + 4);
642         fprintf(stderr, " %d", offset);
643         // Pushed value is used by POP_BT for jumping.
644         // Resolve maxPc here.
645         BRANCH(PUSH_BT, offset);
646       }
647       BYTECODE(PUSH_REGISTER) {
648         fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
649         ADVANCE(PUSH_REGISTER);
650       }
651       BYTECODE(SET_REGISTER) {
652         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
653                 Load32Aligned(pc + 4));
654         ADVANCE(SET_REGISTER);
655       }
656       BYTECODE(ADVANCE_REGISTER) {
657         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
658                 Load32Aligned(pc + 4));
659         ADVANCE(ADVANCE_REGISTER);
660       }
661       BYTECODE(SET_REGISTER_TO_CP) {
662         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
663                 Load32Aligned(pc + 4));
664         ADVANCE(SET_REGISTER_TO_CP);
665       }
666       BYTECODE(SET_CP_TO_REGISTER) {
667         fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
668         ADVANCE(SET_CP_TO_REGISTER);
669       }
670       BYTECODE(SET_REGISTER_TO_SP) {
671         fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
672         ADVANCE(SET_REGISTER_TO_SP);
673       }
674       BYTECODE(SET_SP_TO_REGISTER) {
675         fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
676         ADVANCE(SET_SP_TO_REGISTER);
677       }
678       BYTECODE(POP_CP) { ADVANCE(POP_CP); }
679       BYTECODE(POP_BT) {
680         // Jump is already resolved in PUSH_BT.
681         STOP(POP_BT);
682       }
683       BYTECODE(POP_REGISTER) {
684         fprintf(stderr, " reg[%d]", insn >> irregexp::BYTECODE_SHIFT);
685         ADVANCE(POP_REGISTER);
686       }
687       BYTECODE(FAIL) { ADVANCE(FAIL); }
688       BYTECODE(SUCCEED) { ADVANCE(SUCCEED); }
689       BYTECODE(ADVANCE_CP) {
690         fprintf(stderr, " %d", insn >> irregexp::BYTECODE_SHIFT);
691         ADVANCE(ADVANCE_CP);
692       }
693       BYTECODE(GOTO) {
694         int32_t offset = Load32Aligned(pc + 4);
695         fprintf(stderr, " %d", offset);
696         JUMP(GOTO, offset);
697       }
698       BYTECODE(ADVANCE_CP_AND_GOTO) {
699         int32_t offset = Load32Aligned(pc + 4);
700         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
701         JUMP(ADVANCE_CP_AND_GOTO, offset);
702       }
703       BYTECODE(CHECK_GREEDY) {
704         int32_t offset = Load32Aligned(pc + 4);
705         fprintf(stderr, " %d", offset);
706         BRANCH(CHECK_GREEDY, offset);
707       }
708       BYTECODE(LOAD_CURRENT_CHAR) {
709         int32_t offset = Load32Aligned(pc + 4);
710         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
711         BRANCH(LOAD_CURRENT_CHAR, offset);
712       }
713       BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
714         fprintf(stderr, " %d", insn >> irregexp::BYTECODE_SHIFT);
715         ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
716       }
717       BYTECODE(LOAD_2_CURRENT_CHARS) {
718         int32_t offset = Load32Aligned(pc + 4);
719         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
720         BRANCH(LOAD_2_CURRENT_CHARS, offset);
721       }
722       BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
723         fprintf(stderr, " %d", insn >> irregexp::BYTECODE_SHIFT);
724         ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
725       }
726       BYTECODE(LOAD_4_CURRENT_CHARS) { ADVANCE(LOAD_4_CURRENT_CHARS); }
727       BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
728         ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
729       }
730       BYTECODE(CHECK_4_CHARS) {
731         int32_t offset = Load32Aligned(pc + 8);
732         fprintf(stderr, " %d, %d", Load32Aligned(pc + 4), offset);
733         BRANCH(CHECK_4_CHARS, offset);
734       }
735       BYTECODE(CHECK_CHAR) {
736         int32_t offset = Load32Aligned(pc + 4);
737         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
738         BRANCH(CHECK_CHAR, offset);
739       }
740       BYTECODE(CHECK_NOT_4_CHARS) {
741         int32_t offset = Load32Aligned(pc + 8);
742         fprintf(stderr, " %d, %d", Load32Aligned(pc + 4), offset);
743         BRANCH(CHECK_NOT_4_CHARS, offset);
744       }
745       BYTECODE(CHECK_NOT_CHAR) {
746         int32_t offset = Load32Aligned(pc + 4);
747         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
748         BRANCH(CHECK_NOT_CHAR, offset);
749       }
750       BYTECODE(AND_CHECK_4_CHARS) {
751         int32_t offset = Load32Aligned(pc + 12);
752         fprintf(stderr, " %d, %d, %d", Load32Aligned(pc + 4),
753                 Load32Aligned(pc + 8), offset);
754         BRANCH(AND_CHECK_4_CHARS, offset);
755       }
756       BYTECODE(AND_CHECK_CHAR) {
757         int32_t offset = Load32Aligned(pc + 8);
758         fprintf(stderr, " %d, %d, %d", insn >> irregexp::BYTECODE_SHIFT,
759                 Load32Aligned(pc + 4), offset);
760         BRANCH(AND_CHECK_CHAR, offset);
761       }
762       BYTECODE(AND_CHECK_NOT_4_CHARS) {
763         int32_t offset = Load32Aligned(pc + 12);
764         fprintf(stderr, " %d, %d, %d", Load32Aligned(pc + 4),
765                 Load32Aligned(pc + 8), offset);
766         BRANCH(AND_CHECK_NOT_4_CHARS, offset);
767       }
768       BYTECODE(AND_CHECK_NOT_CHAR) {
769         int32_t offset = Load32Aligned(pc + 8);
770         fprintf(stderr, " %d, %d, %d", insn >> irregexp::BYTECODE_SHIFT,
771                 Load32Aligned(pc + 4), offset);
772         BRANCH(AND_CHECK_NOT_CHAR, offset);
773       }
774       BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
775         int32_t offset = Load32Aligned(pc + 8);
776         fprintf(stderr, " %d, %d, %d, %d", insn >> irregexp::BYTECODE_SHIFT,
777                 Load16Aligned(pc + 4), Load16Aligned(pc + 6), offset);
778         BRANCH(MINUS_AND_CHECK_NOT_CHAR, offset);
779       }
780       BYTECODE(CHECK_CHAR_IN_RANGE) {
781         int32_t offset = Load32Aligned(pc + 8);
782         fprintf(stderr, " %d, %d, %d", Load16Aligned(pc + 4),
783                 Load16Aligned(pc + 6), offset);
784         BRANCH(CHECK_CHAR_IN_RANGE, offset);
785       }
786       BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
787         int32_t offset = Load32Aligned(pc + 8);
788         fprintf(stderr, " %d, %d, %d", Load16Aligned(pc + 4),
789                 Load16Aligned(pc + 6), offset);
790         BRANCH(CHECK_CHAR_NOT_IN_RANGE, offset);
791       }
792       BYTECODE(CHECK_BIT_IN_TABLE) {
793         int32_t offset = Load32Aligned(pc + 4);
794         fprintf(stderr,
795                 " %d, "
796                 "%02x %02x %02x %02x %02x %02x %02x %02x "
797                 "%02x %02x %02x %02x %02x %02x %02x %02x",
798                 offset, pc[8], pc[9], pc[10], pc[11], pc[12], pc[13], pc[14],
799                 pc[15], pc[16], pc[17], pc[18], pc[19], pc[20], pc[21], pc[22],
800                 pc[23]);
801         BRANCH(CHECK_BIT_IN_TABLE, offset);
802       }
803       BYTECODE(CHECK_LT) {
804         int32_t offset = Load32Aligned(pc + 4);
805         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
806         BRANCH(CHECK_LT, offset);
807       }
808       BYTECODE(CHECK_GT) {
809         int32_t offset = Load32Aligned(pc + 4);
810         fprintf(stderr, " %d, %d", insn >> irregexp::BYTECODE_SHIFT, offset);
811         BRANCH(CHECK_GT, offset);
812       }
813       BYTECODE(CHECK_REGISTER_LT) {
814         int32_t offset = Load32Aligned(pc + 8);
815         fprintf(stderr, " reg[%d], %d, %d", insn >> irregexp::BYTECODE_SHIFT,
816                 Load32Aligned(pc + 4), offset);
817         BRANCH(CHECK_REGISTER_LT, offset);
818       }
819       BYTECODE(CHECK_REGISTER_GE) {
820         int32_t offset = Load32Aligned(pc + 8);
821         fprintf(stderr, " reg[%d], %d, %d", insn >> irregexp::BYTECODE_SHIFT,
822                 Load32Aligned(pc + 4), offset);
823         BRANCH(CHECK_REGISTER_GE, offset);
824       }
825       BYTECODE(CHECK_REGISTER_EQ_POS) {
826         int32_t offset = Load32Aligned(pc + 4);
827         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
828                 offset);
829         BRANCH(CHECK_REGISTER_EQ_POS, offset);
830       }
831       BYTECODE(CHECK_NOT_REGS_EQUAL) {
832         int32_t offset = Load32Aligned(pc + 8);
833         fprintf(stderr, " reg[%d], %d, %d", insn >> irregexp::BYTECODE_SHIFT,
834                 Load32Aligned(pc + 4), offset);
835         BRANCH(CHECK_NOT_REGS_EQUAL, offset);
836       }
837       BYTECODE(CHECK_NOT_BACK_REF) {
838         int32_t offset = Load32Aligned(pc + 4);
839         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
840                 offset);
841         BRANCH(CHECK_NOT_BACK_REF, offset);
842       }
843       BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
844         int32_t offset = Load32Aligned(pc + 4);
845         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
846                 offset);
847         BRANCH(CHECK_NOT_BACK_REF_NO_CASE, offset);
848       }
849       BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
850         int32_t offset = Load32Aligned(pc + 4);
851         fprintf(stderr, " reg[%d], %d", insn >> irregexp::BYTECODE_SHIFT,
852                 offset);
853         BRANCH(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, offset);
854       }
855       BYTECODE(CHECK_AT_START) {
856         int32_t offset = Load32Aligned(pc + 4);
857         fprintf(stderr, " %d", offset);
858         BRANCH(CHECK_AT_START, offset);
859       }
860       BYTECODE(CHECK_NOT_AT_START) {
861         int32_t offset = Load32Aligned(pc + 4);
862         fprintf(stderr, " %d", offset);
863         BRANCH(CHECK_NOT_AT_START, offset);
864       }
865       BYTECODE(SET_CURRENT_POSITION_FROM_END) {
866         fprintf(stderr, " %u",
867                 static_cast<uint32_t>(insn) >> irregexp::BYTECODE_SHIFT);
868         ADVANCE(SET_CURRENT_POSITION_FROM_END);
869       }
870       default:
871         MOZ_CRASH("Bad bytecode");
872     }
873   }
874 
875 #  undef BYTECODE
876 #  undef ADVANCE
877 #  undef STOP
878 #  undef JUMP
879 #  undef BRANCH
880 
881   return true;
882 }
883 
884 /* static */
dumpBytecode(JSContext * cx,Handle<RegExpObject * > regexp,HandleLinearString input)885 bool RegExpObject::dumpBytecode(JSContext* cx, Handle<RegExpObject*> regexp,
886                                 HandleLinearString input) {
887   RootedRegExpShared shared(cx, getShared(cx, regexp));
888   if (!shared) {
889     return false;
890   }
891 
892   return RegExpShared::dumpBytecode(cx, &shared, input);
893 }
894 #endif  // DEBUG && !ENABLE_NEW_REGEXP
895 
896 template <typename CharT>
IsRegExpMetaChar(CharT ch)897 static MOZ_ALWAYS_INLINE bool IsRegExpMetaChar(CharT ch) {
898   switch (ch) {
899     /* ES 2016 draft Mar 25, 2016 21.2.1 SyntaxCharacter. */
900     case '^':
901     case '$':
902     case '\\':
903     case '.':
904     case '*':
905     case '+':
906     case '?':
907     case '(':
908     case ')':
909     case '[':
910     case ']':
911     case '{':
912     case '}':
913     case '|':
914       return true;
915     default:
916       return false;
917   }
918 }
919 
920 template <typename CharT>
HasRegExpMetaChars(const CharT * chars,size_t length)921 bool js::HasRegExpMetaChars(const CharT* chars, size_t length) {
922   for (size_t i = 0; i < length; ++i) {
923     if (IsRegExpMetaChar<CharT>(chars[i])) {
924       return true;
925     }
926   }
927   return false;
928 }
929 
930 template bool js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars,
931                                                  size_t length);
932 
933 template bool js::HasRegExpMetaChars<char16_t>(const char16_t* chars,
934                                                size_t length);
935 
StringHasRegExpMetaChars(JSLinearString * str)936 bool js::StringHasRegExpMetaChars(JSLinearString* str) {
937   AutoCheckCannotGC nogc;
938   if (str->hasLatin1Chars()) {
939     return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
940   }
941 
942   return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
943 }
944 
945 /* RegExpShared */
946 
RegExpShared(JSAtom * source,RegExpFlags flags)947 RegExpShared::RegExpShared(JSAtom* source, RegExpFlags flags)
948     : headerAndSource(source), pairCount_(0), flags(flags) {}
949 
traceChildren(JSTracer * trc)950 void RegExpShared::traceChildren(JSTracer* trc) {
951   // Discard code to avoid holding onto ExecutablePools.
952   if (IsMarkingTrace(trc) && trc->runtime()->gc.isShrinkingGC()) {
953     discardJitCode();
954   }
955 
956   TraceNullableEdge(trc, &headerAndSource, "RegExpShared source");
957 #ifdef ENABLE_NEW_REGEXP
958   if (kind() == RegExpShared::Kind::Atom) {
959     TraceNullableEdge(trc, &patternAtom_, "RegExpShared pattern atom");
960   } else {
961     for (auto& comp : compilationArray) {
962       TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
963     }
964     TraceNullableEdge(trc, &groupsTemplate_, "RegExpShared groups template");
965   }
966 #else
967   for (auto& comp : compilationArray) {
968     TraceNullableEdge(trc, &comp.jitCode, "RegExpShared code");
969   }
970 #endif
971 }
972 
discardJitCode()973 void RegExpShared::discardJitCode() {
974   for (auto& comp : compilationArray) {
975     comp.jitCode = nullptr;
976   }
977 
978   // We can also purge the tables used by JIT code.
979   tables.clearAndFree();
980 }
981 
finalize(JSFreeOp * fop)982 void RegExpShared::finalize(JSFreeOp* fop) {
983   for (auto& comp : compilationArray) {
984     if (comp.byteCode) {
985       size_t length = comp.byteCodeLength();
986       fop->free_(this, comp.byteCode, length, MemoryUse::RegExpSharedBytecode);
987     }
988   }
989 #ifdef ENABLE_NEW_REGEXP
990   if (namedCaptureIndices_) {
991     size_t length = numNamedCaptures() * sizeof(uint32_t);
992     fop->free_(this, namedCaptureIndices_, length,
993                MemoryUse::RegExpSharedNamedCaptureData);
994   }
995 #endif
996   tables.~JitCodeTables();
997 }
998 
999 /* static */
compile(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)1000 bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
1001                            HandleLinearString input,
1002                            RegExpShared::CodeKind codeKind) {
1003   TraceLoggerThread* logger = TraceLoggerForCurrentThread(cx);
1004   AutoTraceLog logCompile(logger, TraceLogger_IrregexpCompile);
1005 
1006   RootedAtom pattern(cx, re->getSource());
1007   return compile(cx, re, pattern, input, codeKind);
1008 }
1009 
1010 #ifdef ENABLE_NEW_REGEXP
compile(JSContext * cx,MutableHandleRegExpShared re,HandleAtom pattern,HandleLinearString input,RegExpShared::CodeKind code)1011 bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
1012                            HandleAtom pattern, HandleLinearString input,
1013                            RegExpShared::CodeKind code) {
1014   MOZ_CRASH("TODO");
1015 }
1016 /* static */
compileIfNecessary(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)1017 bool RegExpShared::compileIfNecessary(JSContext* cx,
1018                                       MutableHandleRegExpShared re,
1019                                       HandleLinearString input,
1020                                       RegExpShared::CodeKind codeKind) {
1021   if (codeKind == RegExpShared::CodeKind::Any) {
1022     // We start by interpreting regexps, then compile them once they are
1023     // sufficiently hot. For very long input strings, we tier up eagerly.
1024     codeKind = RegExpShared::CodeKind::Bytecode;
1025     if (IsNativeRegExpEnabled() &&
1026         (re->markedForTierUp() || input->length() > 1000)) {
1027       codeKind = RegExpShared::CodeKind::Jitcode;
1028     }
1029   }
1030 
1031   bool needsCompile = false;
1032   if (re->kind() == RegExpShared::Kind::Unparsed) {
1033     needsCompile = true;
1034   }
1035   if (re->kind() == RegExpShared::Kind::RegExp) {
1036     if (!re->isCompiled(input->hasLatin1Chars(), codeKind)) {
1037       needsCompile = true;
1038     }
1039   }
1040   if (needsCompile) {
1041     return irregexp::CompilePattern(cx, re, input, codeKind);
1042   }
1043   return true;
1044 }
1045 
1046 /* static */
execute(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t start,VectorMatchPairs * matches)1047 RegExpRunStatus RegExpShared::execute(JSContext* cx,
1048                                       MutableHandleRegExpShared re,
1049                                       HandleLinearString input, size_t start,
1050                                       VectorMatchPairs* matches) {
1051   MOZ_ASSERT(matches);
1052 
1053   // TODO: Add tracelogger support
1054 
1055   /* Compile the code at point-of-use. */
1056   if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
1057     return RegExpRunStatus_Error;
1058   }
1059 
1060   /*
1061    * Ensure sufficient memory for output vector.
1062    * No need to initialize it. The RegExp engine fills them in on a match.
1063    */
1064   if (!matches->allocOrExpandArray(re->pairCount())) {
1065     ReportOutOfMemory(cx);
1066     return RegExpRunStatus_Error;
1067   }
1068 
1069   if (re->kind() == RegExpShared::Kind::Atom) {
1070     return RegExpShared::executeAtom(cx, re, input, start, matches);
1071   }
1072 
1073   // Reset the Irregexp backtrack stack if it grows during execution.
1074   irregexp::RegExpStackScope stackScope(cx->isolate);
1075 
1076   /*
1077    * Ensure sufficient memory for output vector.
1078    * No need to initialize it. The RegExp engine fills them in on a match.
1079    */
1080   if (!matches->allocOrExpandArray(re->pairCount())) {
1081     ReportOutOfMemory(cx);
1082     return RegExpRunStatus_Error;
1083   }
1084 
1085   uint32_t interruptRetries = 0;
1086   const uint32_t maxInterruptRetries = 4;
1087   do {
1088     RegExpRunStatus result = irregexp::Execute(cx, re, input, start, matches);
1089 
1090     if (result == RegExpRunStatus_Error) {
1091       /* Execute can return RegExpRunStatus_Error:
1092        *
1093        *  1. If the native stack overflowed
1094        *  2. If the backtrack stack overflowed
1095        *  3. If an interrupt was requested during execution.
1096        *
1097        * In the first two cases, we want to throw an error. In the
1098        * third case, we want to handle the interrupt and try again.
1099        * We cap the number of times we will retry.
1100        */
1101       if (cx->hasAnyPendingInterrupt()) {
1102         if (!CheckForInterrupt(cx)) {
1103           return RegExpRunStatus_Error;
1104         }
1105         if (interruptRetries++ < maxInterruptRetries) {
1106           continue;
1107         }
1108       }
1109       // If we have run out of retries, this regexp takes too long to execute.
1110       ReportOverRecursed(cx);
1111       return RegExpRunStatus_Error;
1112     }
1113 
1114     MOZ_ASSERT(result == RegExpRunStatus_Success ||
1115                result == RegExpRunStatus_Success_NotFound);
1116 
1117     return result;
1118   } while (true);
1119 
1120   MOZ_CRASH("Unreachable");
1121 }
1122 
useAtomMatch(HandleAtom pattern)1123 void RegExpShared::useAtomMatch(HandleAtom pattern) {
1124   MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
1125   kind_ = RegExpShared::Kind::Atom;
1126   patternAtom_ = pattern;
1127   pairCount_ = 1;
1128 }
1129 
useRegExpMatch(size_t pairCount)1130 void RegExpShared::useRegExpMatch(size_t pairCount) {
1131   MOZ_ASSERT(kind() == RegExpShared::Kind::Unparsed);
1132   kind_ = RegExpShared::Kind::RegExp;
1133   pairCount_ = pairCount;
1134   ticks_ = jit::JitOptions.regexpWarmUpThreshold;
1135 }
1136 
1137 /* static */
initializeNamedCaptures(JSContext * cx,HandleRegExpShared re,HandleNativeObject namedCaptures)1138 bool RegExpShared::initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
1139                                            HandleNativeObject namedCaptures) {
1140   MOZ_ASSERT(!re->groupsTemplate_);
1141   MOZ_ASSERT(!re->namedCaptureIndices_);
1142 
1143   // The irregexp parser returns named capture information in the form
1144   // of an ArrayObject, where even elements store the capture name and
1145   // odd elements store the corresponding capture index. We create a
1146   // template object with a property for each capture name, and store
1147   // the capture indices as a heap-allocated array.
1148   uint32_t numNamedCaptures = namedCaptures->getDenseInitializedLength() / 2;
1149 
1150   // Create a plain template object.
1151   RootedPlainObject templateObject(
1152       cx, NewTenuredObjectWithGivenProto<PlainObject>(cx, nullptr));
1153   if (!templateObject) {
1154     return false;
1155   }
1156 
1157   // Create a new group for the template.
1158   Rooted<TaggedProto> proto(cx, templateObject->taggedProto());
1159   ObjectGroup* group = ObjectGroupRealm::makeGroup(
1160       cx, templateObject->realm(), templateObject->getClass(), proto);
1161   if (!group) {
1162     return false;
1163   }
1164   templateObject->setGroup(group);
1165 
1166   // Initialize the properties of the template.
1167   RootedValue dummyString(cx, StringValue(cx->runtime()->emptyString));
1168   for (uint32_t i = 0; i < numNamedCaptures; i++) {
1169     RootedString name(cx, namedCaptures->getDenseElement(i * 2).toString());
1170     RootedId id(cx, NameToId(name->asAtom().asPropertyName()));
1171     if (!NativeDefineDataProperty(cx, templateObject, id, dummyString,
1172                                   JSPROP_ENUMERATE)) {
1173       return false;
1174     }
1175     AddTypePropertyId(cx, templateObject, id, UndefinedValue());
1176   }
1177 
1178   // Allocate the capture index array.
1179   uint32_t arraySize = numNamedCaptures * sizeof(uint32_t);
1180   uint32_t* captureIndices = static_cast<uint32_t*>(js_malloc(arraySize));
1181   if (!captureIndices) {
1182     js::ReportOutOfMemory(cx);
1183     return false;
1184   }
1185 
1186   // Populate the capture index array
1187   for (uint32_t i = 0; i < numNamedCaptures; i++) {
1188     captureIndices[i] = namedCaptures->getDenseElement(i * 2 + 1).toInt32();
1189   }
1190 
1191   re->numNamedCaptures_ = numNamedCaptures;
1192   re->groupsTemplate_ = templateObject;
1193   re->namedCaptureIndices_ = captureIndices;
1194   js::AddCellMemory(re, arraySize, MemoryUse::RegExpSharedNamedCaptureData);
1195   return true;
1196 }
1197 
tierUpTick()1198 void RegExpShared::tierUpTick() {
1199   MOZ_ASSERT(kind() == RegExpShared::Kind::RegExp);
1200   if (ticks_ > 0) {
1201     ticks_--;
1202   }
1203 }
1204 
markedForTierUp() const1205 bool RegExpShared::markedForTierUp() const {
1206   if (!IsNativeRegExpEnabled()) {
1207     return false;
1208   }
1209   if (kind() != RegExpShared::Kind::RegExp) {
1210     return false;
1211   }
1212   return ticks_ == 0;
1213 }
1214 
1215 #else   // !ENABLE_NEW_REGEXP
1216 
1217 /* static */
compile(JSContext * cx,MutableHandleRegExpShared re,HandleAtom pattern,HandleLinearString input,RegExpShared::CodeKind codeKind)1218 bool RegExpShared::compile(JSContext* cx, MutableHandleRegExpShared re,
1219                            HandleAtom pattern, HandleLinearString input,
1220                            RegExpShared::CodeKind codeKind) {
1221   if (!re->ignoreCase() && !StringHasRegExpMetaChars(pattern)) {
1222     re->canStringMatch = true;
1223   }
1224 
1225   CompileOptions options(cx);
1226   frontend::DummyTokenStream dummyTokenStream(cx, options);
1227 
1228   /* Parse the pattern. The RegExpCompileData is allocated in LifoAlloc and
1229    * will only be live while LifoAllocScope is on stack. */
1230   LifoAllocScope allocScope(&cx->tempLifoAlloc());
1231   irregexp::RegExpCompileData data;
1232   if (!irregexp::ParsePattern(dummyTokenStream, allocScope.alloc(), pattern,
1233                               /*match_only =*/false, re->getFlags(), &data)) {
1234     return false;
1235   }
1236 
1237   // Add one to account for the whole-match capture.
1238   re->pairCount_ = data.capture_count + 1;
1239 
1240   bool forceBytecode = codeKind == RegExpShared::CodeKind::Bytecode;
1241   JitCodeTables tables;
1242   irregexp::RegExpCode code = irregexp::CompilePattern(
1243       cx, allocScope.alloc(), re, &data, input, false /* global() */,
1244       re->ignoreCase(), input->hasLatin1Chars(), /*match_only = */ false,
1245       forceBytecode, re->sticky(), re->unicode(), tables);
1246   if (code.empty()) {
1247     return false;
1248   }
1249 
1250   MOZ_ASSERT(!code.jitCode || !code.byteCode);
1251 
1252   RegExpCompilation& compilation = re->compilation(input->hasLatin1Chars());
1253   if (code.jitCode) {
1254     // First copy the tables. GC can purge the tables if the RegExpShared
1255     // has no JIT code, so it's important to do this right before setting
1256     // compilation.jitCode (to ensure no purging happens between adding the
1257     // tables and setting the JIT code).
1258     for (size_t i = 0; i < tables.length(); i++) {
1259       if (!re->addTable(std::move(tables[i]))) {
1260         ReportOutOfMemory(cx);
1261         return false;
1262       }
1263     }
1264     compilation.jitCode = code.jitCode;
1265   } else if (code.byteCode) {
1266     MOZ_ASSERT(tables.empty(), "RegExpInterpreter does not use data tables");
1267     compilation.byteCode = code.byteCode;
1268     AddCellMemory(re, compilation.byteCodeLength(),
1269                   MemoryUse::RegExpSharedBytecode);
1270   }
1271 
1272   return true;
1273 }
1274 
1275 /* static */
compileIfNecessary(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)1276 bool RegExpShared::compileIfNecessary(JSContext* cx,
1277                                       MutableHandleRegExpShared re,
1278                                       HandleLinearString input,
1279                                       RegExpShared::CodeKind codeKind) {
1280   if (re->isCompiled(input->hasLatin1Chars(), codeKind)) {
1281     return true;
1282   }
1283   return compile(cx, re, input, codeKind);
1284 }
1285 
1286 /* static */
execute(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t start,VectorMatchPairs * matches)1287 RegExpRunStatus RegExpShared::execute(JSContext* cx,
1288                                       MutableHandleRegExpShared re,
1289                                       HandleLinearString input, size_t start,
1290                                       VectorMatchPairs* matches) {
1291   MOZ_ASSERT(matches);
1292   TraceLoggerThread* logger = TraceLoggerForCurrentThread(cx);
1293 
1294   /* Compile the code at point-of-use. */
1295   if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Any)) {
1296     return RegExpRunStatus_Error;
1297   }
1298 
1299   /*
1300    * Ensure sufficient memory for output vector.
1301    * No need to initialize it. The RegExp engine fills them in on a match.
1302    */
1303   if (!matches->allocOrExpandArray(re->pairCount())) {
1304     ReportOutOfMemory(cx);
1305     return RegExpRunStatus_Error;
1306   }
1307 
1308   size_t length = input->length();
1309 
1310   // Reset the Irregexp backtrack stack if it grows during execution.
1311   irregexp::RegExpStackScope stackScope(cx);
1312 
1313   if (re->canStringMatch) {
1314     return executeAtom(cx, re, input, start, matches);
1315   }
1316 
1317   do {
1318     jit::JitCode* code = re->compilation(input->hasLatin1Chars()).jitCode;
1319     if (!code) {
1320       break;
1321     }
1322 
1323     RegExpRunStatus result;
1324     {
1325       AutoTraceLog logJIT(logger, TraceLogger_IrregexpExecute);
1326       AutoCheckCannotGC nogc;
1327       if (input->hasLatin1Chars()) {
1328         const Latin1Char* chars = input->latin1Chars(nogc);
1329         result = irregexp::ExecuteCode(cx, code, chars, start, length, matches,
1330                                        /*endIndex = */ nullptr);
1331       } else {
1332         const char16_t* chars = input->twoByteChars(nogc);
1333         result = irregexp::ExecuteCode(cx, code, chars, start, length, matches,
1334                                        /*endIndex = */ nullptr);
1335       }
1336     }
1337 
1338     if (result == RegExpRunStatus_Error) {
1339       // An 'Error' result is returned if a stack overflow guard or
1340       // interrupt guard failed. If CheckOverRecursed doesn't throw, break
1341       // out and retry the regexp in the bytecode interpreter, which can
1342       // execute while tolerating future interrupts. Otherwise, if we keep
1343       // getting interrupted we will never finish executing the regexp.
1344       if (!jit::CheckOverRecursed(cx)) {
1345         return RegExpRunStatus_Error;
1346       }
1347       break;
1348     }
1349 
1350     if (result == RegExpRunStatus_Success_NotFound) {
1351       return RegExpRunStatus_Success_NotFound;
1352     }
1353 
1354     MOZ_ASSERT(result == RegExpRunStatus_Success);
1355 
1356     matches->checkAgainst(length);
1357     return RegExpRunStatus_Success;
1358   } while (false);
1359 
1360   // Compile bytecode for the RegExp if necessary.
1361   if (!compileIfNecessary(cx, re, input, RegExpShared::CodeKind::Bytecode)) {
1362     return RegExpRunStatus_Error;
1363   }
1364 
1365   uint8_t* byteCode = re->compilation(input->hasLatin1Chars()).byteCode;
1366   AutoTraceLog logInterpreter(logger, TraceLogger_IrregexpExecute);
1367 
1368   AutoStableStringChars inputChars(cx);
1369   if (!inputChars.init(cx, input)) {
1370     return RegExpRunStatus_Error;
1371   }
1372 
1373   RegExpRunStatus result;
1374   if (inputChars.isLatin1()) {
1375     const Latin1Char* chars = inputChars.latin1Range().begin().get();
1376     result = irregexp::InterpretCode(cx, byteCode, chars, start, length,
1377                                      matches, /*endIndex = */ nullptr);
1378   } else {
1379     const char16_t* chars = inputChars.twoByteRange().begin().get();
1380     result = irregexp::InterpretCode(cx, byteCode, chars, start, length,
1381                                      matches, /*endIndex = */ nullptr);
1382   }
1383 
1384   if (result == RegExpRunStatus_Success) {
1385     matches->checkAgainst(length);
1386   }
1387   return result;
1388 }
1389 #endif  // !ENABLE_NEW_REGEXP
1390 
1391 /* static */
executeAtom(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t start,VectorMatchPairs * matches)1392 RegExpRunStatus RegExpShared::executeAtom(JSContext* cx,
1393                                           MutableHandleRegExpShared re,
1394                                           HandleLinearString input,
1395                                           size_t start,
1396                                           VectorMatchPairs* matches) {
1397   MOZ_ASSERT(re->pairCount() == 1);
1398 
1399   size_t length = input->length();
1400   size_t searchLength = re->patternAtom()->length();
1401 
1402   if (re->sticky()) {
1403     // First part checks size_t overflow.
1404     if (searchLength + start < searchLength || searchLength + start > length) {
1405       return RegExpRunStatus_Success_NotFound;
1406     }
1407     if (!HasSubstringAt(input, re->patternAtom(), start)) {
1408       return RegExpRunStatus_Success_NotFound;
1409     }
1410 
1411     (*matches)[0].start = start;
1412     (*matches)[0].limit = start + searchLength;
1413     matches->checkAgainst(length);
1414 
1415     return RegExpRunStatus_Success;
1416   }
1417 
1418   int res = StringFindPattern(input, re->patternAtom(), start);
1419   if (res == -1) {
1420     return RegExpRunStatus_Success_NotFound;
1421   }
1422 
1423   (*matches)[0].start = res;
1424   (*matches)[0].limit = res + searchLength;
1425   matches->checkAgainst(length);
1426 
1427   return RegExpRunStatus_Success;
1428 }
1429 
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)1430 size_t RegExpShared::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
1431   size_t n = 0;
1432 
1433   for (size_t i = 0; i < ArrayLength(compilationArray); i++) {
1434     const RegExpCompilation& compilation = compilationArray[i];
1435     if (compilation.byteCode) {
1436       n += mallocSizeOf(compilation.byteCode);
1437     }
1438   }
1439 
1440   n += tables.sizeOfExcludingThis(mallocSizeOf);
1441   for (size_t i = 0; i < tables.length(); i++) {
1442     n += mallocSizeOf(tables[i].get());
1443   }
1444 
1445   return n;
1446 }
1447 
1448 /* RegExpRealm */
1449 
RegExpRealm()1450 RegExpRealm::RegExpRealm()
1451     : matchResultTemplateObject_(nullptr),
1452       optimizableRegExpPrototypeShape_(nullptr),
1453       optimizableRegExpInstanceShape_(nullptr) {}
1454 
createMatchResultTemplateObject(JSContext * cx)1455 ArrayObject* RegExpRealm::createMatchResultTemplateObject(JSContext* cx) {
1456   MOZ_ASSERT(!matchResultTemplateObject_);
1457 
1458   /* Create template array object */
1459   RootedArrayObject templateObject(
1460       cx, NewDenseUnallocatedArray(cx, RegExpObject::MaxPairCount, nullptr,
1461                                    TenuredObject));
1462   if (!templateObject) {
1463     return nullptr;
1464   }
1465 
1466   // Create a new group for the template.
1467   Rooted<TaggedProto> proto(cx, templateObject->taggedProto());
1468   ObjectGroup* group = ObjectGroupRealm::makeGroup(
1469       cx, templateObject->realm(), templateObject->getClass(), proto);
1470   if (!group) {
1471     return nullptr;
1472   }
1473   templateObject->setGroup(group);
1474 
1475   /* Set dummy index property */
1476   RootedValue index(cx, Int32Value(0));
1477   if (!NativeDefineDataProperty(cx, templateObject, cx->names().index, index,
1478                                 JSPROP_ENUMERATE)) {
1479     return nullptr;
1480   }
1481 
1482   /* Set dummy input property */
1483   RootedValue inputVal(cx, StringValue(cx->runtime()->emptyString));
1484   if (!NativeDefineDataProperty(cx, templateObject, cx->names().input, inputVal,
1485                                 JSPROP_ENUMERATE)) {
1486     return nullptr;
1487   }
1488 
1489 #ifdef ENABLE_NEW_REGEXP
1490   /* Set dummy groups property */
1491   RootedValue groupsVal(cx, UndefinedValue());
1492   if (!NativeDefineDataProperty(cx, templateObject, cx->names().groups,
1493                                 groupsVal, JSPROP_ENUMERATE)) {
1494     return nullptr;
1495   }
1496   AddTypePropertyId(cx, templateObject, NameToId(cx->names().groups),
1497                     TypeSet::AnyObjectType());
1498 
1499   // Make sure that the properties are in the right slots.
1500 #  ifdef DEBUG
1501   Shape* groupsShape = templateObject->lastProperty();
1502   MOZ_ASSERT(groupsShape->slot() == MatchResultObjectGroupsSlot &&
1503              groupsShape->propidRef() == NameToId(cx->names().groups));
1504   Shape* inputShape = groupsShape->previous().get();
1505   MOZ_ASSERT(inputShape->slot() == MatchResultObjectInputSlot &&
1506              inputShape->propidRef() == NameToId(cx->names().input));
1507   Shape* indexShape = inputShape->previous().get();
1508   MOZ_ASSERT(indexShape->slot() == MatchResultObjectIndexSlot &&
1509              indexShape->propidRef() == NameToId(cx->names().index));
1510 #  endif
1511 #endif
1512 
1513   // Make sure type information reflects the indexed properties which might
1514   // be added.
1515   AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::StringType());
1516   AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::UndefinedType());
1517 
1518   matchResultTemplateObject_.set(templateObject);
1519 
1520   return matchResultTemplateObject_;
1521 }
1522 
traceWeak(JSTracer * trc)1523 void RegExpRealm::traceWeak(JSTracer* trc) {
1524   if (matchResultTemplateObject_) {
1525     TraceWeakEdge(trc, &matchResultTemplateObject_,
1526                   "RegExpRealm::matchResultTemplateObject_");
1527   }
1528 
1529   if (optimizableRegExpPrototypeShape_) {
1530     TraceWeakEdge(trc, &optimizableRegExpPrototypeShape_,
1531                   "RegExpRealm::optimizableRegExpPrototypeShape_");
1532   }
1533 
1534   if (optimizableRegExpInstanceShape_) {
1535     TraceWeakEdge(trc, &optimizableRegExpInstanceShape_,
1536                   "RegExpRealm::optimizableRegExpInstanceShape_");
1537   }
1538 }
1539 
get(JSContext * cx,HandleAtom source,RegExpFlags flags)1540 RegExpShared* RegExpZone::get(JSContext* cx, HandleAtom source,
1541                               RegExpFlags flags) {
1542   DependentAddPtr<Set> p(cx, set_, Key(source, flags));
1543   if (p) {
1544     return *p;
1545   }
1546 
1547   auto shared = Allocate<RegExpShared>(cx);
1548   if (!shared) {
1549     return nullptr;
1550   }
1551 
1552   new (shared) RegExpShared(source, flags);
1553 
1554   if (!p.add(cx, set_, Key(source, flags), shared)) {
1555     ReportOutOfMemory(cx);
1556     return nullptr;
1557   }
1558 
1559   return shared;
1560 }
1561 
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)1562 size_t RegExpZone::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
1563   return set_.sizeOfExcludingThis(mallocSizeOf);
1564 }
1565 
RegExpZone(Zone * zone)1566 RegExpZone::RegExpZone(Zone* zone) : set_(zone, zone) {}
1567 
1568 /* Functions */
1569 
CloneRegExpObject(JSContext * cx,Handle<RegExpObject * > regex)1570 JSObject* js::CloneRegExpObject(JSContext* cx, Handle<RegExpObject*> regex) {
1571   // Unlike RegExpAlloc, all clones must use |regex|'s group.
1572   RootedObjectGroup group(cx, regex->group());
1573   Rooted<RegExpObject*> clone(
1574       cx, NewObjectWithGroup<RegExpObject>(cx, group, GenericObject));
1575   if (!clone) {
1576     return nullptr;
1577   }
1578   clone->initPrivate(nullptr);
1579 
1580   if (!EmptyShape::ensureInitialCustomShape<RegExpObject>(cx, clone)) {
1581     return nullptr;
1582   }
1583 
1584   RegExpShared* shared = RegExpObject::getShared(cx, regex);
1585   if (!shared) {
1586     return nullptr;
1587   }
1588 
1589   clone->initAndZeroLastIndex(shared->getSource(), shared->getFlags(), cx);
1590   clone->setShared(*shared);
1591 
1592   return clone;
1593 }
1594 
1595 template <typename CharT>
ParseRegExpFlags(const CharT * chars,size_t length,RegExpFlags * flagsOut,char16_t * invalidFlag)1596 static bool ParseRegExpFlags(const CharT* chars, size_t length,
1597                              RegExpFlags* flagsOut, char16_t* invalidFlag) {
1598   *flagsOut = RegExpFlag::NoFlags;
1599 
1600   for (size_t i = 0; i < length; i++) {
1601     uint8_t flag;
1602     switch (chars[i]) {
1603       case 'g':
1604         flag = RegExpFlag::Global;
1605         break;
1606       case 'i':
1607         flag = RegExpFlag::IgnoreCase;
1608         break;
1609       case 'm':
1610         flag = RegExpFlag::Multiline;
1611         break;
1612       case 's':
1613         flag = RegExpFlag::DotAll;
1614         break;
1615       case 'u':
1616         flag = RegExpFlag::Unicode;
1617         break;
1618       case 'y':
1619         flag = RegExpFlag::Sticky;
1620         break;
1621       default:
1622         *invalidFlag = chars[i];
1623         return false;
1624     }
1625     if (*flagsOut & flag) {
1626       *invalidFlag = chars[i];
1627       return false;
1628     }
1629     *flagsOut |= flag;
1630   }
1631 
1632   return true;
1633 }
1634 
ParseRegExpFlags(JSContext * cx,JSString * flagStr,RegExpFlags * flagsOut)1635 bool js::ParseRegExpFlags(JSContext* cx, JSString* flagStr,
1636                           RegExpFlags* flagsOut) {
1637   JSLinearString* linear = flagStr->ensureLinear(cx);
1638   if (!linear) {
1639     return false;
1640   }
1641 
1642   size_t len = linear->length();
1643 
1644   bool ok;
1645   char16_t invalidFlag;
1646   if (linear->hasLatin1Chars()) {
1647     AutoCheckCannotGC nogc;
1648     ok = ::ParseRegExpFlags(linear->latin1Chars(nogc), len, flagsOut,
1649                             &invalidFlag);
1650   } else {
1651     AutoCheckCannotGC nogc;
1652     ok = ::ParseRegExpFlags(linear->twoByteChars(nogc), len, flagsOut,
1653                             &invalidFlag);
1654   }
1655 
1656   if (!ok) {
1657     TwoByteChars range(&invalidFlag, 1);
1658     UniqueChars utf8(JS::CharsToNewUTF8CharsZ(cx, range).c_str());
1659     if (!utf8) {
1660       return false;
1661     }
1662     JS_ReportErrorNumberUTF8(cx, GetErrorMessage, nullptr,
1663                              JSMSG_BAD_REGEXP_FLAG, utf8.get());
1664     return false;
1665   }
1666 
1667   return true;
1668 }
1669 
1670 template <XDRMode mode>
XDRScriptRegExpObject(XDRState<mode> * xdr,MutableHandle<RegExpObject * > objp)1671 XDRResult js::XDRScriptRegExpObject(XDRState<mode>* xdr,
1672                                     MutableHandle<RegExpObject*> objp) {
1673   /* NB: Keep this in sync with CloneScriptRegExpObject. */
1674 
1675   RootedAtom source(xdr->cx());
1676   uint8_t flags = 0;
1677 
1678   if (mode == XDR_ENCODE) {
1679     MOZ_ASSERT(objp);
1680     RegExpObject& reobj = *objp;
1681     source = reobj.getSource();
1682     flags = reobj.getFlags().value();
1683   }
1684   MOZ_TRY(XDRAtom(xdr, &source));
1685   MOZ_TRY(xdr->codeUint8(&flags));
1686   if (mode == XDR_DECODE) {
1687     RegExpObject* reobj = RegExpObject::create(
1688         xdr->cx(), source, RegExpFlags(flags), TenuredObject);
1689     if (!reobj) {
1690       return xdr->fail(JS::TranscodeResult_Throw);
1691     }
1692 
1693     objp.set(reobj);
1694   }
1695   return Ok();
1696 }
1697 
1698 template XDRResult js::XDRScriptRegExpObject(XDRState<XDR_ENCODE>* xdr,
1699                                              MutableHandle<RegExpObject*> objp);
1700 
1701 template XDRResult js::XDRScriptRegExpObject(XDRState<XDR_DECODE>* xdr,
1702                                              MutableHandle<RegExpObject*> objp);
1703 
CloneScriptRegExpObject(JSContext * cx,RegExpObject & reobj)1704 JSObject* js::CloneScriptRegExpObject(JSContext* cx, RegExpObject& reobj) {
1705   /* NB: Keep this in sync with XDRScriptRegExpObject. */
1706 
1707   RootedAtom source(cx, reobj.getSource());
1708   cx->markAtom(source);
1709 
1710   return RegExpObject::create(cx, source, reobj.getFlags(), TenuredObject);
1711 }
1712 
RegExpToSharedNonInline(JSContext * cx,HandleObject obj)1713 JS_FRIEND_API RegExpShared* js::RegExpToSharedNonInline(JSContext* cx,
1714                                                         HandleObject obj) {
1715   return RegExpToShared(cx, obj);
1716 }
1717 
size(mozilla::MallocSizeOf mallocSizeOf) const1718 JS::ubi::Node::Size JS::ubi::Concrete<RegExpShared>::size(
1719     mozilla::MallocSizeOf mallocSizeOf) const {
1720   return js::gc::Arena::thingSize(gc::AllocKind::REGEXP_SHARED) +
1721          get().sizeOfExcludingThis(mallocSizeOf);
1722 }
1723 
1724 /*
1725  * Regular Expressions.
1726  */
NewRegExpObject(JSContext * cx,const char * bytes,size_t length,RegExpFlags flags)1727 JS_PUBLIC_API JSObject* JS::NewRegExpObject(JSContext* cx, const char* bytes,
1728                                             size_t length, RegExpFlags flags) {
1729   AssertHeapIsIdle();
1730   CHECK_THREAD(cx);
1731 
1732   UniqueTwoByteChars chars(InflateString(cx, bytes, length));
1733   if (!chars) {
1734     return nullptr;
1735   }
1736 
1737   return RegExpObject::create(cx, chars.get(), length, flags, GenericObject);
1738 }
1739 
NewUCRegExpObject(JSContext * cx,const char16_t * chars,size_t length,RegExpFlags flags)1740 JS_PUBLIC_API JSObject* JS::NewUCRegExpObject(JSContext* cx,
1741                                               const char16_t* chars,
1742                                               size_t length,
1743                                               RegExpFlags flags) {
1744   AssertHeapIsIdle();
1745   CHECK_THREAD(cx);
1746 
1747   return RegExpObject::create(cx, chars, length, flags, GenericObject);
1748 }
1749 
SetRegExpInput(JSContext * cx,HandleObject obj,HandleString input)1750 JS_PUBLIC_API bool JS::SetRegExpInput(JSContext* cx, HandleObject obj,
1751                                       HandleString input) {
1752   AssertHeapIsIdle();
1753   CHECK_THREAD(cx);
1754   cx->check(input);
1755 
1756   Handle<GlobalObject*> global = obj.as<GlobalObject>();
1757   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1758   if (!res) {
1759     return false;
1760   }
1761 
1762   res->reset(input);
1763   return true;
1764 }
1765 
ClearRegExpStatics(JSContext * cx,HandleObject obj)1766 JS_PUBLIC_API bool JS::ClearRegExpStatics(JSContext* cx, HandleObject obj) {
1767   AssertHeapIsIdle();
1768   CHECK_THREAD(cx);
1769   MOZ_ASSERT(obj);
1770 
1771   Handle<GlobalObject*> global = obj.as<GlobalObject>();
1772   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1773   if (!res) {
1774     return false;
1775   }
1776 
1777   res->clear();
1778   return true;
1779 }
1780 
ExecuteRegExp(JSContext * cx,HandleObject obj,HandleObject reobj,char16_t * chars,size_t length,size_t * indexp,bool test,MutableHandleValue rval)1781 JS_PUBLIC_API bool JS::ExecuteRegExp(JSContext* cx, HandleObject obj,
1782                                      HandleObject reobj, char16_t* chars,
1783                                      size_t length, size_t* indexp, bool test,
1784                                      MutableHandleValue rval) {
1785   AssertHeapIsIdle();
1786   CHECK_THREAD(cx);
1787 
1788   Handle<GlobalObject*> global = obj.as<GlobalObject>();
1789   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, global);
1790   if (!res) {
1791     return false;
1792   }
1793 
1794   RootedLinearString input(cx, NewStringCopyN<CanGC>(cx, chars, length));
1795   if (!input) {
1796     return false;
1797   }
1798 
1799   return ExecuteRegExpLegacy(cx, res, reobj.as<RegExpObject>(), input, indexp,
1800                              test, rval);
1801 }
1802 
ExecuteRegExpNoStatics(JSContext * cx,HandleObject obj,const char16_t * chars,size_t length,size_t * indexp,bool test,MutableHandleValue rval)1803 JS_PUBLIC_API bool JS::ExecuteRegExpNoStatics(JSContext* cx, HandleObject obj,
1804                                               const char16_t* chars,
1805                                               size_t length, size_t* indexp,
1806                                               bool test,
1807                                               MutableHandleValue rval) {
1808   AssertHeapIsIdle();
1809   CHECK_THREAD(cx);
1810 
1811   RootedLinearString input(cx, NewStringCopyN<CanGC>(cx, chars, length));
1812   if (!input) {
1813     return false;
1814   }
1815 
1816   return ExecuteRegExpLegacy(cx, nullptr, obj.as<RegExpObject>(), input, indexp,
1817                              test, rval);
1818 }
1819 
ObjectIsRegExp(JSContext * cx,HandleObject obj,bool * isRegExp)1820 JS_PUBLIC_API bool JS::ObjectIsRegExp(JSContext* cx, HandleObject obj,
1821                                       bool* isRegExp) {
1822   cx->check(obj);
1823 
1824   ESClass cls;
1825   if (!GetBuiltinClass(cx, obj, &cls)) {
1826     return false;
1827   }
1828 
1829   *isRegExp = cls == ESClass::RegExp;
1830   return true;
1831 }
1832 
GetRegExpFlags(JSContext * cx,HandleObject obj)1833 JS_PUBLIC_API RegExpFlags JS::GetRegExpFlags(JSContext* cx, HandleObject obj) {
1834   AssertHeapIsIdle();
1835   CHECK_THREAD(cx);
1836 
1837   RegExpShared* shared = RegExpToShared(cx, obj);
1838   if (!shared) {
1839     return RegExpFlag::NoFlags;
1840   }
1841   return shared->getFlags();
1842 }
1843 
GetRegExpSource(JSContext * cx,HandleObject obj)1844 JS_PUBLIC_API JSString* JS::GetRegExpSource(JSContext* cx, HandleObject obj) {
1845   AssertHeapIsIdle();
1846   CHECK_THREAD(cx);
1847 
1848   RegExpShared* shared = RegExpToShared(cx, obj);
1849   if (!shared) {
1850     return nullptr;
1851   }
1852   return shared->getSource();
1853 }
1854 
CheckRegExpSyntax(JSContext * cx,const char16_t * chars,size_t length,RegExpFlags flags,MutableHandleValue error)1855 JS_PUBLIC_API bool JS::CheckRegExpSyntax(JSContext* cx, const char16_t* chars,
1856                                          size_t length, RegExpFlags flags,
1857                                          MutableHandleValue error) {
1858   AssertHeapIsIdle();
1859   CHECK_THREAD(cx);
1860 
1861   CompileOptions dummyOptions(cx);
1862   frontend::DummyTokenStream dummyTokenStream(cx, dummyOptions);
1863 
1864   LifoAllocScope allocScope(&cx->tempLifoAlloc());
1865 
1866   mozilla::Range<const char16_t> source(chars, length);
1867 #ifdef ENABLE_NEW_REGEXP
1868   bool success =
1869       irregexp::CheckPatternSyntax(cx, dummyTokenStream, source, flags);
1870 #else
1871   bool success = irregexp::ParsePatternSyntax(
1872       dummyTokenStream, allocScope.alloc(), source, flags.unicode());
1873 #endif
1874   error.set(UndefinedValue());
1875   if (!success) {
1876     // We can fail because of OOM or over-recursion even if the syntax is valid.
1877     if (cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed()) {
1878       return false;
1879     }
1880     if (!cx->getPendingException(error)) {
1881       return false;
1882     }
1883     cx->clearPendingException();
1884   }
1885   return true;
1886 }
1887