1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /**
8  * The compiled representation of a RegExp, potentially shared among RegExp
9  * instances created during separate evaluations of a single RegExp literal in
10  * source code.
11  */
12 
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
15 
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
18 
19 #include "gc/Barrier.h"
20 #include "gc/Marking.h"
21 #include "gc/ZoneAllocator.h"
22 #include "jit/JitOptions.h"
23 #include "js/AllocPolicy.h"
24 #include "js/RegExpFlags.h"  // JS::RegExpFlag, JS::RegExpFlags
25 #include "js/UbiNode.h"
26 #include "js/Vector.h"
27 #ifdef ENABLE_NEW_REGEXP
28 #  include "new-regexp/RegExpTypes.h"
29 #endif
30 #include "vm/ArrayObject.h"
31 #include "vm/JSAtom.h"
32 
33 namespace js {
34 
35 class ArrayObject;
36 class RegExpRealm;
37 class RegExpShared;
38 class RegExpStatics;
39 class VectorMatchPairs;
40 
41 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
42 using HandleRegExpShared = JS::Handle<RegExpShared*>;
43 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
44 
45 enum RegExpRunStatus : int32_t {
46   RegExpRunStatus_Error = -1,
47   RegExpRunStatus_Success = 1,
48   RegExpRunStatus_Success_NotFound = 0,
49 };
50 
51 #ifdef ENABLE_NEW_REGEXP
52 
IsNativeRegExpEnabled()53 inline bool IsNativeRegExpEnabled() {
54 #  ifdef JS_CODEGEN_NONE
55   return false;
56 #  else
57   return jit::JitOptions.nativeRegExp;
58 #  endif
59 }
60 
61 #else
62 /*
63  * Layout of the reg exp bytecode header.
64  */
65 struct RegExpByteCodeHeader {
66   uint32_t length;        // Number of instructions.
67   uint32_t numRegisters;  // Number of registers used.
68 };
69 #endif  // ENABLE_NEW_REGEXP
70 
71 /*
72  * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
73  * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
74  * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
75  * table so that they can be reused when compiling the same regex string.
76  *
77  * To save memory, a RegExpShared is not created for a RegExpObject until it is
78  * needed for execution. When a RegExpShared needs to be created, it is looked
79  * up in a per-compartment table to allow reuse between objects.
80  *
81  * During a GC, RegExpShared instances are marked and swept like GC things.
82  * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
83  * than explicitly tracing them, so that the RegExpShared and any jitcode can
84  * be reclaimed quicker. However, the RegExpShareds are traced through by
85  * objects when we are preserving jitcode in their zone, to avoid the same
86  * recompilation inefficiencies as normal Ion and baseline compilation.
87  */
88 class RegExpShared : public gc::TenuredCell {
89  public:
90   enum class Kind { Unparsed, Atom, RegExp };
91   enum class CodeKind { Bytecode, Jitcode, Any };
92 
93 #ifdef ENABLE_NEW_REGEXP
94   using ByteCode = js::irregexp::ByteArrayData;
95   using JitCodeTable = js::irregexp::ByteArray;
96 #else
97   using ByteCode = uint8_t;
98   using JitCodeTable = UniquePtr<uint8_t[], JS::FreePolicy>;
99 #endif
100   using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
101 
102  private:
103   friend class RegExpStatics;
104   friend class RegExpZone;
105 
106   struct RegExpCompilation {
107     WeakHeapPtr<jit::JitCode*> jitCode;
108     ByteCode* byteCode = nullptr;
109 
110     bool compiled(CodeKind kind = CodeKind::Any) const {
111       switch (kind) {
112         case CodeKind::Bytecode:
113           return !!byteCode;
114         case CodeKind::Jitcode:
115           return !!jitCode;
116         case CodeKind::Any:
117           return !!byteCode || !!jitCode;
118       }
119       MOZ_CRASH("Unreachable");
120     }
121 
byteCodeLengthRegExpCompilation122     size_t byteCodeLength() const {
123       MOZ_ASSERT(byteCode);
124 #ifdef ENABLE_NEW_REGEXP
125       return byteCode->length;
126 #else
127       auto header = reinterpret_cast<RegExpByteCodeHeader*>(byteCode);
128       return header->length;
129 #endif
130     }
131   };
132 
133   /* Source to the RegExp, for lazy compilation. */
134   using HeaderWithAtom = gc::CellHeaderWithTenuredGCPointer<JSAtom>;
135   HeaderWithAtom headerAndSource;
136 
137   RegExpCompilation compilationArray[2];
138 
139   uint32_t pairCount_;
140   JS::RegExpFlags flags;
141 
142 #ifdef ENABLE_NEW_REGEXP
143   RegExpShared::Kind kind_ = Kind::Unparsed;
144   GCPtrAtom patternAtom_;
145   uint32_t maxRegisters_ = 0;
146   uint32_t ticks_ = 0;
147 #else
148   bool canStringMatch = false;
149 #endif
150 
151 #ifdef ENABLE_NEW_REGEXP
152   uint32_t numNamedCaptures_ = {};
153   uint32_t* namedCaptureIndices_ = {};
154   GCPtr<PlainObject*> groupsTemplate_ = {};
155 #endif
156 
CompilationIndex(bool latin1)157   static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
158 
159   // Tables referenced by JIT code.
160   JitCodeTables tables;
161 
162   /* Internal functions. */
163   RegExpShared(JSAtom* source, JS::RegExpFlags flags);
164 
165   static bool compile(JSContext* cx, MutableHandleRegExpShared res,
166                       HandleLinearString input, CodeKind code);
167   static bool compile(JSContext* cx, MutableHandleRegExpShared res,
168                       HandleAtom pattern, HandleLinearString input,
169                       CodeKind code);
170 
compilation(bool latin1)171   const RegExpCompilation& compilation(bool latin1) const {
172     return compilationArray[CompilationIndex(latin1)];
173   }
174 
compilation(bool latin1)175   RegExpCompilation& compilation(bool latin1) {
176     return compilationArray[CompilationIndex(latin1)];
177   }
178 
179  public:
180   ~RegExpShared() = delete;
181 
182   static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
183                                  HandleLinearString input, CodeKind code);
184 
185   static RegExpRunStatus executeAtom(JSContext* cx,
186                                      MutableHandleRegExpShared re,
187                                      HandleLinearString input, size_t start,
188                                      VectorMatchPairs* matches);
189 
190   // Execute this RegExp on input starting from searchIndex, filling in matches.
191   static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
192                                  HandleLinearString input, size_t searchIndex,
193                                  VectorMatchPairs* matches);
194 
195   // Register a table with this RegExpShared, and take ownership.
addTable(JitCodeTable table)196   bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
197 
198   /* Accessors */
199 
pairCount()200   size_t pairCount() const {
201 #ifdef ENABLE_NEW_REGEXP
202     MOZ_ASSERT(kind() != Kind::Unparsed);
203 #else
204     MOZ_ASSERT(isCompiled());
205 #endif
206     return pairCount_;
207   }
208 
209 #ifdef ENABLE_NEW_REGEXP
kind()210   RegExpShared::Kind kind() const { return kind_; }
211 
212   // Use simple string matching for this regexp.
213   void useAtomMatch(HandleAtom pattern);
214 
215   // Use the regular expression engine for this regexp.
216   void useRegExpMatch(size_t parenCount);
217 
218   static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
219                                       HandleNativeObject namedCaptures);
getGroupsTemplate()220   PlainObject* getGroupsTemplate() { return groupsTemplate_; }
221 
222   void tierUpTick();
223   bool markedForTierUp() const;
224 
setByteCode(ByteCode * code,bool latin1)225   void setByteCode(ByteCode* code, bool latin1) {
226     compilation(latin1).byteCode = code;
227   }
getByteCode(bool latin1)228   ByteCode* getByteCode(bool latin1) const {
229     return compilation(latin1).byteCode;
230   }
setJitCode(jit::JitCode * code,bool latin1)231   void setJitCode(jit::JitCode* code, bool latin1) {
232     compilation(latin1).jitCode = code;
233   }
getJitCode(bool latin1)234   jit::JitCode* getJitCode(bool latin1) const {
235     return compilation(latin1).jitCode;
236   }
getMaxRegisters()237   uint32_t getMaxRegisters() const { return maxRegisters_; }
updateMaxRegisters(uint32_t numRegisters)238   void updateMaxRegisters(uint32_t numRegisters) {
239     maxRegisters_ = std::max(maxRegisters_, numRegisters);
240   }
241 
numNamedCaptures()242   uint32_t numNamedCaptures() const { return numNamedCaptures_; }
getNamedCaptureIndex(uint32_t idx)243   int32_t getNamedCaptureIndex(uint32_t idx) const {
244     MOZ_ASSERT(idx < numNamedCaptures());
245     MOZ_ASSERT(namedCaptureIndices_);
246     return namedCaptureIndices_[idx];
247   }
248 
249 #endif
250 
getSource()251   JSAtom* getSource() const { return headerAndSource.ptr(); }
252 
253 #ifdef ENABLE_NEW_REGEXP
patternAtom()254   JSAtom* patternAtom() const { return patternAtom_; }
255 #else
patternAtom()256   JSAtom* patternAtom() const { return getSource(); }
257 #endif
258 
getFlags()259   JS::RegExpFlags getFlags() const { return flags; }
260 
global()261   bool global() const { return flags.global(); }
ignoreCase()262   bool ignoreCase() const { return flags.ignoreCase(); }
multiline()263   bool multiline() const { return flags.multiline(); }
dotAll()264   bool dotAll() const { return flags.dotAll(); }
unicode()265   bool unicode() const { return flags.unicode(); }
sticky()266   bool sticky() const { return flags.sticky(); }
267 
268   bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
269     return compilation(latin1).compiled(codeKind);
270   }
isCompiled()271   bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
272 
273   void traceChildren(JSTracer* trc);
274   void discardJitCode();
275   void finalize(JSFreeOp* fop);
276 
offsetOfSource()277   static size_t offsetOfSource() {
278     return offsetof(RegExpShared, headerAndSource) +
279            HeaderWithAtom::offsetOfPtr();
280   }
281 
offsetOfFlags()282   static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
283 
offsetOfPairCount()284   static size_t offsetOfPairCount() {
285     return offsetof(RegExpShared, pairCount_);
286   }
287 
offsetOfJitCode(bool latin1)288   static size_t offsetOfJitCode(bool latin1) {
289     return offsetof(RegExpShared, compilationArray) +
290            (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
291            offsetof(RegExpCompilation, jitCode);
292   }
293 #ifdef ENABLE_NEW_REGEXP
offsetOfGroupsTemplate()294   static size_t offsetOfGroupsTemplate() {
295     return offsetof(RegExpShared, groupsTemplate_);
296   }
297 #endif
298 
299   size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
300 
301 #ifdef DEBUG
302   static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
303                            HandleLinearString input);
304 #endif
305 
306  public:
307   static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
cellHeader()308   const gc::CellHeader& cellHeader() const { return headerAndSource; }
309 };
310 
311 class RegExpZone {
312   struct Key {
313     JSAtom* atom = nullptr;
314     JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
315 
316     Key() = default;
KeyKey317     Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
KeyKey318     MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
319         : atom(shared.unbarrieredGet()->getSource()),
320           flags(shared.unbarrieredGet()->getFlags()) {}
321 
322     using Lookup = Key;
hashKey323     static HashNumber hash(const Lookup& l) {
324       HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
325       return mozilla::AddToHash(hash, l.flags.value());
326     }
matchKey327     static bool match(Key l, Key r) {
328       return l.atom == r.atom && l.flags == r.flags;
329     }
330   };
331 
332   /*
333    * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
334    * that was not marked is deleted and removed from the set.
335    */
336   using Set = JS::WeakCache<
337       JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
338   Set set_;
339 
340  public:
341   explicit RegExpZone(Zone* zone);
342 
~RegExpZone()343   ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
344 
empty()345   bool empty() const { return set_.empty(); }
346 
maybeGet(JSAtom * source,JS::RegExpFlags flags)347   RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
348     Set::Ptr p = set_.lookup(Key(source, flags));
349     return p ? *p : nullptr;
350   }
351 
352   RegExpShared* get(JSContext* cx, HandleAtom source, JS::RegExpFlags flags);
353 
354 #ifdef DEBUG
clear()355   void clear() { set_.clear(); }
356 #endif
357 
358   size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
359 };
360 
361 class RegExpRealm {
362   /*
363    * This is the template object where the result of re.exec() is based on,
364    * if there is a result. This is used in CreateRegExpMatchResult to set
365    * the input/index properties faster.
366    */
367   WeakHeapPtr<ArrayObject*> matchResultTemplateObject_;
368 
369   /*
370    * The shape of RegExp.prototype object that satisfies following:
371    *   * RegExp.prototype.flags getter is not modified
372    *   * RegExp.prototype.global getter is not modified
373    *   * RegExp.prototype.ignoreCase getter is not modified
374    *   * RegExp.prototype.multiline getter is not modified
375    *   * RegExp.prototype.dotAll getter is not modified
376    *   * RegExp.prototype.sticky getter is not modified
377    *   * RegExp.prototype.unicode getter is not modified
378    *   * RegExp.prototype.exec is an own data property
379    *   * RegExp.prototype[@@match] is an own data property
380    *   * RegExp.prototype[@@search] is an own data property
381    */
382   WeakHeapPtr<Shape*> optimizableRegExpPrototypeShape_;
383 
384   /*
385    * The shape of RegExp instance that satisfies following:
386    *   * lastProperty is lastIndex
387    *   * prototype is RegExp.prototype
388    */
389   WeakHeapPtr<Shape*> optimizableRegExpInstanceShape_;
390 
391   ArrayObject* createMatchResultTemplateObject(JSContext* cx);
392 
393  public:
394   explicit RegExpRealm();
395 
396   void traceWeak(JSTracer* trc);
397 
398   static const size_t MatchResultObjectIndexSlot = 0;
399   static const size_t MatchResultObjectInputSlot = 1;
400 #ifdef ENABLE_NEW_REGEXP
401   static const size_t MatchResultObjectGroupsSlot = 2;
402 #endif
403 
offsetOfMatchResultObjectIndexSlot()404   static size_t offsetOfMatchResultObjectIndexSlot() {
405     return sizeof(Value) * MatchResultObjectIndexSlot;
406   }
offsetOfMatchResultObjectInputSlot()407   static size_t offsetOfMatchResultObjectInputSlot() {
408     return sizeof(Value) * MatchResultObjectInputSlot;
409   }
410 #ifdef ENABLE_NEW_REGEXP
offsetOfMatchResultObjectGroupsSlot()411   static size_t offsetOfMatchResultObjectGroupsSlot() {
412     return sizeof(Value) * MatchResultObjectGroupsSlot;
413   }
414 #endif
415 
416   /* Get or create template object used to base the result of .exec() on. */
getOrCreateMatchResultTemplateObject(JSContext * cx)417   ArrayObject* getOrCreateMatchResultTemplateObject(JSContext* cx) {
418     if (matchResultTemplateObject_) {
419       return matchResultTemplateObject_;
420     }
421     return createMatchResultTemplateObject(cx);
422   }
423 
getOptimizableRegExpPrototypeShape()424   Shape* getOptimizableRegExpPrototypeShape() {
425     return optimizableRegExpPrototypeShape_;
426   }
setOptimizableRegExpPrototypeShape(Shape * shape)427   void setOptimizableRegExpPrototypeShape(Shape* shape) {
428     optimizableRegExpPrototypeShape_ = shape;
429   }
getOptimizableRegExpInstanceShape()430   Shape* getOptimizableRegExpInstanceShape() {
431     return optimizableRegExpInstanceShape_;
432   }
setOptimizableRegExpInstanceShape(Shape * shape)433   void setOptimizableRegExpInstanceShape(Shape* shape) {
434     optimizableRegExpInstanceShape_ = shape;
435   }
436 
offsetOfOptimizableRegExpPrototypeShape()437   static size_t offsetOfOptimizableRegExpPrototypeShape() {
438     return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
439   }
offsetOfOptimizableRegExpInstanceShape()440   static size_t offsetOfOptimizableRegExpInstanceShape() {
441     return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
442   }
443 };
444 
445 } /* namespace js */
446 
447 namespace JS {
448 namespace ubi {
449 
450 template <>
451 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
452  protected:
Concrete(js::RegExpShared * ptr)453   explicit Concrete(js::RegExpShared* ptr)
454       : TracerConcrete<js::RegExpShared>(ptr) {}
455 
456  public:
construct(void * storage,js::RegExpShared * ptr)457   static void construct(void* storage, js::RegExpShared* ptr) {
458     new (storage) Concrete(ptr);
459   }
460 
coarseType()461   CoarseType coarseType() const final { return CoarseType::Other; }
462 
463   Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
464 
typeName()465   const char16_t* typeName() const override { return concreteTypeName; }
466   static const char16_t concreteTypeName[];
467 };
468 
469 }  // namespace ubi
470 }  // namespace JS
471 
472 #endif /* vm_RegExpShared_h */
473