1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /**
8  * The compiled representation of a RegExp, potentially shared among RegExp
9  * instances created during separate evaluations of a single RegExp literal in
10  * source code.
11  */
12 
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
15 
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
18 
19 #include "gc/Barrier.h"
20 #include "gc/Marking.h"
21 #include "gc/ZoneAllocator.h"
22 #include "irregexp/RegExpTypes.h"
23 #include "jit/JitCode.h"
24 #include "jit/JitOptions.h"
25 #include "js/AllocPolicy.h"
26 #include "js/RegExpFlags.h"  // JS::RegExpFlag, JS::RegExpFlags
27 #include "js/UbiNode.h"
28 #include "js/Vector.h"
29 #include "vm/ArrayObject.h"
30 #include "vm/JSAtom.h"
31 
32 namespace js {
33 
34 class ArrayObject;
35 class RegExpRealm;
36 class RegExpShared;
37 class RegExpStatics;
38 class VectorMatchPairs;
39 
40 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
41 using HandleRegExpShared = JS::Handle<RegExpShared*>;
42 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
43 
44 enum RegExpRunStatus : int32_t {
45   RegExpRunStatus_Error = -1,
46   RegExpRunStatus_Success = 1,
47   RegExpRunStatus_Success_NotFound = 0,
48 };
49 
IsNativeRegExpEnabled()50 inline bool IsNativeRegExpEnabled() {
51   return jit::HasJitBackend() && jit::JitOptions.nativeRegExp;
52 }
53 
54 /*
55  * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
56  * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
57  * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
58  * table so that they can be reused when compiling the same regex string.
59  *
60  * To save memory, a RegExpShared is not created for a RegExpObject until it is
61  * needed for execution. When a RegExpShared needs to be created, it is looked
62  * up in a per-compartment table to allow reuse between objects.
63  *
64  * During a GC, RegExpShared instances are marked and swept like GC things.
65  * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
66  * than explicitly tracing them, so that the RegExpShared and any jitcode can
67  * be reclaimed quicker. However, the RegExpShareds are traced through by
68  * objects when we are preserving jitcode in their zone, to avoid the same
69  * recompilation inefficiencies as normal Ion and baseline compilation.
70  */
71 class RegExpShared
72     : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> {
73  public:
74   enum class Kind { Unparsed, Atom, RegExp };
75   enum class CodeKind { Bytecode, Jitcode, Any };
76 
77   using ByteCode = js::irregexp::ByteArrayData;
78   using JitCodeTable = js::irregexp::ByteArray;
79   using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
80 
81  private:
82   friend class RegExpStatics;
83   friend class RegExpZone;
84 
85   struct RegExpCompilation {
86     WeakHeapPtr<jit::JitCode*> jitCode;
87     ByteCode* byteCode = nullptr;
88 
89     bool compiled(CodeKind kind = CodeKind::Any) const {
90       switch (kind) {
91         case CodeKind::Bytecode:
92           return !!byteCode;
93         case CodeKind::Jitcode:
94           return !!jitCode;
95         case CodeKind::Any:
96           return !!byteCode || !!jitCode;
97       }
98       MOZ_CRASH("Unreachable");
99     }
100 
byteCodeLengthRegExpCompilation101     size_t byteCodeLength() const {
102       MOZ_ASSERT(byteCode);
103       return byteCode->length;
104     }
105   };
106 
107  public:
108   /* Source to the RegExp, for lazy compilation. Stored in the cell header. */
getSource()109   JSAtom* getSource() const { return headerPtr(); }
110 
111  private:
112   RegExpCompilation compilationArray[2];
113 
114   uint32_t pairCount_;
115   JS::RegExpFlags flags;
116 
117   RegExpShared::Kind kind_ = Kind::Unparsed;
118   GCPtrAtom patternAtom_;
119   uint32_t maxRegisters_ = 0;
120   uint32_t ticks_ = 0;
121 
122   uint32_t numNamedCaptures_ = {};
123   uint32_t* namedCaptureIndices_ = {};
124   GCPtr<PlainObject*> groupsTemplate_ = {};
125 
CompilationIndex(bool latin1)126   static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
127 
128   // Tables referenced by JIT code.
129   JitCodeTables tables;
130 
131   /* Internal functions. */
132   RegExpShared(JSAtom* source, JS::RegExpFlags flags);
133 
compilation(bool latin1)134   const RegExpCompilation& compilation(bool latin1) const {
135     return compilationArray[CompilationIndex(latin1)];
136   }
137 
compilation(bool latin1)138   RegExpCompilation& compilation(bool latin1) {
139     return compilationArray[CompilationIndex(latin1)];
140   }
141 
142  public:
143   ~RegExpShared() = delete;
144 
145   static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
146                                  HandleLinearString input, CodeKind code);
147 
148   static RegExpRunStatus executeAtom(MutableHandleRegExpShared re,
149                                      HandleLinearString input, size_t start,
150                                      VectorMatchPairs* matches);
151 
152   // Execute this RegExp on input starting from searchIndex, filling in matches.
153   static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
154                                  HandleLinearString input, size_t searchIndex,
155                                  VectorMatchPairs* matches);
156 
157   // Register a table with this RegExpShared, and take ownership.
addTable(JitCodeTable table)158   bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
159 
160   /* Accessors */
161 
pairCount()162   size_t pairCount() const {
163     MOZ_ASSERT(kind() != Kind::Unparsed);
164     return pairCount_;
165   }
166 
kind()167   RegExpShared::Kind kind() const { return kind_; }
168 
169   // Use simple string matching for this regexp.
170   void useAtomMatch(HandleAtom pattern);
171 
172   // Use the regular expression engine for this regexp.
173   void useRegExpMatch(size_t parenCount);
174 
175   static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
176                                       HandleNativeObject namedCaptures);
getGroupsTemplate()177   PlainObject* getGroupsTemplate() { return groupsTemplate_; }
178 
179   void tierUpTick();
180   bool markedForTierUp() const;
181 
setByteCode(ByteCode * code,bool latin1)182   void setByteCode(ByteCode* code, bool latin1) {
183     compilation(latin1).byteCode = code;
184   }
getByteCode(bool latin1)185   ByteCode* getByteCode(bool latin1) const {
186     return compilation(latin1).byteCode;
187   }
setJitCode(jit::JitCode * code,bool latin1)188   void setJitCode(jit::JitCode* code, bool latin1) {
189     compilation(latin1).jitCode = code;
190   }
getJitCode(bool latin1)191   jit::JitCode* getJitCode(bool latin1) const {
192     return compilation(latin1).jitCode;
193   }
getMaxRegisters()194   uint32_t getMaxRegisters() const { return maxRegisters_; }
updateMaxRegisters(uint32_t numRegisters)195   void updateMaxRegisters(uint32_t numRegisters) {
196     maxRegisters_ = std::max(maxRegisters_, numRegisters);
197   }
198 
numNamedCaptures()199   uint32_t numNamedCaptures() const { return numNamedCaptures_; }
getNamedCaptureIndex(uint32_t idx)200   int32_t getNamedCaptureIndex(uint32_t idx) const {
201     MOZ_ASSERT(idx < numNamedCaptures());
202     MOZ_ASSERT(namedCaptureIndices_);
203     return namedCaptureIndices_[idx];
204   }
205 
patternAtom()206   JSAtom* patternAtom() const { return patternAtom_; }
207 
getFlags()208   JS::RegExpFlags getFlags() const { return flags; }
209 
hasIndices()210   bool hasIndices() const { return flags.hasIndices(); }
global()211   bool global() const { return flags.global(); }
ignoreCase()212   bool ignoreCase() const { return flags.ignoreCase(); }
multiline()213   bool multiline() const { return flags.multiline(); }
dotAll()214   bool dotAll() const { return flags.dotAll(); }
unicode()215   bool unicode() const { return flags.unicode(); }
sticky()216   bool sticky() const { return flags.sticky(); }
217 
218   bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
219     return compilation(latin1).compiled(codeKind);
220   }
isCompiled()221   bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
222 
223   void traceChildren(JSTracer* trc);
224   void discardJitCode();
225   void finalize(JSFreeOp* fop);
226 
offsetOfSource()227   static size_t offsetOfSource() { return offsetOfHeaderPtr(); }
228 
offsetOfPatternAtom()229   static size_t offsetOfPatternAtom() {
230     return offsetof(RegExpShared, patternAtom_);
231   }
232 
offsetOfFlags()233   static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
234 
offsetOfPairCount()235   static size_t offsetOfPairCount() {
236     return offsetof(RegExpShared, pairCount_);
237   }
238 
offsetOfJitCode(bool latin1)239   static size_t offsetOfJitCode(bool latin1) {
240     return offsetof(RegExpShared, compilationArray) +
241            (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
242            offsetof(RegExpCompilation, jitCode);
243   }
244 
offsetOfGroupsTemplate()245   static size_t offsetOfGroupsTemplate() {
246     return offsetof(RegExpShared, groupsTemplate_);
247   }
248 
249   size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
250 
251 #ifdef DEBUG
252   static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
253                            HandleLinearString input);
254 #endif
255 
256  public:
257   static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
258 };
259 
260 class RegExpZone {
261   struct Key {
262     JSAtom* atom = nullptr;
263     JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
264 
265     Key() = default;
KeyKey266     Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
KeyKey267     MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
268         : atom(shared.unbarrieredGet()->getSource()),
269           flags(shared.unbarrieredGet()->getFlags()) {}
270 
271     using Lookup = Key;
hashKey272     static HashNumber hash(const Lookup& l) {
273       HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
274       return mozilla::AddToHash(hash, l.flags.value());
275     }
matchKey276     static bool match(Key l, Key r) {
277       return l.atom == r.atom && l.flags == r.flags;
278     }
279   };
280 
281   /*
282    * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
283    * that was not marked is deleted and removed from the set.
284    */
285   using Set = JS::WeakCache<
286       JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
287   Set set_;
288 
289  public:
290   explicit RegExpZone(Zone* zone);
291 
~RegExpZone()292   ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
293 
empty()294   bool empty() const { return set_.empty(); }
295 
maybeGet(JSAtom * source,JS::RegExpFlags flags)296   RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
297     Set::Ptr p = set_.lookup(Key(source, flags));
298     return p ? *p : nullptr;
299   }
300 
301   RegExpShared* get(JSContext* cx, HandleAtom source, JS::RegExpFlags flags);
302 
303 #ifdef DEBUG
clear()304   void clear() { set_.clear(); }
305 #endif
306 
307   size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
308 };
309 
310 class RegExpRealm {
311  public:
312   enum ResultTemplateKind { Normal, WithIndices, Indices, NumKinds };
313 
314  private:
315   /*
316    * The template objects that the result of re.exec() is based on, if
317    * there is a result. These are used in CreateRegExpMatchResult.
318    * There are three template objects, each of which is an ArrayObject
319    * with some additional properties. We decide which to use based on
320    * the |hasIndices| (/d) flag.
321    *
322    *  Normal: Has |index|, |input|, and |groups| properties.
323    *          Used for the result object if |hasIndices| is not set.
324    *
325    *  WithIndices: Has |index|, |input|, |groups|, and |indices| properties.
326    *               Used for the result object if |hasIndices| is set.
327    *
328    *  Indices: Has a |groups| property. If |hasIndices| is set, used
329    *           for the |.indices| property of the result object.
330    */
331   WeakHeapPtr<ArrayObject*>
332       matchResultTemplateObjects_[ResultTemplateKind::NumKinds];
333 
334   /*
335    * The shape of RegExp.prototype object that satisfies following:
336    *   * RegExp.prototype.flags getter is not modified
337    *   * RegExp.prototype.global getter is not modified
338    *   * RegExp.prototype.ignoreCase getter is not modified
339    *   * RegExp.prototype.multiline getter is not modified
340    *   * RegExp.prototype.dotAll getter is not modified
341    *   * RegExp.prototype.sticky getter is not modified
342    *   * RegExp.prototype.unicode getter is not modified
343    *   * RegExp.prototype.exec is an own data property
344    *   * RegExp.prototype[@@match] is an own data property
345    *   * RegExp.prototype[@@search] is an own data property
346    */
347   WeakHeapPtr<Shape*> optimizableRegExpPrototypeShape_;
348 
349   /*
350    * The shape of RegExp instance that satisfies following:
351    *   * lastProperty is lastIndex
352    *   * prototype is RegExp.prototype
353    */
354   WeakHeapPtr<Shape*> optimizableRegExpInstanceShape_;
355 
356   ArrayObject* createMatchResultTemplateObject(JSContext* cx,
357                                                ResultTemplateKind kind);
358 
359  public:
360   explicit RegExpRealm();
361 
362   void traceWeak(JSTracer* trc);
363 
364   static const size_t MatchResultObjectIndexSlot = 0;
365   static const size_t MatchResultObjectInputSlot = 1;
366   static const size_t MatchResultObjectGroupsSlot = 2;
367   static const size_t MatchResultObjectIndicesSlot = 3;
368 
369   static const size_t IndicesGroupsSlot = 0;
370 
offsetOfMatchResultObjectIndexSlot()371   static size_t offsetOfMatchResultObjectIndexSlot() {
372     return sizeof(Value) * MatchResultObjectIndexSlot;
373   }
offsetOfMatchResultObjectInputSlot()374   static size_t offsetOfMatchResultObjectInputSlot() {
375     return sizeof(Value) * MatchResultObjectInputSlot;
376   }
offsetOfMatchResultObjectGroupsSlot()377   static size_t offsetOfMatchResultObjectGroupsSlot() {
378     return sizeof(Value) * MatchResultObjectGroupsSlot;
379   }
offsetOfMatchResultObjectIndicesSlot()380   static size_t offsetOfMatchResultObjectIndicesSlot() {
381     return sizeof(Value) * MatchResultObjectIndicesSlot;
382   }
383 
384   /* Get or create template object used to base the result of .exec() on. */
385   ArrayObject* getOrCreateMatchResultTemplateObject(
386       JSContext* cx, ResultTemplateKind kind = ResultTemplateKind::Normal) {
387     if (matchResultTemplateObjects_[kind]) {
388       return matchResultTemplateObjects_[kind];
389     }
390     return createMatchResultTemplateObject(cx, kind);
391   }
392 
getOptimizableRegExpPrototypeShape()393   Shape* getOptimizableRegExpPrototypeShape() {
394     return optimizableRegExpPrototypeShape_;
395   }
setOptimizableRegExpPrototypeShape(Shape * shape)396   void setOptimizableRegExpPrototypeShape(Shape* shape) {
397     optimizableRegExpPrototypeShape_ = shape;
398   }
getOptimizableRegExpInstanceShape()399   Shape* getOptimizableRegExpInstanceShape() {
400     return optimizableRegExpInstanceShape_;
401   }
setOptimizableRegExpInstanceShape(Shape * shape)402   void setOptimizableRegExpInstanceShape(Shape* shape) {
403     optimizableRegExpInstanceShape_ = shape;
404   }
405 
offsetOfOptimizableRegExpPrototypeShape()406   static size_t offsetOfOptimizableRegExpPrototypeShape() {
407     return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
408   }
offsetOfOptimizableRegExpInstanceShape()409   static size_t offsetOfOptimizableRegExpInstanceShape() {
410     return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
411   }
412 };
413 
414 RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input,
415                                      size_t start, MatchPairs* matchPairs);
416 
417 } /* namespace js */
418 
419 namespace JS {
420 namespace ubi {
421 
422 template <>
423 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
424  protected:
Concrete(js::RegExpShared * ptr)425   explicit Concrete(js::RegExpShared* ptr)
426       : TracerConcrete<js::RegExpShared>(ptr) {}
427 
428  public:
construct(void * storage,js::RegExpShared * ptr)429   static void construct(void* storage, js::RegExpShared* ptr) {
430     new (storage) Concrete(ptr);
431   }
432 
coarseType()433   CoarseType coarseType() const final { return CoarseType::Other; }
434 
435   Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
436 
typeName()437   const char16_t* typeName() const override { return concreteTypeName; }
438   static const char16_t concreteTypeName[];
439 };
440 
441 }  // namespace ubi
442 }  // namespace JS
443 
444 #endif /* vm_RegExpShared_h */
445