1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /**
8  * The compiled representation of a RegExp, potentially shared among RegExp
9  * instances created during separate evaluations of a single RegExp literal in
10  * source code.
11  */
12 
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
15 
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
18 
19 #include "gc/Barrier.h"
20 #include "gc/Marking.h"
21 #include "gc/ZoneAllocator.h"
22 #include "irregexp/RegExpTypes.h"
23 #include "jit/JitCode.h"
24 #include "jit/JitOptions.h"
25 #include "js/AllocPolicy.h"
26 #include "js/RegExpFlags.h"  // JS::RegExpFlag, JS::RegExpFlags
27 #include "js/UbiNode.h"
28 #include "js/Vector.h"
29 #include "vm/ArrayObject.h"
30 #include "vm/JSAtom.h"
31 
32 namespace js {
33 
34 class ArrayObject;
35 class RegExpRealm;
36 class RegExpShared;
37 class RegExpStatics;
38 class VectorMatchPairs;
39 
40 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
41 using HandleRegExpShared = JS::Handle<RegExpShared*>;
42 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
43 
44 enum RegExpRunStatus : int32_t {
45   RegExpRunStatus_Error = -1,
46   RegExpRunStatus_Success = 1,
47   RegExpRunStatus_Success_NotFound = 0,
48 };
49 
IsNativeRegExpEnabled()50 inline bool IsNativeRegExpEnabled() {
51 #ifdef JS_CODEGEN_NONE
52   return false;
53 #else
54   return jit::JitOptions.nativeRegExp;
55 #endif
56 }
57 
58 /*
59  * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
60  * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
61  * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
62  * table so that they can be reused when compiling the same regex string.
63  *
64  * To save memory, a RegExpShared is not created for a RegExpObject until it is
65  * needed for execution. When a RegExpShared needs to be created, it is looked
66  * up in a per-compartment table to allow reuse between objects.
67  *
68  * During a GC, RegExpShared instances are marked and swept like GC things.
69  * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
70  * than explicitly tracing them, so that the RegExpShared and any jitcode can
71  * be reclaimed quicker. However, the RegExpShareds are traced through by
72  * objects when we are preserving jitcode in their zone, to avoid the same
73  * recompilation inefficiencies as normal Ion and baseline compilation.
74  */
75 class RegExpShared
76     : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> {
77  public:
78   enum class Kind { Unparsed, Atom, RegExp };
79   enum class CodeKind { Bytecode, Jitcode, Any };
80 
81   using ByteCode = js::irregexp::ByteArrayData;
82   using JitCodeTable = js::irregexp::ByteArray;
83   using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
84 
85  private:
86   friend class RegExpStatics;
87   friend class RegExpZone;
88 
89   struct RegExpCompilation {
90     WeakHeapPtr<jit::JitCode*> jitCode;
91     ByteCode* byteCode = nullptr;
92 
93     bool compiled(CodeKind kind = CodeKind::Any) const {
94       switch (kind) {
95         case CodeKind::Bytecode:
96           return !!byteCode;
97         case CodeKind::Jitcode:
98           return !!jitCode;
99         case CodeKind::Any:
100           return !!byteCode || !!jitCode;
101       }
102       MOZ_CRASH("Unreachable");
103     }
104 
byteCodeLengthRegExpCompilation105     size_t byteCodeLength() const {
106       MOZ_ASSERT(byteCode);
107       return byteCode->length;
108     }
109   };
110 
111  public:
112   /* Source to the RegExp, for lazy compilation. Stored in the cell header. */
getSource()113   JSAtom* getSource() const { return headerPtr(); }
114 
115  private:
116   RegExpCompilation compilationArray[2];
117 
118   uint32_t pairCount_;
119   JS::RegExpFlags flags;
120 
121   RegExpShared::Kind kind_ = Kind::Unparsed;
122   GCPtrAtom patternAtom_;
123   uint32_t maxRegisters_ = 0;
124   uint32_t ticks_ = 0;
125 
126   uint32_t numNamedCaptures_ = {};
127   uint32_t* namedCaptureIndices_ = {};
128   GCPtr<PlainObject*> groupsTemplate_ = {};
129 
CompilationIndex(bool latin1)130   static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
131 
132   // Tables referenced by JIT code.
133   JitCodeTables tables;
134 
135   /* Internal functions. */
136   RegExpShared(JSAtom* source, JS::RegExpFlags flags);
137 
compilation(bool latin1)138   const RegExpCompilation& compilation(bool latin1) const {
139     return compilationArray[CompilationIndex(latin1)];
140   }
141 
compilation(bool latin1)142   RegExpCompilation& compilation(bool latin1) {
143     return compilationArray[CompilationIndex(latin1)];
144   }
145 
146  public:
147   ~RegExpShared() = delete;
148 
149   static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
150                                  HandleLinearString input, CodeKind code);
151 
152   static RegExpRunStatus executeAtom(MutableHandleRegExpShared re,
153                                      HandleLinearString input, size_t start,
154                                      VectorMatchPairs* matches);
155 
156   // Execute this RegExp on input starting from searchIndex, filling in matches.
157   static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
158                                  HandleLinearString input, size_t searchIndex,
159                                  VectorMatchPairs* matches);
160 
161   // Register a table with this RegExpShared, and take ownership.
addTable(JitCodeTable table)162   bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
163 
164   /* Accessors */
165 
pairCount()166   size_t pairCount() const {
167     MOZ_ASSERT(kind() != Kind::Unparsed);
168     return pairCount_;
169   }
170 
kind()171   RegExpShared::Kind kind() const { return kind_; }
172 
173   // Use simple string matching for this regexp.
174   void useAtomMatch(HandleAtom pattern);
175 
176   // Use the regular expression engine for this regexp.
177   void useRegExpMatch(size_t parenCount);
178 
179   static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
180                                       HandleNativeObject namedCaptures);
getGroupsTemplate()181   PlainObject* getGroupsTemplate() { return groupsTemplate_; }
182 
183   void tierUpTick();
184   bool markedForTierUp() const;
185 
setByteCode(ByteCode * code,bool latin1)186   void setByteCode(ByteCode* code, bool latin1) {
187     compilation(latin1).byteCode = code;
188   }
getByteCode(bool latin1)189   ByteCode* getByteCode(bool latin1) const {
190     return compilation(latin1).byteCode;
191   }
setJitCode(jit::JitCode * code,bool latin1)192   void setJitCode(jit::JitCode* code, bool latin1) {
193     compilation(latin1).jitCode = code;
194   }
getJitCode(bool latin1)195   jit::JitCode* getJitCode(bool latin1) const {
196     return compilation(latin1).jitCode;
197   }
getMaxRegisters()198   uint32_t getMaxRegisters() const { return maxRegisters_; }
updateMaxRegisters(uint32_t numRegisters)199   void updateMaxRegisters(uint32_t numRegisters) {
200     maxRegisters_ = std::max(maxRegisters_, numRegisters);
201   }
202 
numNamedCaptures()203   uint32_t numNamedCaptures() const { return numNamedCaptures_; }
getNamedCaptureIndex(uint32_t idx)204   int32_t getNamedCaptureIndex(uint32_t idx) const {
205     MOZ_ASSERT(idx < numNamedCaptures());
206     MOZ_ASSERT(namedCaptureIndices_);
207     return namedCaptureIndices_[idx];
208   }
209 
patternAtom()210   JSAtom* patternAtom() const { return patternAtom_; }
211 
getFlags()212   JS::RegExpFlags getFlags() const { return flags; }
213 
hasIndices()214   bool hasIndices() const { return flags.hasIndices(); }
global()215   bool global() const { return flags.global(); }
ignoreCase()216   bool ignoreCase() const { return flags.ignoreCase(); }
multiline()217   bool multiline() const { return flags.multiline(); }
dotAll()218   bool dotAll() const { return flags.dotAll(); }
unicode()219   bool unicode() const { return flags.unicode(); }
sticky()220   bool sticky() const { return flags.sticky(); }
221 
222   bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
223     return compilation(latin1).compiled(codeKind);
224   }
isCompiled()225   bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
226 
227   void traceChildren(JSTracer* trc);
228   void discardJitCode();
229   void finalize(JSFreeOp* fop);
230 
offsetOfSource()231   static size_t offsetOfSource() { return offsetOfHeaderPtr(); }
232 
offsetOfPatternAtom()233   static size_t offsetOfPatternAtom() {
234     return offsetof(RegExpShared, patternAtom_);
235   }
236 
offsetOfFlags()237   static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
238 
offsetOfPairCount()239   static size_t offsetOfPairCount() {
240     return offsetof(RegExpShared, pairCount_);
241   }
242 
offsetOfJitCode(bool latin1)243   static size_t offsetOfJitCode(bool latin1) {
244     return offsetof(RegExpShared, compilationArray) +
245            (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
246            offsetof(RegExpCompilation, jitCode);
247   }
248 
offsetOfGroupsTemplate()249   static size_t offsetOfGroupsTemplate() {
250     return offsetof(RegExpShared, groupsTemplate_);
251   }
252 
253   size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
254 
255 #ifdef DEBUG
256   static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
257                            HandleLinearString input);
258 #endif
259 
260  public:
261   static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
262 };
263 
264 class RegExpZone {
265   struct Key {
266     JSAtom* atom = nullptr;
267     JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
268 
269     Key() = default;
KeyKey270     Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
KeyKey271     MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
272         : atom(shared.unbarrieredGet()->getSource()),
273           flags(shared.unbarrieredGet()->getFlags()) {}
274 
275     using Lookup = Key;
hashKey276     static HashNumber hash(const Lookup& l) {
277       HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
278       return mozilla::AddToHash(hash, l.flags.value());
279     }
matchKey280     static bool match(Key l, Key r) {
281       return l.atom == r.atom && l.flags == r.flags;
282     }
283   };
284 
285   /*
286    * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
287    * that was not marked is deleted and removed from the set.
288    */
289   using Set = JS::WeakCache<
290       JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
291   Set set_;
292 
293  public:
294   explicit RegExpZone(Zone* zone);
295 
~RegExpZone()296   ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
297 
empty()298   bool empty() const { return set_.empty(); }
299 
maybeGet(JSAtom * source,JS::RegExpFlags flags)300   RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
301     Set::Ptr p = set_.lookup(Key(source, flags));
302     return p ? *p : nullptr;
303   }
304 
305   RegExpShared* get(JSContext* cx, HandleAtom source, JS::RegExpFlags flags);
306 
307 #ifdef DEBUG
clear()308   void clear() { set_.clear(); }
309 #endif
310 
311   size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
312 };
313 
314 class RegExpRealm {
315  public:
316   enum ResultTemplateKind { Normal, WithIndices, Indices, NumKinds };
317 
318  private:
319   /*
320    * The template objects that the result of re.exec() is based on, if
321    * there is a result. These are used in CreateRegExpMatchResult.
322    * There are three template objects, each of which is an ArrayObject
323    * with some additional properties. We decide which to use based on
324    * the |hasIndices| (/d) flag.
325    *
326    *  Normal: Has |index|, |input|, and |groups| properties.
327    *          Used for the result object if |hasIndices| is not set.
328    *
329    *  WithIndices: Has |index|, |input|, |groups|, and |indices| properties.
330    *               Used for the result object if |hasIndices| is set.
331    *
332    *  Indices: Has a |groups| property. If |hasIndices| is set, used
333    *           for the |.indices| property of the result object.
334    */
335   WeakHeapPtr<ArrayObject*>
336       matchResultTemplateObjects_[ResultTemplateKind::NumKinds];
337 
338   /*
339    * The shape of RegExp.prototype object that satisfies following:
340    *   * RegExp.prototype.flags getter is not modified
341    *   * RegExp.prototype.global getter is not modified
342    *   * RegExp.prototype.ignoreCase getter is not modified
343    *   * RegExp.prototype.multiline getter is not modified
344    *   * RegExp.prototype.dotAll getter is not modified
345    *   * RegExp.prototype.sticky getter is not modified
346    *   * RegExp.prototype.unicode getter is not modified
347    *   * RegExp.prototype.exec is an own data property
348    *   * RegExp.prototype[@@match] is an own data property
349    *   * RegExp.prototype[@@search] is an own data property
350    */
351   WeakHeapPtr<Shape*> optimizableRegExpPrototypeShape_;
352 
353   /*
354    * The shape of RegExp instance that satisfies following:
355    *   * lastProperty is lastIndex
356    *   * prototype is RegExp.prototype
357    */
358   WeakHeapPtr<Shape*> optimizableRegExpInstanceShape_;
359 
360   ArrayObject* createMatchResultTemplateObject(JSContext* cx,
361                                                ResultTemplateKind kind);
362 
363  public:
364   explicit RegExpRealm();
365 
366   void traceWeak(JSTracer* trc);
367 
368   static const size_t MatchResultObjectIndexSlot = 0;
369   static const size_t MatchResultObjectInputSlot = 1;
370   static const size_t MatchResultObjectGroupsSlot = 2;
371   static const size_t MatchResultObjectIndicesSlot = 3;
372 
373   static const size_t IndicesGroupsSlot = 0;
374 
offsetOfMatchResultObjectIndexSlot()375   static size_t offsetOfMatchResultObjectIndexSlot() {
376     return sizeof(Value) * MatchResultObjectIndexSlot;
377   }
offsetOfMatchResultObjectInputSlot()378   static size_t offsetOfMatchResultObjectInputSlot() {
379     return sizeof(Value) * MatchResultObjectInputSlot;
380   }
offsetOfMatchResultObjectGroupsSlot()381   static size_t offsetOfMatchResultObjectGroupsSlot() {
382     return sizeof(Value) * MatchResultObjectGroupsSlot;
383   }
offsetOfMatchResultObjectIndicesSlot()384   static size_t offsetOfMatchResultObjectIndicesSlot() {
385     return sizeof(Value) * MatchResultObjectIndicesSlot;
386   }
387 
388   /* Get or create template object used to base the result of .exec() on. */
389   ArrayObject* getOrCreateMatchResultTemplateObject(
390       JSContext* cx, ResultTemplateKind kind = ResultTemplateKind::Normal) {
391     if (matchResultTemplateObjects_[kind]) {
392       return matchResultTemplateObjects_[kind];
393     }
394     return createMatchResultTemplateObject(cx, kind);
395   }
396 
getOptimizableRegExpPrototypeShape()397   Shape* getOptimizableRegExpPrototypeShape() {
398     return optimizableRegExpPrototypeShape_;
399   }
setOptimizableRegExpPrototypeShape(Shape * shape)400   void setOptimizableRegExpPrototypeShape(Shape* shape) {
401     optimizableRegExpPrototypeShape_ = shape;
402   }
getOptimizableRegExpInstanceShape()403   Shape* getOptimizableRegExpInstanceShape() {
404     return optimizableRegExpInstanceShape_;
405   }
setOptimizableRegExpInstanceShape(Shape * shape)406   void setOptimizableRegExpInstanceShape(Shape* shape) {
407     optimizableRegExpInstanceShape_ = shape;
408   }
409 
offsetOfOptimizableRegExpPrototypeShape()410   static size_t offsetOfOptimizableRegExpPrototypeShape() {
411     return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
412   }
offsetOfOptimizableRegExpInstanceShape()413   static size_t offsetOfOptimizableRegExpInstanceShape() {
414     return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
415   }
416 };
417 
418 RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input,
419                                      size_t start, MatchPairs* matchPairs);
420 
421 } /* namespace js */
422 
423 namespace JS {
424 namespace ubi {
425 
426 template <>
427 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
428  protected:
Concrete(js::RegExpShared * ptr)429   explicit Concrete(js::RegExpShared* ptr)
430       : TracerConcrete<js::RegExpShared>(ptr) {}
431 
432  public:
construct(void * storage,js::RegExpShared * ptr)433   static void construct(void* storage, js::RegExpShared* ptr) {
434     new (storage) Concrete(ptr);
435   }
436 
coarseType()437   CoarseType coarseType() const final { return CoarseType::Other; }
438 
439   Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
440 
typeName()441   const char16_t* typeName() const override { return concreteTypeName; }
442   static const char16_t concreteTypeName[];
443 };
444 
445 }  // namespace ubi
446 }  // namespace JS
447 
448 #endif /* vm_RegExpShared_h */
449