1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 /**
8 * The compiled representation of a RegExp, potentially shared among RegExp
9 * instances created during separate evaluations of a single RegExp literal in
10 * source code.
11 */
12
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
15
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
18
19 #include "gc/Barrier.h"
20 #include "gc/Marking.h"
21 #include "gc/ZoneAllocator.h"
22 #include "irregexp/RegExpTypes.h"
23 #include "jit/JitCode.h"
24 #include "jit/JitOptions.h"
25 #include "js/AllocPolicy.h"
26 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
27 #include "js/UbiNode.h"
28 #include "js/Vector.h"
29 #include "vm/ArrayObject.h"
30 #include "vm/JSAtom.h"
31
32 namespace js {
33
34 class ArrayObject;
35 class RegExpRealm;
36 class RegExpShared;
37 class RegExpStatics;
38 class VectorMatchPairs;
39
40 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
41 using HandleRegExpShared = JS::Handle<RegExpShared*>;
42 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
43
44 enum RegExpRunStatus : int32_t {
45 RegExpRunStatus_Error = -1,
46 RegExpRunStatus_Success = 1,
47 RegExpRunStatus_Success_NotFound = 0,
48 };
49
IsNativeRegExpEnabled()50 inline bool IsNativeRegExpEnabled() {
51 return jit::HasJitBackend() && jit::JitOptions.nativeRegExp;
52 }
53
54 /*
55 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
56 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
57 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
58 * table so that they can be reused when compiling the same regex string.
59 *
60 * To save memory, a RegExpShared is not created for a RegExpObject until it is
61 * needed for execution. When a RegExpShared needs to be created, it is looked
62 * up in a per-compartment table to allow reuse between objects.
63 *
64 * During a GC, RegExpShared instances are marked and swept like GC things.
65 * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
66 * than explicitly tracing them, so that the RegExpShared and any jitcode can
67 * be reclaimed quicker. However, the RegExpShareds are traced through by
68 * objects when we are preserving jitcode in their zone, to avoid the same
69 * recompilation inefficiencies as normal Ion and baseline compilation.
70 */
71 class RegExpShared
72 : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> {
73 public:
74 enum class Kind { Unparsed, Atom, RegExp };
75 enum class CodeKind { Bytecode, Jitcode, Any };
76
77 using ByteCode = js::irregexp::ByteArrayData;
78 using JitCodeTable = js::irregexp::ByteArray;
79 using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
80
81 private:
82 friend class RegExpStatics;
83 friend class RegExpZone;
84
85 struct RegExpCompilation {
86 WeakHeapPtr<jit::JitCode*> jitCode;
87 ByteCode* byteCode = nullptr;
88
89 bool compiled(CodeKind kind = CodeKind::Any) const {
90 switch (kind) {
91 case CodeKind::Bytecode:
92 return !!byteCode;
93 case CodeKind::Jitcode:
94 return !!jitCode;
95 case CodeKind::Any:
96 return !!byteCode || !!jitCode;
97 }
98 MOZ_CRASH("Unreachable");
99 }
100
byteCodeLengthRegExpCompilation101 size_t byteCodeLength() const {
102 MOZ_ASSERT(byteCode);
103 return byteCode->length;
104 }
105 };
106
107 public:
108 /* Source to the RegExp, for lazy compilation. Stored in the cell header. */
getSource()109 JSAtom* getSource() const { return headerPtr(); }
110
111 private:
112 RegExpCompilation compilationArray[2];
113
114 uint32_t pairCount_;
115 JS::RegExpFlags flags;
116
117 RegExpShared::Kind kind_ = Kind::Unparsed;
118 GCPtrAtom patternAtom_;
119 uint32_t maxRegisters_ = 0;
120 uint32_t ticks_ = 0;
121
122 uint32_t numNamedCaptures_ = {};
123 uint32_t* namedCaptureIndices_ = {};
124 GCPtr<PlainObject*> groupsTemplate_ = {};
125
CompilationIndex(bool latin1)126 static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
127
128 // Tables referenced by JIT code.
129 JitCodeTables tables;
130
131 /* Internal functions. */
132 RegExpShared(JSAtom* source, JS::RegExpFlags flags);
133
compilation(bool latin1)134 const RegExpCompilation& compilation(bool latin1) const {
135 return compilationArray[CompilationIndex(latin1)];
136 }
137
compilation(bool latin1)138 RegExpCompilation& compilation(bool latin1) {
139 return compilationArray[CompilationIndex(latin1)];
140 }
141
142 public:
143 ~RegExpShared() = delete;
144
145 static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
146 HandleLinearString input, CodeKind code);
147
148 static RegExpRunStatus executeAtom(MutableHandleRegExpShared re,
149 HandleLinearString input, size_t start,
150 VectorMatchPairs* matches);
151
152 // Execute this RegExp on input starting from searchIndex, filling in matches.
153 static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
154 HandleLinearString input, size_t searchIndex,
155 VectorMatchPairs* matches);
156
157 // Register a table with this RegExpShared, and take ownership.
addTable(JitCodeTable table)158 bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
159
160 /* Accessors */
161
pairCount()162 size_t pairCount() const {
163 MOZ_ASSERT(kind() != Kind::Unparsed);
164 return pairCount_;
165 }
166
kind()167 RegExpShared::Kind kind() const { return kind_; }
168
169 // Use simple string matching for this regexp.
170 void useAtomMatch(HandleAtom pattern);
171
172 // Use the regular expression engine for this regexp.
173 void useRegExpMatch(size_t parenCount);
174
175 static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
176 HandleNativeObject namedCaptures);
getGroupsTemplate()177 PlainObject* getGroupsTemplate() { return groupsTemplate_; }
178
179 void tierUpTick();
180 bool markedForTierUp() const;
181
setByteCode(ByteCode * code,bool latin1)182 void setByteCode(ByteCode* code, bool latin1) {
183 compilation(latin1).byteCode = code;
184 }
getByteCode(bool latin1)185 ByteCode* getByteCode(bool latin1) const {
186 return compilation(latin1).byteCode;
187 }
setJitCode(jit::JitCode * code,bool latin1)188 void setJitCode(jit::JitCode* code, bool latin1) {
189 compilation(latin1).jitCode = code;
190 }
getJitCode(bool latin1)191 jit::JitCode* getJitCode(bool latin1) const {
192 return compilation(latin1).jitCode;
193 }
getMaxRegisters()194 uint32_t getMaxRegisters() const { return maxRegisters_; }
updateMaxRegisters(uint32_t numRegisters)195 void updateMaxRegisters(uint32_t numRegisters) {
196 maxRegisters_ = std::max(maxRegisters_, numRegisters);
197 }
198
numNamedCaptures()199 uint32_t numNamedCaptures() const { return numNamedCaptures_; }
getNamedCaptureIndex(uint32_t idx)200 int32_t getNamedCaptureIndex(uint32_t idx) const {
201 MOZ_ASSERT(idx < numNamedCaptures());
202 MOZ_ASSERT(namedCaptureIndices_);
203 return namedCaptureIndices_[idx];
204 }
205
patternAtom()206 JSAtom* patternAtom() const { return patternAtom_; }
207
getFlags()208 JS::RegExpFlags getFlags() const { return flags; }
209
hasIndices()210 bool hasIndices() const { return flags.hasIndices(); }
global()211 bool global() const { return flags.global(); }
ignoreCase()212 bool ignoreCase() const { return flags.ignoreCase(); }
multiline()213 bool multiline() const { return flags.multiline(); }
dotAll()214 bool dotAll() const { return flags.dotAll(); }
unicode()215 bool unicode() const { return flags.unicode(); }
sticky()216 bool sticky() const { return flags.sticky(); }
217
218 bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
219 return compilation(latin1).compiled(codeKind);
220 }
isCompiled()221 bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
222
223 void traceChildren(JSTracer* trc);
224 void discardJitCode();
225 void finalize(JSFreeOp* fop);
226
offsetOfSource()227 static size_t offsetOfSource() { return offsetOfHeaderPtr(); }
228
offsetOfPatternAtom()229 static size_t offsetOfPatternAtom() {
230 return offsetof(RegExpShared, patternAtom_);
231 }
232
offsetOfFlags()233 static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
234
offsetOfPairCount()235 static size_t offsetOfPairCount() {
236 return offsetof(RegExpShared, pairCount_);
237 }
238
offsetOfJitCode(bool latin1)239 static size_t offsetOfJitCode(bool latin1) {
240 return offsetof(RegExpShared, compilationArray) +
241 (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
242 offsetof(RegExpCompilation, jitCode);
243 }
244
offsetOfGroupsTemplate()245 static size_t offsetOfGroupsTemplate() {
246 return offsetof(RegExpShared, groupsTemplate_);
247 }
248
249 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
250
251 #ifdef DEBUG
252 static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
253 HandleLinearString input);
254 #endif
255
256 public:
257 static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
258 };
259
260 class RegExpZone {
261 struct Key {
262 JSAtom* atom = nullptr;
263 JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
264
265 Key() = default;
KeyKey266 Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
KeyKey267 MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
268 : atom(shared.unbarrieredGet()->getSource()),
269 flags(shared.unbarrieredGet()->getFlags()) {}
270
271 using Lookup = Key;
hashKey272 static HashNumber hash(const Lookup& l) {
273 HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
274 return mozilla::AddToHash(hash, l.flags.value());
275 }
matchKey276 static bool match(Key l, Key r) {
277 return l.atom == r.atom && l.flags == r.flags;
278 }
279 };
280
281 /*
282 * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
283 * that was not marked is deleted and removed from the set.
284 */
285 using Set = JS::WeakCache<
286 JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
287 Set set_;
288
289 public:
290 explicit RegExpZone(Zone* zone);
291
~RegExpZone()292 ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
293
empty()294 bool empty() const { return set_.empty(); }
295
maybeGet(JSAtom * source,JS::RegExpFlags flags)296 RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
297 Set::Ptr p = set_.lookup(Key(source, flags));
298 return p ? *p : nullptr;
299 }
300
301 RegExpShared* get(JSContext* cx, HandleAtom source, JS::RegExpFlags flags);
302
303 #ifdef DEBUG
clear()304 void clear() { set_.clear(); }
305 #endif
306
307 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
308 };
309
310 class RegExpRealm {
311 public:
312 enum ResultTemplateKind { Normal, WithIndices, Indices, NumKinds };
313
314 private:
315 /*
316 * The template objects that the result of re.exec() is based on, if
317 * there is a result. These are used in CreateRegExpMatchResult.
318 * There are three template objects, each of which is an ArrayObject
319 * with some additional properties. We decide which to use based on
320 * the |hasIndices| (/d) flag.
321 *
322 * Normal: Has |index|, |input|, and |groups| properties.
323 * Used for the result object if |hasIndices| is not set.
324 *
325 * WithIndices: Has |index|, |input|, |groups|, and |indices| properties.
326 * Used for the result object if |hasIndices| is set.
327 *
328 * Indices: Has a |groups| property. If |hasIndices| is set, used
329 * for the |.indices| property of the result object.
330 */
331 WeakHeapPtr<ArrayObject*>
332 matchResultTemplateObjects_[ResultTemplateKind::NumKinds];
333
334 /*
335 * The shape of RegExp.prototype object that satisfies following:
336 * * RegExp.prototype.flags getter is not modified
337 * * RegExp.prototype.global getter is not modified
338 * * RegExp.prototype.ignoreCase getter is not modified
339 * * RegExp.prototype.multiline getter is not modified
340 * * RegExp.prototype.dotAll getter is not modified
341 * * RegExp.prototype.sticky getter is not modified
342 * * RegExp.prototype.unicode getter is not modified
343 * * RegExp.prototype.exec is an own data property
344 * * RegExp.prototype[@@match] is an own data property
345 * * RegExp.prototype[@@search] is an own data property
346 */
347 WeakHeapPtr<Shape*> optimizableRegExpPrototypeShape_;
348
349 /*
350 * The shape of RegExp instance that satisfies following:
351 * * lastProperty is lastIndex
352 * * prototype is RegExp.prototype
353 */
354 WeakHeapPtr<Shape*> optimizableRegExpInstanceShape_;
355
356 ArrayObject* createMatchResultTemplateObject(JSContext* cx,
357 ResultTemplateKind kind);
358
359 public:
360 explicit RegExpRealm();
361
362 void traceWeak(JSTracer* trc);
363
364 static const size_t MatchResultObjectIndexSlot = 0;
365 static const size_t MatchResultObjectInputSlot = 1;
366 static const size_t MatchResultObjectGroupsSlot = 2;
367 static const size_t MatchResultObjectIndicesSlot = 3;
368
369 static const size_t IndicesGroupsSlot = 0;
370
offsetOfMatchResultObjectIndexSlot()371 static size_t offsetOfMatchResultObjectIndexSlot() {
372 return sizeof(Value) * MatchResultObjectIndexSlot;
373 }
offsetOfMatchResultObjectInputSlot()374 static size_t offsetOfMatchResultObjectInputSlot() {
375 return sizeof(Value) * MatchResultObjectInputSlot;
376 }
offsetOfMatchResultObjectGroupsSlot()377 static size_t offsetOfMatchResultObjectGroupsSlot() {
378 return sizeof(Value) * MatchResultObjectGroupsSlot;
379 }
offsetOfMatchResultObjectIndicesSlot()380 static size_t offsetOfMatchResultObjectIndicesSlot() {
381 return sizeof(Value) * MatchResultObjectIndicesSlot;
382 }
383
384 /* Get or create template object used to base the result of .exec() on. */
385 ArrayObject* getOrCreateMatchResultTemplateObject(
386 JSContext* cx, ResultTemplateKind kind = ResultTemplateKind::Normal) {
387 if (matchResultTemplateObjects_[kind]) {
388 return matchResultTemplateObjects_[kind];
389 }
390 return createMatchResultTemplateObject(cx, kind);
391 }
392
getOptimizableRegExpPrototypeShape()393 Shape* getOptimizableRegExpPrototypeShape() {
394 return optimizableRegExpPrototypeShape_;
395 }
setOptimizableRegExpPrototypeShape(Shape * shape)396 void setOptimizableRegExpPrototypeShape(Shape* shape) {
397 optimizableRegExpPrototypeShape_ = shape;
398 }
getOptimizableRegExpInstanceShape()399 Shape* getOptimizableRegExpInstanceShape() {
400 return optimizableRegExpInstanceShape_;
401 }
setOptimizableRegExpInstanceShape(Shape * shape)402 void setOptimizableRegExpInstanceShape(Shape* shape) {
403 optimizableRegExpInstanceShape_ = shape;
404 }
405
offsetOfOptimizableRegExpPrototypeShape()406 static size_t offsetOfOptimizableRegExpPrototypeShape() {
407 return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
408 }
offsetOfOptimizableRegExpInstanceShape()409 static size_t offsetOfOptimizableRegExpInstanceShape() {
410 return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
411 }
412 };
413
414 RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input,
415 size_t start, MatchPairs* matchPairs);
416
417 } /* namespace js */
418
419 namespace JS {
420 namespace ubi {
421
422 template <>
423 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
424 protected:
Concrete(js::RegExpShared * ptr)425 explicit Concrete(js::RegExpShared* ptr)
426 : TracerConcrete<js::RegExpShared>(ptr) {}
427
428 public:
construct(void * storage,js::RegExpShared * ptr)429 static void construct(void* storage, js::RegExpShared* ptr) {
430 new (storage) Concrete(ptr);
431 }
432
coarseType()433 CoarseType coarseType() const final { return CoarseType::Other; }
434
435 Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
436
typeName()437 const char16_t* typeName() const override { return concreteTypeName; }
438 static const char16_t concreteTypeName[];
439 };
440
441 } // namespace ubi
442 } // namespace JS
443
444 #endif /* vm_RegExpShared_h */
445