1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 /**
8 * The compiled representation of a RegExp, potentially shared among RegExp
9 * instances created during separate evaluations of a single RegExp literal in
10 * source code.
11 */
12
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
15
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
18
19 #include "gc/Barrier.h"
20 #include "gc/Marking.h"
21 #include "gc/ZoneAllocator.h"
22 #include "irregexp/RegExpTypes.h"
23 #include "jit/JitCode.h"
24 #include "jit/JitOptions.h"
25 #include "js/AllocPolicy.h"
26 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
27 #include "js/UbiNode.h"
28 #include "js/Vector.h"
29 #include "vm/ArrayObject.h"
30 #include "vm/JSAtom.h"
31
32 namespace js {
33
34 class ArrayObject;
35 class RegExpRealm;
36 class RegExpShared;
37 class RegExpStatics;
38 class VectorMatchPairs;
39
40 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
41 using HandleRegExpShared = JS::Handle<RegExpShared*>;
42 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
43
44 enum RegExpRunStatus : int32_t {
45 RegExpRunStatus_Error = -1,
46 RegExpRunStatus_Success = 1,
47 RegExpRunStatus_Success_NotFound = 0,
48 };
49
IsNativeRegExpEnabled()50 inline bool IsNativeRegExpEnabled() {
51 #ifdef JS_CODEGEN_NONE
52 return false;
53 #else
54 return jit::JitOptions.nativeRegExp;
55 #endif
56 }
57
58 /*
59 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
60 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
61 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
62 * table so that they can be reused when compiling the same regex string.
63 *
64 * To save memory, a RegExpShared is not created for a RegExpObject until it is
65 * needed for execution. When a RegExpShared needs to be created, it is looked
66 * up in a per-compartment table to allow reuse between objects.
67 *
68 * During a GC, RegExpShared instances are marked and swept like GC things.
69 * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
70 * than explicitly tracing them, so that the RegExpShared and any jitcode can
71 * be reclaimed quicker. However, the RegExpShareds are traced through by
72 * objects when we are preserving jitcode in their zone, to avoid the same
73 * recompilation inefficiencies as normal Ion and baseline compilation.
74 */
75 class RegExpShared
76 : public gc::CellWithTenuredGCPointer<gc::TenuredCell, JSAtom> {
77 public:
78 enum class Kind { Unparsed, Atom, RegExp };
79 enum class CodeKind { Bytecode, Jitcode, Any };
80
81 using ByteCode = js::irregexp::ByteArrayData;
82 using JitCodeTable = js::irregexp::ByteArray;
83 using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
84
85 private:
86 friend class RegExpStatics;
87 friend class RegExpZone;
88
89 struct RegExpCompilation {
90 WeakHeapPtr<jit::JitCode*> jitCode;
91 ByteCode* byteCode = nullptr;
92
93 bool compiled(CodeKind kind = CodeKind::Any) const {
94 switch (kind) {
95 case CodeKind::Bytecode:
96 return !!byteCode;
97 case CodeKind::Jitcode:
98 return !!jitCode;
99 case CodeKind::Any:
100 return !!byteCode || !!jitCode;
101 }
102 MOZ_CRASH("Unreachable");
103 }
104
byteCodeLengthRegExpCompilation105 size_t byteCodeLength() const {
106 MOZ_ASSERT(byteCode);
107 return byteCode->length;
108 }
109 };
110
111 public:
112 /* Source to the RegExp, for lazy compilation. Stored in the cell header. */
getSource()113 JSAtom* getSource() const { return headerPtr(); }
114
115 private:
116 RegExpCompilation compilationArray[2];
117
118 uint32_t pairCount_;
119 JS::RegExpFlags flags;
120
121 RegExpShared::Kind kind_ = Kind::Unparsed;
122 GCPtrAtom patternAtom_;
123 uint32_t maxRegisters_ = 0;
124 uint32_t ticks_ = 0;
125
126 uint32_t numNamedCaptures_ = {};
127 uint32_t* namedCaptureIndices_ = {};
128 GCPtr<PlainObject*> groupsTemplate_ = {};
129
CompilationIndex(bool latin1)130 static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
131
132 // Tables referenced by JIT code.
133 JitCodeTables tables;
134
135 /* Internal functions. */
136 RegExpShared(JSAtom* source, JS::RegExpFlags flags);
137
compilation(bool latin1)138 const RegExpCompilation& compilation(bool latin1) const {
139 return compilationArray[CompilationIndex(latin1)];
140 }
141
compilation(bool latin1)142 RegExpCompilation& compilation(bool latin1) {
143 return compilationArray[CompilationIndex(latin1)];
144 }
145
146 public:
147 ~RegExpShared() = delete;
148
149 static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
150 HandleLinearString input, CodeKind code);
151
152 static RegExpRunStatus executeAtom(MutableHandleRegExpShared re,
153 HandleLinearString input, size_t start,
154 VectorMatchPairs* matches);
155
156 // Execute this RegExp on input starting from searchIndex, filling in matches.
157 static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
158 HandleLinearString input, size_t searchIndex,
159 VectorMatchPairs* matches);
160
161 // Register a table with this RegExpShared, and take ownership.
addTable(JitCodeTable table)162 bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
163
164 /* Accessors */
165
pairCount()166 size_t pairCount() const {
167 MOZ_ASSERT(kind() != Kind::Unparsed);
168 return pairCount_;
169 }
170
kind()171 RegExpShared::Kind kind() const { return kind_; }
172
173 // Use simple string matching for this regexp.
174 void useAtomMatch(HandleAtom pattern);
175
176 // Use the regular expression engine for this regexp.
177 void useRegExpMatch(size_t parenCount);
178
179 static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
180 HandleNativeObject namedCaptures);
getGroupsTemplate()181 PlainObject* getGroupsTemplate() { return groupsTemplate_; }
182
183 void tierUpTick();
184 bool markedForTierUp() const;
185
setByteCode(ByteCode * code,bool latin1)186 void setByteCode(ByteCode* code, bool latin1) {
187 compilation(latin1).byteCode = code;
188 }
getByteCode(bool latin1)189 ByteCode* getByteCode(bool latin1) const {
190 return compilation(latin1).byteCode;
191 }
setJitCode(jit::JitCode * code,bool latin1)192 void setJitCode(jit::JitCode* code, bool latin1) {
193 compilation(latin1).jitCode = code;
194 }
getJitCode(bool latin1)195 jit::JitCode* getJitCode(bool latin1) const {
196 return compilation(latin1).jitCode;
197 }
getMaxRegisters()198 uint32_t getMaxRegisters() const { return maxRegisters_; }
updateMaxRegisters(uint32_t numRegisters)199 void updateMaxRegisters(uint32_t numRegisters) {
200 maxRegisters_ = std::max(maxRegisters_, numRegisters);
201 }
202
numNamedCaptures()203 uint32_t numNamedCaptures() const { return numNamedCaptures_; }
getNamedCaptureIndex(uint32_t idx)204 int32_t getNamedCaptureIndex(uint32_t idx) const {
205 MOZ_ASSERT(idx < numNamedCaptures());
206 MOZ_ASSERT(namedCaptureIndices_);
207 return namedCaptureIndices_[idx];
208 }
209
patternAtom()210 JSAtom* patternAtom() const { return patternAtom_; }
211
getFlags()212 JS::RegExpFlags getFlags() const { return flags; }
213
hasIndices()214 bool hasIndices() const { return flags.hasIndices(); }
global()215 bool global() const { return flags.global(); }
ignoreCase()216 bool ignoreCase() const { return flags.ignoreCase(); }
multiline()217 bool multiline() const { return flags.multiline(); }
dotAll()218 bool dotAll() const { return flags.dotAll(); }
unicode()219 bool unicode() const { return flags.unicode(); }
sticky()220 bool sticky() const { return flags.sticky(); }
221
222 bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
223 return compilation(latin1).compiled(codeKind);
224 }
isCompiled()225 bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
226
227 void traceChildren(JSTracer* trc);
228 void discardJitCode();
229 void finalize(JSFreeOp* fop);
230
offsetOfSource()231 static size_t offsetOfSource() { return offsetOfHeaderPtr(); }
232
offsetOfPatternAtom()233 static size_t offsetOfPatternAtom() {
234 return offsetof(RegExpShared, patternAtom_);
235 }
236
offsetOfFlags()237 static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
238
offsetOfPairCount()239 static size_t offsetOfPairCount() {
240 return offsetof(RegExpShared, pairCount_);
241 }
242
offsetOfJitCode(bool latin1)243 static size_t offsetOfJitCode(bool latin1) {
244 return offsetof(RegExpShared, compilationArray) +
245 (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
246 offsetof(RegExpCompilation, jitCode);
247 }
248
offsetOfGroupsTemplate()249 static size_t offsetOfGroupsTemplate() {
250 return offsetof(RegExpShared, groupsTemplate_);
251 }
252
253 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
254
255 #ifdef DEBUG
256 static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
257 HandleLinearString input);
258 #endif
259
260 public:
261 static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
262 };
263
264 class RegExpZone {
265 struct Key {
266 JSAtom* atom = nullptr;
267 JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
268
269 Key() = default;
KeyKey270 Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
KeyKey271 MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
272 : atom(shared.unbarrieredGet()->getSource()),
273 flags(shared.unbarrieredGet()->getFlags()) {}
274
275 using Lookup = Key;
hashKey276 static HashNumber hash(const Lookup& l) {
277 HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
278 return mozilla::AddToHash(hash, l.flags.value());
279 }
matchKey280 static bool match(Key l, Key r) {
281 return l.atom == r.atom && l.flags == r.flags;
282 }
283 };
284
285 /*
286 * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
287 * that was not marked is deleted and removed from the set.
288 */
289 using Set = JS::WeakCache<
290 JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
291 Set set_;
292
293 public:
294 explicit RegExpZone(Zone* zone);
295
~RegExpZone()296 ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
297
empty()298 bool empty() const { return set_.empty(); }
299
maybeGet(JSAtom * source,JS::RegExpFlags flags)300 RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
301 Set::Ptr p = set_.lookup(Key(source, flags));
302 return p ? *p : nullptr;
303 }
304
305 RegExpShared* get(JSContext* cx, HandleAtom source, JS::RegExpFlags flags);
306
307 #ifdef DEBUG
clear()308 void clear() { set_.clear(); }
309 #endif
310
311 size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
312 };
313
314 class RegExpRealm {
315 public:
316 enum ResultTemplateKind { Normal, WithIndices, Indices, NumKinds };
317
318 private:
319 /*
320 * The template objects that the result of re.exec() is based on, if
321 * there is a result. These are used in CreateRegExpMatchResult.
322 * There are three template objects, each of which is an ArrayObject
323 * with some additional properties. We decide which to use based on
324 * the |hasIndices| (/d) flag.
325 *
326 * Normal: Has |index|, |input|, and |groups| properties.
327 * Used for the result object if |hasIndices| is not set.
328 *
329 * WithIndices: Has |index|, |input|, |groups|, and |indices| properties.
330 * Used for the result object if |hasIndices| is set.
331 *
332 * Indices: Has a |groups| property. If |hasIndices| is set, used
333 * for the |.indices| property of the result object.
334 */
335 WeakHeapPtr<ArrayObject*>
336 matchResultTemplateObjects_[ResultTemplateKind::NumKinds];
337
338 /*
339 * The shape of RegExp.prototype object that satisfies following:
340 * * RegExp.prototype.flags getter is not modified
341 * * RegExp.prototype.global getter is not modified
342 * * RegExp.prototype.ignoreCase getter is not modified
343 * * RegExp.prototype.multiline getter is not modified
344 * * RegExp.prototype.dotAll getter is not modified
345 * * RegExp.prototype.sticky getter is not modified
346 * * RegExp.prototype.unicode getter is not modified
347 * * RegExp.prototype.exec is an own data property
348 * * RegExp.prototype[@@match] is an own data property
349 * * RegExp.prototype[@@search] is an own data property
350 */
351 WeakHeapPtr<Shape*> optimizableRegExpPrototypeShape_;
352
353 /*
354 * The shape of RegExp instance that satisfies following:
355 * * lastProperty is lastIndex
356 * * prototype is RegExp.prototype
357 */
358 WeakHeapPtr<Shape*> optimizableRegExpInstanceShape_;
359
360 ArrayObject* createMatchResultTemplateObject(JSContext* cx,
361 ResultTemplateKind kind);
362
363 public:
364 explicit RegExpRealm();
365
366 void traceWeak(JSTracer* trc);
367
368 static const size_t MatchResultObjectIndexSlot = 0;
369 static const size_t MatchResultObjectInputSlot = 1;
370 static const size_t MatchResultObjectGroupsSlot = 2;
371 static const size_t MatchResultObjectIndicesSlot = 3;
372
373 static const size_t IndicesGroupsSlot = 0;
374
offsetOfMatchResultObjectIndexSlot()375 static size_t offsetOfMatchResultObjectIndexSlot() {
376 return sizeof(Value) * MatchResultObjectIndexSlot;
377 }
offsetOfMatchResultObjectInputSlot()378 static size_t offsetOfMatchResultObjectInputSlot() {
379 return sizeof(Value) * MatchResultObjectInputSlot;
380 }
offsetOfMatchResultObjectGroupsSlot()381 static size_t offsetOfMatchResultObjectGroupsSlot() {
382 return sizeof(Value) * MatchResultObjectGroupsSlot;
383 }
offsetOfMatchResultObjectIndicesSlot()384 static size_t offsetOfMatchResultObjectIndicesSlot() {
385 return sizeof(Value) * MatchResultObjectIndicesSlot;
386 }
387
388 /* Get or create template object used to base the result of .exec() on. */
389 ArrayObject* getOrCreateMatchResultTemplateObject(
390 JSContext* cx, ResultTemplateKind kind = ResultTemplateKind::Normal) {
391 if (matchResultTemplateObjects_[kind]) {
392 return matchResultTemplateObjects_[kind];
393 }
394 return createMatchResultTemplateObject(cx, kind);
395 }
396
getOptimizableRegExpPrototypeShape()397 Shape* getOptimizableRegExpPrototypeShape() {
398 return optimizableRegExpPrototypeShape_;
399 }
setOptimizableRegExpPrototypeShape(Shape * shape)400 void setOptimizableRegExpPrototypeShape(Shape* shape) {
401 optimizableRegExpPrototypeShape_ = shape;
402 }
getOptimizableRegExpInstanceShape()403 Shape* getOptimizableRegExpInstanceShape() {
404 return optimizableRegExpInstanceShape_;
405 }
setOptimizableRegExpInstanceShape(Shape * shape)406 void setOptimizableRegExpInstanceShape(Shape* shape) {
407 optimizableRegExpInstanceShape_ = shape;
408 }
409
offsetOfOptimizableRegExpPrototypeShape()410 static size_t offsetOfOptimizableRegExpPrototypeShape() {
411 return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
412 }
offsetOfOptimizableRegExpInstanceShape()413 static size_t offsetOfOptimizableRegExpInstanceShape() {
414 return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
415 }
416 };
417
418 RegExpRunStatus ExecuteRegExpAtomRaw(RegExpShared* re, JSLinearString* input,
419 size_t start, MatchPairs* matchPairs);
420
421 } /* namespace js */
422
423 namespace JS {
424 namespace ubi {
425
426 template <>
427 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
428 protected:
Concrete(js::RegExpShared * ptr)429 explicit Concrete(js::RegExpShared* ptr)
430 : TracerConcrete<js::RegExpShared>(ptr) {}
431
432 public:
construct(void * storage,js::RegExpShared * ptr)433 static void construct(void* storage, js::RegExpShared* ptr) {
434 new (storage) Concrete(ptr);
435 }
436
coarseType()437 CoarseType coarseType() const final { return CoarseType::Other; }
438
439 Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
440
typeName()441 const char16_t* typeName() const override { return concreteTypeName; }
442 static const char16_t concreteTypeName[];
443 };
444
445 } // namespace ubi
446 } // namespace JS
447
448 #endif /* vm_RegExpShared_h */
449