1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 /**
8 * The compiled representation of a RegExp, potentially shared among RegExp
9 * instances created during separate evaluations of a single RegExp literal in
10 * source code.
11 */
12
13 #ifndef vm_RegExpShared_h
14 #define vm_RegExpShared_h
15
16 #include "mozilla/Assertions.h"
17 #include "mozilla/MemoryReporting.h"
18
19 #include "gc/Barrier.h"
20 #include "gc/Marking.h"
21 #include "gc/ZoneAllocator.h"
22 #include "jit/JitOptions.h"
23 #include "js/AllocPolicy.h"
24 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
25 #include "js/UbiNode.h"
26 #include "js/Vector.h"
27 #ifdef ENABLE_NEW_REGEXP
28 # include "new-regexp/RegExpTypes.h"
29 #endif
30 #include "vm/ArrayObject.h"
31 #include "vm/JSAtom.h"
32
33 namespace js {
34
35 class ArrayObject;
36 class RegExpRealm;
37 class RegExpShared;
38 class RegExpStatics;
39 class VectorMatchPairs;
40
41 using RootedRegExpShared = JS::Rooted<RegExpShared*>;
42 using HandleRegExpShared = JS::Handle<RegExpShared*>;
43 using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
44
45 enum RegExpRunStatus : int32_t {
46 RegExpRunStatus_Error = -1,
47 RegExpRunStatus_Success = 1,
48 RegExpRunStatus_Success_NotFound = 0,
49 };
50
51 #ifdef ENABLE_NEW_REGEXP
52
IsNativeRegExpEnabled()53 inline bool IsNativeRegExpEnabled() {
54 # ifdef JS_CODEGEN_NONE
55 return false;
56 # else
57 return jit::JitOptions.nativeRegExp;
58 # endif
59 }
60
61 #else
62 /*
63 * Layout of the reg exp bytecode header.
64 */
65 struct RegExpByteCodeHeader {
66 uint32_t length; // Number of instructions.
67 uint32_t numRegisters; // Number of registers used.
68 };
69 #endif // ENABLE_NEW_REGEXP
70
71 /*
72 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
73 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
74 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
75 * table so that they can be reused when compiling the same regex string.
76 *
77 * To save memory, a RegExpShared is not created for a RegExpObject until it is
78 * needed for execution. When a RegExpShared needs to be created, it is looked
79 * up in a per-compartment table to allow reuse between objects.
80 *
81 * During a GC, RegExpShared instances are marked and swept like GC things.
82 * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
83 * than explicitly tracing them, so that the RegExpShared and any jitcode can
84 * be reclaimed quicker. However, the RegExpShareds are traced through by
85 * objects when we are preserving jitcode in their zone, to avoid the same
86 * recompilation inefficiencies as normal Ion and baseline compilation.
87 */
88 class RegExpShared : public gc::TenuredCell {
89 public:
90 enum class Kind { Unparsed, Atom, RegExp };
91 enum class CodeKind { Bytecode, Jitcode, Any };
92
93 #ifdef ENABLE_NEW_REGEXP
94 using ByteCode = js::irregexp::ByteArrayData;
95 using JitCodeTable = js::irregexp::ByteArray;
96 #else
97 using ByteCode = uint8_t;
98 using JitCodeTable = UniquePtr<uint8_t[], JS::FreePolicy>;
99 #endif
100 using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
101
102 private:
103 friend class RegExpStatics;
104 friend class RegExpZone;
105
106 struct RegExpCompilation {
107 WeakHeapPtr<jit::JitCode*> jitCode;
108 ByteCode* byteCode = nullptr;
109
110 bool compiled(CodeKind kind = CodeKind::Any) const {
111 switch (kind) {
112 case CodeKind::Bytecode:
113 return !!byteCode;
114 case CodeKind::Jitcode:
115 return !!jitCode;
116 case CodeKind::Any:
117 return !!byteCode || !!jitCode;
118 }
119 MOZ_CRASH("Unreachable");
120 }
121
byteCodeLengthRegExpCompilation122 size_t byteCodeLength() const {
123 MOZ_ASSERT(byteCode);
124 #ifdef ENABLE_NEW_REGEXP
125 return byteCode->length;
126 #else
127 auto header = reinterpret_cast<RegExpByteCodeHeader*>(byteCode);
128 return header->length;
129 #endif
130 }
131 };
132
133 /* Source to the RegExp, for lazy compilation. */
134 using HeaderWithAtom = gc::CellHeaderWithTenuredGCPointer<JSAtom>;
135 HeaderWithAtom headerAndSource;
136
137 RegExpCompilation compilationArray[2];
138
139 uint32_t pairCount_;
140 JS::RegExpFlags flags;
141
142 #ifdef ENABLE_NEW_REGEXP
143 RegExpShared::Kind kind_ = Kind::Unparsed;
144 GCPtrAtom patternAtom_;
145 uint32_t maxRegisters_ = 0;
146 uint32_t ticks_ = 0;
147 #else
148 bool canStringMatch = false;
149 #endif
150
151 #ifdef ENABLE_NEW_REGEXP
152 uint32_t numNamedCaptures_ = {};
153 uint32_t* namedCaptureIndices_ = {};
154 GCPtr<PlainObject*> groupsTemplate_ = {};
155 #endif
156
CompilationIndex(bool latin1)157 static int CompilationIndex(bool latin1) { return latin1 ? 0 : 1; }
158
159 // Tables referenced by JIT code.
160 JitCodeTables tables;
161
162 /* Internal functions. */
163 RegExpShared(JSAtom* source, JS::RegExpFlags flags);
164
165 static bool compile(JSContext* cx, MutableHandleRegExpShared res,
166 HandleLinearString input, CodeKind code);
167 static bool compile(JSContext* cx, MutableHandleRegExpShared res,
168 HandleAtom pattern, HandleLinearString input,
169 CodeKind code);
170
compilation(bool latin1)171 const RegExpCompilation& compilation(bool latin1) const {
172 return compilationArray[CompilationIndex(latin1)];
173 }
174
compilation(bool latin1)175 RegExpCompilation& compilation(bool latin1) {
176 return compilationArray[CompilationIndex(latin1)];
177 }
178
179 public:
180 ~RegExpShared() = delete;
181
182 static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
183 HandleLinearString input, CodeKind code);
184
185 static RegExpRunStatus executeAtom(JSContext* cx,
186 MutableHandleRegExpShared re,
187 HandleLinearString input, size_t start,
188 VectorMatchPairs* matches);
189
190 // Execute this RegExp on input starting from searchIndex, filling in matches.
191 static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
192 HandleLinearString input, size_t searchIndex,
193 VectorMatchPairs* matches);
194
195 // Register a table with this RegExpShared, and take ownership.
addTable(JitCodeTable table)196 bool addTable(JitCodeTable table) { return tables.append(std::move(table)); }
197
198 /* Accessors */
199
pairCount()200 size_t pairCount() const {
201 #ifdef ENABLE_NEW_REGEXP
202 MOZ_ASSERT(kind() != Kind::Unparsed);
203 #else
204 MOZ_ASSERT(isCompiled());
205 #endif
206 return pairCount_;
207 }
208
209 #ifdef ENABLE_NEW_REGEXP
kind()210 RegExpShared::Kind kind() const { return kind_; }
211
212 // Use simple string matching for this regexp.
213 void useAtomMatch(HandleAtom pattern);
214
215 // Use the regular expression engine for this regexp.
216 void useRegExpMatch(size_t parenCount);
217
218 static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re,
219 HandleNativeObject namedCaptures);
getGroupsTemplate()220 PlainObject* getGroupsTemplate() { return groupsTemplate_; }
221
222 void tierUpTick();
223 bool markedForTierUp() const;
224
setByteCode(ByteCode * code,bool latin1)225 void setByteCode(ByteCode* code, bool latin1) {
226 compilation(latin1).byteCode = code;
227 }
getByteCode(bool latin1)228 ByteCode* getByteCode(bool latin1) const {
229 return compilation(latin1).byteCode;
230 }
setJitCode(jit::JitCode * code,bool latin1)231 void setJitCode(jit::JitCode* code, bool latin1) {
232 compilation(latin1).jitCode = code;
233 }
getJitCode(bool latin1)234 jit::JitCode* getJitCode(bool latin1) const {
235 return compilation(latin1).jitCode;
236 }
getMaxRegisters()237 uint32_t getMaxRegisters() const { return maxRegisters_; }
updateMaxRegisters(uint32_t numRegisters)238 void updateMaxRegisters(uint32_t numRegisters) {
239 maxRegisters_ = std::max(maxRegisters_, numRegisters);
240 }
241
numNamedCaptures()242 uint32_t numNamedCaptures() const { return numNamedCaptures_; }
getNamedCaptureIndex(uint32_t idx)243 int32_t getNamedCaptureIndex(uint32_t idx) const {
244 MOZ_ASSERT(idx < numNamedCaptures());
245 MOZ_ASSERT(namedCaptureIndices_);
246 return namedCaptureIndices_[idx];
247 }
248
249 #endif
250
getSource()251 JSAtom* getSource() const { return headerAndSource.ptr(); }
252
253 #ifdef ENABLE_NEW_REGEXP
patternAtom()254 JSAtom* patternAtom() const { return patternAtom_; }
255 #else
patternAtom()256 JSAtom* patternAtom() const { return getSource(); }
257 #endif
258
getFlags()259 JS::RegExpFlags getFlags() const { return flags; }
260
global()261 bool global() const { return flags.global(); }
ignoreCase()262 bool ignoreCase() const { return flags.ignoreCase(); }
multiline()263 bool multiline() const { return flags.multiline(); }
dotAll()264 bool dotAll() const { return flags.dotAll(); }
unicode()265 bool unicode() const { return flags.unicode(); }
sticky()266 bool sticky() const { return flags.sticky(); }
267
268 bool isCompiled(bool latin1, CodeKind codeKind = CodeKind::Any) const {
269 return compilation(latin1).compiled(codeKind);
270 }
isCompiled()271 bool isCompiled() const { return isCompiled(true) || isCompiled(false); }
272
273 void traceChildren(JSTracer* trc);
274 void discardJitCode();
275 void finalize(JSFreeOp* fop);
276
offsetOfSource()277 static size_t offsetOfSource() {
278 return offsetof(RegExpShared, headerAndSource) +
279 HeaderWithAtom::offsetOfPtr();
280 }
281
offsetOfFlags()282 static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); }
283
offsetOfPairCount()284 static size_t offsetOfPairCount() {
285 return offsetof(RegExpShared, pairCount_);
286 }
287
offsetOfJitCode(bool latin1)288 static size_t offsetOfJitCode(bool latin1) {
289 return offsetof(RegExpShared, compilationArray) +
290 (CompilationIndex(latin1) * sizeof(RegExpCompilation)) +
291 offsetof(RegExpCompilation, jitCode);
292 }
293 #ifdef ENABLE_NEW_REGEXP
offsetOfGroupsTemplate()294 static size_t offsetOfGroupsTemplate() {
295 return offsetof(RegExpShared, groupsTemplate_);
296 }
297 #endif
298
299 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
300
301 #ifdef DEBUG
302 static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res,
303 HandleLinearString input);
304 #endif
305
306 public:
307 static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared;
cellHeader()308 const gc::CellHeader& cellHeader() const { return headerAndSource; }
309 };
310
311 class RegExpZone {
312 struct Key {
313 JSAtom* atom = nullptr;
314 JS::RegExpFlags flags = JS::RegExpFlag::NoFlags;
315
316 Key() = default;
KeyKey317 Key(JSAtom* atom, JS::RegExpFlags flags) : atom(atom), flags(flags) {}
KeyKey318 MOZ_IMPLICIT Key(const WeakHeapPtr<RegExpShared*>& shared)
319 : atom(shared.unbarrieredGet()->getSource()),
320 flags(shared.unbarrieredGet()->getFlags()) {}
321
322 using Lookup = Key;
hashKey323 static HashNumber hash(const Lookup& l) {
324 HashNumber hash = DefaultHasher<JSAtom*>::hash(l.atom);
325 return mozilla::AddToHash(hash, l.flags.value());
326 }
matchKey327 static bool match(Key l, Key r) {
328 return l.atom == r.atom && l.flags == r.flags;
329 }
330 };
331
332 /*
333 * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
334 * that was not marked is deleted and removed from the set.
335 */
336 using Set = JS::WeakCache<
337 JS::GCHashSet<WeakHeapPtr<RegExpShared*>, Key, ZoneAllocPolicy>>;
338 Set set_;
339
340 public:
341 explicit RegExpZone(Zone* zone);
342
~RegExpZone()343 ~RegExpZone() { MOZ_ASSERT(set_.empty()); }
344
empty()345 bool empty() const { return set_.empty(); }
346
maybeGet(JSAtom * source,JS::RegExpFlags flags)347 RegExpShared* maybeGet(JSAtom* source, JS::RegExpFlags flags) const {
348 Set::Ptr p = set_.lookup(Key(source, flags));
349 return p ? *p : nullptr;
350 }
351
352 RegExpShared* get(JSContext* cx, HandleAtom source, JS::RegExpFlags flags);
353
354 #ifdef DEBUG
clear()355 void clear() { set_.clear(); }
356 #endif
357
358 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
359 };
360
361 class RegExpRealm {
362 /*
363 * This is the template object where the result of re.exec() is based on,
364 * if there is a result. This is used in CreateRegExpMatchResult to set
365 * the input/index properties faster.
366 */
367 WeakHeapPtr<ArrayObject*> matchResultTemplateObject_;
368
369 /*
370 * The shape of RegExp.prototype object that satisfies following:
371 * * RegExp.prototype.flags getter is not modified
372 * * RegExp.prototype.global getter is not modified
373 * * RegExp.prototype.ignoreCase getter is not modified
374 * * RegExp.prototype.multiline getter is not modified
375 * * RegExp.prototype.dotAll getter is not modified
376 * * RegExp.prototype.sticky getter is not modified
377 * * RegExp.prototype.unicode getter is not modified
378 * * RegExp.prototype.exec is an own data property
379 * * RegExp.prototype[@@match] is an own data property
380 * * RegExp.prototype[@@search] is an own data property
381 */
382 WeakHeapPtr<Shape*> optimizableRegExpPrototypeShape_;
383
384 /*
385 * The shape of RegExp instance that satisfies following:
386 * * lastProperty is lastIndex
387 * * prototype is RegExp.prototype
388 */
389 WeakHeapPtr<Shape*> optimizableRegExpInstanceShape_;
390
391 ArrayObject* createMatchResultTemplateObject(JSContext* cx);
392
393 public:
394 explicit RegExpRealm();
395
396 void traceWeak(JSTracer* trc);
397
398 static const size_t MatchResultObjectIndexSlot = 0;
399 static const size_t MatchResultObjectInputSlot = 1;
400 #ifdef ENABLE_NEW_REGEXP
401 static const size_t MatchResultObjectGroupsSlot = 2;
402 #endif
403
offsetOfMatchResultObjectIndexSlot()404 static size_t offsetOfMatchResultObjectIndexSlot() {
405 return sizeof(Value) * MatchResultObjectIndexSlot;
406 }
offsetOfMatchResultObjectInputSlot()407 static size_t offsetOfMatchResultObjectInputSlot() {
408 return sizeof(Value) * MatchResultObjectInputSlot;
409 }
410 #ifdef ENABLE_NEW_REGEXP
offsetOfMatchResultObjectGroupsSlot()411 static size_t offsetOfMatchResultObjectGroupsSlot() {
412 return sizeof(Value) * MatchResultObjectGroupsSlot;
413 }
414 #endif
415
416 /* Get or create template object used to base the result of .exec() on. */
getOrCreateMatchResultTemplateObject(JSContext * cx)417 ArrayObject* getOrCreateMatchResultTemplateObject(JSContext* cx) {
418 if (matchResultTemplateObject_) {
419 return matchResultTemplateObject_;
420 }
421 return createMatchResultTemplateObject(cx);
422 }
423
getOptimizableRegExpPrototypeShape()424 Shape* getOptimizableRegExpPrototypeShape() {
425 return optimizableRegExpPrototypeShape_;
426 }
setOptimizableRegExpPrototypeShape(Shape * shape)427 void setOptimizableRegExpPrototypeShape(Shape* shape) {
428 optimizableRegExpPrototypeShape_ = shape;
429 }
getOptimizableRegExpInstanceShape()430 Shape* getOptimizableRegExpInstanceShape() {
431 return optimizableRegExpInstanceShape_;
432 }
setOptimizableRegExpInstanceShape(Shape * shape)433 void setOptimizableRegExpInstanceShape(Shape* shape) {
434 optimizableRegExpInstanceShape_ = shape;
435 }
436
offsetOfOptimizableRegExpPrototypeShape()437 static size_t offsetOfOptimizableRegExpPrototypeShape() {
438 return offsetof(RegExpRealm, optimizableRegExpPrototypeShape_);
439 }
offsetOfOptimizableRegExpInstanceShape()440 static size_t offsetOfOptimizableRegExpInstanceShape() {
441 return offsetof(RegExpRealm, optimizableRegExpInstanceShape_);
442 }
443 };
444
445 } /* namespace js */
446
447 namespace JS {
448 namespace ubi {
449
450 template <>
451 class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared> {
452 protected:
Concrete(js::RegExpShared * ptr)453 explicit Concrete(js::RegExpShared* ptr)
454 : TracerConcrete<js::RegExpShared>(ptr) {}
455
456 public:
construct(void * storage,js::RegExpShared * ptr)457 static void construct(void* storage, js::RegExpShared* ptr) {
458 new (storage) Concrete(ptr);
459 }
460
coarseType()461 CoarseType coarseType() const final { return CoarseType::Other; }
462
463 Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
464
typeName()465 const char16_t* typeName() const override { return concreteTypeName; }
466 static const char16_t concreteTypeName[];
467 };
468
469 } // namespace ubi
470 } // namespace JS
471
472 #endif /* vm_RegExpShared_h */
473