1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "builtin/RegExp.h"
8
9 #include "mozilla/Casting.h"
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/TextUtils.h"
12
13 #include "jsapi.h"
14
15 #include "frontend/TokenStream.h"
16 #include "irregexp/RegExpAPI.h"
17 #include "jit/InlinableNatives.h"
18 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_NEWREGEXP_FLAGGED
19 #include "js/PropertySpec.h"
20 #include "js/RegExpFlags.h" // JS::RegExpFlag, JS::RegExpFlags
21 #include "util/StringBuffer.h"
22 #include "util/Unicode.h"
23 #include "vm/JSContext.h"
24 #include "vm/RegExpStatics.h"
25 #include "vm/SelfHosting.h"
26 #include "vm/WellKnownAtom.h" // js_*_str
27
28 #include "vm/EnvironmentObject-inl.h"
29 #include "vm/JSObject-inl.h"
30 #include "vm/NativeObject-inl.h"
31 #include "vm/ObjectOperations-inl.h"
32 #include "vm/PlainObject-inl.h"
33
34 using namespace js;
35
36 using mozilla::AssertedCast;
37 using mozilla::CheckedInt;
38 using mozilla::IsAsciiDigit;
39
40 using JS::CompileOptions;
41 using JS::RegExpFlag;
42 using JS::RegExpFlags;
43
44 // Allocate an object for the |.groups| or |.indices.groups| property
45 // of a regexp match result.
CreateGroupsObject(JSContext * cx,HandlePlainObject groupsTemplate)46 static PlainObject* CreateGroupsObject(JSContext* cx,
47 HandlePlainObject groupsTemplate) {
48 if (groupsTemplate->inDictionaryMode()) {
49 return NewPlainObjectWithProto(cx, nullptr);
50 }
51
52 // The groups template object is stored in RegExpShared, which is shared
53 // across compartments and realms. So watch out for the case when the template
54 // object's realm is different from the current realm.
55 if (cx->realm() != groupsTemplate->realm()) {
56 return PlainObject::createWithTemplateFromDifferentRealm(cx,
57 groupsTemplate);
58 }
59
60 return PlainObject::createWithTemplate(cx, groupsTemplate);
61 }
62
63 /*
64 * Implements RegExpBuiltinExec: Steps 18-35
65 * https://tc39.es/ecma262/#sec-regexpbuiltinexec
66 */
CreateRegExpMatchResult(JSContext * cx,HandleRegExpShared re,HandleString input,const MatchPairs & matches,MutableHandleValue rval)67 bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re,
68 HandleString input, const MatchPairs& matches,
69 MutableHandleValue rval) {
70 MOZ_ASSERT(re);
71 MOZ_ASSERT(input);
72
73 /*
74 * Create the (slow) result array for a match.
75 *
76 * Array contents:
77 * 0: matched string
78 * 1..pairCount-1: paren matches
79 * input: input string
80 * index: start index for the match
81 * groups: named capture groups for the match
82 * indices: capture indices for the match, if required
83 */
84
85 bool hasIndices = re->hasIndices();
86
87 // Get the templateObject that defines the shape and type of the output
88 // object.
89 RegExpRealm::ResultTemplateKind kind =
90 hasIndices ? RegExpRealm::ResultTemplateKind::WithIndices
91 : RegExpRealm::ResultTemplateKind::Normal;
92 ArrayObject* templateObject =
93 cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx, kind);
94 if (!templateObject) {
95 return false;
96 }
97
98 // Steps 18-19
99 size_t numPairs = matches.length();
100 MOZ_ASSERT(numPairs > 0);
101
102 // Steps 20-21: Allocate the match result object.
103 RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(
104 cx, numPairs, templateObject));
105 if (!arr) {
106 return false;
107 }
108
109 // Steps 28-29 and 33 a-d: Initialize the elements of the match result.
110 // Store a Value for each match pair.
111 for (size_t i = 0; i < numPairs; i++) {
112 const MatchPair& pair = matches[i];
113
114 if (pair.isUndefined()) {
115 MOZ_ASSERT(i != 0); // Since we had a match, first pair must be present.
116 arr->setDenseInitializedLength(i + 1);
117 arr->initDenseElement(i, UndefinedValue());
118 } else {
119 JSLinearString* str =
120 NewDependentString(cx, input, pair.start, pair.length());
121 if (!str) {
122 return false;
123 }
124 arr->setDenseInitializedLength(i + 1);
125 arr->initDenseElement(i, StringValue(str));
126 }
127 }
128
129 // Step 34a (reordered): Allocate and initialize the indices object if needed.
130 // This is an inlined implementation of MakeIndicesArray:
131 // https://tc39.es/ecma262/#sec-makeindicesarray
132 RootedArrayObject indices(cx);
133 RootedPlainObject indicesGroups(cx);
134 if (hasIndices) {
135 // MakeIndicesArray: step 8
136 ArrayObject* indicesTemplate =
137 cx->realm()->regExps.getOrCreateMatchResultTemplateObject(
138 cx, RegExpRealm::ResultTemplateKind::Indices);
139 indices =
140 NewDenseFullyAllocatedArrayWithTemplate(cx, numPairs, indicesTemplate);
141 if (!indices) {
142 return false;
143 }
144
145 // MakeIndicesArray: steps 10-12
146 if (re->numNamedCaptures() > 0) {
147 RootedPlainObject groupsTemplate(cx, re->getGroupsTemplate());
148 indicesGroups = CreateGroupsObject(cx, groupsTemplate);
149 if (!indicesGroups) {
150 return false;
151 }
152 indices->setSlot(RegExpRealm::IndicesGroupsSlot,
153 ObjectValue(*indicesGroups));
154 } else {
155 indices->setSlot(RegExpRealm::IndicesGroupsSlot, UndefinedValue());
156 }
157
158 // MakeIndicesArray: step 13 a-d. (Step 13.e is implemented below.)
159 for (size_t i = 0; i < numPairs; i++) {
160 const MatchPair& pair = matches[i];
161
162 if (pair.isUndefined()) {
163 // Since we had a match, first pair must be present.
164 MOZ_ASSERT(i != 0);
165 indices->setDenseInitializedLength(i + 1);
166 indices->initDenseElement(i, UndefinedValue());
167 } else {
168 RootedArrayObject indexPair(cx, NewDenseFullyAllocatedArray(cx, 2));
169 if (!indexPair) {
170 return false;
171 }
172 indexPair->setDenseInitializedLength(2);
173 indexPair->initDenseElement(0, Int32Value(pair.start));
174 indexPair->initDenseElement(1, Int32Value(pair.limit));
175
176 indices->setDenseInitializedLength(i + 1);
177 indices->initDenseElement(i, ObjectValue(*indexPair));
178 }
179 }
180 }
181
182 // Steps 30-31 (reordered): Allocate the groups object (if needed).
183 RootedPlainObject groups(cx);
184 bool groupsInDictionaryMode = false;
185 if (re->numNamedCaptures() > 0) {
186 RootedPlainObject groupsTemplate(cx, re->getGroupsTemplate());
187 groupsInDictionaryMode = groupsTemplate->inDictionaryMode();
188 groups = CreateGroupsObject(cx, groupsTemplate);
189 if (!groups) {
190 return false;
191 }
192 }
193
194 // Step 33 e-f: Initialize the properties of |groups| and |indices.groups|.
195 // The groups template object stores the names of the named captures
196 // in the the order in which they are defined. The named capture
197 // indices vector stores the corresponding capture indices. In
198 // dictionary mode, we have to define the properties explicitly. If
199 // we are not in dictionary mode, we simply fill in the slots with
200 // the correct values.
201 if (groupsInDictionaryMode) {
202 RootedIdVector keys(cx);
203 RootedPlainObject groupsTemplate(cx, re->getGroupsTemplate());
204 if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) {
205 return false;
206 }
207 MOZ_ASSERT(keys.length() == re->numNamedCaptures());
208 RootedId key(cx);
209 RootedValue val(cx);
210 for (uint32_t i = 0; i < keys.length(); i++) {
211 key = keys[i];
212 uint32_t idx = re->getNamedCaptureIndex(i);
213 val = arr->getDenseElement(idx);
214 if (!NativeDefineDataProperty(cx, groups, key, val, JSPROP_ENUMERATE)) {
215 return false;
216 }
217 // MakeIndicesArray: Step 13.e (reordered)
218 if (hasIndices) {
219 val = indices->getDenseElement(idx);
220 if (!NativeDefineDataProperty(cx, indicesGroups, key, val,
221 JSPROP_ENUMERATE)) {
222 return false;
223 }
224 }
225 }
226 } else {
227 for (uint32_t i = 0; i < re->numNamedCaptures(); i++) {
228 uint32_t idx = re->getNamedCaptureIndex(i);
229 groups->setSlot(i, arr->getDenseElement(idx));
230
231 // MakeIndicesArray: Step 13.e (reordered)
232 if (hasIndices) {
233 indicesGroups->setSlot(i, indices->getDenseElement(idx));
234 }
235 }
236 }
237
238 // Step 22 (reordered).
239 // Set the |index| property.
240 arr->setSlot(RegExpRealm::MatchResultObjectIndexSlot,
241 Int32Value(matches[0].start));
242
243 // Step 23 (reordered).
244 // Set the |input| property.
245 arr->setSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(input));
246
247 // Step 32 (reordered)
248 // Set the |groups| property.
249 arr->setSlot(RegExpRealm::MatchResultObjectGroupsSlot,
250 groups ? ObjectValue(*groups) : UndefinedValue());
251
252 // Step 34b
253 // Set the |indices| property.
254 if (re->hasIndices()) {
255 arr->setSlot(RegExpRealm::MatchResultObjectIndicesSlot,
256 ObjectValue(*indices));
257 }
258
259 #ifdef DEBUG
260 RootedValue test(cx);
261 RootedId id(cx, NameToId(cx->names().index));
262 if (!NativeGetProperty(cx, arr, id, &test)) {
263 return false;
264 }
265 MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectIndexSlot));
266 id = NameToId(cx->names().input);
267 if (!NativeGetProperty(cx, arr, id, &test)) {
268 return false;
269 }
270 MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectInputSlot));
271 #endif
272
273 // Step 35.
274 rval.setObject(*arr);
275 return true;
276 }
277
CreateRegExpSearchResult(const MatchPairs & matches)278 static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
279 /* Fit the start and limit of match into a int32_t. */
280 uint32_t position = matches[0].start;
281 uint32_t lastIndex = matches[0].limit;
282 MOZ_ASSERT(position < 0x8000);
283 MOZ_ASSERT(lastIndex < 0x8000);
284 return position | (lastIndex << 15);
285 }
286
287 /*
288 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
289 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
290 */
ExecuteRegExpImpl(JSContext * cx,RegExpStatics * res,MutableHandleRegExpShared re,HandleLinearString input,size_t searchIndex,VectorMatchPairs * matches)291 static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res,
292 MutableHandleRegExpShared re,
293 HandleLinearString input,
294 size_t searchIndex,
295 VectorMatchPairs* matches) {
296 RegExpRunStatus status =
297 RegExpShared::execute(cx, re, input, searchIndex, matches);
298
299 /* Out of spec: Update RegExpStatics. */
300 if (status == RegExpRunStatus_Success && res) {
301 if (!res->updateFromMatchPairs(cx, input, *matches)) {
302 return RegExpRunStatus_Error;
303 }
304 }
305 return status;
306 }
307
308 /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */
ExecuteRegExpLegacy(JSContext * cx,RegExpStatics * res,Handle<RegExpObject * > reobj,HandleLinearString input,size_t * lastIndex,bool test,MutableHandleValue rval)309 bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res,
310 Handle<RegExpObject*> reobj,
311 HandleLinearString input, size_t* lastIndex,
312 bool test, MutableHandleValue rval) {
313 RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj));
314 if (!shared) {
315 return false;
316 }
317
318 VectorMatchPairs matches;
319
320 RegExpRunStatus status =
321 ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches);
322 if (status == RegExpRunStatus_Error) {
323 return false;
324 }
325
326 if (status == RegExpRunStatus_Success_NotFound) {
327 /* ExecuteRegExp() previously returned an array or null. */
328 rval.setNull();
329 return true;
330 }
331
332 *lastIndex = matches[0].limit;
333
334 if (test) {
335 /* Forbid an array, as an optimization. */
336 rval.setBoolean(true);
337 return true;
338 }
339
340 return CreateRegExpMatchResult(cx, shared, input, matches, rval);
341 }
342
CheckPatternSyntaxSlow(JSContext * cx,HandleAtom pattern,RegExpFlags flags)343 static bool CheckPatternSyntaxSlow(JSContext* cx, HandleAtom pattern,
344 RegExpFlags flags) {
345 LifoAllocScope allocScope(&cx->tempLifoAlloc());
346 CompileOptions options(cx);
347 frontend::DummyTokenStream dummyTokenStream(cx, options);
348 return irregexp::CheckPatternSyntax(cx, dummyTokenStream, pattern, flags);
349 }
350
CheckPatternSyntax(JSContext * cx,HandleAtom pattern,RegExpFlags flags)351 static RegExpShared* CheckPatternSyntax(JSContext* cx, HandleAtom pattern,
352 RegExpFlags flags) {
353 // If we already have a RegExpShared for this pattern/flags, we can
354 // avoid the much slower CheckPatternSyntaxSlow call.
355
356 RootedRegExpShared shared(cx, cx->zone()->regExps().maybeGet(pattern, flags));
357 if (shared) {
358 #ifdef DEBUG
359 // Assert the pattern is valid.
360 if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
361 MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed());
362 return nullptr;
363 }
364 #endif
365 return shared;
366 }
367
368 if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
369 return nullptr;
370 }
371
372 // Allocate and return a new RegExpShared so we will hit the fast path
373 // next time.
374 return cx->zone()->regExps().get(cx, pattern, flags);
375 }
376
377 /*
378 * ES 2016 draft Mar 25, 2016 21.2.3.2.2.
379 *
380 * Steps 14-15 set |obj|'s "lastIndex" property to zero. Some of
381 * RegExpInitialize's callers have a fresh RegExp not yet exposed to script:
382 * in these cases zeroing "lastIndex" is infallible. But others have a RegExp
383 * whose "lastIndex" property might have been made non-writable: here, zeroing
384 * "lastIndex" can fail. We efficiently solve this problem by completely
385 * removing "lastIndex" zeroing from the provided function.
386 *
387 * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES!
388 *
389 * Because this function only ever returns a user-provided |obj| in the spec,
390 * we omit it and just return the usual success/failure.
391 */
RegExpInitializeIgnoringLastIndex(JSContext * cx,Handle<RegExpObject * > obj,HandleValue patternValue,HandleValue flagsValue)392 static bool RegExpInitializeIgnoringLastIndex(JSContext* cx,
393 Handle<RegExpObject*> obj,
394 HandleValue patternValue,
395 HandleValue flagsValue) {
396 RootedAtom pattern(cx);
397 if (patternValue.isUndefined()) {
398 /* Step 1. */
399 pattern = cx->names().empty;
400 } else {
401 /* Step 2. */
402 pattern = ToAtom<CanGC>(cx, patternValue);
403 if (!pattern) {
404 return false;
405 }
406 }
407
408 /* Step 3. */
409 RegExpFlags flags = RegExpFlag::NoFlags;
410 if (!flagsValue.isUndefined()) {
411 /* Step 4. */
412 RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue));
413 if (!flagStr) {
414 return false;
415 }
416
417 /* Step 5. */
418 if (!ParseRegExpFlags(cx, flagStr, &flags)) {
419 return false;
420 }
421 }
422
423 /* Steps 7-8. */
424 RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags);
425 if (!shared) {
426 return false;
427 }
428
429 /* Steps 9-12. */
430 obj->initIgnoringLastIndex(pattern, flags);
431
432 obj->setShared(shared);
433
434 return true;
435 }
436
437 /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */
RegExpCreate(JSContext * cx,HandleValue patternValue,HandleValue flagsValue,MutableHandleValue rval)438 bool js::RegExpCreate(JSContext* cx, HandleValue patternValue,
439 HandleValue flagsValue, MutableHandleValue rval) {
440 /* Step 1. */
441 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject));
442 if (!regexp) {
443 return false;
444 }
445
446 /* Step 2. */
447 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue,
448 flagsValue)) {
449 return false;
450 }
451 regexp->zeroLastIndex(cx);
452
453 rval.setObject(*regexp);
454 return true;
455 }
456
IsRegExpObject(HandleValue v)457 MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) {
458 return v.isObject() && v.toObject().is<RegExpObject>();
459 }
460
461 /* ES6 draft rc3 7.2.8. */
IsRegExp(JSContext * cx,HandleValue value,bool * result)462 bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) {
463 /* Step 1. */
464 if (!value.isObject()) {
465 *result = false;
466 return true;
467 }
468 RootedObject obj(cx, &value.toObject());
469
470 /* Steps 2-3. */
471 RootedValue isRegExp(cx);
472 RootedId matchId(cx, PropertyKey::Symbol(cx->wellKnownSymbols().match));
473 if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) {
474 return false;
475 }
476
477 /* Step 4. */
478 if (!isRegExp.isUndefined()) {
479 *result = ToBoolean(isRegExp);
480 return true;
481 }
482
483 /* Steps 5-6. */
484 ESClass cls;
485 if (!GetClassOfValue(cx, value, &cls)) {
486 return false;
487 }
488
489 *result = cls == ESClass::RegExp;
490 return true;
491 }
492
493 /* ES6 B.2.5.1. */
regexp_compile_impl(JSContext * cx,const CallArgs & args)494 MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx,
495 const CallArgs& args) {
496 MOZ_ASSERT(IsRegExpObject(args.thisv()));
497
498 Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>());
499
500 // Step 3.
501 RootedValue patternValue(cx, args.get(0));
502 ESClass cls;
503 if (!GetClassOfValue(cx, patternValue, &cls)) {
504 return false;
505 }
506 if (cls == ESClass::RegExp) {
507 // Step 3a.
508 if (args.hasDefined(1)) {
509 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
510 JSMSG_NEWREGEXP_FLAGGED);
511 return false;
512 }
513
514 // Beware! |patternObj| might be a proxy into another compartment, so
515 // don't assume |patternObj.is<RegExpObject>()|. For the same reason,
516 // don't reuse the RegExpShared below.
517 RootedObject patternObj(cx, &patternValue.toObject());
518
519 RootedAtom sourceAtom(cx);
520 RegExpFlags flags = RegExpFlag::NoFlags;
521 {
522 // Step 3b.
523 RegExpShared* shared = RegExpToShared(cx, patternObj);
524 if (!shared) {
525 return false;
526 }
527
528 sourceAtom = shared->getSource();
529 flags = shared->getFlags();
530 }
531
532 // Step 5, minus lastIndex zeroing.
533 regexp->initIgnoringLastIndex(sourceAtom, flags);
534 } else {
535 // Step 4.
536 RootedValue P(cx, patternValue);
537 RootedValue F(cx, args.get(1));
538
539 // Step 5, minus lastIndex zeroing.
540 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
541 return false;
542 }
543 }
544
545 // The final niggling bit of step 5.
546 //
547 // |regexp| is user-exposed, but if its "lastIndex" property hasn't been
548 // made non-writable, we can still use a fast path to zero it.
549 if (regexp->lookupPure(cx->names().lastIndex)->writable()) {
550 regexp->zeroLastIndex(cx);
551 } else {
552 RootedValue zero(cx, Int32Value(0));
553 if (!SetProperty(cx, regexp, cx->names().lastIndex, zero)) {
554 return false;
555 }
556 }
557
558 args.rval().setObject(*regexp);
559 return true;
560 }
561
regexp_compile(JSContext * cx,unsigned argc,Value * vp)562 static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) {
563 CallArgs args = CallArgsFromVp(argc, vp);
564
565 /* Steps 1-2. */
566 return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args);
567 }
568
569 /*
570 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1.
571 */
regexp_construct(JSContext * cx,unsigned argc,Value * vp)572 bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) {
573 CallArgs args = CallArgsFromVp(argc, vp);
574
575 // Steps 1.
576 bool patternIsRegExp;
577 if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) {
578 return false;
579 }
580
581 // We can delay step 3 and step 4a until later, during
582 // GetPrototypeFromBuiltinConstructor calls. Accessing the new.target
583 // and the callee from the stack is unobservable.
584 if (!args.isConstructing()) {
585 // Step 3.b.
586 if (patternIsRegExp && !args.hasDefined(1)) {
587 RootedObject patternObj(cx, &args[0].toObject());
588
589 // Step 3.b.i.
590 RootedValue patternConstructor(cx);
591 if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor,
592 &patternConstructor)) {
593 return false;
594 }
595
596 // Step 3.b.ii.
597 if (patternConstructor.isObject() &&
598 patternConstructor.toObject() == args.callee()) {
599 args.rval().set(args[0]);
600 return true;
601 }
602 }
603 }
604
605 RootedValue patternValue(cx, args.get(0));
606
607 // Step 4.
608 ESClass cls;
609 if (!GetClassOfValue(cx, patternValue, &cls)) {
610 return false;
611 }
612 if (cls == ESClass::RegExp) {
613 // Beware! |patternObj| might be a proxy into another compartment, so
614 // don't assume |patternObj.is<RegExpObject>()|.
615 RootedObject patternObj(cx, &patternValue.toObject());
616
617 RootedAtom sourceAtom(cx);
618 RegExpFlags flags;
619 RootedRegExpShared shared(cx);
620 {
621 // Step 4.a.
622 shared = RegExpToShared(cx, patternObj);
623 if (!shared) {
624 return false;
625 }
626 sourceAtom = shared->getSource();
627
628 // Step 4.b.
629 // Get original flags in all cases, to compare with passed flags.
630 flags = shared->getFlags();
631
632 // If the RegExpShared is in another Zone, don't reuse it.
633 if (cx->zone() != shared->zone()) {
634 shared = nullptr;
635 }
636 }
637
638 // Step 7.
639 RootedObject proto(cx);
640 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
641 return false;
642 }
643
644 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
645 if (!regexp) {
646 return false;
647 }
648
649 // Step 8.
650 if (args.hasDefined(1)) {
651 // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4.
652 RegExpFlags flagsArg = RegExpFlag::NoFlags;
653 RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
654 if (!flagStr) {
655 return false;
656 }
657 if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) {
658 return false;
659 }
660
661 // Don't reuse the RegExpShared if we have different flags.
662 if (flags != flagsArg) {
663 shared = nullptr;
664 }
665
666 if (!flags.unicode() && flagsArg.unicode()) {
667 // Have to check syntax again when adding 'u' flag.
668
669 // ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56
670 // 21.2.3.2.2 step 7.
671 shared = CheckPatternSyntax(cx, sourceAtom, flagsArg);
672 if (!shared) {
673 return false;
674 }
675 }
676 flags = flagsArg;
677 }
678
679 regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
680
681 if (shared) {
682 regexp->setShared(shared);
683 }
684
685 args.rval().setObject(*regexp);
686 return true;
687 }
688
689 RootedValue P(cx);
690 RootedValue F(cx);
691
692 // Step 5.
693 if (patternIsRegExp) {
694 RootedObject patternObj(cx, &patternValue.toObject());
695
696 // Step 5.a.
697 if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P)) {
698 return false;
699 }
700
701 // Step 5.b.
702 F = args.get(1);
703 if (F.isUndefined()) {
704 if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F)) {
705 return false;
706 }
707 }
708 } else {
709 // Steps 6.a-b.
710 P = patternValue;
711 F = args.get(1);
712 }
713
714 // Step 7.
715 RootedObject proto(cx);
716 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
717 return false;
718 }
719
720 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
721 if (!regexp) {
722 return false;
723 }
724
725 // Step 8.
726 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
727 return false;
728 }
729 regexp->zeroLastIndex(cx);
730
731 args.rval().setObject(*regexp);
732 return true;
733 }
734
735 /*
736 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1
737 * steps 4, 7-8.
738 */
regexp_construct_raw_flags(JSContext * cx,unsigned argc,Value * vp)739 bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) {
740 CallArgs args = CallArgsFromVp(argc, vp);
741 MOZ_ASSERT(args.length() == 2);
742 MOZ_ASSERT(!args.isConstructing());
743
744 // Step 4.a.
745 RootedAtom sourceAtom(cx, AtomizeString(cx, args[0].toString()));
746 if (!sourceAtom) {
747 return false;
748 }
749
750 // Step 4.c.
751 RegExpFlags flags = AssertedCast<uint8_t>(int32_t(args[1].toNumber()));
752
753 // Step 7.
754 RegExpObject* regexp = RegExpAlloc(cx, GenericObject);
755 if (!regexp) {
756 return false;
757 }
758
759 // Step 8.
760 regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
761 args.rval().setObject(*regexp);
762 return true;
763 }
764
765 // This is a specialized implementation of "UnwrapAndTypeCheckThis" for RegExp
766 // getters that need to return a special value for same-realm
767 // %RegExp.prototype%.
768 template <typename Fn>
RegExpGetter(JSContext * cx,CallArgs & args,const char * methodName,Fn && fn,HandleValue fallbackValue=UndefinedHandleValue)769 static bool RegExpGetter(JSContext* cx, CallArgs& args, const char* methodName,
770 Fn&& fn,
771 HandleValue fallbackValue = UndefinedHandleValue) {
772 JSObject* obj = nullptr;
773 if (args.thisv().isObject()) {
774 obj = &args.thisv().toObject();
775 if (IsWrapper(obj)) {
776 obj = CheckedUnwrapStatic(obj);
777 if (!obj) {
778 ReportAccessDenied(cx);
779 return false;
780 }
781 }
782 }
783
784 if (obj) {
785 // Step 4ff
786 if (obj->is<RegExpObject>()) {
787 return fn(&obj->as<RegExpObject>());
788 }
789
790 // Step 3.a. "If SameValue(R, %RegExp.prototype%) is true, return
791 // undefined."
792 // Or `return "(?:)"` for get RegExp.prototype.source.
793 if (obj == cx->global()->maybeGetRegExpPrototype()) {
794 args.rval().set(fallbackValue);
795 return true;
796 }
797
798 // fall-through
799 }
800
801 // Step 2. and Step 3.b.
802 JS_ReportErrorNumberLatin1(cx, GetErrorMessage, nullptr,
803 JSMSG_INCOMPATIBLE_REGEXP_GETTER, methodName,
804 InformalValueTypeName(args.thisv()));
805 return false;
806 }
807
regexp_hasIndices(JSContext * cx,unsigned argc,JS::Value * vp)808 bool js::regexp_hasIndices(JSContext* cx, unsigned argc, JS::Value* vp) {
809 CallArgs args = CallArgsFromVp(argc, vp);
810 return RegExpGetter(cx, args, "hasIndices", [args](RegExpObject* unwrapped) {
811 args.rval().setBoolean(unwrapped->hasIndices());
812 return true;
813 });
814 }
815
816 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
817 // 21.2.5.5 get RegExp.prototype.global
regexp_global(JSContext * cx,unsigned argc,JS::Value * vp)818 bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) {
819 CallArgs args = CallArgsFromVp(argc, vp);
820 return RegExpGetter(cx, args, "global", [args](RegExpObject* unwrapped) {
821 args.rval().setBoolean(unwrapped->global());
822 return true;
823 });
824 }
825
826 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
827 // 21.2.5.6 get RegExp.prototype.ignoreCase
regexp_ignoreCase(JSContext * cx,unsigned argc,JS::Value * vp)828 bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) {
829 CallArgs args = CallArgsFromVp(argc, vp);
830 return RegExpGetter(cx, args, "ignoreCase", [args](RegExpObject* unwrapped) {
831 args.rval().setBoolean(unwrapped->ignoreCase());
832 return true;
833 });
834 }
835
836 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
837 // 21.2.5.9 get RegExp.prototype.multiline
regexp_multiline(JSContext * cx,unsigned argc,JS::Value * vp)838 bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) {
839 CallArgs args = CallArgsFromVp(argc, vp);
840 return RegExpGetter(cx, args, "multiline", [args](RegExpObject* unwrapped) {
841 args.rval().setBoolean(unwrapped->multiline());
842 return true;
843 });
844 }
845
846 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
847 // 21.2.5.12 get RegExp.prototype.source
regexp_source(JSContext * cx,unsigned argc,JS::Value * vp)848 static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) {
849 CallArgs args = CallArgsFromVp(argc, vp);
850 // Step 3.a. Return "(?:)" for %RegExp.prototype%.
851 RootedValue fallback(cx, StringValue(cx->names().emptyRegExp));
852 return RegExpGetter(
853 cx, args, "source",
854 [cx, args](RegExpObject* unwrapped) {
855 RootedAtom src(cx, unwrapped->getSource());
856 MOZ_ASSERT(src);
857 // Mark potentially cross-compartment JSAtom.
858 cx->markAtom(src);
859
860 // Step 7.
861 JSString* escaped = EscapeRegExpPattern(cx, src);
862 if (!escaped) {
863 return false;
864 }
865
866 args.rval().setString(escaped);
867 return true;
868 },
869 fallback);
870 }
871
872 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
873 // 21.2.5.3 get RegExp.prototype.dotAll
regexp_dotAll(JSContext * cx,unsigned argc,JS::Value * vp)874 bool js::regexp_dotAll(JSContext* cx, unsigned argc, JS::Value* vp) {
875 CallArgs args = CallArgsFromVp(argc, vp);
876 return RegExpGetter(cx, args, "dotAll", [args](RegExpObject* unwrapped) {
877 args.rval().setBoolean(unwrapped->dotAll());
878 return true;
879 });
880 }
881
882 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
883 // 21.2.5.14 get RegExp.prototype.sticky
regexp_sticky(JSContext * cx,unsigned argc,JS::Value * vp)884 bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) {
885 CallArgs args = CallArgsFromVp(argc, vp);
886 return RegExpGetter(cx, args, "sticky", [args](RegExpObject* unwrapped) {
887 args.rval().setBoolean(unwrapped->sticky());
888 return true;
889 });
890 }
891
892 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
893 // 21.2.5.17 get RegExp.prototype.unicode
regexp_unicode(JSContext * cx,unsigned argc,JS::Value * vp)894 bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) {
895 CallArgs args = CallArgsFromVp(argc, vp);
896 return RegExpGetter(cx, args, "unicode", [args](RegExpObject* unwrapped) {
897 args.rval().setBoolean(unwrapped->unicode());
898 return true;
899 });
900 }
901
902 const JSPropertySpec js::regexp_properties[] = {
903 JS_SELF_HOSTED_GET("flags", "$RegExpFlagsGetter", 0),
904 JS_PSG("hasIndices", regexp_hasIndices, 0),
905 JS_PSG("global", regexp_global, 0),
906 JS_PSG("ignoreCase", regexp_ignoreCase, 0),
907 JS_PSG("multiline", regexp_multiline, 0),
908 JS_PSG("dotAll", regexp_dotAll, 0),
909 JS_PSG("source", regexp_source, 0),
910 JS_PSG("sticky", regexp_sticky, 0),
911 JS_PSG("unicode", regexp_unicode, 0),
912 JS_PS_END};
913
914 const JSFunctionSpec js::regexp_methods[] = {
915 JS_SELF_HOSTED_FN(js_toSource_str, "$RegExpToString", 0, 0),
916 JS_SELF_HOSTED_FN(js_toString_str, "$RegExpToString", 0, 0),
917 JS_FN("compile", regexp_compile, 2, 0),
918 JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0),
919 JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0),
920 JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0),
921 JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0),
922 JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0),
923 JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0),
924 JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0),
925 JS_FS_END};
926
927 #define STATIC_PAREN_GETTER_CODE(parenNum) \
928 if (!res->createParen(cx, parenNum, args.rval())) return false; \
929 if (args.rval().isUndefined()) \
930 args.rval().setString(cx->runtime()->emptyString); \
931 return true
932
933 /*
934 * RegExp static properties.
935 *
936 * RegExp class static properties and their Perl counterparts:
937 *
938 * RegExp.input $_
939 * RegExp.lastMatch $&
940 * RegExp.lastParen $+
941 * RegExp.leftContext $`
942 * RegExp.rightContext $'
943 */
944
945 #define DEFINE_STATIC_GETTER(name, code) \
946 static bool name(JSContext* cx, unsigned argc, Value* vp) { \
947 CallArgs args = CallArgsFromVp(argc, vp); \
948 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
949 if (!res) return false; \
950 code; \
951 }
952
953 DEFINE_STATIC_GETTER(static_input_getter,
954 return res->createPendingInput(cx, args.rval()))
955 DEFINE_STATIC_GETTER(static_lastMatch_getter,
956 return res->createLastMatch(cx, args.rval()))
957 DEFINE_STATIC_GETTER(static_lastParen_getter,
958 return res->createLastParen(cx, args.rval()))
959 DEFINE_STATIC_GETTER(static_leftContext_getter,
960 return res->createLeftContext(cx, args.rval()))
961 DEFINE_STATIC_GETTER(static_rightContext_getter,
962 return res->createRightContext(cx, args.rval()))
963
964 DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1))
965 DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2))
966 DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3))
967 DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4))
968 DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5))
969 DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6))
970 DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7))
971 DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8))
972 DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9))
973
974 #define DEFINE_STATIC_SETTER(name, code) \
975 static bool name(JSContext* cx, unsigned argc, Value* vp) { \
976 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
977 if (!res) return false; \
978 code; \
979 return true; \
980 }
981
static_input_setter(JSContext * cx,unsigned argc,Value * vp)982 static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) {
983 CallArgs args = CallArgsFromVp(argc, vp);
984 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
985 if (!res) {
986 return false;
987 }
988
989 RootedString str(cx, ToString<CanGC>(cx, args.get(0)));
990 if (!str) {
991 return false;
992 }
993
994 res->setPendingInput(str);
995 args.rval().setString(str);
996 return true;
997 }
998
999 const JSPropertySpec js::regexp_static_props[] = {
1000 JS_PSGS("input", static_input_getter, static_input_setter,
1001 JSPROP_PERMANENT | JSPROP_ENUMERATE),
1002 JS_PSG("lastMatch", static_lastMatch_getter,
1003 JSPROP_PERMANENT | JSPROP_ENUMERATE),
1004 JS_PSG("lastParen", static_lastParen_getter,
1005 JSPROP_PERMANENT | JSPROP_ENUMERATE),
1006 JS_PSG("leftContext", static_leftContext_getter,
1007 JSPROP_PERMANENT | JSPROP_ENUMERATE),
1008 JS_PSG("rightContext", static_rightContext_getter,
1009 JSPROP_PERMANENT | JSPROP_ENUMERATE),
1010 JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1011 JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1012 JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1013 JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1014 JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1015 JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1016 JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1017 JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1018 JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1019 JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT),
1020 JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT),
1021 JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT),
1022 JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT),
1023 JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT),
1024 JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0),
1025 JS_PS_END};
1026
1027 template <typename CharT>
IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,size_t index)1028 static bool IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,
1029 size_t index) {
1030 JS::AutoCheckCannotGC nogc;
1031 MOZ_ASSERT(index > 0 && index < input->length());
1032 const CharT* inputChars = input->chars<CharT>(nogc);
1033
1034 return unicode::IsTrailSurrogate(inputChars[index]) &&
1035 unicode::IsLeadSurrogate(inputChars[index - 1]);
1036 }
1037
IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,int32_t index)1038 static bool IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,
1039 int32_t index) {
1040 if (index <= 0 || size_t(index) >= input->length()) {
1041 return false;
1042 }
1043
1044 return input->hasLatin1Chars()
1045 ? IsTrailSurrogateWithLeadSurrogateImpl<Latin1Char>(input, index)
1046 : IsTrailSurrogateWithLeadSurrogateImpl<char16_t>(input, index);
1047 }
1048
1049 /*
1050 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1051 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
1052 */
ExecuteRegExp(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,VectorMatchPairs * matches)1053 static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp,
1054 HandleString string, int32_t lastIndex,
1055 VectorMatchPairs* matches) {
1056 /*
1057 * WARNING: Despite the presence of spec step comment numbers, this
1058 * algorithm isn't consistent with any ES6 version, draft or
1059 * otherwise. YOU HAVE BEEN WARNED.
1060 */
1061
1062 /* Steps 1-2 performed by the caller. */
1063 Handle<RegExpObject*> reobj = regexp.as<RegExpObject>();
1064
1065 RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj));
1066 if (!re) {
1067 return RegExpRunStatus_Error;
1068 }
1069
1070 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
1071 if (!res) {
1072 return RegExpRunStatus_Error;
1073 }
1074
1075 RootedLinearString input(cx, string->ensureLinear(cx));
1076 if (!input) {
1077 return RegExpRunStatus_Error;
1078 }
1079
1080 /* Handled by caller */
1081 MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
1082
1083 /* Steps 4-8 performed by the caller. */
1084
1085 /* Step 10. */
1086 if (reobj->unicode()) {
1087 /*
1088 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
1089 * 21.2.2.2 step 2.
1090 * Let listIndex be the index into Input of the character that was
1091 * obtained from element index of str.
1092 *
1093 * In the spec, pattern match is performed with decoded Unicode code
1094 * points, but our implementation performs it with UTF-16 encoded
1095 * string. In step 2, we should decrement lastIndex (index) if it
1096 * points the trail surrogate that has corresponding lead surrogate.
1097 *
1098 * var r = /\uD83D\uDC38/ug;
1099 * r.lastIndex = 1;
1100 * var str = "\uD83D\uDC38";
1101 * var result = r.exec(str); // pattern match starts from index 0
1102 * print(result.index); // prints 0
1103 *
1104 * Note: this doesn't match the current spec text and result in
1105 * different values for `result.index` under certain conditions.
1106 * However, the spec will change to match our implementation's
1107 * behavior. See https://github.com/tc39/ecma262/issues/128.
1108 */
1109 if (IsTrailSurrogateWithLeadSurrogate(input, lastIndex)) {
1110 lastIndex--;
1111 }
1112 }
1113
1114 /* Steps 3, 11-14, except 12.a.i, 12.c.i.1. */
1115 RegExpRunStatus status =
1116 ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches);
1117 if (status == RegExpRunStatus_Error) {
1118 return RegExpRunStatus_Error;
1119 }
1120
1121 /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */
1122
1123 return status;
1124 }
1125
1126 /*
1127 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1128 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1129 */
RegExpMatcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,MutableHandleValue rval)1130 static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp,
1131 HandleString string, int32_t lastIndex,
1132 MutableHandleValue rval) {
1133 /* Execute regular expression and gather matches. */
1134 VectorMatchPairs matches;
1135
1136 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1137 RegExpRunStatus status =
1138 ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
1139 if (status == RegExpRunStatus_Error) {
1140 return false;
1141 }
1142
1143 /* Steps 12.a, 12.c. */
1144 if (status == RegExpRunStatus_Success_NotFound) {
1145 rval.setNull();
1146 return true;
1147 }
1148
1149 /* Steps 16-25 */
1150 RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
1151 return CreateRegExpMatchResult(cx, shared, string, matches, rval);
1152 }
1153
1154 /*
1155 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1156 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1157 */
RegExpMatcher(JSContext * cx,unsigned argc,Value * vp)1158 bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) {
1159 CallArgs args = CallArgsFromVp(argc, vp);
1160 MOZ_ASSERT(args.length() == 3);
1161 MOZ_ASSERT(IsRegExpObject(args[0]));
1162 MOZ_ASSERT(args[1].isString());
1163 MOZ_ASSERT(args[2].isNumber());
1164
1165 RootedObject regexp(cx, &args[0].toObject());
1166 RootedString string(cx, args[1].toString());
1167
1168 int32_t lastIndex;
1169 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1170
1171 /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
1172 return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
1173 }
1174
1175 /*
1176 * Separate interface for use by the JITs.
1177 * This code cannot re-enter JIT code.
1178 */
RegExpMatcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t maybeLastIndex,MatchPairs * maybeMatches,MutableHandleValue output)1179 bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp,
1180 HandleString input, int32_t maybeLastIndex,
1181 MatchPairs* maybeMatches, MutableHandleValue output) {
1182 // RegExp execution was successful only if the pairs have actually been
1183 // filled in. Note that IC code always passes a nullptr maybeMatches.
1184 if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
1185 RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
1186 return CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output);
1187 }
1188
1189 // |maybeLastIndex| only contains a valid value when the RegExp execution
1190 // was not successful.
1191 MOZ_ASSERT(maybeLastIndex >= 0);
1192 return RegExpMatcherImpl(cx, regexp, input, maybeLastIndex, output);
1193 }
1194
1195 /*
1196 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1197 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1198 * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub,
1199 * changes to this code need to get reflected in there too.
1200 */
RegExpSearcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,int32_t * result)1201 static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
1202 HandleString string, int32_t lastIndex,
1203 int32_t* result) {
1204 /* Execute regular expression and gather matches. */
1205 VectorMatchPairs matches;
1206
1207 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1208 RegExpRunStatus status =
1209 ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
1210 if (status == RegExpRunStatus_Error) {
1211 return false;
1212 }
1213
1214 /* Steps 12.a, 12.c. */
1215 if (status == RegExpRunStatus_Success_NotFound) {
1216 *result = -1;
1217 return true;
1218 }
1219
1220 /* Steps 16-25 */
1221 *result = CreateRegExpSearchResult(matches);
1222 return true;
1223 }
1224
1225 /*
1226 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1227 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1228 */
RegExpSearcher(JSContext * cx,unsigned argc,Value * vp)1229 bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) {
1230 CallArgs args = CallArgsFromVp(argc, vp);
1231 MOZ_ASSERT(args.length() == 3);
1232 MOZ_ASSERT(IsRegExpObject(args[0]));
1233 MOZ_ASSERT(args[1].isString());
1234 MOZ_ASSERT(args[2].isNumber());
1235
1236 RootedObject regexp(cx, &args[0].toObject());
1237 RootedString string(cx, args[1].toString());
1238
1239 int32_t lastIndex;
1240 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1241
1242 /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
1243 int32_t result = 0;
1244 if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) {
1245 return false;
1246 }
1247
1248 args.rval().setInt32(result);
1249 return true;
1250 }
1251
1252 /*
1253 * Separate interface for use by the JITs.
1254 * This code cannot re-enter JIT code.
1255 */
RegExpSearcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,MatchPairs * maybeMatches,int32_t * result)1256 bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
1257 HandleString input, int32_t lastIndex,
1258 MatchPairs* maybeMatches, int32_t* result) {
1259 MOZ_ASSERT(lastIndex >= 0);
1260
1261 // RegExp execution was successful only if the pairs have actually been
1262 // filled in. Note that IC code always passes a nullptr maybeMatches.
1263 if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
1264 *result = CreateRegExpSearchResult(*maybeMatches);
1265 return true;
1266 }
1267 return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
1268 }
1269
1270 /*
1271 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1272 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
1273 */
RegExpTester(JSContext * cx,unsigned argc,Value * vp)1274 bool js::RegExpTester(JSContext* cx, unsigned argc, Value* vp) {
1275 CallArgs args = CallArgsFromVp(argc, vp);
1276 MOZ_ASSERT(args.length() == 3);
1277 MOZ_ASSERT(IsRegExpObject(args[0]));
1278 MOZ_ASSERT(args[1].isString());
1279 MOZ_ASSERT(args[2].isNumber());
1280
1281 RootedObject regexp(cx, &args[0].toObject());
1282 RootedString string(cx, args[1].toString());
1283
1284 int32_t lastIndex;
1285 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1286
1287 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1288 VectorMatchPairs matches;
1289 RegExpRunStatus status =
1290 ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
1291
1292 if (status == RegExpRunStatus_Error) {
1293 return false;
1294 }
1295
1296 if (status == RegExpRunStatus_Success) {
1297 int32_t endIndex = matches[0].limit;
1298 args.rval().setInt32(endIndex);
1299 } else {
1300 args.rval().setInt32(-1);
1301 }
1302 return true;
1303 }
1304
1305 /*
1306 * Separate interface for use by the JITs.
1307 * This code cannot re-enter JIT code.
1308 */
RegExpTesterRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,int32_t * endIndex)1309 bool js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
1310 int32_t lastIndex, int32_t* endIndex) {
1311 MOZ_ASSERT(lastIndex >= 0);
1312
1313 VectorMatchPairs matches;
1314 RegExpRunStatus status =
1315 ExecuteRegExp(cx, regexp, input, lastIndex, &matches);
1316
1317 if (status == RegExpRunStatus_Success) {
1318 *endIndex = matches[0].limit;
1319 return true;
1320 }
1321 if (status == RegExpRunStatus_Success_NotFound) {
1322 *endIndex = -1;
1323 return true;
1324 }
1325
1326 return false;
1327 }
1328
1329 using CapturesVector = GCVector<Value, 4>;
1330
1331 struct JSSubString {
1332 JSLinearString* base = nullptr;
1333 size_t offset = 0;
1334 size_t length = 0;
1335
1336 JSSubString() = default;
1337
initEmptyJSSubString1338 void initEmpty(JSLinearString* base) {
1339 this->base = base;
1340 offset = length = 0;
1341 }
initJSSubString1342 void init(JSLinearString* base, size_t offset, size_t length) {
1343 this->base = base;
1344 this->offset = offset;
1345 this->length = length;
1346 }
1347 };
1348
GetParen(JSLinearString * matched,const JS::Value & capture,JSSubString * out)1349 static void GetParen(JSLinearString* matched, const JS::Value& capture,
1350 JSSubString* out) {
1351 if (capture.isUndefined()) {
1352 out->initEmpty(matched);
1353 return;
1354 }
1355 JSLinearString& captureLinear = capture.toString()->asLinear();
1356 out->init(&captureLinear, 0, captureLinear.length());
1357 }
1358
1359 template <typename CharT>
InterpretDollar(JSLinearString * matched,JSLinearString * string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,JSLinearString * replacement,const CharT * replacementBegin,const CharT * currentDollar,const CharT * replacementEnd,JSSubString * out,size_t * skip,uint32_t * currentNamedCapture)1360 static bool InterpretDollar(JSLinearString* matched, JSLinearString* string,
1361 size_t position, size_t tailPos,
1362 Handle<CapturesVector> captures,
1363 Handle<CapturesVector> namedCaptures,
1364 JSLinearString* replacement,
1365 const CharT* replacementBegin,
1366 const CharT* currentDollar,
1367 const CharT* replacementEnd, JSSubString* out,
1368 size_t* skip, uint32_t* currentNamedCapture) {
1369 MOZ_ASSERT(*currentDollar == '$');
1370
1371 /* If there is only a dollar, bail now. */
1372 if (currentDollar + 1 >= replacementEnd) {
1373 return false;
1374 }
1375
1376 // ES 2021 Table 57: Replacement Text Symbol Substitutions
1377 // https://tc39.es/ecma262/#table-replacement-text-symbol-substitutions
1378 char16_t c = currentDollar[1];
1379 if (IsAsciiDigit(c)) {
1380 /* $n, $nn */
1381 unsigned num = AsciiDigitToNumber(c);
1382 if (num > captures.length()) {
1383 // The result is implementation-defined. Do not substitute.
1384 return false;
1385 }
1386
1387 const CharT* currentChar = currentDollar + 2;
1388 if (currentChar < replacementEnd) {
1389 c = *currentChar;
1390 if (IsAsciiDigit(c)) {
1391 unsigned tmpNum = 10 * num + AsciiDigitToNumber(c);
1392 // If num > captures.length(), the result is implementation-defined.
1393 // Consume next character only if num <= captures.length().
1394 if (tmpNum <= captures.length()) {
1395 currentChar++;
1396 num = tmpNum;
1397 }
1398 }
1399 }
1400
1401 if (num == 0) {
1402 // The result is implementation-defined. Do not substitute.
1403 return false;
1404 }
1405
1406 *skip = currentChar - currentDollar;
1407
1408 MOZ_ASSERT(num <= captures.length());
1409
1410 GetParen(matched, captures[num - 1], out);
1411 return true;
1412 }
1413
1414 // '$<': Named Captures
1415 if (c == '<') {
1416 // Step 1.
1417 if (namedCaptures.length() == 0) {
1418 return false;
1419 }
1420
1421 // Step 2.b
1422 const CharT* nameStart = currentDollar + 2;
1423 const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
1424
1425 // Step 2.c
1426 if (!nameEnd) {
1427 return false;
1428 }
1429
1430 // Step 2.d
1431 // We precompute named capture replacements in InitNamedCaptures.
1432 // They are stored in the order in which we will need them, so here
1433 // we can just take the next one in the list.
1434 size_t nameLength = nameEnd - nameStart;
1435 *skip = nameLength + 3; // $<...>
1436
1437 // Steps 2.d.iii-iv
1438 GetParen(matched, namedCaptures[*currentNamedCapture], out);
1439 *currentNamedCapture += 1;
1440 return true;
1441 }
1442
1443 switch (c) {
1444 default:
1445 return false;
1446 case '$':
1447 out->init(replacement, currentDollar - replacementBegin, 1);
1448 break;
1449 case '&':
1450 out->init(matched, 0, matched->length());
1451 break;
1452 case '`':
1453 out->init(string, 0, position);
1454 break;
1455 case '\'':
1456 if (tailPos >= string->length()) {
1457 out->initEmpty(matched);
1458 } else {
1459 out->init(string, tailPos, string->length() - tailPos);
1460 }
1461 break;
1462 }
1463
1464 *skip = 2;
1465 return true;
1466 }
1467
1468 template <typename CharT>
FindReplaceLengthString(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1469 static bool FindReplaceLengthString(JSContext* cx, HandleLinearString matched,
1470 HandleLinearString string, size_t position,
1471 size_t tailPos,
1472 Handle<CapturesVector> captures,
1473 Handle<CapturesVector> namedCaptures,
1474 HandleLinearString replacement,
1475 size_t firstDollarIndex, size_t* sizep) {
1476 CheckedInt<uint32_t> replen = replacement->length();
1477
1478 JS::AutoCheckCannotGC nogc;
1479 MOZ_ASSERT(firstDollarIndex < replacement->length());
1480 const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1481 const CharT* currentDollar = replacementBegin + firstDollarIndex;
1482 const CharT* replacementEnd = replacementBegin + replacement->length();
1483 uint32_t currentNamedCapture = 0;
1484 do {
1485 JSSubString sub;
1486 size_t skip;
1487 if (InterpretDollar(matched, string, position, tailPos, captures,
1488 namedCaptures, replacement, replacementBegin,
1489 currentDollar, replacementEnd, &sub, &skip,
1490 ¤tNamedCapture)) {
1491 if (sub.length > skip) {
1492 replen += sub.length - skip;
1493 } else {
1494 replen -= skip - sub.length;
1495 }
1496 currentDollar += skip;
1497 } else {
1498 currentDollar++;
1499 }
1500
1501 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1502 } while (currentDollar);
1503
1504 if (!replen.isValid()) {
1505 ReportAllocationOverflow(cx);
1506 return false;
1507 }
1508
1509 *sizep = replen.value();
1510 return true;
1511 }
1512
FindReplaceLength(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1513 static bool FindReplaceLength(JSContext* cx, HandleLinearString matched,
1514 HandleLinearString string, size_t position,
1515 size_t tailPos, Handle<CapturesVector> captures,
1516 Handle<CapturesVector> namedCaptures,
1517 HandleLinearString replacement,
1518 size_t firstDollarIndex, size_t* sizep) {
1519 return replacement->hasLatin1Chars()
1520 ? FindReplaceLengthString<Latin1Char>(
1521 cx, matched, string, position, tailPos, captures,
1522 namedCaptures, replacement, firstDollarIndex, sizep)
1523 : FindReplaceLengthString<char16_t>(
1524 cx, matched, string, position, tailPos, captures,
1525 namedCaptures, replacement, firstDollarIndex, sizep);
1526 }
1527
1528 /*
1529 * Precondition: |sb| already has necessary growth space reserved (as
1530 * derived from FindReplaceLength), and has been inflated to TwoByte if
1531 * necessary.
1532 */
1533 template <typename CharT>
DoReplace(HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,HandleLinearString replacement,size_t firstDollarIndex,StringBuffer & sb)1534 static void DoReplace(HandleLinearString matched, HandleLinearString string,
1535 size_t position, size_t tailPos,
1536 Handle<CapturesVector> captures,
1537 Handle<CapturesVector> namedCaptures,
1538 HandleLinearString replacement, size_t firstDollarIndex,
1539 StringBuffer& sb) {
1540 JS::AutoCheckCannotGC nogc;
1541 const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1542 const CharT* currentChar = replacementBegin;
1543
1544 MOZ_ASSERT(firstDollarIndex < replacement->length());
1545 const CharT* currentDollar = replacementBegin + firstDollarIndex;
1546 const CharT* replacementEnd = replacementBegin + replacement->length();
1547 uint32_t currentNamedCapture = 0;
1548 do {
1549 /* Move one of the constant portions of the replacement value. */
1550 size_t len = currentDollar - currentChar;
1551 sb.infallibleAppend(currentChar, len);
1552 currentChar = currentDollar;
1553
1554 JSSubString sub;
1555 size_t skip;
1556 if (InterpretDollar(matched, string, position, tailPos, captures,
1557 namedCaptures, replacement, replacementBegin,
1558 currentDollar, replacementEnd, &sub, &skip,
1559 ¤tNamedCapture)) {
1560 sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
1561 currentChar += skip;
1562 currentDollar += skip;
1563 } else {
1564 currentDollar++;
1565 }
1566
1567 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1568 } while (currentDollar);
1569 sb.infallibleAppend(currentChar,
1570 replacement->length() - (currentChar - replacementBegin));
1571 }
1572
1573 /*
1574 * This function finds the list of named captures of the form
1575 * "$<name>" in a replacement string and converts them into jsids, for
1576 * use in InitNamedReplacements.
1577 */
1578 template <typename CharT>
CollectNames(JSContext * cx,HandleLinearString replacement,size_t firstDollarIndex,MutableHandle<GCVector<jsid>> names)1579 static bool CollectNames(JSContext* cx, HandleLinearString replacement,
1580 size_t firstDollarIndex,
1581 MutableHandle<GCVector<jsid>> names) {
1582 JS::AutoCheckCannotGC nogc;
1583 MOZ_ASSERT(firstDollarIndex < replacement->length());
1584
1585 const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1586 const CharT* currentDollar = replacementBegin + firstDollarIndex;
1587 const CharT* replacementEnd = replacementBegin + replacement->length();
1588
1589 // https://tc39.es/ecma262/#table-45, "$<" section
1590 while (currentDollar && currentDollar + 1 < replacementEnd) {
1591 if (currentDollar[1] == '<') {
1592 // Step 2.b
1593 const CharT* nameStart = currentDollar + 2;
1594 const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
1595
1596 // Step 2.c
1597 if (!nameEnd) {
1598 return true;
1599 }
1600
1601 // Step 2.d.i
1602 size_t nameLength = nameEnd - nameStart;
1603 JSAtom* atom = AtomizeChars(cx, nameStart, nameLength);
1604 if (!atom || !names.append(AtomToId(atom))) {
1605 return false;
1606 }
1607 currentDollar = nameEnd + 1;
1608 } else {
1609 currentDollar += 2;
1610 }
1611 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1612 }
1613 return true;
1614 }
1615
1616 /*
1617 * When replacing named captures, the spec requires us to perform
1618 * `Get(match.groups, name)` for each "$<name>". These `Get`s can be
1619 * script-visible; for example, RegExp can be extended with an `exec`
1620 * method that wraps `groups` in a proxy. To make sure that we do the
1621 * right thing, if a regexp has named captures, we find the named
1622 * capture replacements before beginning the actual replacement.
1623 * This guarantees that we will call GetProperty once and only once for
1624 * each "$<name>" in the replacement string, in the correct order.
1625 *
1626 * This function precomputes the results of step 2 of the '$<' case
1627 * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so
1628 * that when we need to access the nth named capture in InterpretDollar,
1629 * we can just use the nth value stored in namedCaptures.
1630 */
InitNamedCaptures(JSContext * cx,HandleLinearString replacement,HandleObject groups,size_t firstDollarIndex,MutableHandle<CapturesVector> namedCaptures)1631 static bool InitNamedCaptures(JSContext* cx, HandleLinearString replacement,
1632 HandleObject groups, size_t firstDollarIndex,
1633 MutableHandle<CapturesVector> namedCaptures) {
1634 Rooted<GCVector<jsid>> names(cx, cx);
1635 if (replacement->hasLatin1Chars()) {
1636 if (!CollectNames<Latin1Char>(cx, replacement, firstDollarIndex, &names)) {
1637 return false;
1638 }
1639 } else {
1640 if (!CollectNames<char16_t>(cx, replacement, firstDollarIndex, &names)) {
1641 return false;
1642 }
1643 }
1644
1645 // https://tc39.es/ecma262/#table-45, "$<" section
1646 RootedId id(cx);
1647 RootedValue capture(cx);
1648 for (uint32_t i = 0; i < names.length(); i++) {
1649 // Step 2.d.i
1650 id = names[i];
1651
1652 // Step 2.d.ii
1653 if (!GetProperty(cx, groups, groups, id, &capture)) {
1654 return false;
1655 }
1656
1657 // Step 2.d.iii
1658 if (capture.isUndefined()) {
1659 if (!namedCaptures.append(capture)) {
1660 return false;
1661 }
1662 } else {
1663 // Step 2.d.iv
1664 JSString* str = ToString<CanGC>(cx, capture);
1665 if (!str) {
1666 return false;
1667 }
1668 JSLinearString* linear = str->ensureLinear(cx);
1669 if (!linear) {
1670 return false;
1671 }
1672 if (!namedCaptures.append(StringValue(linear))) {
1673 return false;
1674 }
1675 }
1676 }
1677
1678 return true;
1679 }
1680
NeedTwoBytes(HandleLinearString string,HandleLinearString replacement,HandleLinearString matched,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures)1681 static bool NeedTwoBytes(HandleLinearString string,
1682 HandleLinearString replacement,
1683 HandleLinearString matched,
1684 Handle<CapturesVector> captures,
1685 Handle<CapturesVector> namedCaptures) {
1686 if (string->hasTwoByteChars()) {
1687 return true;
1688 }
1689 if (replacement->hasTwoByteChars()) {
1690 return true;
1691 }
1692 if (matched->hasTwoByteChars()) {
1693 return true;
1694 }
1695
1696 for (const Value& capture : captures) {
1697 if (capture.isUndefined()) {
1698 continue;
1699 }
1700 if (capture.toString()->hasTwoByteChars()) {
1701 return true;
1702 }
1703 }
1704
1705 for (const Value& capture : namedCaptures) {
1706 if (capture.isUndefined()) {
1707 continue;
1708 }
1709 if (capture.toString()->hasTwoByteChars()) {
1710 return true;
1711 }
1712 }
1713
1714 return false;
1715 }
1716
1717 /* ES 2021 21.1.3.17.1 */
1718 // https://tc39.es/ecma262/#sec-getsubstitution
RegExpGetSubstitution(JSContext * cx,HandleArrayObject matchResult,HandleLinearString string,size_t position,HandleLinearString replacement,size_t firstDollarIndex,HandleValue groups,MutableHandleValue rval)1719 bool js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult,
1720 HandleLinearString string, size_t position,
1721 HandleLinearString replacement,
1722 size_t firstDollarIndex, HandleValue groups,
1723 MutableHandleValue rval) {
1724 MOZ_ASSERT(firstDollarIndex < replacement->length());
1725
1726 // Step 1 (skipped).
1727
1728 // Step 10 (reordered).
1729 uint32_t matchResultLength = matchResult->length();
1730 MOZ_ASSERT(matchResultLength > 0);
1731 MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
1732
1733 const Value& matchedValue = matchResult->getDenseElement(0);
1734 RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx));
1735 if (!matched) {
1736 return false;
1737 }
1738
1739 // Step 2.
1740 size_t matchLength = matched->length();
1741
1742 // Steps 3-5 (skipped).
1743
1744 // Step 6.
1745 MOZ_ASSERT(position <= string->length());
1746
1747 uint32_t nCaptures = matchResultLength - 1;
1748 Rooted<CapturesVector> captures(cx, CapturesVector(cx));
1749 if (!captures.reserve(nCaptures)) {
1750 return false;
1751 }
1752
1753 // Step 7.
1754 for (uint32_t i = 1; i <= nCaptures; i++) {
1755 const Value& capture = matchResult->getDenseElement(i);
1756
1757 if (capture.isUndefined()) {
1758 captures.infallibleAppend(capture);
1759 continue;
1760 }
1761
1762 JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
1763 if (!captureLinear) {
1764 return false;
1765 }
1766 captures.infallibleAppend(StringValue(captureLinear));
1767 }
1768
1769 Rooted<CapturesVector> namedCaptures(cx, cx);
1770 if (groups.isObject()) {
1771 RootedObject groupsObj(cx, &groups.toObject());
1772 if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex,
1773 &namedCaptures)) {
1774 return false;
1775 }
1776 } else {
1777 MOZ_ASSERT(groups.isUndefined());
1778 }
1779
1780 // Step 8 (skipped).
1781
1782 // Step 9.
1783 CheckedInt<uint32_t> checkedTailPos(0);
1784 checkedTailPos += position;
1785 checkedTailPos += matchLength;
1786 if (!checkedTailPos.isValid()) {
1787 ReportAllocationOverflow(cx);
1788 return false;
1789 }
1790 uint32_t tailPos = checkedTailPos.value();
1791
1792 // Step 11.
1793 size_t reserveLength;
1794 if (!FindReplaceLength(cx, matched, string, position, tailPos, captures,
1795 namedCaptures, replacement, firstDollarIndex,
1796 &reserveLength)) {
1797 return false;
1798 }
1799
1800 JSStringBuilder result(cx);
1801 if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) {
1802 if (!result.ensureTwoByteChars()) {
1803 return false;
1804 }
1805 }
1806
1807 if (!result.reserve(reserveLength)) {
1808 return false;
1809 }
1810
1811 if (replacement->hasLatin1Chars()) {
1812 DoReplace<Latin1Char>(matched, string, position, tailPos, captures,
1813 namedCaptures, replacement, firstDollarIndex, result);
1814 } else {
1815 DoReplace<char16_t>(matched, string, position, tailPos, captures,
1816 namedCaptures, replacement, firstDollarIndex, result);
1817 }
1818
1819 // Step 12.
1820 JSString* resultString = result.finishString();
1821 if (!resultString) {
1822 return false;
1823 }
1824
1825 rval.setString(resultString);
1826 return true;
1827 }
1828
GetFirstDollarIndex(JSContext * cx,unsigned argc,Value * vp)1829 bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) {
1830 CallArgs args = CallArgsFromVp(argc, vp);
1831 MOZ_ASSERT(args.length() == 1);
1832 JSString* str = args[0].toString();
1833
1834 // Should be handled in different path.
1835 MOZ_ASSERT(str->length() != 0);
1836
1837 int32_t index = -1;
1838 if (!GetFirstDollarIndexRaw(cx, str, &index)) {
1839 return false;
1840 }
1841
1842 args.rval().setInt32(index);
1843 return true;
1844 }
1845
1846 template <typename TextChar>
GetFirstDollarIndexImpl(const TextChar * text,uint32_t textLen)1847 static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text,
1848 uint32_t textLen) {
1849 const TextChar* end = text + textLen;
1850 for (const TextChar* c = text; c != end; ++c) {
1851 if (*c == '$') {
1852 return c - text;
1853 }
1854 }
1855 return -1;
1856 }
1857
GetFirstDollarIndexRawFlat(JSLinearString * text)1858 int32_t js::GetFirstDollarIndexRawFlat(JSLinearString* text) {
1859 uint32_t len = text->length();
1860
1861 JS::AutoCheckCannotGC nogc;
1862 if (text->hasLatin1Chars()) {
1863 return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len);
1864 }
1865
1866 return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len);
1867 }
1868
GetFirstDollarIndexRaw(JSContext * cx,JSString * str,int32_t * index)1869 bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) {
1870 JSLinearString* text = str->ensureLinear(cx);
1871 if (!text) {
1872 return false;
1873 }
1874
1875 *index = GetFirstDollarIndexRawFlat(text);
1876 return true;
1877 }
1878
RegExpPrototypeOptimizable(JSContext * cx,unsigned argc,Value * vp)1879 bool js::RegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1880 // This can only be called from self-hosted code.
1881 CallArgs args = CallArgsFromVp(argc, vp);
1882 MOZ_ASSERT(args.length() == 1);
1883
1884 args.rval().setBoolean(
1885 RegExpPrototypeOptimizableRaw(cx, &args[0].toObject()));
1886 return true;
1887 }
1888
RegExpPrototypeOptimizableRaw(JSContext * cx,JSObject * proto)1889 bool js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto) {
1890 AutoUnsafeCallWithABI unsafe;
1891 AutoAssertNoPendingException aanpe(cx);
1892 if (!proto->is<NativeObject>()) {
1893 return false;
1894 }
1895
1896 NativeObject* nproto = static_cast<NativeObject*>(proto);
1897
1898 Shape* shape = cx->realm()->regExps.getOptimizableRegExpPrototypeShape();
1899 if (shape == nproto->shape()) {
1900 return true;
1901 }
1902
1903 JSFunction* flagsGetter;
1904 if (!GetOwnGetterPure(cx, proto, NameToId(cx->names().flags), &flagsGetter)) {
1905 return false;
1906 }
1907
1908 if (!flagsGetter) {
1909 return false;
1910 }
1911
1912 if (!IsSelfHostedFunctionWithName(flagsGetter,
1913 cx->names().RegExpFlagsGetter)) {
1914 return false;
1915 }
1916
1917 JSNative globalGetter;
1918 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().global),
1919 &globalGetter)) {
1920 return false;
1921 }
1922
1923 if (globalGetter != regexp_global) {
1924 return false;
1925 }
1926
1927 JSNative hasIndicesGetter;
1928 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().hasIndices),
1929 &hasIndicesGetter)) {
1930 return false;
1931 }
1932
1933 if (hasIndicesGetter != regexp_hasIndices) {
1934 return false;
1935 }
1936
1937 JSNative ignoreCaseGetter;
1938 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().ignoreCase),
1939 &ignoreCaseGetter)) {
1940 return false;
1941 }
1942
1943 if (ignoreCaseGetter != regexp_ignoreCase) {
1944 return false;
1945 }
1946
1947 JSNative multilineGetter;
1948 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().multiline),
1949 &multilineGetter)) {
1950 return false;
1951 }
1952
1953 if (multilineGetter != regexp_multiline) {
1954 return false;
1955 }
1956
1957 JSNative stickyGetter;
1958 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().sticky),
1959 &stickyGetter)) {
1960 return false;
1961 }
1962
1963 if (stickyGetter != regexp_sticky) {
1964 return false;
1965 }
1966
1967 JSNative unicodeGetter;
1968 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().unicode),
1969 &unicodeGetter)) {
1970 return false;
1971 }
1972
1973 if (unicodeGetter != regexp_unicode) {
1974 return false;
1975 }
1976
1977 JSNative dotAllGetter;
1978 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().dotAll),
1979 &dotAllGetter)) {
1980 return false;
1981 }
1982
1983 if (dotAllGetter != regexp_dotAll) {
1984 return false;
1985 }
1986
1987 // Check if @@match, @@search, and exec are own data properties,
1988 // those values should be tested in selfhosted JS.
1989 bool has = false;
1990 if (!HasOwnDataPropertyPure(
1991 cx, proto, PropertyKey::Symbol(cx->wellKnownSymbols().match), &has)) {
1992 return false;
1993 }
1994 if (!has) {
1995 return false;
1996 }
1997
1998 if (!HasOwnDataPropertyPure(
1999 cx, proto, PropertyKey::Symbol(cx->wellKnownSymbols().search),
2000 &has)) {
2001 return false;
2002 }
2003 if (!has) {
2004 return false;
2005 }
2006
2007 if (!HasOwnDataPropertyPure(cx, proto, NameToId(cx->names().exec), &has)) {
2008 return false;
2009 }
2010 if (!has) {
2011 return false;
2012 }
2013
2014 cx->realm()->regExps.setOptimizableRegExpPrototypeShape(nproto->shape());
2015 return true;
2016 }
2017
RegExpInstanceOptimizable(JSContext * cx,unsigned argc,Value * vp)2018 bool js::RegExpInstanceOptimizable(JSContext* cx, unsigned argc, Value* vp) {
2019 // This can only be called from self-hosted code.
2020 CallArgs args = CallArgsFromVp(argc, vp);
2021 MOZ_ASSERT(args.length() == 2);
2022
2023 args.rval().setBoolean(RegExpInstanceOptimizableRaw(cx, &args[0].toObject(),
2024 &args[1].toObject()));
2025 return true;
2026 }
2027
RegExpInstanceOptimizableRaw(JSContext * cx,JSObject * obj,JSObject * proto)2028 bool js::RegExpInstanceOptimizableRaw(JSContext* cx, JSObject* obj,
2029 JSObject* proto) {
2030 AutoUnsafeCallWithABI unsafe;
2031 AutoAssertNoPendingException aanpe(cx);
2032
2033 RegExpObject* rx = &obj->as<RegExpObject>();
2034
2035 Shape* shape = cx->realm()->regExps.getOptimizableRegExpInstanceShape();
2036 if (shape == rx->shape()) {
2037 return true;
2038 }
2039
2040 if (!rx->hasStaticPrototype()) {
2041 return false;
2042 }
2043
2044 if (rx->staticPrototype() != proto) {
2045 return false;
2046 }
2047
2048 if (!RegExpObject::isInitialShape(rx)) {
2049 return false;
2050 }
2051
2052 cx->realm()->regExps.setOptimizableRegExpInstanceShape(rx->shape());
2053 return true;
2054 }
2055
2056 /*
2057 * Pattern match the script to check if it is is indexing into a particular
2058 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
2059 * such cases, which are used by javascript packers (particularly the popular
2060 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
2061 * code patterns generated by such packers here.
2062 */
intrinsic_GetElemBaseForLambda(JSContext * cx,unsigned argc,Value * vp)2063 bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc,
2064 Value* vp) {
2065 // This can only be called from self-hosted code.
2066 CallArgs args = CallArgsFromVp(argc, vp);
2067 MOZ_ASSERT(args.length() == 1);
2068
2069 JSObject& lambda = args[0].toObject();
2070 args.rval().setUndefined();
2071
2072 if (!lambda.is<JSFunction>()) {
2073 return true;
2074 }
2075
2076 RootedFunction fun(cx, &lambda.as<JSFunction>());
2077 if (!fun->isInterpreted() || fun->isClassConstructor()) {
2078 return true;
2079 }
2080
2081 JSScript* script = JSFunction::getOrCreateScript(cx, fun);
2082 if (!script) {
2083 return false;
2084 }
2085
2086 jsbytecode* pc = script->code();
2087
2088 /*
2089 * JSOp::GetAliasedVar tells us exactly where to find the base object 'b'.
2090 * Rule out the (unlikely) possibility of a function with environment
2091 * objects since it would make our environment walk off.
2092 */
2093 if (JSOp(*pc) != JSOp::GetAliasedVar || fun->needsSomeEnvironmentObject()) {
2094 return true;
2095 }
2096 EnvironmentCoordinate ec(pc);
2097 EnvironmentObject* env = &fun->environment()->as<EnvironmentObject>();
2098 for (unsigned i = 0; i < ec.hops(); ++i) {
2099 env = &env->enclosingEnvironment().as<EnvironmentObject>();
2100 }
2101 Value b = env->aliasedBinding(ec);
2102 pc += JSOpLength_GetAliasedVar;
2103
2104 /* Look for 'a' to be the lambda's first argument. */
2105 if (JSOp(*pc) != JSOp::GetArg || GET_ARGNO(pc) != 0) {
2106 return true;
2107 }
2108 pc += JSOpLength_GetArg;
2109
2110 /* 'b[a]' */
2111 if (JSOp(*pc) != JSOp::GetElem) {
2112 return true;
2113 }
2114 pc += JSOpLength_GetElem;
2115
2116 /* 'return b[a]' */
2117 if (JSOp(*pc) != JSOp::Return) {
2118 return true;
2119 }
2120
2121 /* 'b' must behave like a normal object. */
2122 if (!b.isObject()) {
2123 return true;
2124 }
2125
2126 JSObject& bobj = b.toObject();
2127 const JSClass* clasp = bobj.getClass();
2128 if (!clasp->isNativeObject() || clasp->getOpsLookupProperty() ||
2129 clasp->getOpsGetProperty()) {
2130 return true;
2131 }
2132
2133 args.rval().setObject(bobj);
2134 return true;
2135 }
2136
2137 /*
2138 * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda.
2139 * It returns the property value only if the property is data property and the
2140 * property value is a string. Otherwise it returns undefined.
2141 */
intrinsic_GetStringDataProperty(JSContext * cx,unsigned argc,Value * vp)2142 bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc,
2143 Value* vp) {
2144 CallArgs args = CallArgsFromVp(argc, vp);
2145 MOZ_ASSERT(args.length() == 2);
2146
2147 RootedObject obj(cx, &args[0].toObject());
2148 if (!obj->is<NativeObject>()) {
2149 // The object is already checked to be native in GetElemBaseForLambda,
2150 // but it can be swapped to another class that is non-native.
2151 // Return undefined to mark failure to get the property.
2152 args.rval().setUndefined();
2153 return true;
2154 }
2155
2156 JSAtom* atom = AtomizeString(cx, args[1].toString());
2157 if (!atom) {
2158 return false;
2159 }
2160
2161 Value v;
2162 if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString()) {
2163 args.rval().set(v);
2164 } else {
2165 args.rval().setUndefined();
2166 }
2167
2168 return true;
2169 }
2170