1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "builtin/RegExp.h"
8 
9 #include "mozilla/Casting.h"
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/TextUtils.h"
12 
13 #include "jsapi.h"
14 
15 #include "frontend/TokenStream.h"
16 #include "irregexp/RegExpAPI.h"
17 #include "jit/InlinableNatives.h"
18 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_NEWREGEXP_FLAGGED
19 #include "js/PropertySpec.h"
20 #include "js/RegExpFlags.h"  // JS::RegExpFlag, JS::RegExpFlags
21 #include "util/StringBuffer.h"
22 #include "util/Unicode.h"
23 #include "vm/JSContext.h"
24 #include "vm/RegExpStatics.h"
25 #include "vm/SelfHosting.h"
26 #include "vm/WellKnownAtom.h"  // js_*_str
27 
28 #include "vm/EnvironmentObject-inl.h"
29 #include "vm/JSObject-inl.h"
30 #include "vm/NativeObject-inl.h"
31 #include "vm/ObjectOperations-inl.h"
32 #include "vm/PlainObject-inl.h"
33 
34 using namespace js;
35 
36 using mozilla::AssertedCast;
37 using mozilla::CheckedInt;
38 using mozilla::IsAsciiDigit;
39 
40 using JS::CompileOptions;
41 using JS::RegExpFlag;
42 using JS::RegExpFlags;
43 
44 // Allocate an object for the |.groups| or |.indices.groups| property
45 // of a regexp match result.
CreateGroupsObject(JSContext * cx,HandlePlainObject groupsTemplate)46 static PlainObject* CreateGroupsObject(JSContext* cx,
47                                        HandlePlainObject groupsTemplate) {
48   if (groupsTemplate->inDictionaryMode()) {
49     return NewPlainObjectWithProto(cx, nullptr);
50   }
51 
52   // The groups template object is stored in RegExpShared, which is shared
53   // across compartments and realms. So watch out for the case when the template
54   // object's realm is different from the current realm.
55   if (cx->realm() != groupsTemplate->realm()) {
56     return PlainObject::createWithTemplateFromDifferentRealm(cx,
57                                                              groupsTemplate);
58   }
59 
60   return PlainObject::createWithTemplate(cx, groupsTemplate);
61 }
62 
63 /*
64  * Implements RegExpBuiltinExec: Steps 18-35
65  * https://tc39.es/ecma262/#sec-regexpbuiltinexec
66  */
CreateRegExpMatchResult(JSContext * cx,HandleRegExpShared re,HandleString input,const MatchPairs & matches,MutableHandleValue rval)67 bool js::CreateRegExpMatchResult(JSContext* cx, HandleRegExpShared re,
68                                  HandleString input, const MatchPairs& matches,
69                                  MutableHandleValue rval) {
70   MOZ_ASSERT(re);
71   MOZ_ASSERT(input);
72 
73   /*
74    * Create the (slow) result array for a match.
75    *
76    * Array contents:
77    *  0:              matched string
78    *  1..pairCount-1: paren matches
79    *  input:          input string
80    *  index:          start index for the match
81    *  groups:         named capture groups for the match
82    *  indices:        capture indices for the match, if required
83    */
84 
85   bool hasIndices = re->hasIndices();
86 
87   // Get the templateObject that defines the shape and type of the output
88   // object.
89   RegExpRealm::ResultTemplateKind kind =
90       hasIndices ? RegExpRealm::ResultTemplateKind::WithIndices
91                  : RegExpRealm::ResultTemplateKind::Normal;
92   ArrayObject* templateObject =
93       cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx, kind);
94   if (!templateObject) {
95     return false;
96   }
97 
98   // Steps 18-19
99   size_t numPairs = matches.length();
100   MOZ_ASSERT(numPairs > 0);
101 
102   // Steps 20-21: Allocate the match result object.
103   RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(
104                                 cx, numPairs, templateObject));
105   if (!arr) {
106     return false;
107   }
108 
109   // Steps 28-29 and 33 a-d: Initialize the elements of the match result.
110   // Store a Value for each match pair.
111   for (size_t i = 0; i < numPairs; i++) {
112     const MatchPair& pair = matches[i];
113 
114     if (pair.isUndefined()) {
115       MOZ_ASSERT(i != 0);  // Since we had a match, first pair must be present.
116       arr->setDenseInitializedLength(i + 1);
117       arr->initDenseElement(i, UndefinedValue());
118     } else {
119       JSLinearString* str =
120           NewDependentString(cx, input, pair.start, pair.length());
121       if (!str) {
122         return false;
123       }
124       arr->setDenseInitializedLength(i + 1);
125       arr->initDenseElement(i, StringValue(str));
126     }
127   }
128 
129   // Step 34a (reordered): Allocate and initialize the indices object if needed.
130   // This is an inlined implementation of MakeIndicesArray:
131   // https://tc39.es/ecma262/#sec-makeindicesarray
132   RootedArrayObject indices(cx);
133   RootedPlainObject indicesGroups(cx);
134   if (hasIndices) {
135     // MakeIndicesArray: step 8
136     ArrayObject* indicesTemplate =
137         cx->realm()->regExps.getOrCreateMatchResultTemplateObject(
138             cx, RegExpRealm::ResultTemplateKind::Indices);
139     indices =
140         NewDenseFullyAllocatedArrayWithTemplate(cx, numPairs, indicesTemplate);
141     if (!indices) {
142       return false;
143     }
144 
145     // MakeIndicesArray: steps 10-12
146     if (re->numNamedCaptures() > 0) {
147       RootedPlainObject groupsTemplate(cx, re->getGroupsTemplate());
148       indicesGroups = CreateGroupsObject(cx, groupsTemplate);
149       if (!indicesGroups) {
150         return false;
151       }
152       indices->setSlot(RegExpRealm::IndicesGroupsSlot,
153                        ObjectValue(*indicesGroups));
154     } else {
155       indices->setSlot(RegExpRealm::IndicesGroupsSlot, UndefinedValue());
156     }
157 
158     // MakeIndicesArray: step 13 a-d. (Step 13.e is implemented below.)
159     for (size_t i = 0; i < numPairs; i++) {
160       const MatchPair& pair = matches[i];
161 
162       if (pair.isUndefined()) {
163         // Since we had a match, first pair must be present.
164         MOZ_ASSERT(i != 0);
165         indices->setDenseInitializedLength(i + 1);
166         indices->initDenseElement(i, UndefinedValue());
167       } else {
168         RootedArrayObject indexPair(cx, NewDenseFullyAllocatedArray(cx, 2));
169         if (!indexPair) {
170           return false;
171         }
172         indexPair->setDenseInitializedLength(2);
173         indexPair->initDenseElement(0, Int32Value(pair.start));
174         indexPair->initDenseElement(1, Int32Value(pair.limit));
175 
176         indices->setDenseInitializedLength(i + 1);
177         indices->initDenseElement(i, ObjectValue(*indexPair));
178       }
179     }
180   }
181 
182   // Steps 30-31 (reordered): Allocate the groups object (if needed).
183   RootedPlainObject groups(cx);
184   bool groupsInDictionaryMode = false;
185   if (re->numNamedCaptures() > 0) {
186     RootedPlainObject groupsTemplate(cx, re->getGroupsTemplate());
187     groupsInDictionaryMode = groupsTemplate->inDictionaryMode();
188     groups = CreateGroupsObject(cx, groupsTemplate);
189     if (!groups) {
190       return false;
191     }
192   }
193 
194   // Step 33 e-f: Initialize the properties of |groups| and |indices.groups|.
195   // The groups template object stores the names of the named captures
196   // in the the order in which they are defined. The named capture
197   // indices vector stores the corresponding capture indices. In
198   // dictionary mode, we have to define the properties explicitly. If
199   // we are not in dictionary mode, we simply fill in the slots with
200   // the correct values.
201   if (groupsInDictionaryMode) {
202     RootedIdVector keys(cx);
203     RootedPlainObject groupsTemplate(cx, re->getGroupsTemplate());
204     if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) {
205       return false;
206     }
207     MOZ_ASSERT(keys.length() == re->numNamedCaptures());
208     RootedId key(cx);
209     RootedValue val(cx);
210     for (uint32_t i = 0; i < keys.length(); i++) {
211       key = keys[i];
212       uint32_t idx = re->getNamedCaptureIndex(i);
213       val = arr->getDenseElement(idx);
214       if (!NativeDefineDataProperty(cx, groups, key, val, JSPROP_ENUMERATE)) {
215         return false;
216       }
217       // MakeIndicesArray: Step 13.e (reordered)
218       if (hasIndices) {
219         val = indices->getDenseElement(idx);
220         if (!NativeDefineDataProperty(cx, indicesGroups, key, val,
221                                       JSPROP_ENUMERATE)) {
222           return false;
223         }
224       }
225     }
226   } else {
227     for (uint32_t i = 0; i < re->numNamedCaptures(); i++) {
228       uint32_t idx = re->getNamedCaptureIndex(i);
229       groups->setSlot(i, arr->getDenseElement(idx));
230 
231       // MakeIndicesArray: Step 13.e (reordered)
232       if (hasIndices) {
233         indicesGroups->setSlot(i, indices->getDenseElement(idx));
234       }
235     }
236   }
237 
238   // Step 22 (reordered).
239   // Set the |index| property.
240   arr->setSlot(RegExpRealm::MatchResultObjectIndexSlot,
241                Int32Value(matches[0].start));
242 
243   // Step 23 (reordered).
244   // Set the |input| property.
245   arr->setSlot(RegExpRealm::MatchResultObjectInputSlot, StringValue(input));
246 
247   // Step 32 (reordered)
248   // Set the |groups| property.
249   arr->setSlot(RegExpRealm::MatchResultObjectGroupsSlot,
250                groups ? ObjectValue(*groups) : UndefinedValue());
251 
252   // Step 34b
253   // Set the |indices| property.
254   if (re->hasIndices()) {
255     arr->setSlot(RegExpRealm::MatchResultObjectIndicesSlot,
256                  ObjectValue(*indices));
257   }
258 
259 #ifdef DEBUG
260   RootedValue test(cx);
261   RootedId id(cx, NameToId(cx->names().index));
262   if (!NativeGetProperty(cx, arr, id, &test)) {
263     return false;
264   }
265   MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectIndexSlot));
266   id = NameToId(cx->names().input);
267   if (!NativeGetProperty(cx, arr, id, &test)) {
268     return false;
269   }
270   MOZ_ASSERT(test == arr->getSlot(RegExpRealm::MatchResultObjectInputSlot));
271 #endif
272 
273   // Step 35.
274   rval.setObject(*arr);
275   return true;
276 }
277 
CreateRegExpSearchResult(const MatchPairs & matches)278 static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
279   /* Fit the start and limit of match into a int32_t. */
280   uint32_t position = matches[0].start;
281   uint32_t lastIndex = matches[0].limit;
282   MOZ_ASSERT(position < 0x8000);
283   MOZ_ASSERT(lastIndex < 0x8000);
284   return position | (lastIndex << 15);
285 }
286 
287 /*
288  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
289  * steps 3, 9-14, except 12.a.i, 12.c.i.1.
290  */
ExecuteRegExpImpl(JSContext * cx,RegExpStatics * res,MutableHandleRegExpShared re,HandleLinearString input,size_t searchIndex,VectorMatchPairs * matches)291 static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res,
292                                          MutableHandleRegExpShared re,
293                                          HandleLinearString input,
294                                          size_t searchIndex,
295                                          VectorMatchPairs* matches) {
296   RegExpRunStatus status =
297       RegExpShared::execute(cx, re, input, searchIndex, matches);
298 
299   /* Out of spec: Update RegExpStatics. */
300   if (status == RegExpRunStatus_Success && res) {
301     if (!res->updateFromMatchPairs(cx, input, *matches)) {
302       return RegExpRunStatus_Error;
303     }
304   }
305   return status;
306 }
307 
308 /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */
ExecuteRegExpLegacy(JSContext * cx,RegExpStatics * res,Handle<RegExpObject * > reobj,HandleLinearString input,size_t * lastIndex,bool test,MutableHandleValue rval)309 bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res,
310                              Handle<RegExpObject*> reobj,
311                              HandleLinearString input, size_t* lastIndex,
312                              bool test, MutableHandleValue rval) {
313   RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj));
314   if (!shared) {
315     return false;
316   }
317 
318   VectorMatchPairs matches;
319 
320   RegExpRunStatus status =
321       ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches);
322   if (status == RegExpRunStatus_Error) {
323     return false;
324   }
325 
326   if (status == RegExpRunStatus_Success_NotFound) {
327     /* ExecuteRegExp() previously returned an array or null. */
328     rval.setNull();
329     return true;
330   }
331 
332   *lastIndex = matches[0].limit;
333 
334   if (test) {
335     /* Forbid an array, as an optimization. */
336     rval.setBoolean(true);
337     return true;
338   }
339 
340   return CreateRegExpMatchResult(cx, shared, input, matches, rval);
341 }
342 
CheckPatternSyntaxSlow(JSContext * cx,HandleAtom pattern,RegExpFlags flags)343 static bool CheckPatternSyntaxSlow(JSContext* cx, HandleAtom pattern,
344                                    RegExpFlags flags) {
345   LifoAllocScope allocScope(&cx->tempLifoAlloc());
346   CompileOptions options(cx);
347   frontend::DummyTokenStream dummyTokenStream(cx, options);
348   return irregexp::CheckPatternSyntax(cx, dummyTokenStream, pattern, flags);
349 }
350 
CheckPatternSyntax(JSContext * cx,HandleAtom pattern,RegExpFlags flags)351 static RegExpShared* CheckPatternSyntax(JSContext* cx, HandleAtom pattern,
352                                         RegExpFlags flags) {
353   // If we already have a RegExpShared for this pattern/flags, we can
354   // avoid the much slower CheckPatternSyntaxSlow call.
355 
356   RootedRegExpShared shared(cx, cx->zone()->regExps().maybeGet(pattern, flags));
357   if (shared) {
358 #ifdef DEBUG
359     // Assert the pattern is valid.
360     if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
361       MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed());
362       return nullptr;
363     }
364 #endif
365     return shared;
366   }
367 
368   if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
369     return nullptr;
370   }
371 
372   // Allocate and return a new RegExpShared so we will hit the fast path
373   // next time.
374   return cx->zone()->regExps().get(cx, pattern, flags);
375 }
376 
377 /*
378  * ES 2016 draft Mar 25, 2016 21.2.3.2.2.
379  *
380  * Steps 14-15 set |obj|'s "lastIndex" property to zero.  Some of
381  * RegExpInitialize's callers have a fresh RegExp not yet exposed to script:
382  * in these cases zeroing "lastIndex" is infallible.  But others have a RegExp
383  * whose "lastIndex" property might have been made non-writable: here, zeroing
384  * "lastIndex" can fail.  We efficiently solve this problem by completely
385  * removing "lastIndex" zeroing from the provided function.
386  *
387  * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES!
388  *
389  * Because this function only ever returns a user-provided |obj| in the spec,
390  * we omit it and just return the usual success/failure.
391  */
RegExpInitializeIgnoringLastIndex(JSContext * cx,Handle<RegExpObject * > obj,HandleValue patternValue,HandleValue flagsValue)392 static bool RegExpInitializeIgnoringLastIndex(JSContext* cx,
393                                               Handle<RegExpObject*> obj,
394                                               HandleValue patternValue,
395                                               HandleValue flagsValue) {
396   RootedAtom pattern(cx);
397   if (patternValue.isUndefined()) {
398     /* Step 1. */
399     pattern = cx->names().empty;
400   } else {
401     /* Step 2. */
402     pattern = ToAtom<CanGC>(cx, patternValue);
403     if (!pattern) {
404       return false;
405     }
406   }
407 
408   /* Step 3. */
409   RegExpFlags flags = RegExpFlag::NoFlags;
410   if (!flagsValue.isUndefined()) {
411     /* Step 4. */
412     RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue));
413     if (!flagStr) {
414       return false;
415     }
416 
417     /* Step 5. */
418     if (!ParseRegExpFlags(cx, flagStr, &flags)) {
419       return false;
420     }
421   }
422 
423   /* Steps 7-8. */
424   RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags);
425   if (!shared) {
426     return false;
427   }
428 
429   /* Steps 9-12. */
430   obj->initIgnoringLastIndex(pattern, flags);
431 
432   obj->setShared(shared);
433 
434   return true;
435 }
436 
437 /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */
RegExpCreate(JSContext * cx,HandleValue patternValue,HandleValue flagsValue,MutableHandleValue rval)438 bool js::RegExpCreate(JSContext* cx, HandleValue patternValue,
439                       HandleValue flagsValue, MutableHandleValue rval) {
440   /* Step 1. */
441   Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject));
442   if (!regexp) {
443     return false;
444   }
445 
446   /* Step 2. */
447   if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue,
448                                          flagsValue)) {
449     return false;
450   }
451   regexp->zeroLastIndex(cx);
452 
453   rval.setObject(*regexp);
454   return true;
455 }
456 
IsRegExpObject(HandleValue v)457 MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) {
458   return v.isObject() && v.toObject().is<RegExpObject>();
459 }
460 
461 /* ES6 draft rc3 7.2.8. */
IsRegExp(JSContext * cx,HandleValue value,bool * result)462 bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) {
463   /* Step 1. */
464   if (!value.isObject()) {
465     *result = false;
466     return true;
467   }
468   RootedObject obj(cx, &value.toObject());
469 
470   /* Steps 2-3. */
471   RootedValue isRegExp(cx);
472   RootedId matchId(cx, PropertyKey::Symbol(cx->wellKnownSymbols().match));
473   if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) {
474     return false;
475   }
476 
477   /* Step 4. */
478   if (!isRegExp.isUndefined()) {
479     *result = ToBoolean(isRegExp);
480     return true;
481   }
482 
483   /* Steps 5-6. */
484   ESClass cls;
485   if (!GetClassOfValue(cx, value, &cls)) {
486     return false;
487   }
488 
489   *result = cls == ESClass::RegExp;
490   return true;
491 }
492 
493 /* ES6 B.2.5.1. */
regexp_compile_impl(JSContext * cx,const CallArgs & args)494 MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx,
495                                            const CallArgs& args) {
496   MOZ_ASSERT(IsRegExpObject(args.thisv()));
497 
498   Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>());
499 
500   // Step 3.
501   RootedValue patternValue(cx, args.get(0));
502   ESClass cls;
503   if (!GetClassOfValue(cx, patternValue, &cls)) {
504     return false;
505   }
506   if (cls == ESClass::RegExp) {
507     // Step 3a.
508     if (args.hasDefined(1)) {
509       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
510                                 JSMSG_NEWREGEXP_FLAGGED);
511       return false;
512     }
513 
514     // Beware!  |patternObj| might be a proxy into another compartment, so
515     // don't assume |patternObj.is<RegExpObject>()|.  For the same reason,
516     // don't reuse the RegExpShared below.
517     RootedObject patternObj(cx, &patternValue.toObject());
518 
519     RootedAtom sourceAtom(cx);
520     RegExpFlags flags = RegExpFlag::NoFlags;
521     {
522       // Step 3b.
523       RegExpShared* shared = RegExpToShared(cx, patternObj);
524       if (!shared) {
525         return false;
526       }
527 
528       sourceAtom = shared->getSource();
529       flags = shared->getFlags();
530     }
531 
532     // Step 5, minus lastIndex zeroing.
533     regexp->initIgnoringLastIndex(sourceAtom, flags);
534   } else {
535     // Step 4.
536     RootedValue P(cx, patternValue);
537     RootedValue F(cx, args.get(1));
538 
539     // Step 5, minus lastIndex zeroing.
540     if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
541       return false;
542     }
543   }
544 
545   // The final niggling bit of step 5.
546   //
547   // |regexp| is user-exposed, but if its "lastIndex" property hasn't been
548   // made non-writable, we can still use a fast path to zero it.
549   if (regexp->lookupPure(cx->names().lastIndex)->writable()) {
550     regexp->zeroLastIndex(cx);
551   } else {
552     RootedValue zero(cx, Int32Value(0));
553     if (!SetProperty(cx, regexp, cx->names().lastIndex, zero)) {
554       return false;
555     }
556   }
557 
558   args.rval().setObject(*regexp);
559   return true;
560 }
561 
regexp_compile(JSContext * cx,unsigned argc,Value * vp)562 static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) {
563   CallArgs args = CallArgsFromVp(argc, vp);
564 
565   /* Steps 1-2. */
566   return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args);
567 }
568 
569 /*
570  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1.
571  */
regexp_construct(JSContext * cx,unsigned argc,Value * vp)572 bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) {
573   CallArgs args = CallArgsFromVp(argc, vp);
574 
575   // Steps 1.
576   bool patternIsRegExp;
577   if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) {
578     return false;
579   }
580 
581   // We can delay step 3 and step 4a until later, during
582   // GetPrototypeFromBuiltinConstructor calls. Accessing the new.target
583   // and the callee from the stack is unobservable.
584   if (!args.isConstructing()) {
585     // Step 3.b.
586     if (patternIsRegExp && !args.hasDefined(1)) {
587       RootedObject patternObj(cx, &args[0].toObject());
588 
589       // Step 3.b.i.
590       RootedValue patternConstructor(cx);
591       if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor,
592                        &patternConstructor)) {
593         return false;
594       }
595 
596       // Step 3.b.ii.
597       if (patternConstructor.isObject() &&
598           patternConstructor.toObject() == args.callee()) {
599         args.rval().set(args[0]);
600         return true;
601       }
602     }
603   }
604 
605   RootedValue patternValue(cx, args.get(0));
606 
607   // Step 4.
608   ESClass cls;
609   if (!GetClassOfValue(cx, patternValue, &cls)) {
610     return false;
611   }
612   if (cls == ESClass::RegExp) {
613     // Beware!  |patternObj| might be a proxy into another compartment, so
614     // don't assume |patternObj.is<RegExpObject>()|.
615     RootedObject patternObj(cx, &patternValue.toObject());
616 
617     RootedAtom sourceAtom(cx);
618     RegExpFlags flags;
619     RootedRegExpShared shared(cx);
620     {
621       // Step 4.a.
622       shared = RegExpToShared(cx, patternObj);
623       if (!shared) {
624         return false;
625       }
626       sourceAtom = shared->getSource();
627 
628       // Step 4.b.
629       // Get original flags in all cases, to compare with passed flags.
630       flags = shared->getFlags();
631 
632       // If the RegExpShared is in another Zone, don't reuse it.
633       if (cx->zone() != shared->zone()) {
634         shared = nullptr;
635       }
636     }
637 
638     // Step 7.
639     RootedObject proto(cx);
640     if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
641       return false;
642     }
643 
644     Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
645     if (!regexp) {
646       return false;
647     }
648 
649     // Step 8.
650     if (args.hasDefined(1)) {
651       // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4.
652       RegExpFlags flagsArg = RegExpFlag::NoFlags;
653       RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
654       if (!flagStr) {
655         return false;
656       }
657       if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) {
658         return false;
659       }
660 
661       // Don't reuse the RegExpShared if we have different flags.
662       if (flags != flagsArg) {
663         shared = nullptr;
664       }
665 
666       if (!flags.unicode() && flagsArg.unicode()) {
667         // Have to check syntax again when adding 'u' flag.
668 
669         // ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56
670         // 21.2.3.2.2 step 7.
671         shared = CheckPatternSyntax(cx, sourceAtom, flagsArg);
672         if (!shared) {
673           return false;
674         }
675       }
676       flags = flagsArg;
677     }
678 
679     regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
680 
681     if (shared) {
682       regexp->setShared(shared);
683     }
684 
685     args.rval().setObject(*regexp);
686     return true;
687   }
688 
689   RootedValue P(cx);
690   RootedValue F(cx);
691 
692   // Step 5.
693   if (patternIsRegExp) {
694     RootedObject patternObj(cx, &patternValue.toObject());
695 
696     // Step 5.a.
697     if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P)) {
698       return false;
699     }
700 
701     // Step 5.b.
702     F = args.get(1);
703     if (F.isUndefined()) {
704       if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F)) {
705         return false;
706       }
707     }
708   } else {
709     // Steps 6.a-b.
710     P = patternValue;
711     F = args.get(1);
712   }
713 
714   // Step 7.
715   RootedObject proto(cx);
716   if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_RegExp, &proto)) {
717     return false;
718   }
719 
720   Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
721   if (!regexp) {
722     return false;
723   }
724 
725   // Step 8.
726   if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) {
727     return false;
728   }
729   regexp->zeroLastIndex(cx);
730 
731   args.rval().setObject(*regexp);
732   return true;
733 }
734 
735 /*
736  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1
737  * steps 4, 7-8.
738  */
regexp_construct_raw_flags(JSContext * cx,unsigned argc,Value * vp)739 bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) {
740   CallArgs args = CallArgsFromVp(argc, vp);
741   MOZ_ASSERT(args.length() == 2);
742   MOZ_ASSERT(!args.isConstructing());
743 
744   // Step 4.a.
745   RootedAtom sourceAtom(cx, AtomizeString(cx, args[0].toString()));
746   if (!sourceAtom) {
747     return false;
748   }
749 
750   // Step 4.c.
751   RegExpFlags flags = AssertedCast<uint8_t>(int32_t(args[1].toNumber()));
752 
753   // Step 7.
754   RegExpObject* regexp = RegExpAlloc(cx, GenericObject);
755   if (!regexp) {
756     return false;
757   }
758 
759   // Step 8.
760   regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
761   args.rval().setObject(*regexp);
762   return true;
763 }
764 
765 // This is a specialized implementation of "UnwrapAndTypeCheckThis" for RegExp
766 // getters that need to return a special value for same-realm
767 // %RegExp.prototype%.
768 template <typename Fn>
RegExpGetter(JSContext * cx,CallArgs & args,const char * methodName,Fn && fn,HandleValue fallbackValue=UndefinedHandleValue)769 static bool RegExpGetter(JSContext* cx, CallArgs& args, const char* methodName,
770                          Fn&& fn,
771                          HandleValue fallbackValue = UndefinedHandleValue) {
772   JSObject* obj = nullptr;
773   if (args.thisv().isObject()) {
774     obj = &args.thisv().toObject();
775     if (IsWrapper(obj)) {
776       obj = CheckedUnwrapStatic(obj);
777       if (!obj) {
778         ReportAccessDenied(cx);
779         return false;
780       }
781     }
782   }
783 
784   if (obj) {
785     // Step 4ff
786     if (obj->is<RegExpObject>()) {
787       return fn(&obj->as<RegExpObject>());
788     }
789 
790     // Step 3.a. "If SameValue(R, %RegExp.prototype%) is true, return
791     // undefined."
792     // Or `return "(?:)"` for get RegExp.prototype.source.
793     if (obj == cx->global()->maybeGetRegExpPrototype()) {
794       args.rval().set(fallbackValue);
795       return true;
796     }
797 
798     // fall-through
799   }
800 
801   // Step 2. and Step 3.b.
802   JS_ReportErrorNumberLatin1(cx, GetErrorMessage, nullptr,
803                              JSMSG_INCOMPATIBLE_REGEXP_GETTER, methodName,
804                              InformalValueTypeName(args.thisv()));
805   return false;
806 }
807 
regexp_hasIndices(JSContext * cx,unsigned argc,JS::Value * vp)808 bool js::regexp_hasIndices(JSContext* cx, unsigned argc, JS::Value* vp) {
809   CallArgs args = CallArgsFromVp(argc, vp);
810   return RegExpGetter(cx, args, "hasIndices", [args](RegExpObject* unwrapped) {
811     args.rval().setBoolean(unwrapped->hasIndices());
812     return true;
813   });
814 }
815 
816 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
817 // 21.2.5.5 get RegExp.prototype.global
regexp_global(JSContext * cx,unsigned argc,JS::Value * vp)818 bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) {
819   CallArgs args = CallArgsFromVp(argc, vp);
820   return RegExpGetter(cx, args, "global", [args](RegExpObject* unwrapped) {
821     args.rval().setBoolean(unwrapped->global());
822     return true;
823   });
824 }
825 
826 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
827 // 21.2.5.6 get RegExp.prototype.ignoreCase
regexp_ignoreCase(JSContext * cx,unsigned argc,JS::Value * vp)828 bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) {
829   CallArgs args = CallArgsFromVp(argc, vp);
830   return RegExpGetter(cx, args, "ignoreCase", [args](RegExpObject* unwrapped) {
831     args.rval().setBoolean(unwrapped->ignoreCase());
832     return true;
833   });
834 }
835 
836 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
837 // 21.2.5.9 get RegExp.prototype.multiline
regexp_multiline(JSContext * cx,unsigned argc,JS::Value * vp)838 bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) {
839   CallArgs args = CallArgsFromVp(argc, vp);
840   return RegExpGetter(cx, args, "multiline", [args](RegExpObject* unwrapped) {
841     args.rval().setBoolean(unwrapped->multiline());
842     return true;
843   });
844 }
845 
846 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
847 // 21.2.5.12 get RegExp.prototype.source
regexp_source(JSContext * cx,unsigned argc,JS::Value * vp)848 static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) {
849   CallArgs args = CallArgsFromVp(argc, vp);
850   // Step 3.a. Return "(?:)" for %RegExp.prototype%.
851   RootedValue fallback(cx, StringValue(cx->names().emptyRegExp));
852   return RegExpGetter(
853       cx, args, "source",
854       [cx, args](RegExpObject* unwrapped) {
855         RootedAtom src(cx, unwrapped->getSource());
856         MOZ_ASSERT(src);
857         // Mark potentially cross-compartment JSAtom.
858         cx->markAtom(src);
859 
860         // Step 7.
861         JSString* escaped = EscapeRegExpPattern(cx, src);
862         if (!escaped) {
863           return false;
864         }
865 
866         args.rval().setString(escaped);
867         return true;
868       },
869       fallback);
870 }
871 
872 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
873 // 21.2.5.3 get RegExp.prototype.dotAll
regexp_dotAll(JSContext * cx,unsigned argc,JS::Value * vp)874 bool js::regexp_dotAll(JSContext* cx, unsigned argc, JS::Value* vp) {
875   CallArgs args = CallArgsFromVp(argc, vp);
876   return RegExpGetter(cx, args, "dotAll", [args](RegExpObject* unwrapped) {
877     args.rval().setBoolean(unwrapped->dotAll());
878     return true;
879   });
880 }
881 
882 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
883 // 21.2.5.14 get RegExp.prototype.sticky
regexp_sticky(JSContext * cx,unsigned argc,JS::Value * vp)884 bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) {
885   CallArgs args = CallArgsFromVp(argc, vp);
886   return RegExpGetter(cx, args, "sticky", [args](RegExpObject* unwrapped) {
887     args.rval().setBoolean(unwrapped->sticky());
888     return true;
889   });
890 }
891 
892 // ES2021 draft rev 0b3a808af87a9123890767152a26599cc8fde161
893 // 21.2.5.17 get RegExp.prototype.unicode
regexp_unicode(JSContext * cx,unsigned argc,JS::Value * vp)894 bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) {
895   CallArgs args = CallArgsFromVp(argc, vp);
896   return RegExpGetter(cx, args, "unicode", [args](RegExpObject* unwrapped) {
897     args.rval().setBoolean(unwrapped->unicode());
898     return true;
899   });
900 }
901 
902 const JSPropertySpec js::regexp_properties[] = {
903     JS_SELF_HOSTED_GET("flags", "$RegExpFlagsGetter", 0),
904     JS_PSG("hasIndices", regexp_hasIndices, 0),
905     JS_PSG("global", regexp_global, 0),
906     JS_PSG("ignoreCase", regexp_ignoreCase, 0),
907     JS_PSG("multiline", regexp_multiline, 0),
908     JS_PSG("dotAll", regexp_dotAll, 0),
909     JS_PSG("source", regexp_source, 0),
910     JS_PSG("sticky", regexp_sticky, 0),
911     JS_PSG("unicode", regexp_unicode, 0),
912     JS_PS_END};
913 
914 const JSFunctionSpec js::regexp_methods[] = {
915     JS_SELF_HOSTED_FN(js_toSource_str, "$RegExpToString", 0, 0),
916     JS_SELF_HOSTED_FN(js_toString_str, "$RegExpToString", 0, 0),
917     JS_FN("compile", regexp_compile, 2, 0),
918     JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0),
919     JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0),
920     JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0),
921     JS_SELF_HOSTED_SYM_FN(matchAll, "RegExpMatchAll", 1, 0),
922     JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0),
923     JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0),
924     JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0),
925     JS_FS_END};
926 
927 #define STATIC_PAREN_GETTER_CODE(parenNum)                        \
928   if (!res->createParen(cx, parenNum, args.rval())) return false; \
929   if (args.rval().isUndefined())                                  \
930     args.rval().setString(cx->runtime()->emptyString);            \
931   return true
932 
933 /*
934  * RegExp static properties.
935  *
936  * RegExp class static properties and their Perl counterparts:
937  *
938  *  RegExp.input                $_
939  *  RegExp.lastMatch            $&
940  *  RegExp.lastParen            $+
941  *  RegExp.leftContext          $`
942  *  RegExp.rightContext         $'
943  */
944 
945 #define DEFINE_STATIC_GETTER(name, code)                                   \
946   static bool name(JSContext* cx, unsigned argc, Value* vp) {              \
947     CallArgs args = CallArgsFromVp(argc, vp);                              \
948     RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
949     if (!res) return false;                                                \
950     code;                                                                  \
951   }
952 
953 DEFINE_STATIC_GETTER(static_input_getter,
954                      return res->createPendingInput(cx, args.rval()))
955 DEFINE_STATIC_GETTER(static_lastMatch_getter,
956                      return res->createLastMatch(cx, args.rval()))
957 DEFINE_STATIC_GETTER(static_lastParen_getter,
958                      return res->createLastParen(cx, args.rval()))
959 DEFINE_STATIC_GETTER(static_leftContext_getter,
960                      return res->createLeftContext(cx, args.rval()))
961 DEFINE_STATIC_GETTER(static_rightContext_getter,
962                      return res->createRightContext(cx, args.rval()))
963 
964 DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1))
965 DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2))
966 DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3))
967 DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4))
968 DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5))
969 DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6))
970 DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7))
971 DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8))
972 DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9))
973 
974 #define DEFINE_STATIC_SETTER(name, code)                                   \
975   static bool name(JSContext* cx, unsigned argc, Value* vp) {              \
976     RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
977     if (!res) return false;                                                \
978     code;                                                                  \
979     return true;                                                           \
980   }
981 
static_input_setter(JSContext * cx,unsigned argc,Value * vp)982 static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) {
983   CallArgs args = CallArgsFromVp(argc, vp);
984   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
985   if (!res) {
986     return false;
987   }
988 
989   RootedString str(cx, ToString<CanGC>(cx, args.get(0)));
990   if (!str) {
991     return false;
992   }
993 
994   res->setPendingInput(str);
995   args.rval().setString(str);
996   return true;
997 }
998 
999 const JSPropertySpec js::regexp_static_props[] = {
1000     JS_PSGS("input", static_input_getter, static_input_setter,
1001             JSPROP_PERMANENT | JSPROP_ENUMERATE),
1002     JS_PSG("lastMatch", static_lastMatch_getter,
1003            JSPROP_PERMANENT | JSPROP_ENUMERATE),
1004     JS_PSG("lastParen", static_lastParen_getter,
1005            JSPROP_PERMANENT | JSPROP_ENUMERATE),
1006     JS_PSG("leftContext", static_leftContext_getter,
1007            JSPROP_PERMANENT | JSPROP_ENUMERATE),
1008     JS_PSG("rightContext", static_rightContext_getter,
1009            JSPROP_PERMANENT | JSPROP_ENUMERATE),
1010     JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1011     JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1012     JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1013     JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1014     JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1015     JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1016     JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1017     JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1018     JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
1019     JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT),
1020     JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT),
1021     JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT),
1022     JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT),
1023     JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT),
1024     JS_SELF_HOSTED_SYM_GET(species, "$RegExpSpecies", 0),
1025     JS_PS_END};
1026 
1027 template <typename CharT>
IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,size_t index)1028 static bool IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,
1029                                                   size_t index) {
1030   JS::AutoCheckCannotGC nogc;
1031   MOZ_ASSERT(index > 0 && index < input->length());
1032   const CharT* inputChars = input->chars<CharT>(nogc);
1033 
1034   return unicode::IsTrailSurrogate(inputChars[index]) &&
1035          unicode::IsLeadSurrogate(inputChars[index - 1]);
1036 }
1037 
IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,int32_t index)1038 static bool IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,
1039                                               int32_t index) {
1040   if (index <= 0 || size_t(index) >= input->length()) {
1041     return false;
1042   }
1043 
1044   return input->hasLatin1Chars()
1045              ? IsTrailSurrogateWithLeadSurrogateImpl<Latin1Char>(input, index)
1046              : IsTrailSurrogateWithLeadSurrogateImpl<char16_t>(input, index);
1047 }
1048 
1049 /*
1050  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1051  * steps 3, 9-14, except 12.a.i, 12.c.i.1.
1052  */
ExecuteRegExp(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,VectorMatchPairs * matches)1053 static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp,
1054                                      HandleString string, int32_t lastIndex,
1055                                      VectorMatchPairs* matches) {
1056   /*
1057    * WARNING: Despite the presence of spec step comment numbers, this
1058    *          algorithm isn't consistent with any ES6 version, draft or
1059    *          otherwise.  YOU HAVE BEEN WARNED.
1060    */
1061 
1062   /* Steps 1-2 performed by the caller. */
1063   Handle<RegExpObject*> reobj = regexp.as<RegExpObject>();
1064 
1065   RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj));
1066   if (!re) {
1067     return RegExpRunStatus_Error;
1068   }
1069 
1070   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
1071   if (!res) {
1072     return RegExpRunStatus_Error;
1073   }
1074 
1075   RootedLinearString input(cx, string->ensureLinear(cx));
1076   if (!input) {
1077     return RegExpRunStatus_Error;
1078   }
1079 
1080   /* Handled by caller */
1081   MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
1082 
1083   /* Steps 4-8 performed by the caller. */
1084 
1085   /* Step 10. */
1086   if (reobj->unicode()) {
1087     /*
1088      * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
1089      * 21.2.2.2 step 2.
1090      *   Let listIndex be the index into Input of the character that was
1091      *   obtained from element index of str.
1092      *
1093      * In the spec, pattern match is performed with decoded Unicode code
1094      * points, but our implementation performs it with UTF-16 encoded
1095      * string.  In step 2, we should decrement lastIndex (index) if it
1096      * points the trail surrogate that has corresponding lead surrogate.
1097      *
1098      *   var r = /\uD83D\uDC38/ug;
1099      *   r.lastIndex = 1;
1100      *   var str = "\uD83D\uDC38";
1101      *   var result = r.exec(str); // pattern match starts from index 0
1102      *   print(result.index);      // prints 0
1103      *
1104      * Note: this doesn't match the current spec text and result in
1105      * different values for `result.index` under certain conditions.
1106      * However, the spec will change to match our implementation's
1107      * behavior. See https://github.com/tc39/ecma262/issues/128.
1108      */
1109     if (IsTrailSurrogateWithLeadSurrogate(input, lastIndex)) {
1110       lastIndex--;
1111     }
1112   }
1113 
1114   /* Steps 3, 11-14, except 12.a.i, 12.c.i.1. */
1115   RegExpRunStatus status =
1116       ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches);
1117   if (status == RegExpRunStatus_Error) {
1118     return RegExpRunStatus_Error;
1119   }
1120 
1121   /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */
1122 
1123   return status;
1124 }
1125 
1126 /*
1127  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1128  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1129  */
RegExpMatcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,MutableHandleValue rval)1130 static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp,
1131                               HandleString string, int32_t lastIndex,
1132                               MutableHandleValue rval) {
1133   /* Execute regular expression and gather matches. */
1134   VectorMatchPairs matches;
1135 
1136   /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1137   RegExpRunStatus status =
1138       ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
1139   if (status == RegExpRunStatus_Error) {
1140     return false;
1141   }
1142 
1143   /* Steps 12.a, 12.c. */
1144   if (status == RegExpRunStatus_Success_NotFound) {
1145     rval.setNull();
1146     return true;
1147   }
1148 
1149   /* Steps 16-25 */
1150   RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
1151   return CreateRegExpMatchResult(cx, shared, string, matches, rval);
1152 }
1153 
1154 /*
1155  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1156  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1157  */
RegExpMatcher(JSContext * cx,unsigned argc,Value * vp)1158 bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) {
1159   CallArgs args = CallArgsFromVp(argc, vp);
1160   MOZ_ASSERT(args.length() == 3);
1161   MOZ_ASSERT(IsRegExpObject(args[0]));
1162   MOZ_ASSERT(args[1].isString());
1163   MOZ_ASSERT(args[2].isNumber());
1164 
1165   RootedObject regexp(cx, &args[0].toObject());
1166   RootedString string(cx, args[1].toString());
1167 
1168   int32_t lastIndex;
1169   MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1170 
1171   /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
1172   return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
1173 }
1174 
1175 /*
1176  * Separate interface for use by the JITs.
1177  * This code cannot re-enter JIT code.
1178  */
RegExpMatcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t maybeLastIndex,MatchPairs * maybeMatches,MutableHandleValue output)1179 bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp,
1180                           HandleString input, int32_t maybeLastIndex,
1181                           MatchPairs* maybeMatches, MutableHandleValue output) {
1182   // RegExp execution was successful only if the pairs have actually been
1183   // filled in. Note that IC code always passes a nullptr maybeMatches.
1184   if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
1185     RootedRegExpShared shared(cx, regexp->as<RegExpObject>().getShared());
1186     return CreateRegExpMatchResult(cx, shared, input, *maybeMatches, output);
1187   }
1188 
1189   // |maybeLastIndex| only contains a valid value when the RegExp execution
1190   // was not successful.
1191   MOZ_ASSERT(maybeLastIndex >= 0);
1192   return RegExpMatcherImpl(cx, regexp, input, maybeLastIndex, output);
1193 }
1194 
1195 /*
1196  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1197  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1198  * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub,
1199  * changes to this code need to get reflected in there too.
1200  */
RegExpSearcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,int32_t * result)1201 static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
1202                                HandleString string, int32_t lastIndex,
1203                                int32_t* result) {
1204   /* Execute regular expression and gather matches. */
1205   VectorMatchPairs matches;
1206 
1207   /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1208   RegExpRunStatus status =
1209       ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
1210   if (status == RegExpRunStatus_Error) {
1211     return false;
1212   }
1213 
1214   /* Steps 12.a, 12.c. */
1215   if (status == RegExpRunStatus_Success_NotFound) {
1216     *result = -1;
1217     return true;
1218   }
1219 
1220   /* Steps 16-25 */
1221   *result = CreateRegExpSearchResult(matches);
1222   return true;
1223 }
1224 
1225 /*
1226  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1227  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
1228  */
RegExpSearcher(JSContext * cx,unsigned argc,Value * vp)1229 bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) {
1230   CallArgs args = CallArgsFromVp(argc, vp);
1231   MOZ_ASSERT(args.length() == 3);
1232   MOZ_ASSERT(IsRegExpObject(args[0]));
1233   MOZ_ASSERT(args[1].isString());
1234   MOZ_ASSERT(args[2].isNumber());
1235 
1236   RootedObject regexp(cx, &args[0].toObject());
1237   RootedString string(cx, args[1].toString());
1238 
1239   int32_t lastIndex;
1240   MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1241 
1242   /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
1243   int32_t result = 0;
1244   if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) {
1245     return false;
1246   }
1247 
1248   args.rval().setInt32(result);
1249   return true;
1250 }
1251 
1252 /*
1253  * Separate interface for use by the JITs.
1254  * This code cannot re-enter JIT code.
1255  */
RegExpSearcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,MatchPairs * maybeMatches,int32_t * result)1256 bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
1257                            HandleString input, int32_t lastIndex,
1258                            MatchPairs* maybeMatches, int32_t* result) {
1259   MOZ_ASSERT(lastIndex >= 0);
1260 
1261   // RegExp execution was successful only if the pairs have actually been
1262   // filled in. Note that IC code always passes a nullptr maybeMatches.
1263   if (maybeMatches && maybeMatches->pairsRaw()[0] > MatchPair::NoMatch) {
1264     *result = CreateRegExpSearchResult(*maybeMatches);
1265     return true;
1266   }
1267   return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
1268 }
1269 
1270 /*
1271  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1272  * steps 3, 9-14, except 12.a.i, 12.c.i.1.
1273  */
RegExpTester(JSContext * cx,unsigned argc,Value * vp)1274 bool js::RegExpTester(JSContext* cx, unsigned argc, Value* vp) {
1275   CallArgs args = CallArgsFromVp(argc, vp);
1276   MOZ_ASSERT(args.length() == 3);
1277   MOZ_ASSERT(IsRegExpObject(args[0]));
1278   MOZ_ASSERT(args[1].isString());
1279   MOZ_ASSERT(args[2].isNumber());
1280 
1281   RootedObject regexp(cx, &args[0].toObject());
1282   RootedString string(cx, args[1].toString());
1283 
1284   int32_t lastIndex;
1285   MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1286 
1287   /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1288   VectorMatchPairs matches;
1289   RegExpRunStatus status =
1290       ExecuteRegExp(cx, regexp, string, lastIndex, &matches);
1291 
1292   if (status == RegExpRunStatus_Error) {
1293     return false;
1294   }
1295 
1296   if (status == RegExpRunStatus_Success) {
1297     int32_t endIndex = matches[0].limit;
1298     args.rval().setInt32(endIndex);
1299   } else {
1300     args.rval().setInt32(-1);
1301   }
1302   return true;
1303 }
1304 
1305 /*
1306  * Separate interface for use by the JITs.
1307  * This code cannot re-enter JIT code.
1308  */
RegExpTesterRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,int32_t * endIndex)1309 bool js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
1310                          int32_t lastIndex, int32_t* endIndex) {
1311   MOZ_ASSERT(lastIndex >= 0);
1312 
1313   VectorMatchPairs matches;
1314   RegExpRunStatus status =
1315       ExecuteRegExp(cx, regexp, input, lastIndex, &matches);
1316 
1317   if (status == RegExpRunStatus_Success) {
1318     *endIndex = matches[0].limit;
1319     return true;
1320   }
1321   if (status == RegExpRunStatus_Success_NotFound) {
1322     *endIndex = -1;
1323     return true;
1324   }
1325 
1326   return false;
1327 }
1328 
1329 using CapturesVector = GCVector<Value, 4>;
1330 
1331 struct JSSubString {
1332   JSLinearString* base = nullptr;
1333   size_t offset = 0;
1334   size_t length = 0;
1335 
1336   JSSubString() = default;
1337 
initEmptyJSSubString1338   void initEmpty(JSLinearString* base) {
1339     this->base = base;
1340     offset = length = 0;
1341   }
initJSSubString1342   void init(JSLinearString* base, size_t offset, size_t length) {
1343     this->base = base;
1344     this->offset = offset;
1345     this->length = length;
1346   }
1347 };
1348 
GetParen(JSLinearString * matched,const JS::Value & capture,JSSubString * out)1349 static void GetParen(JSLinearString* matched, const JS::Value& capture,
1350                      JSSubString* out) {
1351   if (capture.isUndefined()) {
1352     out->initEmpty(matched);
1353     return;
1354   }
1355   JSLinearString& captureLinear = capture.toString()->asLinear();
1356   out->init(&captureLinear, 0, captureLinear.length());
1357 }
1358 
1359 template <typename CharT>
InterpretDollar(JSLinearString * matched,JSLinearString * string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,JSLinearString * replacement,const CharT * replacementBegin,const CharT * currentDollar,const CharT * replacementEnd,JSSubString * out,size_t * skip,uint32_t * currentNamedCapture)1360 static bool InterpretDollar(JSLinearString* matched, JSLinearString* string,
1361                             size_t position, size_t tailPos,
1362                             Handle<CapturesVector> captures,
1363                             Handle<CapturesVector> namedCaptures,
1364                             JSLinearString* replacement,
1365                             const CharT* replacementBegin,
1366                             const CharT* currentDollar,
1367                             const CharT* replacementEnd, JSSubString* out,
1368                             size_t* skip, uint32_t* currentNamedCapture) {
1369   MOZ_ASSERT(*currentDollar == '$');
1370 
1371   /* If there is only a dollar, bail now. */
1372   if (currentDollar + 1 >= replacementEnd) {
1373     return false;
1374   }
1375 
1376   // ES 2021 Table 57: Replacement Text Symbol Substitutions
1377   // https://tc39.es/ecma262/#table-replacement-text-symbol-substitutions
1378   char16_t c = currentDollar[1];
1379   if (IsAsciiDigit(c)) {
1380     /* $n, $nn */
1381     unsigned num = AsciiDigitToNumber(c);
1382     if (num > captures.length()) {
1383       // The result is implementation-defined. Do not substitute.
1384       return false;
1385     }
1386 
1387     const CharT* currentChar = currentDollar + 2;
1388     if (currentChar < replacementEnd) {
1389       c = *currentChar;
1390       if (IsAsciiDigit(c)) {
1391         unsigned tmpNum = 10 * num + AsciiDigitToNumber(c);
1392         // If num > captures.length(), the result is implementation-defined.
1393         // Consume next character only if num <= captures.length().
1394         if (tmpNum <= captures.length()) {
1395           currentChar++;
1396           num = tmpNum;
1397         }
1398       }
1399     }
1400 
1401     if (num == 0) {
1402       // The result is implementation-defined. Do not substitute.
1403       return false;
1404     }
1405 
1406     *skip = currentChar - currentDollar;
1407 
1408     MOZ_ASSERT(num <= captures.length());
1409 
1410     GetParen(matched, captures[num - 1], out);
1411     return true;
1412   }
1413 
1414   // '$<': Named Captures
1415   if (c == '<') {
1416     // Step 1.
1417     if (namedCaptures.length() == 0) {
1418       return false;
1419     }
1420 
1421     // Step 2.b
1422     const CharT* nameStart = currentDollar + 2;
1423     const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
1424 
1425     // Step 2.c
1426     if (!nameEnd) {
1427       return false;
1428     }
1429 
1430     // Step 2.d
1431     // We precompute named capture replacements in InitNamedCaptures.
1432     // They are stored in the order in which we will need them, so here
1433     // we can just take the next one in the list.
1434     size_t nameLength = nameEnd - nameStart;
1435     *skip = nameLength + 3;  // $<...>
1436 
1437     // Steps 2.d.iii-iv
1438     GetParen(matched, namedCaptures[*currentNamedCapture], out);
1439     *currentNamedCapture += 1;
1440     return true;
1441   }
1442 
1443   switch (c) {
1444     default:
1445       return false;
1446     case '$':
1447       out->init(replacement, currentDollar - replacementBegin, 1);
1448       break;
1449     case '&':
1450       out->init(matched, 0, matched->length());
1451       break;
1452     case '`':
1453       out->init(string, 0, position);
1454       break;
1455     case '\'':
1456       if (tailPos >= string->length()) {
1457         out->initEmpty(matched);
1458       } else {
1459         out->init(string, tailPos, string->length() - tailPos);
1460       }
1461       break;
1462   }
1463 
1464   *skip = 2;
1465   return true;
1466 }
1467 
1468 template <typename CharT>
FindReplaceLengthString(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1469 static bool FindReplaceLengthString(JSContext* cx, HandleLinearString matched,
1470                                     HandleLinearString string, size_t position,
1471                                     size_t tailPos,
1472                                     Handle<CapturesVector> captures,
1473                                     Handle<CapturesVector> namedCaptures,
1474                                     HandleLinearString replacement,
1475                                     size_t firstDollarIndex, size_t* sizep) {
1476   CheckedInt<uint32_t> replen = replacement->length();
1477 
1478   JS::AutoCheckCannotGC nogc;
1479   MOZ_ASSERT(firstDollarIndex < replacement->length());
1480   const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1481   const CharT* currentDollar = replacementBegin + firstDollarIndex;
1482   const CharT* replacementEnd = replacementBegin + replacement->length();
1483   uint32_t currentNamedCapture = 0;
1484   do {
1485     JSSubString sub;
1486     size_t skip;
1487     if (InterpretDollar(matched, string, position, tailPos, captures,
1488                         namedCaptures, replacement, replacementBegin,
1489                         currentDollar, replacementEnd, &sub, &skip,
1490                         &currentNamedCapture)) {
1491       if (sub.length > skip) {
1492         replen += sub.length - skip;
1493       } else {
1494         replen -= skip - sub.length;
1495       }
1496       currentDollar += skip;
1497     } else {
1498       currentDollar++;
1499     }
1500 
1501     currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1502   } while (currentDollar);
1503 
1504   if (!replen.isValid()) {
1505     ReportAllocationOverflow(cx);
1506     return false;
1507   }
1508 
1509   *sizep = replen.value();
1510   return true;
1511 }
1512 
FindReplaceLength(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1513 static bool FindReplaceLength(JSContext* cx, HandleLinearString matched,
1514                               HandleLinearString string, size_t position,
1515                               size_t tailPos, Handle<CapturesVector> captures,
1516                               Handle<CapturesVector> namedCaptures,
1517                               HandleLinearString replacement,
1518                               size_t firstDollarIndex, size_t* sizep) {
1519   return replacement->hasLatin1Chars()
1520              ? FindReplaceLengthString<Latin1Char>(
1521                    cx, matched, string, position, tailPos, captures,
1522                    namedCaptures, replacement, firstDollarIndex, sizep)
1523              : FindReplaceLengthString<char16_t>(
1524                    cx, matched, string, position, tailPos, captures,
1525                    namedCaptures, replacement, firstDollarIndex, sizep);
1526 }
1527 
1528 /*
1529  * Precondition: |sb| already has necessary growth space reserved (as
1530  * derived from FindReplaceLength), and has been inflated to TwoByte if
1531  * necessary.
1532  */
1533 template <typename CharT>
DoReplace(HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures,HandleLinearString replacement,size_t firstDollarIndex,StringBuffer & sb)1534 static void DoReplace(HandleLinearString matched, HandleLinearString string,
1535                       size_t position, size_t tailPos,
1536                       Handle<CapturesVector> captures,
1537                       Handle<CapturesVector> namedCaptures,
1538                       HandleLinearString replacement, size_t firstDollarIndex,
1539                       StringBuffer& sb) {
1540   JS::AutoCheckCannotGC nogc;
1541   const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1542   const CharT* currentChar = replacementBegin;
1543 
1544   MOZ_ASSERT(firstDollarIndex < replacement->length());
1545   const CharT* currentDollar = replacementBegin + firstDollarIndex;
1546   const CharT* replacementEnd = replacementBegin + replacement->length();
1547   uint32_t currentNamedCapture = 0;
1548   do {
1549     /* Move one of the constant portions of the replacement value. */
1550     size_t len = currentDollar - currentChar;
1551     sb.infallibleAppend(currentChar, len);
1552     currentChar = currentDollar;
1553 
1554     JSSubString sub;
1555     size_t skip;
1556     if (InterpretDollar(matched, string, position, tailPos, captures,
1557                         namedCaptures, replacement, replacementBegin,
1558                         currentDollar, replacementEnd, &sub, &skip,
1559                         &currentNamedCapture)) {
1560       sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
1561       currentChar += skip;
1562       currentDollar += skip;
1563     } else {
1564       currentDollar++;
1565     }
1566 
1567     currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1568   } while (currentDollar);
1569   sb.infallibleAppend(currentChar,
1570                       replacement->length() - (currentChar - replacementBegin));
1571 }
1572 
1573 /*
1574  * This function finds the list of named captures of the form
1575  * "$<name>" in a replacement string and converts them into jsids, for
1576  * use in InitNamedReplacements.
1577  */
1578 template <typename CharT>
CollectNames(JSContext * cx,HandleLinearString replacement,size_t firstDollarIndex,MutableHandle<GCVector<jsid>> names)1579 static bool CollectNames(JSContext* cx, HandleLinearString replacement,
1580                          size_t firstDollarIndex,
1581                          MutableHandle<GCVector<jsid>> names) {
1582   JS::AutoCheckCannotGC nogc;
1583   MOZ_ASSERT(firstDollarIndex < replacement->length());
1584 
1585   const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1586   const CharT* currentDollar = replacementBegin + firstDollarIndex;
1587   const CharT* replacementEnd = replacementBegin + replacement->length();
1588 
1589   // https://tc39.es/ecma262/#table-45, "$<" section
1590   while (currentDollar && currentDollar + 1 < replacementEnd) {
1591     if (currentDollar[1] == '<') {
1592       // Step 2.b
1593       const CharT* nameStart = currentDollar + 2;
1594       const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
1595 
1596       // Step 2.c
1597       if (!nameEnd) {
1598         return true;
1599       }
1600 
1601       // Step 2.d.i
1602       size_t nameLength = nameEnd - nameStart;
1603       JSAtom* atom = AtomizeChars(cx, nameStart, nameLength);
1604       if (!atom || !names.append(AtomToId(atom))) {
1605         return false;
1606       }
1607       currentDollar = nameEnd + 1;
1608     } else {
1609       currentDollar += 2;
1610     }
1611     currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1612   }
1613   return true;
1614 }
1615 
1616 /*
1617  * When replacing named captures, the spec requires us to perform
1618  * `Get(match.groups, name)` for each "$<name>". These `Get`s can be
1619  * script-visible; for example, RegExp can be extended with an `exec`
1620  * method that wraps `groups` in a proxy. To make sure that we do the
1621  * right thing, if a regexp has named captures, we find the named
1622  * capture replacements before beginning the actual replacement.
1623  * This guarantees that we will call GetProperty once and only once for
1624  * each "$<name>" in the replacement string, in the correct order.
1625  *
1626  * This function precomputes the results of step 2 of the '$<' case
1627  * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so
1628  * that when we need to access the nth named capture in InterpretDollar,
1629  * we can just use the nth value stored in namedCaptures.
1630  */
InitNamedCaptures(JSContext * cx,HandleLinearString replacement,HandleObject groups,size_t firstDollarIndex,MutableHandle<CapturesVector> namedCaptures)1631 static bool InitNamedCaptures(JSContext* cx, HandleLinearString replacement,
1632                               HandleObject groups, size_t firstDollarIndex,
1633                               MutableHandle<CapturesVector> namedCaptures) {
1634   Rooted<GCVector<jsid>> names(cx, cx);
1635   if (replacement->hasLatin1Chars()) {
1636     if (!CollectNames<Latin1Char>(cx, replacement, firstDollarIndex, &names)) {
1637       return false;
1638     }
1639   } else {
1640     if (!CollectNames<char16_t>(cx, replacement, firstDollarIndex, &names)) {
1641       return false;
1642     }
1643   }
1644 
1645   // https://tc39.es/ecma262/#table-45, "$<" section
1646   RootedId id(cx);
1647   RootedValue capture(cx);
1648   for (uint32_t i = 0; i < names.length(); i++) {
1649     // Step 2.d.i
1650     id = names[i];
1651 
1652     // Step 2.d.ii
1653     if (!GetProperty(cx, groups, groups, id, &capture)) {
1654       return false;
1655     }
1656 
1657     // Step 2.d.iii
1658     if (capture.isUndefined()) {
1659       if (!namedCaptures.append(capture)) {
1660         return false;
1661       }
1662     } else {
1663       // Step 2.d.iv
1664       JSString* str = ToString<CanGC>(cx, capture);
1665       if (!str) {
1666         return false;
1667       }
1668       JSLinearString* linear = str->ensureLinear(cx);
1669       if (!linear) {
1670         return false;
1671       }
1672       if (!namedCaptures.append(StringValue(linear))) {
1673         return false;
1674       }
1675     }
1676   }
1677 
1678   return true;
1679 }
1680 
NeedTwoBytes(HandleLinearString string,HandleLinearString replacement,HandleLinearString matched,Handle<CapturesVector> captures,Handle<CapturesVector> namedCaptures)1681 static bool NeedTwoBytes(HandleLinearString string,
1682                          HandleLinearString replacement,
1683                          HandleLinearString matched,
1684                          Handle<CapturesVector> captures,
1685                          Handle<CapturesVector> namedCaptures) {
1686   if (string->hasTwoByteChars()) {
1687     return true;
1688   }
1689   if (replacement->hasTwoByteChars()) {
1690     return true;
1691   }
1692   if (matched->hasTwoByteChars()) {
1693     return true;
1694   }
1695 
1696   for (const Value& capture : captures) {
1697     if (capture.isUndefined()) {
1698       continue;
1699     }
1700     if (capture.toString()->hasTwoByteChars()) {
1701       return true;
1702     }
1703   }
1704 
1705   for (const Value& capture : namedCaptures) {
1706     if (capture.isUndefined()) {
1707       continue;
1708     }
1709     if (capture.toString()->hasTwoByteChars()) {
1710       return true;
1711     }
1712   }
1713 
1714   return false;
1715 }
1716 
1717 /* ES 2021 21.1.3.17.1 */
1718 // https://tc39.es/ecma262/#sec-getsubstitution
RegExpGetSubstitution(JSContext * cx,HandleArrayObject matchResult,HandleLinearString string,size_t position,HandleLinearString replacement,size_t firstDollarIndex,HandleValue groups,MutableHandleValue rval)1719 bool js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult,
1720                                HandleLinearString string, size_t position,
1721                                HandleLinearString replacement,
1722                                size_t firstDollarIndex, HandleValue groups,
1723                                MutableHandleValue rval) {
1724   MOZ_ASSERT(firstDollarIndex < replacement->length());
1725 
1726   // Step 1 (skipped).
1727 
1728   // Step 10 (reordered).
1729   uint32_t matchResultLength = matchResult->length();
1730   MOZ_ASSERT(matchResultLength > 0);
1731   MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
1732 
1733   const Value& matchedValue = matchResult->getDenseElement(0);
1734   RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx));
1735   if (!matched) {
1736     return false;
1737   }
1738 
1739   // Step 2.
1740   size_t matchLength = matched->length();
1741 
1742   // Steps 3-5 (skipped).
1743 
1744   // Step 6.
1745   MOZ_ASSERT(position <= string->length());
1746 
1747   uint32_t nCaptures = matchResultLength - 1;
1748   Rooted<CapturesVector> captures(cx, CapturesVector(cx));
1749   if (!captures.reserve(nCaptures)) {
1750     return false;
1751   }
1752 
1753   // Step 7.
1754   for (uint32_t i = 1; i <= nCaptures; i++) {
1755     const Value& capture = matchResult->getDenseElement(i);
1756 
1757     if (capture.isUndefined()) {
1758       captures.infallibleAppend(capture);
1759       continue;
1760     }
1761 
1762     JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
1763     if (!captureLinear) {
1764       return false;
1765     }
1766     captures.infallibleAppend(StringValue(captureLinear));
1767   }
1768 
1769   Rooted<CapturesVector> namedCaptures(cx, cx);
1770   if (groups.isObject()) {
1771     RootedObject groupsObj(cx, &groups.toObject());
1772     if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex,
1773                            &namedCaptures)) {
1774       return false;
1775     }
1776   } else {
1777     MOZ_ASSERT(groups.isUndefined());
1778   }
1779 
1780   // Step 8 (skipped).
1781 
1782   // Step 9.
1783   CheckedInt<uint32_t> checkedTailPos(0);
1784   checkedTailPos += position;
1785   checkedTailPos += matchLength;
1786   if (!checkedTailPos.isValid()) {
1787     ReportAllocationOverflow(cx);
1788     return false;
1789   }
1790   uint32_t tailPos = checkedTailPos.value();
1791 
1792   // Step 11.
1793   size_t reserveLength;
1794   if (!FindReplaceLength(cx, matched, string, position, tailPos, captures,
1795                          namedCaptures, replacement, firstDollarIndex,
1796                          &reserveLength)) {
1797     return false;
1798   }
1799 
1800   JSStringBuilder result(cx);
1801   if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) {
1802     if (!result.ensureTwoByteChars()) {
1803       return false;
1804     }
1805   }
1806 
1807   if (!result.reserve(reserveLength)) {
1808     return false;
1809   }
1810 
1811   if (replacement->hasLatin1Chars()) {
1812     DoReplace<Latin1Char>(matched, string, position, tailPos, captures,
1813                           namedCaptures, replacement, firstDollarIndex, result);
1814   } else {
1815     DoReplace<char16_t>(matched, string, position, tailPos, captures,
1816                         namedCaptures, replacement, firstDollarIndex, result);
1817   }
1818 
1819   // Step 12.
1820   JSString* resultString = result.finishString();
1821   if (!resultString) {
1822     return false;
1823   }
1824 
1825   rval.setString(resultString);
1826   return true;
1827 }
1828 
GetFirstDollarIndex(JSContext * cx,unsigned argc,Value * vp)1829 bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) {
1830   CallArgs args = CallArgsFromVp(argc, vp);
1831   MOZ_ASSERT(args.length() == 1);
1832   JSString* str = args[0].toString();
1833 
1834   // Should be handled in different path.
1835   MOZ_ASSERT(str->length() != 0);
1836 
1837   int32_t index = -1;
1838   if (!GetFirstDollarIndexRaw(cx, str, &index)) {
1839     return false;
1840   }
1841 
1842   args.rval().setInt32(index);
1843   return true;
1844 }
1845 
1846 template <typename TextChar>
GetFirstDollarIndexImpl(const TextChar * text,uint32_t textLen)1847 static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text,
1848                                                      uint32_t textLen) {
1849   const TextChar* end = text + textLen;
1850   for (const TextChar* c = text; c != end; ++c) {
1851     if (*c == '$') {
1852       return c - text;
1853     }
1854   }
1855   return -1;
1856 }
1857 
GetFirstDollarIndexRawFlat(JSLinearString * text)1858 int32_t js::GetFirstDollarIndexRawFlat(JSLinearString* text) {
1859   uint32_t len = text->length();
1860 
1861   JS::AutoCheckCannotGC nogc;
1862   if (text->hasLatin1Chars()) {
1863     return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len);
1864   }
1865 
1866   return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len);
1867 }
1868 
GetFirstDollarIndexRaw(JSContext * cx,JSString * str,int32_t * index)1869 bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) {
1870   JSLinearString* text = str->ensureLinear(cx);
1871   if (!text) {
1872     return false;
1873   }
1874 
1875   *index = GetFirstDollarIndexRawFlat(text);
1876   return true;
1877 }
1878 
RegExpPrototypeOptimizable(JSContext * cx,unsigned argc,Value * vp)1879 bool js::RegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1880   // This can only be called from self-hosted code.
1881   CallArgs args = CallArgsFromVp(argc, vp);
1882   MOZ_ASSERT(args.length() == 1);
1883 
1884   args.rval().setBoolean(
1885       RegExpPrototypeOptimizableRaw(cx, &args[0].toObject()));
1886   return true;
1887 }
1888 
RegExpPrototypeOptimizableRaw(JSContext * cx,JSObject * proto)1889 bool js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto) {
1890   AutoUnsafeCallWithABI unsafe;
1891   AutoAssertNoPendingException aanpe(cx);
1892   if (!proto->is<NativeObject>()) {
1893     return false;
1894   }
1895 
1896   NativeObject* nproto = static_cast<NativeObject*>(proto);
1897 
1898   Shape* shape = cx->realm()->regExps.getOptimizableRegExpPrototypeShape();
1899   if (shape == nproto->shape()) {
1900     return true;
1901   }
1902 
1903   JSFunction* flagsGetter;
1904   if (!GetOwnGetterPure(cx, proto, NameToId(cx->names().flags), &flagsGetter)) {
1905     return false;
1906   }
1907 
1908   if (!flagsGetter) {
1909     return false;
1910   }
1911 
1912   if (!IsSelfHostedFunctionWithName(flagsGetter,
1913                                     cx->names().RegExpFlagsGetter)) {
1914     return false;
1915   }
1916 
1917   JSNative globalGetter;
1918   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().global),
1919                               &globalGetter)) {
1920     return false;
1921   }
1922 
1923   if (globalGetter != regexp_global) {
1924     return false;
1925   }
1926 
1927   JSNative hasIndicesGetter;
1928   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().hasIndices),
1929                               &hasIndicesGetter)) {
1930     return false;
1931   }
1932 
1933   if (hasIndicesGetter != regexp_hasIndices) {
1934     return false;
1935   }
1936 
1937   JSNative ignoreCaseGetter;
1938   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().ignoreCase),
1939                               &ignoreCaseGetter)) {
1940     return false;
1941   }
1942 
1943   if (ignoreCaseGetter != regexp_ignoreCase) {
1944     return false;
1945   }
1946 
1947   JSNative multilineGetter;
1948   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().multiline),
1949                               &multilineGetter)) {
1950     return false;
1951   }
1952 
1953   if (multilineGetter != regexp_multiline) {
1954     return false;
1955   }
1956 
1957   JSNative stickyGetter;
1958   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().sticky),
1959                               &stickyGetter)) {
1960     return false;
1961   }
1962 
1963   if (stickyGetter != regexp_sticky) {
1964     return false;
1965   }
1966 
1967   JSNative unicodeGetter;
1968   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().unicode),
1969                               &unicodeGetter)) {
1970     return false;
1971   }
1972 
1973   if (unicodeGetter != regexp_unicode) {
1974     return false;
1975   }
1976 
1977   JSNative dotAllGetter;
1978   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().dotAll),
1979                               &dotAllGetter)) {
1980     return false;
1981   }
1982 
1983   if (dotAllGetter != regexp_dotAll) {
1984     return false;
1985   }
1986 
1987   // Check if @@match, @@search, and exec are own data properties,
1988   // those values should be tested in selfhosted JS.
1989   bool has = false;
1990   if (!HasOwnDataPropertyPure(
1991           cx, proto, PropertyKey::Symbol(cx->wellKnownSymbols().match), &has)) {
1992     return false;
1993   }
1994   if (!has) {
1995     return false;
1996   }
1997 
1998   if (!HasOwnDataPropertyPure(
1999           cx, proto, PropertyKey::Symbol(cx->wellKnownSymbols().search),
2000           &has)) {
2001     return false;
2002   }
2003   if (!has) {
2004     return false;
2005   }
2006 
2007   if (!HasOwnDataPropertyPure(cx, proto, NameToId(cx->names().exec), &has)) {
2008     return false;
2009   }
2010   if (!has) {
2011     return false;
2012   }
2013 
2014   cx->realm()->regExps.setOptimizableRegExpPrototypeShape(nproto->shape());
2015   return true;
2016 }
2017 
RegExpInstanceOptimizable(JSContext * cx,unsigned argc,Value * vp)2018 bool js::RegExpInstanceOptimizable(JSContext* cx, unsigned argc, Value* vp) {
2019   // This can only be called from self-hosted code.
2020   CallArgs args = CallArgsFromVp(argc, vp);
2021   MOZ_ASSERT(args.length() == 2);
2022 
2023   args.rval().setBoolean(RegExpInstanceOptimizableRaw(cx, &args[0].toObject(),
2024                                                       &args[1].toObject()));
2025   return true;
2026 }
2027 
RegExpInstanceOptimizableRaw(JSContext * cx,JSObject * obj,JSObject * proto)2028 bool js::RegExpInstanceOptimizableRaw(JSContext* cx, JSObject* obj,
2029                                       JSObject* proto) {
2030   AutoUnsafeCallWithABI unsafe;
2031   AutoAssertNoPendingException aanpe(cx);
2032 
2033   RegExpObject* rx = &obj->as<RegExpObject>();
2034 
2035   Shape* shape = cx->realm()->regExps.getOptimizableRegExpInstanceShape();
2036   if (shape == rx->shape()) {
2037     return true;
2038   }
2039 
2040   if (!rx->hasStaticPrototype()) {
2041     return false;
2042   }
2043 
2044   if (rx->staticPrototype() != proto) {
2045     return false;
2046   }
2047 
2048   if (!RegExpObject::isInitialShape(rx)) {
2049     return false;
2050   }
2051 
2052   cx->realm()->regExps.setOptimizableRegExpInstanceShape(rx->shape());
2053   return true;
2054 }
2055 
2056 /*
2057  * Pattern match the script to check if it is is indexing into a particular
2058  * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
2059  * such cases, which are used by javascript packers (particularly the popular
2060  * Dean Edwards packer) to efficiently encode large scripts. We only handle the
2061  * code patterns generated by such packers here.
2062  */
intrinsic_GetElemBaseForLambda(JSContext * cx,unsigned argc,Value * vp)2063 bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc,
2064                                         Value* vp) {
2065   // This can only be called from self-hosted code.
2066   CallArgs args = CallArgsFromVp(argc, vp);
2067   MOZ_ASSERT(args.length() == 1);
2068 
2069   JSObject& lambda = args[0].toObject();
2070   args.rval().setUndefined();
2071 
2072   if (!lambda.is<JSFunction>()) {
2073     return true;
2074   }
2075 
2076   RootedFunction fun(cx, &lambda.as<JSFunction>());
2077   if (!fun->isInterpreted() || fun->isClassConstructor()) {
2078     return true;
2079   }
2080 
2081   JSScript* script = JSFunction::getOrCreateScript(cx, fun);
2082   if (!script) {
2083     return false;
2084   }
2085 
2086   jsbytecode* pc = script->code();
2087 
2088   /*
2089    * JSOp::GetAliasedVar tells us exactly where to find the base object 'b'.
2090    * Rule out the (unlikely) possibility of a function with environment
2091    * objects since it would make our environment walk off.
2092    */
2093   if (JSOp(*pc) != JSOp::GetAliasedVar || fun->needsSomeEnvironmentObject()) {
2094     return true;
2095   }
2096   EnvironmentCoordinate ec(pc);
2097   EnvironmentObject* env = &fun->environment()->as<EnvironmentObject>();
2098   for (unsigned i = 0; i < ec.hops(); ++i) {
2099     env = &env->enclosingEnvironment().as<EnvironmentObject>();
2100   }
2101   Value b = env->aliasedBinding(ec);
2102   pc += JSOpLength_GetAliasedVar;
2103 
2104   /* Look for 'a' to be the lambda's first argument. */
2105   if (JSOp(*pc) != JSOp::GetArg || GET_ARGNO(pc) != 0) {
2106     return true;
2107   }
2108   pc += JSOpLength_GetArg;
2109 
2110   /* 'b[a]' */
2111   if (JSOp(*pc) != JSOp::GetElem) {
2112     return true;
2113   }
2114   pc += JSOpLength_GetElem;
2115 
2116   /* 'return b[a]' */
2117   if (JSOp(*pc) != JSOp::Return) {
2118     return true;
2119   }
2120 
2121   /* 'b' must behave like a normal object. */
2122   if (!b.isObject()) {
2123     return true;
2124   }
2125 
2126   JSObject& bobj = b.toObject();
2127   const JSClass* clasp = bobj.getClass();
2128   if (!clasp->isNativeObject() || clasp->getOpsLookupProperty() ||
2129       clasp->getOpsGetProperty()) {
2130     return true;
2131   }
2132 
2133   args.rval().setObject(bobj);
2134   return true;
2135 }
2136 
2137 /*
2138  * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda.
2139  * It returns the property value only if the property is data property and the
2140  * property value is a string.  Otherwise it returns undefined.
2141  */
intrinsic_GetStringDataProperty(JSContext * cx,unsigned argc,Value * vp)2142 bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc,
2143                                          Value* vp) {
2144   CallArgs args = CallArgsFromVp(argc, vp);
2145   MOZ_ASSERT(args.length() == 2);
2146 
2147   RootedObject obj(cx, &args[0].toObject());
2148   if (!obj->is<NativeObject>()) {
2149     // The object is already checked to be native in GetElemBaseForLambda,
2150     // but it can be swapped to another class that is non-native.
2151     // Return undefined to mark failure to get the property.
2152     args.rval().setUndefined();
2153     return true;
2154   }
2155 
2156   JSAtom* atom = AtomizeString(cx, args[1].toString());
2157   if (!atom) {
2158     return false;
2159   }
2160 
2161   Value v;
2162   if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString()) {
2163     args.rval().set(v);
2164   } else {
2165     args.rval().setUndefined();
2166   }
2167 
2168   return true;
2169 }
2170