1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "builtin/RegExp.h"
8 
9 #include "mozilla/CheckedInt.h"
10 #include "mozilla/TypeTraits.h"
11 
12 #include "frontend/TokenStream.h"
13 #include "irregexp/RegExpParser.h"
14 #include "jit/InlinableNatives.h"
15 #include "util/StringBuffer.h"
16 #include "util/Unicode.h"
17 #include "vm/JSContext.h"
18 #include "vm/RegExpStatics.h"
19 #include "vm/SelfHosting.h"
20 
21 #include "vm/JSObject-inl.h"
22 #include "vm/NativeObject-inl.h"
23 #include "vm/UnboxedObject-inl.h"
24 
25 using namespace js;
26 using namespace js::unicode;
27 
28 using mozilla::CheckedInt;
29 
30 /*
31  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
32  * steps 3, 16-25.
33  */
CreateRegExpMatchResult(JSContext * cx,HandleString input,const MatchPairs & matches,MutableHandleValue rval)34 bool js::CreateRegExpMatchResult(JSContext* cx, HandleString input,
35                                  const MatchPairs& matches,
36                                  MutableHandleValue rval) {
37   MOZ_ASSERT(input);
38 
39   /*
40    * Create the (slow) result array for a match.
41    *
42    * Array contents:
43    *  0:              matched string
44    *  1..pairCount-1: paren matches
45    *  input:          input string
46    *  index:          start index for the match
47    */
48 
49   /* Get the templateObject that defines the shape and type of the output object
50    */
51   JSObject* templateObject =
52       cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
53   if (!templateObject) return false;
54 
55   size_t numPairs = matches.length();
56   MOZ_ASSERT(numPairs > 0);
57 
58   /* Step 17. */
59   RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(
60                                 cx, numPairs, templateObject));
61   if (!arr) return false;
62 
63   /* Steps 22-24.
64    * Store a Value for each pair. */
65   for (size_t i = 0; i < numPairs; i++) {
66     const MatchPair& pair = matches[i];
67 
68     if (pair.isUndefined()) {
69       MOZ_ASSERT(i !=
70                  0); /* Since we had a match, first pair must be present. */
71       arr->setDenseInitializedLength(i + 1);
72       arr->initDenseElement(i, UndefinedValue());
73     } else {
74       JSLinearString* str =
75           NewDependentString(cx, input, pair.start, pair.length());
76       if (!str) return false;
77       arr->setDenseInitializedLength(i + 1);
78       arr->initDenseElement(i, StringValue(str));
79     }
80   }
81 
82   /* Step 20 (reordered).
83    * Set the |index| property. (TemplateObject positions it in slot 0) */
84   arr->setSlot(0, Int32Value(matches[0].start));
85 
86   /* Step 21 (reordered).
87    * Set the |input| property. (TemplateObject positions it in slot 1) */
88   arr->setSlot(1, StringValue(input));
89 
90 #ifdef DEBUG
91   RootedValue test(cx);
92   RootedId id(cx, NameToId(cx->names().index));
93   if (!NativeGetProperty(cx, arr, id, &test)) return false;
94   MOZ_ASSERT(test == arr->getSlot(0));
95   id = NameToId(cx->names().input);
96   if (!NativeGetProperty(cx, arr, id, &test)) return false;
97   MOZ_ASSERT(test == arr->getSlot(1));
98 #endif
99 
100   /* Step 25. */
101   rval.setObject(*arr);
102   return true;
103 }
104 
CreateRegExpSearchResult(const MatchPairs & matches)105 static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
106   /* Fit the start and limit of match into a int32_t. */
107   uint32_t position = matches[0].start;
108   uint32_t lastIndex = matches[0].limit;
109   MOZ_ASSERT(position < 0x8000);
110   MOZ_ASSERT(lastIndex < 0x8000);
111   return position | (lastIndex << 15);
112 }
113 
114 /*
115  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
116  * steps 3, 9-14, except 12.a.i, 12.c.i.1.
117  */
ExecuteRegExpImpl(JSContext * cx,RegExpStatics * res,MutableHandleRegExpShared re,HandleLinearString input,size_t searchIndex,VectorMatchPairs * matches,size_t * endIndex)118 static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res,
119                                          MutableHandleRegExpShared re,
120                                          HandleLinearString input,
121                                          size_t searchIndex,
122                                          VectorMatchPairs* matches,
123                                          size_t* endIndex) {
124   RegExpRunStatus status =
125       RegExpShared::execute(cx, re, input, searchIndex, matches, endIndex);
126 
127   /* Out of spec: Update RegExpStatics. */
128   if (status == RegExpRunStatus_Success && res) {
129     if (matches) {
130       if (!res->updateFromMatchPairs(cx, input, *matches))
131         return RegExpRunStatus_Error;
132     } else {
133       res->updateLazily(cx, input, re, searchIndex);
134     }
135   }
136   return status;
137 }
138 
139 /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */
ExecuteRegExpLegacy(JSContext * cx,RegExpStatics * res,Handle<RegExpObject * > reobj,HandleLinearString input,size_t * lastIndex,bool test,MutableHandleValue rval)140 bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res,
141                              Handle<RegExpObject*> reobj,
142                              HandleLinearString input, size_t* lastIndex,
143                              bool test, MutableHandleValue rval) {
144   RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj));
145   if (!shared) return false;
146 
147   VectorMatchPairs matches;
148 
149   RegExpRunStatus status =
150       ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches, nullptr);
151   if (status == RegExpRunStatus_Error) return false;
152 
153   if (status == RegExpRunStatus_Success_NotFound) {
154     /* ExecuteRegExp() previously returned an array or null. */
155     rval.setNull();
156     return true;
157   }
158 
159   *lastIndex = matches[0].limit;
160 
161   if (test) {
162     /* Forbid an array, as an optimization. */
163     rval.setBoolean(true);
164     return true;
165   }
166 
167   return CreateRegExpMatchResult(cx, input, matches, rval);
168 }
169 
CheckPatternSyntaxSlow(JSContext * cx,HandleAtom pattern,RegExpFlag flags)170 static bool CheckPatternSyntaxSlow(JSContext* cx, HandleAtom pattern,
171                                    RegExpFlag flags) {
172   CompileOptions options(cx);
173   frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
174   return irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(),
175                                       pattern, flags & UnicodeFlag);
176 }
177 
CheckPatternSyntax(JSContext * cx,HandleAtom pattern,RegExpFlag flags)178 static RegExpShared* CheckPatternSyntax(JSContext* cx, HandleAtom pattern,
179                                         RegExpFlag flags) {
180   // If we already have a RegExpShared for this pattern/flags, we can
181   // avoid the much slower CheckPatternSyntaxSlow call.
182 
183   if (RegExpShared* shared = cx->zone()->regExps.maybeGet(pattern, flags)) {
184 #ifdef DEBUG
185     // Assert the pattern is valid.
186     if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
187       MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed());
188       return nullptr;
189     }
190 #endif
191     return shared;
192   }
193 
194   if (!CheckPatternSyntaxSlow(cx, pattern, flags)) return nullptr;
195 
196   // Allocate and return a new RegExpShared so we will hit the fast path
197   // next time.
198   return cx->zone()->regExps.get(cx, pattern, flags);
199 }
200 
201 /*
202  * ES 2016 draft Mar 25, 2016 21.2.3.2.2.
203  *
204  * Steps 14-15 set |obj|'s "lastIndex" property to zero.  Some of
205  * RegExpInitialize's callers have a fresh RegExp not yet exposed to script:
206  * in these cases zeroing "lastIndex" is infallible.  But others have a RegExp
207  * whose "lastIndex" property might have been made non-writable: here, zeroing
208  * "lastIndex" can fail.  We efficiently solve this problem by completely
209  * removing "lastIndex" zeroing from the provided function.
210  *
211  * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES!
212  *
213  * Because this function only ever returns a user-provided |obj| in the spec,
214  * we omit it and just return the usual success/failure.
215  */
RegExpInitializeIgnoringLastIndex(JSContext * cx,Handle<RegExpObject * > obj,HandleValue patternValue,HandleValue flagsValue)216 static bool RegExpInitializeIgnoringLastIndex(JSContext* cx,
217                                               Handle<RegExpObject*> obj,
218                                               HandleValue patternValue,
219                                               HandleValue flagsValue) {
220   RootedAtom pattern(cx);
221   if (patternValue.isUndefined()) {
222     /* Step 1. */
223     pattern = cx->names().empty;
224   } else {
225     /* Step 2. */
226     pattern = ToAtom<CanGC>(cx, patternValue);
227     if (!pattern) return false;
228   }
229 
230   /* Step 3. */
231   RegExpFlag flags = RegExpFlag(0);
232   if (!flagsValue.isUndefined()) {
233     /* Step 4. */
234     RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue));
235     if (!flagStr) return false;
236 
237     /* Step 5. */
238     if (!ParseRegExpFlags(cx, flagStr, &flags)) return false;
239   }
240 
241   /* Steps 7-8. */
242   RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags);
243   if (!shared) return false;
244 
245   /* Steps 9-12. */
246   obj->initIgnoringLastIndex(pattern, flags);
247 
248   obj->setShared(*shared);
249 
250   return true;
251 }
252 
253 /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */
RegExpCreate(JSContext * cx,HandleValue patternValue,HandleValue flagsValue,MutableHandleValue rval)254 bool js::RegExpCreate(JSContext* cx, HandleValue patternValue,
255                       HandleValue flagsValue, MutableHandleValue rval) {
256   /* Step 1. */
257   Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject));
258   if (!regexp) return false;
259 
260   /* Step 2. */
261   if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue, flagsValue))
262     return false;
263   regexp->zeroLastIndex(cx);
264 
265   rval.setObject(*regexp);
266   return true;
267 }
268 
IsRegExpObject(HandleValue v)269 MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) {
270   return v.isObject() && v.toObject().is<RegExpObject>();
271 }
272 
273 /* ES6 draft rc3 7.2.8. */
IsRegExp(JSContext * cx,HandleValue value,bool * result)274 bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) {
275   /* Step 1. */
276   if (!value.isObject()) {
277     *result = false;
278     return true;
279   }
280   RootedObject obj(cx, &value.toObject());
281 
282   /* Steps 2-3. */
283   RootedValue isRegExp(cx);
284   RootedId matchId(cx, SYMBOL_TO_JSID(cx->wellKnownSymbols().match));
285   if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) return false;
286 
287   /* Step 4. */
288   if (!isRegExp.isUndefined()) {
289     *result = ToBoolean(isRegExp);
290     return true;
291   }
292 
293   /* Steps 5-6. */
294   ESClass cls;
295   if (!GetClassOfValue(cx, value, &cls)) return false;
296 
297   *result = cls == ESClass::RegExp;
298   return true;
299 }
300 
301 /* ES6 B.2.5.1. */
regexp_compile_impl(JSContext * cx,const CallArgs & args)302 MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx,
303                                            const CallArgs& args) {
304   MOZ_ASSERT(IsRegExpObject(args.thisv()));
305 
306   Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>());
307 
308   // Step 3.
309   RootedValue patternValue(cx, args.get(0));
310   ESClass cls;
311   if (!GetClassOfValue(cx, patternValue, &cls)) return false;
312   if (cls == ESClass::RegExp) {
313     // Step 3a.
314     if (args.hasDefined(1)) {
315       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
316                                 JSMSG_NEWREGEXP_FLAGGED);
317       return false;
318     }
319 
320     // Beware!  |patternObj| might be a proxy into another compartment, so
321     // don't assume |patternObj.is<RegExpObject>()|.  For the same reason,
322     // don't reuse the RegExpShared below.
323     RootedObject patternObj(cx, &patternValue.toObject());
324 
325     RootedAtom sourceAtom(cx);
326     RegExpFlag flags;
327     {
328       // Step 3b.
329       RegExpShared* shared = RegExpToShared(cx, patternObj);
330       if (!shared) return false;
331 
332       sourceAtom = shared->getSource();
333       flags = shared->getFlags();
334     }
335 
336     // Step 5, minus lastIndex zeroing.
337     regexp->initIgnoringLastIndex(sourceAtom, flags);
338   } else {
339     // Step 4.
340     RootedValue P(cx, patternValue);
341     RootedValue F(cx, args.get(1));
342 
343     // Step 5, minus lastIndex zeroing.
344     if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) return false;
345   }
346 
347   // The final niggling bit of step 5.
348   //
349   // |regexp| is user-exposed, but if its "lastIndex" property hasn't been
350   // made non-writable, we can still use a fast path to zero it.
351   if (regexp->lookupPure(cx->names().lastIndex)->writable()) {
352     regexp->zeroLastIndex(cx);
353   } else {
354     RootedValue zero(cx, Int32Value(0));
355     if (!SetProperty(cx, regexp, cx->names().lastIndex, zero)) return false;
356   }
357 
358   args.rval().setObject(*regexp);
359   return true;
360 }
361 
regexp_compile(JSContext * cx,unsigned argc,Value * vp)362 static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) {
363   CallArgs args = CallArgsFromVp(argc, vp);
364 
365   /* Steps 1-2. */
366   return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args);
367 }
368 
369 /*
370  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1.
371  */
regexp_construct(JSContext * cx,unsigned argc,Value * vp)372 bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) {
373   CallArgs args = CallArgsFromVp(argc, vp);
374 
375   // Steps 1.
376   bool patternIsRegExp;
377   if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) return false;
378 
379   // We can delay step 3 and step 4a until later, during
380   // GetPrototypeFromBuiltinConstructor calls. Accessing the new.target
381   // and the callee from the stack is unobservable.
382   if (!args.isConstructing()) {
383     // Step 3.b.
384     if (patternIsRegExp && !args.hasDefined(1)) {
385       RootedObject patternObj(cx, &args[0].toObject());
386 
387       // Step 3.b.i.
388       RootedValue patternConstructor(cx);
389       if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor,
390                        &patternConstructor))
391         return false;
392 
393       // Step 3.b.ii.
394       if (patternConstructor.isObject() &&
395           patternConstructor.toObject() == args.callee()) {
396         args.rval().set(args[0]);
397         return true;
398       }
399     }
400   }
401 
402   RootedValue patternValue(cx, args.get(0));
403 
404   // Step 4.
405   ESClass cls;
406   if (!GetClassOfValue(cx, patternValue, &cls)) return false;
407   if (cls == ESClass::RegExp) {
408     // Beware!  |patternObj| might be a proxy into another compartment, so
409     // don't assume |patternObj.is<RegExpObject>()|.
410     RootedObject patternObj(cx, &patternValue.toObject());
411 
412     RootedAtom sourceAtom(cx);
413     RegExpFlag flags;
414     RootedRegExpShared shared(cx);
415     {
416       // Step 4.a.
417       shared = RegExpToShared(cx, patternObj);
418       if (!shared) return false;
419       sourceAtom = shared->getSource();
420 
421       // Step 4.b.
422       // Get original flags in all cases, to compare with passed flags.
423       flags = shared->getFlags();
424 
425       // If the RegExpShared is in another Zone, don't reuse it.
426       if (cx->zone() != shared->zone()) shared = nullptr;
427     }
428 
429     // Step 7.
430     RootedObject proto(cx);
431     if (!GetPrototypeFromBuiltinConstructor(cx, args, &proto)) return false;
432 
433     Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
434     if (!regexp) return false;
435 
436     // Step 8.
437     if (args.hasDefined(1)) {
438       // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4.
439       RegExpFlag flagsArg = RegExpFlag(0);
440       RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
441       if (!flagStr) return false;
442       if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) return false;
443 
444       // Don't reuse the RegExpShared if we have different flags.
445       if (flags != flagsArg) shared = nullptr;
446 
447       if (!(flags & UnicodeFlag) && flagsArg & UnicodeFlag) {
448         // Have to check syntax again when adding 'u' flag.
449 
450         // ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56
451         // 21.2.3.2.2 step 7.
452         shared = CheckPatternSyntax(cx, sourceAtom, flagsArg);
453         if (!shared) return false;
454       }
455       flags = flagsArg;
456     }
457 
458     regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
459 
460     if (shared) regexp->setShared(*shared);
461 
462     args.rval().setObject(*regexp);
463     return true;
464   }
465 
466   RootedValue P(cx);
467   RootedValue F(cx);
468 
469   // Step 5.
470   if (patternIsRegExp) {
471     RootedObject patternObj(cx, &patternValue.toObject());
472 
473     // Step 5.a.
474     if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P))
475       return false;
476 
477     // Step 5.b.
478     F = args.get(1);
479     if (F.isUndefined()) {
480       if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F))
481         return false;
482     }
483   } else {
484     // Steps 6.a-b.
485     P = patternValue;
486     F = args.get(1);
487   }
488 
489   // Step 7.
490   RootedObject proto(cx);
491   if (!GetPrototypeFromBuiltinConstructor(cx, args, &proto)) return false;
492 
493   Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
494   if (!regexp) return false;
495 
496   // Step 8.
497   if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) return false;
498   regexp->zeroLastIndex(cx);
499 
500   args.rval().setObject(*regexp);
501   return true;
502 }
503 
504 /*
505  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1
506  * steps 4, 7-8.
507  */
regexp_construct_raw_flags(JSContext * cx,unsigned argc,Value * vp)508 bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) {
509   CallArgs args = CallArgsFromVp(argc, vp);
510   MOZ_ASSERT(args.length() == 2);
511   MOZ_ASSERT(!args.isConstructing());
512 
513   // Step 4.a.
514   RootedAtom sourceAtom(cx, AtomizeString(cx, args[0].toString()));
515   if (!sourceAtom) return false;
516 
517   // Step 4.c.
518   int32_t flags = int32_t(args[1].toNumber());
519 
520   // Step 7.
521   RegExpObject* regexp = RegExpAlloc(cx, GenericObject);
522   if (!regexp) return false;
523 
524   // Step 8.
525   regexp->initAndZeroLastIndex(sourceAtom, RegExpFlag(flags), cx);
526   args.rval().setObject(*regexp);
527   return true;
528 }
529 
IsRegExpPrototype(HandleValue v)530 MOZ_ALWAYS_INLINE bool IsRegExpPrototype(HandleValue v) {
531   if (IsRegExpObject(v) || !v.isObject()) return false;
532 
533   // Note: The prototype shares its JSClass with instances.
534   return StandardProtoKeyOrNull(&v.toObject()) == JSProto_RegExp;
535 }
536 
537 // ES 2017 draft 21.2.5.4.
regexp_global_impl(JSContext * cx,const CallArgs & args)538 MOZ_ALWAYS_INLINE bool regexp_global_impl(JSContext* cx, const CallArgs& args) {
539   MOZ_ASSERT(IsRegExpObject(args.thisv()));
540 
541   // Steps 4-6.
542   RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
543   args.rval().setBoolean(reObj->global());
544   return true;
545 }
546 
regexp_global(JSContext * cx,unsigned argc,JS::Value * vp)547 bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) {
548   CallArgs args = CallArgsFromVp(argc, vp);
549 
550   // Step 3.a.
551   if (IsRegExpPrototype(args.thisv())) {
552     args.rval().setUndefined();
553     return true;
554   }
555 
556   // Steps 1-3.
557   return CallNonGenericMethod<IsRegExpObject, regexp_global_impl>(cx, args);
558 }
559 
560 // ES 2017 draft 21.2.5.5.
regexp_ignoreCase_impl(JSContext * cx,const CallArgs & args)561 MOZ_ALWAYS_INLINE bool regexp_ignoreCase_impl(JSContext* cx,
562                                               const CallArgs& args) {
563   MOZ_ASSERT(IsRegExpObject(args.thisv()));
564 
565   // Steps 4-6.
566   RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
567   args.rval().setBoolean(reObj->ignoreCase());
568   return true;
569 }
570 
regexp_ignoreCase(JSContext * cx,unsigned argc,JS::Value * vp)571 bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) {
572   CallArgs args = CallArgsFromVp(argc, vp);
573 
574   // Step 3.a.
575   if (IsRegExpPrototype(args.thisv())) {
576     args.rval().setUndefined();
577     return true;
578   }
579 
580   // Steps 1-3.
581   return CallNonGenericMethod<IsRegExpObject, regexp_ignoreCase_impl>(cx, args);
582 }
583 
584 // ES 2017 draft 21.2.5.7.
regexp_multiline_impl(JSContext * cx,const CallArgs & args)585 MOZ_ALWAYS_INLINE bool regexp_multiline_impl(JSContext* cx,
586                                              const CallArgs& args) {
587   MOZ_ASSERT(IsRegExpObject(args.thisv()));
588 
589   // Steps 4-6.
590   RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
591   args.rval().setBoolean(reObj->multiline());
592   return true;
593 }
594 
regexp_multiline(JSContext * cx,unsigned argc,JS::Value * vp)595 bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) {
596   CallArgs args = CallArgsFromVp(argc, vp);
597 
598   // Step 3.a.
599   if (IsRegExpPrototype(args.thisv())) {
600     args.rval().setUndefined();
601     return true;
602   }
603 
604   // Steps 1-3.
605   return CallNonGenericMethod<IsRegExpObject, regexp_multiline_impl>(cx, args);
606 }
607 
608 // ES 2017 draft 21.2.5.10.
regexp_source_impl(JSContext * cx,const CallArgs & args)609 MOZ_ALWAYS_INLINE bool regexp_source_impl(JSContext* cx, const CallArgs& args) {
610   MOZ_ASSERT(IsRegExpObject(args.thisv()));
611 
612   // Step 5.
613   RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
614   RootedAtom src(cx, reObj->getSource());
615   if (!src) return false;
616 
617   // Step 7.
618   JSString* str = EscapeRegExpPattern(cx, src);
619   if (!str) return false;
620 
621   args.rval().setString(str);
622   return true;
623 }
624 
regexp_source(JSContext * cx,unsigned argc,JS::Value * vp)625 static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) {
626   CallArgs args = CallArgsFromVp(argc, vp);
627 
628   // Step 3.a.
629   if (IsRegExpPrototype(args.thisv())) {
630     args.rval().setString(cx->names().emptyRegExp);
631     return true;
632   }
633 
634   // Steps 1-4.
635   return CallNonGenericMethod<IsRegExpObject, regexp_source_impl>(cx, args);
636 }
637 
638 // ES 2017 draft 21.2.5.12.
regexp_sticky_impl(JSContext * cx,const CallArgs & args)639 MOZ_ALWAYS_INLINE bool regexp_sticky_impl(JSContext* cx, const CallArgs& args) {
640   MOZ_ASSERT(IsRegExpObject(args.thisv()));
641 
642   // Steps 4-6.
643   RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
644   args.rval().setBoolean(reObj->sticky());
645   return true;
646 }
647 
regexp_sticky(JSContext * cx,unsigned argc,JS::Value * vp)648 bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) {
649   CallArgs args = CallArgsFromVp(argc, vp);
650 
651   // Step 3.a.
652   if (IsRegExpPrototype(args.thisv())) {
653     args.rval().setUndefined();
654     return true;
655   }
656 
657   // Steps 1-3.
658   return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args);
659 }
660 
661 // ES 2017 draft 21.2.5.15.
regexp_unicode_impl(JSContext * cx,const CallArgs & args)662 MOZ_ALWAYS_INLINE bool regexp_unicode_impl(JSContext* cx,
663                                            const CallArgs& args) {
664   MOZ_ASSERT(IsRegExpObject(args.thisv()));
665 
666   // Steps 4-6.
667   RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
668   args.rval().setBoolean(reObj->unicode());
669   return true;
670 }
671 
regexp_unicode(JSContext * cx,unsigned argc,JS::Value * vp)672 bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) {
673   CallArgs args = CallArgsFromVp(argc, vp);
674 
675   // Step 3.a.
676   if (IsRegExpPrototype(args.thisv())) {
677     args.rval().setUndefined();
678     return true;
679   }
680 
681   // Steps 1-3.
682   return CallNonGenericMethod<IsRegExpObject, regexp_unicode_impl>(cx, args);
683 }
684 
685 const JSPropertySpec js::regexp_properties[] = {
686     JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0),
687     JS_PSG("global", regexp_global, 0),
688     JS_PSG("ignoreCase", regexp_ignoreCase, 0),
689     JS_PSG("multiline", regexp_multiline, 0),
690     JS_PSG("source", regexp_source, 0),
691     JS_PSG("sticky", regexp_sticky, 0),
692     JS_PSG("unicode", regexp_unicode, 0),
693     JS_PS_END};
694 
695 const JSFunctionSpec js::regexp_methods[] = {
696     JS_SELF_HOSTED_FN(js_toSource_str, "RegExpToString", 0, 0),
697     JS_SELF_HOSTED_FN(js_toString_str, "RegExpToString", 0, 0),
698     JS_FN("compile", regexp_compile, 2, 0),
699     JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0),
700     JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0),
701     JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0),
702     JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0),
703     JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0),
704     JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0),
705     JS_FS_END};
706 
707 #define STATIC_PAREN_GETTER_CODE(parenNum)                        \
708   if (!res->createParen(cx, parenNum, args.rval())) return false; \
709   if (args.rval().isUndefined())                                  \
710     args.rval().setString(cx->runtime()->emptyString);            \
711   return true
712 
713 /*
714  * RegExp static properties.
715  *
716  * RegExp class static properties and their Perl counterparts:
717  *
718  *  RegExp.input                $_
719  *  RegExp.lastMatch            $&
720  *  RegExp.lastParen            $+
721  *  RegExp.leftContext          $`
722  *  RegExp.rightContext         $'
723  */
724 
725 #define DEFINE_STATIC_GETTER(name, code)                                   \
726   static bool name(JSContext* cx, unsigned argc, Value* vp) {              \
727     CallArgs args = CallArgsFromVp(argc, vp);                              \
728     RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
729     if (!res) return false;                                                \
730     code;                                                                  \
731   }
732 
733 DEFINE_STATIC_GETTER(static_input_getter,
734                      return res->createPendingInput(cx, args.rval()))
735 DEFINE_STATIC_GETTER(static_lastMatch_getter,
736                      return res->createLastMatch(cx, args.rval()))
737 DEFINE_STATIC_GETTER(static_lastParen_getter,
738                      return res->createLastParen(cx, args.rval()))
739 DEFINE_STATIC_GETTER(static_leftContext_getter,
740                      return res->createLeftContext(cx, args.rval()))
741 DEFINE_STATIC_GETTER(static_rightContext_getter,
742                      return res->createRightContext(cx, args.rval()))
743 
744 DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1))
745 DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2))
746 DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3))
747 DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4))
748 DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5))
749 DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6))
750 DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7))
751 DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8))
752 DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9))
753 
754 #define DEFINE_STATIC_SETTER(name, code)                                   \
755   static bool name(JSContext* cx, unsigned argc, Value* vp) {              \
756     RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
757     if (!res) return false;                                                \
758     code;                                                                  \
759     return true;                                                           \
760   }
761 
static_input_setter(JSContext * cx,unsigned argc,Value * vp)762 static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) {
763   CallArgs args = CallArgsFromVp(argc, vp);
764   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
765   if (!res) return false;
766 
767   RootedString str(cx, ToString<CanGC>(cx, args.get(0)));
768   if (!str) return false;
769 
770   res->setPendingInput(str);
771   args.rval().setString(str);
772   return true;
773 }
774 
775 const JSPropertySpec js::regexp_static_props[] = {
776     JS_PSGS("input", static_input_getter, static_input_setter,
777             JSPROP_PERMANENT | JSPROP_ENUMERATE),
778     JS_PSG("lastMatch", static_lastMatch_getter,
779            JSPROP_PERMANENT | JSPROP_ENUMERATE),
780     JS_PSG("lastParen", static_lastParen_getter,
781            JSPROP_PERMANENT | JSPROP_ENUMERATE),
782     JS_PSG("leftContext", static_leftContext_getter,
783            JSPROP_PERMANENT | JSPROP_ENUMERATE),
784     JS_PSG("rightContext", static_rightContext_getter,
785            JSPROP_PERMANENT | JSPROP_ENUMERATE),
786     JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
787     JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
788     JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
789     JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
790     JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
791     JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
792     JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
793     JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
794     JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
795     JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT),
796     JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT),
797     JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT),
798     JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT),
799     JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT),
800     JS_SELF_HOSTED_SYM_GET(species, "RegExpSpecies", 0),
801     JS_PS_END};
802 
803 template <typename CharT>
IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,size_t index)804 static bool IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,
805                                                   size_t index) {
806   JS::AutoCheckCannotGC nogc;
807   MOZ_ASSERT(index > 0 && index < input->length());
808   const CharT* inputChars = input->chars<CharT>(nogc);
809 
810   return unicode::IsTrailSurrogate(inputChars[index]) &&
811          unicode::IsLeadSurrogate(inputChars[index - 1]);
812 }
813 
IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,int32_t index)814 static bool IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,
815                                               int32_t index) {
816   if (index <= 0 || size_t(index) >= input->length()) return false;
817 
818   return input->hasLatin1Chars()
819              ? IsTrailSurrogateWithLeadSurrogateImpl<Latin1Char>(input, index)
820              : IsTrailSurrogateWithLeadSurrogateImpl<char16_t>(input, index);
821 }
822 
823 /*
824  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
825  * steps 3, 9-14, except 12.a.i, 12.c.i.1.
826  */
ExecuteRegExp(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,VectorMatchPairs * matches,size_t * endIndex)827 static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp,
828                                      HandleString string, int32_t lastIndex,
829                                      VectorMatchPairs* matches,
830                                      size_t* endIndex) {
831   /*
832    * WARNING: Despite the presence of spec step comment numbers, this
833    *          algorithm isn't consistent with any ES6 version, draft or
834    *          otherwise.  YOU HAVE BEEN WARNED.
835    */
836 
837   /* Steps 1-2 performed by the caller. */
838   Handle<RegExpObject*> reobj = regexp.as<RegExpObject>();
839 
840   RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj));
841   if (!re) return RegExpRunStatus_Error;
842 
843   RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
844   if (!res) return RegExpRunStatus_Error;
845 
846   RootedLinearString input(cx, string->ensureLinear(cx));
847   if (!input) return RegExpRunStatus_Error;
848 
849   /* Handled by caller */
850   MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
851 
852   /* Steps 4-8 performed by the caller. */
853 
854   /* Step 10. */
855   if (reobj->unicode()) {
856     /*
857      * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
858      * 21.2.2.2 step 2.
859      *   Let listIndex be the index into Input of the character that was
860      *   obtained from element index of str.
861      *
862      * In the spec, pattern match is performed with decoded Unicode code
863      * points, but our implementation performs it with UTF-16 encoded
864      * string.  In step 2, we should decrement lastIndex (index) if it
865      * points the trail surrogate that has corresponding lead surrogate.
866      *
867      *   var r = /\uD83D\uDC38/ug;
868      *   r.lastIndex = 1;
869      *   var str = "\uD83D\uDC38";
870      *   var result = r.exec(str); // pattern match starts from index 0
871      *   print(result.index);      // prints 0
872      *
873      * Note: this doesn't match the current spec text and result in
874      * different values for `result.index` under certain conditions.
875      * However, the spec will change to match our implementation's
876      * behavior. See https://github.com/tc39/ecma262/issues/128.
877      */
878     if (IsTrailSurrogateWithLeadSurrogate(input, lastIndex)) lastIndex--;
879   }
880 
881   /* Steps 3, 11-14, except 12.a.i, 12.c.i.1. */
882   RegExpRunStatus status =
883       ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches, endIndex);
884   if (status == RegExpRunStatus_Error) return RegExpRunStatus_Error;
885 
886   /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */
887 
888   return status;
889 }
890 
891 /*
892  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
893  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
894  */
RegExpMatcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,MutableHandleValue rval)895 static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp,
896                               HandleString string, int32_t lastIndex,
897                               MutableHandleValue rval) {
898   /* Execute regular expression and gather matches. */
899   VectorMatchPairs matches;
900 
901   /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
902   RegExpRunStatus status =
903       ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
904   if (status == RegExpRunStatus_Error) return false;
905 
906   /* Steps 12.a, 12.c. */
907   if (status == RegExpRunStatus_Success_NotFound) {
908     rval.setNull();
909     return true;
910   }
911 
912   /* Steps 16-25 */
913   return CreateRegExpMatchResult(cx, string, matches, rval);
914 }
915 
916 /*
917  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
918  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
919  */
RegExpMatcher(JSContext * cx,unsigned argc,Value * vp)920 bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) {
921   CallArgs args = CallArgsFromVp(argc, vp);
922   MOZ_ASSERT(args.length() == 3);
923   MOZ_ASSERT(IsRegExpObject(args[0]));
924   MOZ_ASSERT(args[1].isString());
925   MOZ_ASSERT(args[2].isNumber());
926 
927   RootedObject regexp(cx, &args[0].toObject());
928   RootedString string(cx, args[1].toString());
929 
930   int32_t lastIndex;
931   MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
932 
933   /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
934   return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
935 }
936 
937 /*
938  * Separate interface for use by IonMonkey.
939  * This code cannot re-enter Ion code.
940  */
RegExpMatcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,MatchPairs * maybeMatches,MutableHandleValue output)941 bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp,
942                           HandleString input, int32_t lastIndex,
943                           MatchPairs* maybeMatches, MutableHandleValue output) {
944   MOZ_ASSERT(lastIndex >= 0);
945 
946   // The MatchPairs will always be passed in, but RegExp execution was
947   // successful only if the pairs have actually been filled in.
948   if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0)
949     return CreateRegExpMatchResult(cx, input, *maybeMatches, output);
950   return RegExpMatcherImpl(cx, regexp, input, lastIndex, output);
951 }
952 
953 /*
954  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
955  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
956  * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub,
957  * changes to this code need to get reflected in there too.
958  */
RegExpSearcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,int32_t * result)959 static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
960                                HandleString string, int32_t lastIndex,
961                                int32_t* result) {
962   /* Execute regular expression and gather matches. */
963   VectorMatchPairs matches;
964 
965   /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
966   RegExpRunStatus status =
967       ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
968   if (status == RegExpRunStatus_Error) return false;
969 
970   /* Steps 12.a, 12.c. */
971   if (status == RegExpRunStatus_Success_NotFound) {
972     *result = -1;
973     return true;
974   }
975 
976   /* Steps 16-25 */
977   *result = CreateRegExpSearchResult(matches);
978   return true;
979 }
980 
981 /*
982  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
983  * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
984  */
RegExpSearcher(JSContext * cx,unsigned argc,Value * vp)985 bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) {
986   CallArgs args = CallArgsFromVp(argc, vp);
987   MOZ_ASSERT(args.length() == 3);
988   MOZ_ASSERT(IsRegExpObject(args[0]));
989   MOZ_ASSERT(args[1].isString());
990   MOZ_ASSERT(args[2].isNumber());
991 
992   RootedObject regexp(cx, &args[0].toObject());
993   RootedString string(cx, args[1].toString());
994 
995   int32_t lastIndex;
996   MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
997 
998   /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
999   int32_t result = 0;
1000   if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) return false;
1001 
1002   args.rval().setInt32(result);
1003   return true;
1004 }
1005 
1006 /*
1007  * Separate interface for use by IonMonkey.
1008  * This code cannot re-enter Ion code.
1009  */
RegExpSearcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,MatchPairs * maybeMatches,int32_t * result)1010 bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
1011                            HandleString input, int32_t lastIndex,
1012                            MatchPairs* maybeMatches, int32_t* result) {
1013   MOZ_ASSERT(lastIndex >= 0);
1014 
1015   // The MatchPairs will always be passed in, but RegExp execution was
1016   // successful only if the pairs have actually been filled in.
1017   if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0) {
1018     *result = CreateRegExpSearchResult(*maybeMatches);
1019     return true;
1020   }
1021   return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
1022 }
1023 
1024 /*
1025  * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1026  * steps 3, 9-14, except 12.a.i, 12.c.i.1.
1027  */
RegExpTester(JSContext * cx,unsigned argc,Value * vp)1028 bool js::RegExpTester(JSContext* cx, unsigned argc, Value* vp) {
1029   CallArgs args = CallArgsFromVp(argc, vp);
1030   MOZ_ASSERT(args.length() == 3);
1031   MOZ_ASSERT(IsRegExpObject(args[0]));
1032   MOZ_ASSERT(args[1].isString());
1033   MOZ_ASSERT(args[2].isNumber());
1034 
1035   RootedObject regexp(cx, &args[0].toObject());
1036   RootedString string(cx, args[1].toString());
1037 
1038   int32_t lastIndex;
1039   MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1040 
1041   /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1042   size_t endIndex = 0;
1043   RegExpRunStatus status =
1044       ExecuteRegExp(cx, regexp, string, lastIndex, nullptr, &endIndex);
1045 
1046   if (status == RegExpRunStatus_Error) return false;
1047 
1048   if (status == RegExpRunStatus_Success) {
1049     MOZ_ASSERT(endIndex <= INT32_MAX);
1050     args.rval().setInt32(int32_t(endIndex));
1051   } else {
1052     args.rval().setInt32(-1);
1053   }
1054   return true;
1055 }
1056 
1057 /*
1058  * Separate interface for use by IonMonkey.
1059  * This code cannot re-enter Ion code.
1060  */
RegExpTesterRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,int32_t * endIndex)1061 bool js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
1062                          int32_t lastIndex, int32_t* endIndex) {
1063   MOZ_ASSERT(lastIndex >= 0);
1064 
1065   size_t endIndexTmp = 0;
1066   RegExpRunStatus status =
1067       ExecuteRegExp(cx, regexp, input, lastIndex, nullptr, &endIndexTmp);
1068 
1069   if (status == RegExpRunStatus_Success) {
1070     MOZ_ASSERT(endIndexTmp <= INT32_MAX);
1071     *endIndex = int32_t(endIndexTmp);
1072     return true;
1073   }
1074   if (status == RegExpRunStatus_Success_NotFound) {
1075     *endIndex = -1;
1076     return true;
1077   }
1078 
1079   return false;
1080 }
1081 
1082 using CapturesVector = GCVector<Value, 4>;
1083 
1084 struct JSSubString {
1085   JSLinearString* base;
1086   size_t offset;
1087   size_t length;
1088 
JSSubStringJSSubString1089   JSSubString() { mozilla::PodZero(this); }
1090 
initEmptyJSSubString1091   void initEmpty(JSLinearString* base) {
1092     this->base = base;
1093     offset = length = 0;
1094   }
initJSSubString1095   void init(JSLinearString* base, size_t offset, size_t length) {
1096     this->base = base;
1097     this->offset = offset;
1098     this->length = length;
1099   }
1100 };
1101 
GetParen(JSLinearString * matched,const JS::Value & capture,JSSubString * out)1102 static void GetParen(JSLinearString* matched, const JS::Value& capture,
1103                      JSSubString* out) {
1104   if (capture.isUndefined()) {
1105     out->initEmpty(matched);
1106     return;
1107   }
1108   JSLinearString& captureLinear = capture.toString()->asLinear();
1109   out->init(&captureLinear, 0, captureLinear.length());
1110 }
1111 
1112 template <typename CharT>
InterpretDollar(JSLinearString * matched,JSLinearString * string,size_t position,size_t tailPos,Handle<CapturesVector> captures,JSLinearString * replacement,const CharT * replacementBegin,const CharT * currentDollar,const CharT * replacementEnd,JSSubString * out,size_t * skip)1113 static bool InterpretDollar(JSLinearString* matched, JSLinearString* string,
1114                             size_t position, size_t tailPos,
1115                             Handle<CapturesVector> captures,
1116                             JSLinearString* replacement,
1117                             const CharT* replacementBegin,
1118                             const CharT* currentDollar,
1119                             const CharT* replacementEnd, JSSubString* out,
1120                             size_t* skip) {
1121   MOZ_ASSERT(*currentDollar == '$');
1122 
1123   /* If there is only a dollar, bail now. */
1124   if (currentDollar + 1 >= replacementEnd) return false;
1125 
1126   /* ES 2016 draft Mar 25, 2016 Table 46. */
1127   char16_t c = currentDollar[1];
1128   if (JS7_ISDEC(c)) {
1129     /* $n, $nn */
1130     unsigned num = JS7_UNDEC(c);
1131     if (num > captures.length()) {
1132       // The result is implementation-defined, do not substitute.
1133       return false;
1134     }
1135 
1136     const CharT* currentChar = currentDollar + 2;
1137     if (currentChar < replacementEnd) {
1138       c = *currentChar;
1139       if (JS7_ISDEC(c)) {
1140         unsigned tmpNum = 10 * num + JS7_UNDEC(c);
1141         // If num > captures.length(), the result is implementation-defined.
1142         // Consume next character only if num <= captures.length().
1143         if (tmpNum <= captures.length()) {
1144           currentChar++;
1145           num = tmpNum;
1146         }
1147       }
1148     }
1149 
1150     if (num == 0) {
1151       // The result is implementation-defined.
1152       // Do not substitute.
1153       return false;
1154     }
1155 
1156     *skip = currentChar - currentDollar;
1157 
1158     MOZ_ASSERT(num <= captures.length());
1159 
1160     GetParen(matched, captures[num - 1], out);
1161     return true;
1162   }
1163 
1164   *skip = 2;
1165   switch (c) {
1166     default:
1167       return false;
1168     case '$':
1169       out->init(replacement, currentDollar - replacementBegin, 1);
1170       break;
1171     case '&':
1172       out->init(matched, 0, matched->length());
1173       break;
1174     case '+':
1175       // SpiderMonkey extension
1176       if (captures.length() == 0)
1177         out->initEmpty(matched);
1178       else
1179         GetParen(matched, captures[captures.length() - 1], out);
1180       break;
1181     case '`':
1182       out->init(string, 0, position);
1183       break;
1184     case '\'':
1185       out->init(string, tailPos, string->length() - tailPos);
1186       break;
1187   }
1188   return true;
1189 }
1190 
1191 template <typename CharT>
FindReplaceLengthString(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1192 static bool FindReplaceLengthString(JSContext* cx, HandleLinearString matched,
1193                                     HandleLinearString string, size_t position,
1194                                     size_t tailPos,
1195                                     Handle<CapturesVector> captures,
1196                                     HandleLinearString replacement,
1197                                     size_t firstDollarIndex, size_t* sizep) {
1198   CheckedInt<uint32_t> replen = replacement->length();
1199 
1200   JS::AutoCheckCannotGC nogc;
1201   MOZ_ASSERT(firstDollarIndex < replacement->length());
1202   const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1203   const CharT* currentDollar = replacementBegin + firstDollarIndex;
1204   const CharT* replacementEnd = replacementBegin + replacement->length();
1205   do {
1206     JSSubString sub;
1207     size_t skip;
1208     if (InterpretDollar(matched, string, position, tailPos, captures,
1209                         replacement, replacementBegin, currentDollar,
1210                         replacementEnd, &sub, &skip)) {
1211       if (sub.length > skip)
1212         replen += sub.length - skip;
1213       else
1214         replen -= skip - sub.length;
1215       currentDollar += skip;
1216     } else {
1217       currentDollar++;
1218     }
1219 
1220     currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1221   } while (currentDollar);
1222 
1223   if (!replen.isValid()) {
1224     ReportAllocationOverflow(cx);
1225     return false;
1226   }
1227 
1228   *sizep = replen.value();
1229   return true;
1230 }
1231 
FindReplaceLength(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1232 static bool FindReplaceLength(JSContext* cx, HandleLinearString matched,
1233                               HandleLinearString string, size_t position,
1234                               size_t tailPos, Handle<CapturesVector> captures,
1235                               HandleLinearString replacement,
1236                               size_t firstDollarIndex, size_t* sizep) {
1237   return replacement->hasLatin1Chars()
1238              ? FindReplaceLengthString<Latin1Char>(
1239                    cx, matched, string, position, tailPos, captures,
1240                    replacement, firstDollarIndex, sizep)
1241              : FindReplaceLengthString<char16_t>(cx, matched, string, position,
1242                                                  tailPos, captures, replacement,
1243                                                  firstDollarIndex, sizep);
1244 }
1245 
1246 /*
1247  * Precondition: |sb| already has necessary growth space reserved (as
1248  * derived from FindReplaceLength), and has been inflated to TwoByte if
1249  * necessary.
1250  */
1251 template <typename CharT>
DoReplace(HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,HandleLinearString replacement,size_t firstDollarIndex,StringBuffer & sb)1252 static void DoReplace(HandleLinearString matched, HandleLinearString string,
1253                       size_t position, size_t tailPos,
1254                       Handle<CapturesVector> captures,
1255                       HandleLinearString replacement, size_t firstDollarIndex,
1256                       StringBuffer& sb) {
1257   JS::AutoCheckCannotGC nogc;
1258   const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1259   const CharT* currentChar = replacementBegin;
1260 
1261   MOZ_ASSERT(firstDollarIndex < replacement->length());
1262   const CharT* currentDollar = replacementBegin + firstDollarIndex;
1263   const CharT* replacementEnd = replacementBegin + replacement->length();
1264   do {
1265     /* Move one of the constant portions of the replacement value. */
1266     size_t len = currentDollar - currentChar;
1267     sb.infallibleAppend(currentChar, len);
1268     currentChar = currentDollar;
1269 
1270     JSSubString sub;
1271     size_t skip;
1272     if (InterpretDollar(matched, string, position, tailPos, captures,
1273                         replacement, replacementBegin, currentDollar,
1274                         replacementEnd, &sub, &skip)) {
1275       sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
1276       currentChar += skip;
1277       currentDollar += skip;
1278     } else {
1279       currentDollar++;
1280     }
1281 
1282     currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1283   } while (currentDollar);
1284   sb.infallibleAppend(currentChar,
1285                       replacement->length() - (currentChar - replacementBegin));
1286 }
1287 
NeedTwoBytes(HandleLinearString string,HandleLinearString replacement,HandleLinearString matched,Handle<CapturesVector> captures)1288 static bool NeedTwoBytes(HandleLinearString string,
1289                          HandleLinearString replacement,
1290                          HandleLinearString matched,
1291                          Handle<CapturesVector> captures) {
1292   if (string->hasTwoByteChars()) return true;
1293   if (replacement->hasTwoByteChars()) return true;
1294   if (matched->hasTwoByteChars()) return true;
1295 
1296   for (size_t i = 0, len = captures.length(); i < len; i++) {
1297     const Value& capture = captures[i];
1298     if (capture.isUndefined()) continue;
1299     if (capture.toString()->hasTwoByteChars()) return true;
1300   }
1301 
1302   return false;
1303 }
1304 
1305 /* ES 2016 draft Mar 25, 2016 21.1.3.14.1. */
RegExpGetSubstitution(JSContext * cx,HandleArrayObject matchResult,HandleLinearString string,size_t position,HandleLinearString replacement,size_t firstDollarIndex,MutableHandleValue rval)1306 bool js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult,
1307                                HandleLinearString string, size_t position,
1308                                HandleLinearString replacement,
1309                                size_t firstDollarIndex,
1310                                MutableHandleValue rval) {
1311   MOZ_ASSERT(firstDollarIndex < replacement->length());
1312 
1313   // Step 1 (skipped).
1314 
1315   // Step 10 (reordered).
1316   uint32_t matchResultLength = matchResult->length();
1317   MOZ_ASSERT(matchResultLength > 0);
1318   MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
1319 
1320   const Value& matchedValue = matchResult->getDenseElement(0);
1321   RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx));
1322   if (!matched) return false;
1323 
1324   // Step 2.
1325   size_t matchLength = matched->length();
1326 
1327   // Steps 3-5 (skipped).
1328 
1329   // Step 6.
1330   MOZ_ASSERT(position <= string->length());
1331 
1332   uint32_t nCaptures = matchResultLength - 1;
1333   Rooted<CapturesVector> captures(cx, CapturesVector(cx));
1334   if (!captures.reserve(nCaptures)) return false;
1335 
1336   // Step 7.
1337   for (uint32_t i = 1; i <= nCaptures; i++) {
1338     const Value& capture = matchResult->getDenseElement(i);
1339 
1340     if (capture.isUndefined()) {
1341       captures.infallibleAppend(capture);
1342       continue;
1343     }
1344 
1345     JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
1346     if (!captureLinear) return false;
1347     captures.infallibleAppend(StringValue(captureLinear));
1348   }
1349 
1350   // Step 8 (skipped).
1351 
1352   // Step 9.
1353   CheckedInt<uint32_t> checkedTailPos(0);
1354   checkedTailPos += position;
1355   checkedTailPos += matchLength;
1356   if (!checkedTailPos.isValid()) {
1357     ReportAllocationOverflow(cx);
1358     return false;
1359   }
1360   uint32_t tailPos = checkedTailPos.value();
1361 
1362   // Step 11.
1363   size_t reserveLength;
1364   if (!FindReplaceLength(cx, matched, string, position, tailPos, captures,
1365                          replacement, firstDollarIndex, &reserveLength)) {
1366     return false;
1367   }
1368 
1369   StringBuffer result(cx);
1370   if (NeedTwoBytes(string, replacement, matched, captures)) {
1371     if (!result.ensureTwoByteChars()) return false;
1372   }
1373 
1374   if (!result.reserve(reserveLength)) return false;
1375 
1376   if (replacement->hasLatin1Chars()) {
1377     DoReplace<Latin1Char>(matched, string, position, tailPos, captures,
1378                           replacement, firstDollarIndex, result);
1379   } else {
1380     DoReplace<char16_t>(matched, string, position, tailPos, captures,
1381                         replacement, firstDollarIndex, result);
1382   }
1383 
1384   // Step 12.
1385   JSString* resultString = result.finishString();
1386   if (!resultString) return false;
1387 
1388   rval.setString(resultString);
1389   return true;
1390 }
1391 
GetFirstDollarIndex(JSContext * cx,unsigned argc,Value * vp)1392 bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) {
1393   CallArgs args = CallArgsFromVp(argc, vp);
1394   MOZ_ASSERT(args.length() == 1);
1395   JSString* str = args[0].toString();
1396 
1397   // Should be handled in different path.
1398   MOZ_ASSERT(str->length() != 0);
1399 
1400   int32_t index = -1;
1401   if (!GetFirstDollarIndexRaw(cx, str, &index)) return false;
1402 
1403   args.rval().setInt32(index);
1404   return true;
1405 }
1406 
1407 template <typename TextChar>
GetFirstDollarIndexImpl(const TextChar * text,uint32_t textLen)1408 static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text,
1409                                                      uint32_t textLen) {
1410   const TextChar* end = text + textLen;
1411   for (const TextChar* c = text; c != end; ++c) {
1412     if (*c == '$') return c - text;
1413   }
1414   return -1;
1415 }
1416 
GetFirstDollarIndexRawFlat(JSLinearString * text)1417 int32_t js::GetFirstDollarIndexRawFlat(JSLinearString* text) {
1418   uint32_t len = text->length();
1419 
1420   JS::AutoCheckCannotGC nogc;
1421   if (text->hasLatin1Chars())
1422     return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len);
1423 
1424   return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len);
1425 }
1426 
GetFirstDollarIndexRaw(JSContext * cx,JSString * str,int32_t * index)1427 bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) {
1428   JSLinearString* text = str->ensureLinear(cx);
1429   if (!text) return false;
1430 
1431   *index = GetFirstDollarIndexRawFlat(text);
1432   return true;
1433 }
1434 
RegExpPrototypeOptimizable(JSContext * cx,unsigned argc,Value * vp)1435 bool js::RegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1436   // This can only be called from self-hosted code.
1437   CallArgs args = CallArgsFromVp(argc, vp);
1438   MOZ_ASSERT(args.length() == 1);
1439 
1440   args.rval().setBoolean(
1441       RegExpPrototypeOptimizableRaw(cx, &args[0].toObject()));
1442   return true;
1443 }
1444 
RegExpPrototypeOptimizableRaw(JSContext * cx,JSObject * proto)1445 bool js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto) {
1446   AutoUnsafeCallWithABI unsafe;
1447   AutoAssertNoPendingException aanpe(cx);
1448   if (!proto->isNative()) return false;
1449 
1450   NativeObject* nproto = static_cast<NativeObject*>(proto);
1451 
1452   Shape* shape =
1453       cx->compartment()->regExps.getOptimizableRegExpPrototypeShape();
1454   if (shape == nproto->lastProperty()) return true;
1455 
1456   JSFunction* flagsGetter;
1457   if (!GetOwnGetterPure(cx, proto, NameToId(cx->names().flags), &flagsGetter))
1458     return false;
1459 
1460   if (!flagsGetter) return false;
1461 
1462   if (!IsSelfHostedFunctionWithName(flagsGetter, cx->names().RegExpFlagsGetter))
1463     return false;
1464 
1465   JSNative globalGetter;
1466   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().global),
1467                               &globalGetter))
1468     return false;
1469 
1470   if (globalGetter != regexp_global) return false;
1471 
1472   JSNative ignoreCaseGetter;
1473   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().ignoreCase),
1474                               &ignoreCaseGetter))
1475     return false;
1476 
1477   if (ignoreCaseGetter != regexp_ignoreCase) return false;
1478 
1479   JSNative multilineGetter;
1480   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().multiline),
1481                               &multilineGetter))
1482     return false;
1483 
1484   if (multilineGetter != regexp_multiline) return false;
1485 
1486   JSNative stickyGetter;
1487   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().sticky),
1488                               &stickyGetter))
1489     return false;
1490 
1491   if (stickyGetter != regexp_sticky) return false;
1492 
1493   JSNative unicodeGetter;
1494   if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().unicode),
1495                               &unicodeGetter))
1496     return false;
1497 
1498   if (unicodeGetter != regexp_unicode) return false;
1499 
1500   // Check if @@match, @@search, and exec are own data properties,
1501   // those values should be tested in selfhosted JS.
1502   bool has = false;
1503   if (!HasOwnDataPropertyPure(
1504           cx, proto, SYMBOL_TO_JSID(cx->wellKnownSymbols().match), &has))
1505     return false;
1506   if (!has) return false;
1507 
1508   if (!HasOwnDataPropertyPure(
1509           cx, proto, SYMBOL_TO_JSID(cx->wellKnownSymbols().search), &has))
1510     return false;
1511   if (!has) return false;
1512 
1513   if (!HasOwnDataPropertyPure(cx, proto, NameToId(cx->names().exec), &has))
1514     return false;
1515   if (!has) return false;
1516 
1517   cx->compartment()->regExps.setOptimizableRegExpPrototypeShape(
1518       nproto->lastProperty());
1519   return true;
1520 }
1521 
RegExpInstanceOptimizable(JSContext * cx,unsigned argc,Value * vp)1522 bool js::RegExpInstanceOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1523   // This can only be called from self-hosted code.
1524   CallArgs args = CallArgsFromVp(argc, vp);
1525   MOZ_ASSERT(args.length() == 2);
1526 
1527   args.rval().setBoolean(RegExpInstanceOptimizableRaw(cx, &args[0].toObject(),
1528                                                       &args[1].toObject()));
1529   return true;
1530 }
1531 
RegExpInstanceOptimizableRaw(JSContext * cx,JSObject * obj,JSObject * proto)1532 bool js::RegExpInstanceOptimizableRaw(JSContext* cx, JSObject* obj,
1533                                       JSObject* proto) {
1534   AutoUnsafeCallWithABI unsafe;
1535   AutoAssertNoPendingException aanpe(cx);
1536 
1537   RegExpObject* rx = &obj->as<RegExpObject>();
1538 
1539   Shape* shape = cx->compartment()->regExps.getOptimizableRegExpInstanceShape();
1540   if (shape == rx->lastProperty()) return true;
1541 
1542   if (!rx->hasStaticPrototype()) return false;
1543 
1544   if (rx->staticPrototype() != proto) return false;
1545 
1546   if (!RegExpObject::isInitialShape(rx)) return false;
1547 
1548   cx->compartment()->regExps.setOptimizableRegExpInstanceShape(
1549       rx->lastProperty());
1550   return true;
1551 }
1552 
1553 /*
1554  * Pattern match the script to check if it is is indexing into a particular
1555  * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
1556  * such cases, which are used by javascript packers (particularly the popular
1557  * Dean Edwards packer) to efficiently encode large scripts. We only handle the
1558  * code patterns generated by such packers here.
1559  */
intrinsic_GetElemBaseForLambda(JSContext * cx,unsigned argc,Value * vp)1560 bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc,
1561                                         Value* vp) {
1562   // This can only be called from self-hosted code.
1563   CallArgs args = CallArgsFromVp(argc, vp);
1564   MOZ_ASSERT(args.length() == 1);
1565 
1566   JSObject& lambda = args[0].toObject();
1567   args.rval().setUndefined();
1568 
1569   if (!lambda.is<JSFunction>()) return true;
1570 
1571   RootedFunction fun(cx, &lambda.as<JSFunction>());
1572   if (!fun->isInterpreted() || fun->isClassConstructor()) return true;
1573 
1574   JSScript* script = JSFunction::getOrCreateScript(cx, fun);
1575   if (!script) return false;
1576 
1577   jsbytecode* pc = script->code();
1578 
1579   /*
1580    * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
1581    * Rule out the (unlikely) possibility of a function with environment
1582    * objects since it would make our environment walk off.
1583    */
1584   if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->needsSomeEnvironmentObject())
1585     return true;
1586   EnvironmentCoordinate ec(pc);
1587   EnvironmentObject* env = &fun->environment()->as<EnvironmentObject>();
1588   for (unsigned i = 0; i < ec.hops(); ++i)
1589     env = &env->enclosingEnvironment().as<EnvironmentObject>();
1590   Value b = env->aliasedBinding(ec);
1591   pc += JSOP_GETALIASEDVAR_LENGTH;
1592 
1593   /* Look for 'a' to be the lambda's first argument. */
1594   if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0) return true;
1595   pc += JSOP_GETARG_LENGTH;
1596 
1597   /* 'b[a]' */
1598   if (JSOp(*pc) != JSOP_GETELEM) return true;
1599   pc += JSOP_GETELEM_LENGTH;
1600 
1601   /* 'return b[a]' */
1602   if (JSOp(*pc) != JSOP_RETURN) return true;
1603 
1604   /* 'b' must behave like a normal object. */
1605   if (!b.isObject()) return true;
1606 
1607   JSObject& bobj = b.toObject();
1608   const Class* clasp = bobj.getClass();
1609   if (!clasp->isNative() || clasp->getOpsLookupProperty() ||
1610       clasp->getOpsGetProperty())
1611     return true;
1612 
1613   args.rval().setObject(bobj);
1614   return true;
1615 }
1616 
1617 /*
1618  * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda.
1619  * It returns the property value only if the property is data property and the
1620  * property value is a string.  Otherwise it returns undefined.
1621  */
intrinsic_GetStringDataProperty(JSContext * cx,unsigned argc,Value * vp)1622 bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc,
1623                                          Value* vp) {
1624   CallArgs args = CallArgsFromVp(argc, vp);
1625   MOZ_ASSERT(args.length() == 2);
1626 
1627   RootedObject obj(cx, &args[0].toObject());
1628   if (!obj->isNative()) {
1629     // The object is already checked to be native in GetElemBaseForLambda,
1630     // but it can be swapped to another class that is non-native.
1631     // Return undefined to mark failure to get the property.
1632     args.rval().setUndefined();
1633     return true;
1634   }
1635 
1636   JSAtom* atom = AtomizeString(cx, args[1].toString());
1637   if (!atom) return false;
1638 
1639   Value v;
1640   if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString())
1641     args.rval().set(v);
1642   else
1643     args.rval().setUndefined();
1644 
1645   return true;
1646 }
1647