1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "builtin/RegExp.h"
8
9 #include "mozilla/CheckedInt.h"
10 #include "mozilla/TypeTraits.h"
11
12 #include "frontend/TokenStream.h"
13 #include "irregexp/RegExpParser.h"
14 #include "jit/InlinableNatives.h"
15 #include "util/StringBuffer.h"
16 #include "util/Unicode.h"
17 #include "vm/JSContext.h"
18 #include "vm/RegExpStatics.h"
19 #include "vm/SelfHosting.h"
20
21 #include "vm/JSObject-inl.h"
22 #include "vm/NativeObject-inl.h"
23 #include "vm/UnboxedObject-inl.h"
24
25 using namespace js;
26 using namespace js::unicode;
27
28 using mozilla::CheckedInt;
29
30 /*
31 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
32 * steps 3, 16-25.
33 */
CreateRegExpMatchResult(JSContext * cx,HandleString input,const MatchPairs & matches,MutableHandleValue rval)34 bool js::CreateRegExpMatchResult(JSContext* cx, HandleString input,
35 const MatchPairs& matches,
36 MutableHandleValue rval) {
37 MOZ_ASSERT(input);
38
39 /*
40 * Create the (slow) result array for a match.
41 *
42 * Array contents:
43 * 0: matched string
44 * 1..pairCount-1: paren matches
45 * input: input string
46 * index: start index for the match
47 */
48
49 /* Get the templateObject that defines the shape and type of the output object
50 */
51 JSObject* templateObject =
52 cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
53 if (!templateObject) return false;
54
55 size_t numPairs = matches.length();
56 MOZ_ASSERT(numPairs > 0);
57
58 /* Step 17. */
59 RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(
60 cx, numPairs, templateObject));
61 if (!arr) return false;
62
63 /* Steps 22-24.
64 * Store a Value for each pair. */
65 for (size_t i = 0; i < numPairs; i++) {
66 const MatchPair& pair = matches[i];
67
68 if (pair.isUndefined()) {
69 MOZ_ASSERT(i !=
70 0); /* Since we had a match, first pair must be present. */
71 arr->setDenseInitializedLength(i + 1);
72 arr->initDenseElement(i, UndefinedValue());
73 } else {
74 JSLinearString* str =
75 NewDependentString(cx, input, pair.start, pair.length());
76 if (!str) return false;
77 arr->setDenseInitializedLength(i + 1);
78 arr->initDenseElement(i, StringValue(str));
79 }
80 }
81
82 /* Step 20 (reordered).
83 * Set the |index| property. (TemplateObject positions it in slot 0) */
84 arr->setSlot(0, Int32Value(matches[0].start));
85
86 /* Step 21 (reordered).
87 * Set the |input| property. (TemplateObject positions it in slot 1) */
88 arr->setSlot(1, StringValue(input));
89
90 #ifdef DEBUG
91 RootedValue test(cx);
92 RootedId id(cx, NameToId(cx->names().index));
93 if (!NativeGetProperty(cx, arr, id, &test)) return false;
94 MOZ_ASSERT(test == arr->getSlot(0));
95 id = NameToId(cx->names().input);
96 if (!NativeGetProperty(cx, arr, id, &test)) return false;
97 MOZ_ASSERT(test == arr->getSlot(1));
98 #endif
99
100 /* Step 25. */
101 rval.setObject(*arr);
102 return true;
103 }
104
CreateRegExpSearchResult(const MatchPairs & matches)105 static int32_t CreateRegExpSearchResult(const MatchPairs& matches) {
106 /* Fit the start and limit of match into a int32_t. */
107 uint32_t position = matches[0].start;
108 uint32_t lastIndex = matches[0].limit;
109 MOZ_ASSERT(position < 0x8000);
110 MOZ_ASSERT(lastIndex < 0x8000);
111 return position | (lastIndex << 15);
112 }
113
114 /*
115 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
116 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
117 */
ExecuteRegExpImpl(JSContext * cx,RegExpStatics * res,MutableHandleRegExpShared re,HandleLinearString input,size_t searchIndex,VectorMatchPairs * matches,size_t * endIndex)118 static RegExpRunStatus ExecuteRegExpImpl(JSContext* cx, RegExpStatics* res,
119 MutableHandleRegExpShared re,
120 HandleLinearString input,
121 size_t searchIndex,
122 VectorMatchPairs* matches,
123 size_t* endIndex) {
124 RegExpRunStatus status =
125 RegExpShared::execute(cx, re, input, searchIndex, matches, endIndex);
126
127 /* Out of spec: Update RegExpStatics. */
128 if (status == RegExpRunStatus_Success && res) {
129 if (matches) {
130 if (!res->updateFromMatchPairs(cx, input, *matches))
131 return RegExpRunStatus_Error;
132 } else {
133 res->updateLazily(cx, input, re, searchIndex);
134 }
135 }
136 return status;
137 }
138
139 /* Legacy ExecuteRegExp behavior is baked into the JSAPI. */
ExecuteRegExpLegacy(JSContext * cx,RegExpStatics * res,Handle<RegExpObject * > reobj,HandleLinearString input,size_t * lastIndex,bool test,MutableHandleValue rval)140 bool js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res,
141 Handle<RegExpObject*> reobj,
142 HandleLinearString input, size_t* lastIndex,
143 bool test, MutableHandleValue rval) {
144 RootedRegExpShared shared(cx, RegExpObject::getShared(cx, reobj));
145 if (!shared) return false;
146
147 VectorMatchPairs matches;
148
149 RegExpRunStatus status =
150 ExecuteRegExpImpl(cx, res, &shared, input, *lastIndex, &matches, nullptr);
151 if (status == RegExpRunStatus_Error) return false;
152
153 if (status == RegExpRunStatus_Success_NotFound) {
154 /* ExecuteRegExp() previously returned an array or null. */
155 rval.setNull();
156 return true;
157 }
158
159 *lastIndex = matches[0].limit;
160
161 if (test) {
162 /* Forbid an array, as an optimization. */
163 rval.setBoolean(true);
164 return true;
165 }
166
167 return CreateRegExpMatchResult(cx, input, matches, rval);
168 }
169
CheckPatternSyntaxSlow(JSContext * cx,HandleAtom pattern,RegExpFlag flags)170 static bool CheckPatternSyntaxSlow(JSContext* cx, HandleAtom pattern,
171 RegExpFlag flags) {
172 CompileOptions options(cx);
173 frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
174 return irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(),
175 pattern, flags & UnicodeFlag);
176 }
177
CheckPatternSyntax(JSContext * cx,HandleAtom pattern,RegExpFlag flags)178 static RegExpShared* CheckPatternSyntax(JSContext* cx, HandleAtom pattern,
179 RegExpFlag flags) {
180 // If we already have a RegExpShared for this pattern/flags, we can
181 // avoid the much slower CheckPatternSyntaxSlow call.
182
183 if (RegExpShared* shared = cx->zone()->regExps.maybeGet(pattern, flags)) {
184 #ifdef DEBUG
185 // Assert the pattern is valid.
186 if (!CheckPatternSyntaxSlow(cx, pattern, flags)) {
187 MOZ_ASSERT(cx->isThrowingOutOfMemory() || cx->isThrowingOverRecursed());
188 return nullptr;
189 }
190 #endif
191 return shared;
192 }
193
194 if (!CheckPatternSyntaxSlow(cx, pattern, flags)) return nullptr;
195
196 // Allocate and return a new RegExpShared so we will hit the fast path
197 // next time.
198 return cx->zone()->regExps.get(cx, pattern, flags);
199 }
200
201 /*
202 * ES 2016 draft Mar 25, 2016 21.2.3.2.2.
203 *
204 * Steps 14-15 set |obj|'s "lastIndex" property to zero. Some of
205 * RegExpInitialize's callers have a fresh RegExp not yet exposed to script:
206 * in these cases zeroing "lastIndex" is infallible. But others have a RegExp
207 * whose "lastIndex" property might have been made non-writable: here, zeroing
208 * "lastIndex" can fail. We efficiently solve this problem by completely
209 * removing "lastIndex" zeroing from the provided function.
210 *
211 * CALLERS MUST HANDLE "lastIndex" ZEROING THEMSELVES!
212 *
213 * Because this function only ever returns a user-provided |obj| in the spec,
214 * we omit it and just return the usual success/failure.
215 */
RegExpInitializeIgnoringLastIndex(JSContext * cx,Handle<RegExpObject * > obj,HandleValue patternValue,HandleValue flagsValue)216 static bool RegExpInitializeIgnoringLastIndex(JSContext* cx,
217 Handle<RegExpObject*> obj,
218 HandleValue patternValue,
219 HandleValue flagsValue) {
220 RootedAtom pattern(cx);
221 if (patternValue.isUndefined()) {
222 /* Step 1. */
223 pattern = cx->names().empty;
224 } else {
225 /* Step 2. */
226 pattern = ToAtom<CanGC>(cx, patternValue);
227 if (!pattern) return false;
228 }
229
230 /* Step 3. */
231 RegExpFlag flags = RegExpFlag(0);
232 if (!flagsValue.isUndefined()) {
233 /* Step 4. */
234 RootedString flagStr(cx, ToString<CanGC>(cx, flagsValue));
235 if (!flagStr) return false;
236
237 /* Step 5. */
238 if (!ParseRegExpFlags(cx, flagStr, &flags)) return false;
239 }
240
241 /* Steps 7-8. */
242 RegExpShared* shared = CheckPatternSyntax(cx, pattern, flags);
243 if (!shared) return false;
244
245 /* Steps 9-12. */
246 obj->initIgnoringLastIndex(pattern, flags);
247
248 obj->setShared(*shared);
249
250 return true;
251 }
252
253 /* ES 2016 draft Mar 25, 2016 21.2.3.2.3. */
RegExpCreate(JSContext * cx,HandleValue patternValue,HandleValue flagsValue,MutableHandleValue rval)254 bool js::RegExpCreate(JSContext* cx, HandleValue patternValue,
255 HandleValue flagsValue, MutableHandleValue rval) {
256 /* Step 1. */
257 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject));
258 if (!regexp) return false;
259
260 /* Step 2. */
261 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, patternValue, flagsValue))
262 return false;
263 regexp->zeroLastIndex(cx);
264
265 rval.setObject(*regexp);
266 return true;
267 }
268
IsRegExpObject(HandleValue v)269 MOZ_ALWAYS_INLINE bool IsRegExpObject(HandleValue v) {
270 return v.isObject() && v.toObject().is<RegExpObject>();
271 }
272
273 /* ES6 draft rc3 7.2.8. */
IsRegExp(JSContext * cx,HandleValue value,bool * result)274 bool js::IsRegExp(JSContext* cx, HandleValue value, bool* result) {
275 /* Step 1. */
276 if (!value.isObject()) {
277 *result = false;
278 return true;
279 }
280 RootedObject obj(cx, &value.toObject());
281
282 /* Steps 2-3. */
283 RootedValue isRegExp(cx);
284 RootedId matchId(cx, SYMBOL_TO_JSID(cx->wellKnownSymbols().match));
285 if (!GetProperty(cx, obj, obj, matchId, &isRegExp)) return false;
286
287 /* Step 4. */
288 if (!isRegExp.isUndefined()) {
289 *result = ToBoolean(isRegExp);
290 return true;
291 }
292
293 /* Steps 5-6. */
294 ESClass cls;
295 if (!GetClassOfValue(cx, value, &cls)) return false;
296
297 *result = cls == ESClass::RegExp;
298 return true;
299 }
300
301 /* ES6 B.2.5.1. */
regexp_compile_impl(JSContext * cx,const CallArgs & args)302 MOZ_ALWAYS_INLINE bool regexp_compile_impl(JSContext* cx,
303 const CallArgs& args) {
304 MOZ_ASSERT(IsRegExpObject(args.thisv()));
305
306 Rooted<RegExpObject*> regexp(cx, &args.thisv().toObject().as<RegExpObject>());
307
308 // Step 3.
309 RootedValue patternValue(cx, args.get(0));
310 ESClass cls;
311 if (!GetClassOfValue(cx, patternValue, &cls)) return false;
312 if (cls == ESClass::RegExp) {
313 // Step 3a.
314 if (args.hasDefined(1)) {
315 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
316 JSMSG_NEWREGEXP_FLAGGED);
317 return false;
318 }
319
320 // Beware! |patternObj| might be a proxy into another compartment, so
321 // don't assume |patternObj.is<RegExpObject>()|. For the same reason,
322 // don't reuse the RegExpShared below.
323 RootedObject patternObj(cx, &patternValue.toObject());
324
325 RootedAtom sourceAtom(cx);
326 RegExpFlag flags;
327 {
328 // Step 3b.
329 RegExpShared* shared = RegExpToShared(cx, patternObj);
330 if (!shared) return false;
331
332 sourceAtom = shared->getSource();
333 flags = shared->getFlags();
334 }
335
336 // Step 5, minus lastIndex zeroing.
337 regexp->initIgnoringLastIndex(sourceAtom, flags);
338 } else {
339 // Step 4.
340 RootedValue P(cx, patternValue);
341 RootedValue F(cx, args.get(1));
342
343 // Step 5, minus lastIndex zeroing.
344 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) return false;
345 }
346
347 // The final niggling bit of step 5.
348 //
349 // |regexp| is user-exposed, but if its "lastIndex" property hasn't been
350 // made non-writable, we can still use a fast path to zero it.
351 if (regexp->lookupPure(cx->names().lastIndex)->writable()) {
352 regexp->zeroLastIndex(cx);
353 } else {
354 RootedValue zero(cx, Int32Value(0));
355 if (!SetProperty(cx, regexp, cx->names().lastIndex, zero)) return false;
356 }
357
358 args.rval().setObject(*regexp);
359 return true;
360 }
361
regexp_compile(JSContext * cx,unsigned argc,Value * vp)362 static bool regexp_compile(JSContext* cx, unsigned argc, Value* vp) {
363 CallArgs args = CallArgsFromVp(argc, vp);
364
365 /* Steps 1-2. */
366 return CallNonGenericMethod<IsRegExpObject, regexp_compile_impl>(cx, args);
367 }
368
369 /*
370 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1.
371 */
regexp_construct(JSContext * cx,unsigned argc,Value * vp)372 bool js::regexp_construct(JSContext* cx, unsigned argc, Value* vp) {
373 CallArgs args = CallArgsFromVp(argc, vp);
374
375 // Steps 1.
376 bool patternIsRegExp;
377 if (!IsRegExp(cx, args.get(0), &patternIsRegExp)) return false;
378
379 // We can delay step 3 and step 4a until later, during
380 // GetPrototypeFromBuiltinConstructor calls. Accessing the new.target
381 // and the callee from the stack is unobservable.
382 if (!args.isConstructing()) {
383 // Step 3.b.
384 if (patternIsRegExp && !args.hasDefined(1)) {
385 RootedObject patternObj(cx, &args[0].toObject());
386
387 // Step 3.b.i.
388 RootedValue patternConstructor(cx);
389 if (!GetProperty(cx, patternObj, patternObj, cx->names().constructor,
390 &patternConstructor))
391 return false;
392
393 // Step 3.b.ii.
394 if (patternConstructor.isObject() &&
395 patternConstructor.toObject() == args.callee()) {
396 args.rval().set(args[0]);
397 return true;
398 }
399 }
400 }
401
402 RootedValue patternValue(cx, args.get(0));
403
404 // Step 4.
405 ESClass cls;
406 if (!GetClassOfValue(cx, patternValue, &cls)) return false;
407 if (cls == ESClass::RegExp) {
408 // Beware! |patternObj| might be a proxy into another compartment, so
409 // don't assume |patternObj.is<RegExpObject>()|.
410 RootedObject patternObj(cx, &patternValue.toObject());
411
412 RootedAtom sourceAtom(cx);
413 RegExpFlag flags;
414 RootedRegExpShared shared(cx);
415 {
416 // Step 4.a.
417 shared = RegExpToShared(cx, patternObj);
418 if (!shared) return false;
419 sourceAtom = shared->getSource();
420
421 // Step 4.b.
422 // Get original flags in all cases, to compare with passed flags.
423 flags = shared->getFlags();
424
425 // If the RegExpShared is in another Zone, don't reuse it.
426 if (cx->zone() != shared->zone()) shared = nullptr;
427 }
428
429 // Step 7.
430 RootedObject proto(cx);
431 if (!GetPrototypeFromBuiltinConstructor(cx, args, &proto)) return false;
432
433 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
434 if (!regexp) return false;
435
436 // Step 8.
437 if (args.hasDefined(1)) {
438 // Step 4.c / 21.2.3.2.2 RegExpInitialize step 4.
439 RegExpFlag flagsArg = RegExpFlag(0);
440 RootedString flagStr(cx, ToString<CanGC>(cx, args[1]));
441 if (!flagStr) return false;
442 if (!ParseRegExpFlags(cx, flagStr, &flagsArg)) return false;
443
444 // Don't reuse the RegExpShared if we have different flags.
445 if (flags != flagsArg) shared = nullptr;
446
447 if (!(flags & UnicodeFlag) && flagsArg & UnicodeFlag) {
448 // Have to check syntax again when adding 'u' flag.
449
450 // ES 2017 draft rev 9b49a888e9dfe2667008a01b2754c3662059ae56
451 // 21.2.3.2.2 step 7.
452 shared = CheckPatternSyntax(cx, sourceAtom, flagsArg);
453 if (!shared) return false;
454 }
455 flags = flagsArg;
456 }
457
458 regexp->initAndZeroLastIndex(sourceAtom, flags, cx);
459
460 if (shared) regexp->setShared(*shared);
461
462 args.rval().setObject(*regexp);
463 return true;
464 }
465
466 RootedValue P(cx);
467 RootedValue F(cx);
468
469 // Step 5.
470 if (patternIsRegExp) {
471 RootedObject patternObj(cx, &patternValue.toObject());
472
473 // Step 5.a.
474 if (!GetProperty(cx, patternObj, patternObj, cx->names().source, &P))
475 return false;
476
477 // Step 5.b.
478 F = args.get(1);
479 if (F.isUndefined()) {
480 if (!GetProperty(cx, patternObj, patternObj, cx->names().flags, &F))
481 return false;
482 }
483 } else {
484 // Steps 6.a-b.
485 P = patternValue;
486 F = args.get(1);
487 }
488
489 // Step 7.
490 RootedObject proto(cx);
491 if (!GetPrototypeFromBuiltinConstructor(cx, args, &proto)) return false;
492
493 Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx, GenericObject, proto));
494 if (!regexp) return false;
495
496 // Step 8.
497 if (!RegExpInitializeIgnoringLastIndex(cx, regexp, P, F)) return false;
498 regexp->zeroLastIndex(cx);
499
500 args.rval().setObject(*regexp);
501 return true;
502 }
503
504 /*
505 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.3.1
506 * steps 4, 7-8.
507 */
regexp_construct_raw_flags(JSContext * cx,unsigned argc,Value * vp)508 bool js::regexp_construct_raw_flags(JSContext* cx, unsigned argc, Value* vp) {
509 CallArgs args = CallArgsFromVp(argc, vp);
510 MOZ_ASSERT(args.length() == 2);
511 MOZ_ASSERT(!args.isConstructing());
512
513 // Step 4.a.
514 RootedAtom sourceAtom(cx, AtomizeString(cx, args[0].toString()));
515 if (!sourceAtom) return false;
516
517 // Step 4.c.
518 int32_t flags = int32_t(args[1].toNumber());
519
520 // Step 7.
521 RegExpObject* regexp = RegExpAlloc(cx, GenericObject);
522 if (!regexp) return false;
523
524 // Step 8.
525 regexp->initAndZeroLastIndex(sourceAtom, RegExpFlag(flags), cx);
526 args.rval().setObject(*regexp);
527 return true;
528 }
529
IsRegExpPrototype(HandleValue v)530 MOZ_ALWAYS_INLINE bool IsRegExpPrototype(HandleValue v) {
531 if (IsRegExpObject(v) || !v.isObject()) return false;
532
533 // Note: The prototype shares its JSClass with instances.
534 return StandardProtoKeyOrNull(&v.toObject()) == JSProto_RegExp;
535 }
536
537 // ES 2017 draft 21.2.5.4.
regexp_global_impl(JSContext * cx,const CallArgs & args)538 MOZ_ALWAYS_INLINE bool regexp_global_impl(JSContext* cx, const CallArgs& args) {
539 MOZ_ASSERT(IsRegExpObject(args.thisv()));
540
541 // Steps 4-6.
542 RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
543 args.rval().setBoolean(reObj->global());
544 return true;
545 }
546
regexp_global(JSContext * cx,unsigned argc,JS::Value * vp)547 bool js::regexp_global(JSContext* cx, unsigned argc, JS::Value* vp) {
548 CallArgs args = CallArgsFromVp(argc, vp);
549
550 // Step 3.a.
551 if (IsRegExpPrototype(args.thisv())) {
552 args.rval().setUndefined();
553 return true;
554 }
555
556 // Steps 1-3.
557 return CallNonGenericMethod<IsRegExpObject, regexp_global_impl>(cx, args);
558 }
559
560 // ES 2017 draft 21.2.5.5.
regexp_ignoreCase_impl(JSContext * cx,const CallArgs & args)561 MOZ_ALWAYS_INLINE bool regexp_ignoreCase_impl(JSContext* cx,
562 const CallArgs& args) {
563 MOZ_ASSERT(IsRegExpObject(args.thisv()));
564
565 // Steps 4-6.
566 RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
567 args.rval().setBoolean(reObj->ignoreCase());
568 return true;
569 }
570
regexp_ignoreCase(JSContext * cx,unsigned argc,JS::Value * vp)571 bool js::regexp_ignoreCase(JSContext* cx, unsigned argc, JS::Value* vp) {
572 CallArgs args = CallArgsFromVp(argc, vp);
573
574 // Step 3.a.
575 if (IsRegExpPrototype(args.thisv())) {
576 args.rval().setUndefined();
577 return true;
578 }
579
580 // Steps 1-3.
581 return CallNonGenericMethod<IsRegExpObject, regexp_ignoreCase_impl>(cx, args);
582 }
583
584 // ES 2017 draft 21.2.5.7.
regexp_multiline_impl(JSContext * cx,const CallArgs & args)585 MOZ_ALWAYS_INLINE bool regexp_multiline_impl(JSContext* cx,
586 const CallArgs& args) {
587 MOZ_ASSERT(IsRegExpObject(args.thisv()));
588
589 // Steps 4-6.
590 RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
591 args.rval().setBoolean(reObj->multiline());
592 return true;
593 }
594
regexp_multiline(JSContext * cx,unsigned argc,JS::Value * vp)595 bool js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp) {
596 CallArgs args = CallArgsFromVp(argc, vp);
597
598 // Step 3.a.
599 if (IsRegExpPrototype(args.thisv())) {
600 args.rval().setUndefined();
601 return true;
602 }
603
604 // Steps 1-3.
605 return CallNonGenericMethod<IsRegExpObject, regexp_multiline_impl>(cx, args);
606 }
607
608 // ES 2017 draft 21.2.5.10.
regexp_source_impl(JSContext * cx,const CallArgs & args)609 MOZ_ALWAYS_INLINE bool regexp_source_impl(JSContext* cx, const CallArgs& args) {
610 MOZ_ASSERT(IsRegExpObject(args.thisv()));
611
612 // Step 5.
613 RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
614 RootedAtom src(cx, reObj->getSource());
615 if (!src) return false;
616
617 // Step 7.
618 JSString* str = EscapeRegExpPattern(cx, src);
619 if (!str) return false;
620
621 args.rval().setString(str);
622 return true;
623 }
624
regexp_source(JSContext * cx,unsigned argc,JS::Value * vp)625 static bool regexp_source(JSContext* cx, unsigned argc, JS::Value* vp) {
626 CallArgs args = CallArgsFromVp(argc, vp);
627
628 // Step 3.a.
629 if (IsRegExpPrototype(args.thisv())) {
630 args.rval().setString(cx->names().emptyRegExp);
631 return true;
632 }
633
634 // Steps 1-4.
635 return CallNonGenericMethod<IsRegExpObject, regexp_source_impl>(cx, args);
636 }
637
638 // ES 2017 draft 21.2.5.12.
regexp_sticky_impl(JSContext * cx,const CallArgs & args)639 MOZ_ALWAYS_INLINE bool regexp_sticky_impl(JSContext* cx, const CallArgs& args) {
640 MOZ_ASSERT(IsRegExpObject(args.thisv()));
641
642 // Steps 4-6.
643 RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
644 args.rval().setBoolean(reObj->sticky());
645 return true;
646 }
647
regexp_sticky(JSContext * cx,unsigned argc,JS::Value * vp)648 bool js::regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp) {
649 CallArgs args = CallArgsFromVp(argc, vp);
650
651 // Step 3.a.
652 if (IsRegExpPrototype(args.thisv())) {
653 args.rval().setUndefined();
654 return true;
655 }
656
657 // Steps 1-3.
658 return CallNonGenericMethod<IsRegExpObject, regexp_sticky_impl>(cx, args);
659 }
660
661 // ES 2017 draft 21.2.5.15.
regexp_unicode_impl(JSContext * cx,const CallArgs & args)662 MOZ_ALWAYS_INLINE bool regexp_unicode_impl(JSContext* cx,
663 const CallArgs& args) {
664 MOZ_ASSERT(IsRegExpObject(args.thisv()));
665
666 // Steps 4-6.
667 RegExpObject* reObj = &args.thisv().toObject().as<RegExpObject>();
668 args.rval().setBoolean(reObj->unicode());
669 return true;
670 }
671
regexp_unicode(JSContext * cx,unsigned argc,JS::Value * vp)672 bool js::regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp) {
673 CallArgs args = CallArgsFromVp(argc, vp);
674
675 // Step 3.a.
676 if (IsRegExpPrototype(args.thisv())) {
677 args.rval().setUndefined();
678 return true;
679 }
680
681 // Steps 1-3.
682 return CallNonGenericMethod<IsRegExpObject, regexp_unicode_impl>(cx, args);
683 }
684
685 const JSPropertySpec js::regexp_properties[] = {
686 JS_SELF_HOSTED_GET("flags", "RegExpFlagsGetter", 0),
687 JS_PSG("global", regexp_global, 0),
688 JS_PSG("ignoreCase", regexp_ignoreCase, 0),
689 JS_PSG("multiline", regexp_multiline, 0),
690 JS_PSG("source", regexp_source, 0),
691 JS_PSG("sticky", regexp_sticky, 0),
692 JS_PSG("unicode", regexp_unicode, 0),
693 JS_PS_END};
694
695 const JSFunctionSpec js::regexp_methods[] = {
696 JS_SELF_HOSTED_FN(js_toSource_str, "RegExpToString", 0, 0),
697 JS_SELF_HOSTED_FN(js_toString_str, "RegExpToString", 0, 0),
698 JS_FN("compile", regexp_compile, 2, 0),
699 JS_SELF_HOSTED_FN("exec", "RegExp_prototype_Exec", 1, 0),
700 JS_SELF_HOSTED_FN("test", "RegExpTest", 1, 0),
701 JS_SELF_HOSTED_SYM_FN(match, "RegExpMatch", 1, 0),
702 JS_SELF_HOSTED_SYM_FN(replace, "RegExpReplace", 2, 0),
703 JS_SELF_HOSTED_SYM_FN(search, "RegExpSearch", 1, 0),
704 JS_SELF_HOSTED_SYM_FN(split, "RegExpSplit", 2, 0),
705 JS_FS_END};
706
707 #define STATIC_PAREN_GETTER_CODE(parenNum) \
708 if (!res->createParen(cx, parenNum, args.rval())) return false; \
709 if (args.rval().isUndefined()) \
710 args.rval().setString(cx->runtime()->emptyString); \
711 return true
712
713 /*
714 * RegExp static properties.
715 *
716 * RegExp class static properties and their Perl counterparts:
717 *
718 * RegExp.input $_
719 * RegExp.lastMatch $&
720 * RegExp.lastParen $+
721 * RegExp.leftContext $`
722 * RegExp.rightContext $'
723 */
724
725 #define DEFINE_STATIC_GETTER(name, code) \
726 static bool name(JSContext* cx, unsigned argc, Value* vp) { \
727 CallArgs args = CallArgsFromVp(argc, vp); \
728 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
729 if (!res) return false; \
730 code; \
731 }
732
733 DEFINE_STATIC_GETTER(static_input_getter,
734 return res->createPendingInput(cx, args.rval()))
735 DEFINE_STATIC_GETTER(static_lastMatch_getter,
736 return res->createLastMatch(cx, args.rval()))
737 DEFINE_STATIC_GETTER(static_lastParen_getter,
738 return res->createLastParen(cx, args.rval()))
739 DEFINE_STATIC_GETTER(static_leftContext_getter,
740 return res->createLeftContext(cx, args.rval()))
741 DEFINE_STATIC_GETTER(static_rightContext_getter,
742 return res->createRightContext(cx, args.rval()))
743
744 DEFINE_STATIC_GETTER(static_paren1_getter, STATIC_PAREN_GETTER_CODE(1))
745 DEFINE_STATIC_GETTER(static_paren2_getter, STATIC_PAREN_GETTER_CODE(2))
746 DEFINE_STATIC_GETTER(static_paren3_getter, STATIC_PAREN_GETTER_CODE(3))
747 DEFINE_STATIC_GETTER(static_paren4_getter, STATIC_PAREN_GETTER_CODE(4))
748 DEFINE_STATIC_GETTER(static_paren5_getter, STATIC_PAREN_GETTER_CODE(5))
749 DEFINE_STATIC_GETTER(static_paren6_getter, STATIC_PAREN_GETTER_CODE(6))
750 DEFINE_STATIC_GETTER(static_paren7_getter, STATIC_PAREN_GETTER_CODE(7))
751 DEFINE_STATIC_GETTER(static_paren8_getter, STATIC_PAREN_GETTER_CODE(8))
752 DEFINE_STATIC_GETTER(static_paren9_getter, STATIC_PAREN_GETTER_CODE(9))
753
754 #define DEFINE_STATIC_SETTER(name, code) \
755 static bool name(JSContext* cx, unsigned argc, Value* vp) { \
756 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global()); \
757 if (!res) return false; \
758 code; \
759 return true; \
760 }
761
static_input_setter(JSContext * cx,unsigned argc,Value * vp)762 static bool static_input_setter(JSContext* cx, unsigned argc, Value* vp) {
763 CallArgs args = CallArgsFromVp(argc, vp);
764 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
765 if (!res) return false;
766
767 RootedString str(cx, ToString<CanGC>(cx, args.get(0)));
768 if (!str) return false;
769
770 res->setPendingInput(str);
771 args.rval().setString(str);
772 return true;
773 }
774
775 const JSPropertySpec js::regexp_static_props[] = {
776 JS_PSGS("input", static_input_getter, static_input_setter,
777 JSPROP_PERMANENT | JSPROP_ENUMERATE),
778 JS_PSG("lastMatch", static_lastMatch_getter,
779 JSPROP_PERMANENT | JSPROP_ENUMERATE),
780 JS_PSG("lastParen", static_lastParen_getter,
781 JSPROP_PERMANENT | JSPROP_ENUMERATE),
782 JS_PSG("leftContext", static_leftContext_getter,
783 JSPROP_PERMANENT | JSPROP_ENUMERATE),
784 JS_PSG("rightContext", static_rightContext_getter,
785 JSPROP_PERMANENT | JSPROP_ENUMERATE),
786 JS_PSG("$1", static_paren1_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
787 JS_PSG("$2", static_paren2_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
788 JS_PSG("$3", static_paren3_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
789 JS_PSG("$4", static_paren4_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
790 JS_PSG("$5", static_paren5_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
791 JS_PSG("$6", static_paren6_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
792 JS_PSG("$7", static_paren7_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
793 JS_PSG("$8", static_paren8_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
794 JS_PSG("$9", static_paren9_getter, JSPROP_PERMANENT | JSPROP_ENUMERATE),
795 JS_PSGS("$_", static_input_getter, static_input_setter, JSPROP_PERMANENT),
796 JS_PSG("$&", static_lastMatch_getter, JSPROP_PERMANENT),
797 JS_PSG("$+", static_lastParen_getter, JSPROP_PERMANENT),
798 JS_PSG("$`", static_leftContext_getter, JSPROP_PERMANENT),
799 JS_PSG("$'", static_rightContext_getter, JSPROP_PERMANENT),
800 JS_SELF_HOSTED_SYM_GET(species, "RegExpSpecies", 0),
801 JS_PS_END};
802
803 template <typename CharT>
IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,size_t index)804 static bool IsTrailSurrogateWithLeadSurrogateImpl(HandleLinearString input,
805 size_t index) {
806 JS::AutoCheckCannotGC nogc;
807 MOZ_ASSERT(index > 0 && index < input->length());
808 const CharT* inputChars = input->chars<CharT>(nogc);
809
810 return unicode::IsTrailSurrogate(inputChars[index]) &&
811 unicode::IsLeadSurrogate(inputChars[index - 1]);
812 }
813
IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,int32_t index)814 static bool IsTrailSurrogateWithLeadSurrogate(HandleLinearString input,
815 int32_t index) {
816 if (index <= 0 || size_t(index) >= input->length()) return false;
817
818 return input->hasLatin1Chars()
819 ? IsTrailSurrogateWithLeadSurrogateImpl<Latin1Char>(input, index)
820 : IsTrailSurrogateWithLeadSurrogateImpl<char16_t>(input, index);
821 }
822
823 /*
824 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
825 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
826 */
ExecuteRegExp(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,VectorMatchPairs * matches,size_t * endIndex)827 static RegExpRunStatus ExecuteRegExp(JSContext* cx, HandleObject regexp,
828 HandleString string, int32_t lastIndex,
829 VectorMatchPairs* matches,
830 size_t* endIndex) {
831 /*
832 * WARNING: Despite the presence of spec step comment numbers, this
833 * algorithm isn't consistent with any ES6 version, draft or
834 * otherwise. YOU HAVE BEEN WARNED.
835 */
836
837 /* Steps 1-2 performed by the caller. */
838 Handle<RegExpObject*> reobj = regexp.as<RegExpObject>();
839
840 RootedRegExpShared re(cx, RegExpObject::getShared(cx, reobj));
841 if (!re) return RegExpRunStatus_Error;
842
843 RegExpStatics* res = GlobalObject::getRegExpStatics(cx, cx->global());
844 if (!res) return RegExpRunStatus_Error;
845
846 RootedLinearString input(cx, string->ensureLinear(cx));
847 if (!input) return RegExpRunStatus_Error;
848
849 /* Handled by caller */
850 MOZ_ASSERT(lastIndex >= 0 && size_t(lastIndex) <= input->length());
851
852 /* Steps 4-8 performed by the caller. */
853
854 /* Step 10. */
855 if (reobj->unicode()) {
856 /*
857 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad
858 * 21.2.2.2 step 2.
859 * Let listIndex be the index into Input of the character that was
860 * obtained from element index of str.
861 *
862 * In the spec, pattern match is performed with decoded Unicode code
863 * points, but our implementation performs it with UTF-16 encoded
864 * string. In step 2, we should decrement lastIndex (index) if it
865 * points the trail surrogate that has corresponding lead surrogate.
866 *
867 * var r = /\uD83D\uDC38/ug;
868 * r.lastIndex = 1;
869 * var str = "\uD83D\uDC38";
870 * var result = r.exec(str); // pattern match starts from index 0
871 * print(result.index); // prints 0
872 *
873 * Note: this doesn't match the current spec text and result in
874 * different values for `result.index` under certain conditions.
875 * However, the spec will change to match our implementation's
876 * behavior. See https://github.com/tc39/ecma262/issues/128.
877 */
878 if (IsTrailSurrogateWithLeadSurrogate(input, lastIndex)) lastIndex--;
879 }
880
881 /* Steps 3, 11-14, except 12.a.i, 12.c.i.1. */
882 RegExpRunStatus status =
883 ExecuteRegExpImpl(cx, res, &re, input, lastIndex, matches, endIndex);
884 if (status == RegExpRunStatus_Error) return RegExpRunStatus_Error;
885
886 /* Steps 12.a.i, 12.c.i.i, 15 are done by Self-hosted function. */
887
888 return status;
889 }
890
891 /*
892 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
893 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
894 */
RegExpMatcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,MutableHandleValue rval)895 static bool RegExpMatcherImpl(JSContext* cx, HandleObject regexp,
896 HandleString string, int32_t lastIndex,
897 MutableHandleValue rval) {
898 /* Execute regular expression and gather matches. */
899 VectorMatchPairs matches;
900
901 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
902 RegExpRunStatus status =
903 ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
904 if (status == RegExpRunStatus_Error) return false;
905
906 /* Steps 12.a, 12.c. */
907 if (status == RegExpRunStatus_Success_NotFound) {
908 rval.setNull();
909 return true;
910 }
911
912 /* Steps 16-25 */
913 return CreateRegExpMatchResult(cx, string, matches, rval);
914 }
915
916 /*
917 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
918 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
919 */
RegExpMatcher(JSContext * cx,unsigned argc,Value * vp)920 bool js::RegExpMatcher(JSContext* cx, unsigned argc, Value* vp) {
921 CallArgs args = CallArgsFromVp(argc, vp);
922 MOZ_ASSERT(args.length() == 3);
923 MOZ_ASSERT(IsRegExpObject(args[0]));
924 MOZ_ASSERT(args[1].isString());
925 MOZ_ASSERT(args[2].isNumber());
926
927 RootedObject regexp(cx, &args[0].toObject());
928 RootedString string(cx, args[1].toString());
929
930 int32_t lastIndex;
931 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
932
933 /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
934 return RegExpMatcherImpl(cx, regexp, string, lastIndex, args.rval());
935 }
936
937 /*
938 * Separate interface for use by IonMonkey.
939 * This code cannot re-enter Ion code.
940 */
RegExpMatcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,MatchPairs * maybeMatches,MutableHandleValue output)941 bool js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp,
942 HandleString input, int32_t lastIndex,
943 MatchPairs* maybeMatches, MutableHandleValue output) {
944 MOZ_ASSERT(lastIndex >= 0);
945
946 // The MatchPairs will always be passed in, but RegExp execution was
947 // successful only if the pairs have actually been filled in.
948 if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0)
949 return CreateRegExpMatchResult(cx, input, *maybeMatches, output);
950 return RegExpMatcherImpl(cx, regexp, input, lastIndex, output);
951 }
952
953 /*
954 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
955 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
956 * This code is inlined in CodeGenerator.cpp generateRegExpSearcherStub,
957 * changes to this code need to get reflected in there too.
958 */
RegExpSearcherImpl(JSContext * cx,HandleObject regexp,HandleString string,int32_t lastIndex,int32_t * result)959 static bool RegExpSearcherImpl(JSContext* cx, HandleObject regexp,
960 HandleString string, int32_t lastIndex,
961 int32_t* result) {
962 /* Execute regular expression and gather matches. */
963 VectorMatchPairs matches;
964
965 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
966 RegExpRunStatus status =
967 ExecuteRegExp(cx, regexp, string, lastIndex, &matches, nullptr);
968 if (status == RegExpRunStatus_Error) return false;
969
970 /* Steps 12.a, 12.c. */
971 if (status == RegExpRunStatus_Success_NotFound) {
972 *result = -1;
973 return true;
974 }
975
976 /* Steps 16-25 */
977 *result = CreateRegExpSearchResult(matches);
978 return true;
979 }
980
981 /*
982 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
983 * steps 3, 9-25, except 12.a.i, 12.c.i.1, 15.
984 */
RegExpSearcher(JSContext * cx,unsigned argc,Value * vp)985 bool js::RegExpSearcher(JSContext* cx, unsigned argc, Value* vp) {
986 CallArgs args = CallArgsFromVp(argc, vp);
987 MOZ_ASSERT(args.length() == 3);
988 MOZ_ASSERT(IsRegExpObject(args[0]));
989 MOZ_ASSERT(args[1].isString());
990 MOZ_ASSERT(args[2].isNumber());
991
992 RootedObject regexp(cx, &args[0].toObject());
993 RootedString string(cx, args[1].toString());
994
995 int32_t lastIndex;
996 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
997
998 /* Steps 3, 9-25, except 12.a.i, 12.c.i.1, 15. */
999 int32_t result = 0;
1000 if (!RegExpSearcherImpl(cx, regexp, string, lastIndex, &result)) return false;
1001
1002 args.rval().setInt32(result);
1003 return true;
1004 }
1005
1006 /*
1007 * Separate interface for use by IonMonkey.
1008 * This code cannot re-enter Ion code.
1009 */
RegExpSearcherRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,MatchPairs * maybeMatches,int32_t * result)1010 bool js::RegExpSearcherRaw(JSContext* cx, HandleObject regexp,
1011 HandleString input, int32_t lastIndex,
1012 MatchPairs* maybeMatches, int32_t* result) {
1013 MOZ_ASSERT(lastIndex >= 0);
1014
1015 // The MatchPairs will always be passed in, but RegExp execution was
1016 // successful only if the pairs have actually been filled in.
1017 if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0) {
1018 *result = CreateRegExpSearchResult(*maybeMatches);
1019 return true;
1020 }
1021 return RegExpSearcherImpl(cx, regexp, input, lastIndex, result);
1022 }
1023
1024 /*
1025 * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
1026 * steps 3, 9-14, except 12.a.i, 12.c.i.1.
1027 */
RegExpTester(JSContext * cx,unsigned argc,Value * vp)1028 bool js::RegExpTester(JSContext* cx, unsigned argc, Value* vp) {
1029 CallArgs args = CallArgsFromVp(argc, vp);
1030 MOZ_ASSERT(args.length() == 3);
1031 MOZ_ASSERT(IsRegExpObject(args[0]));
1032 MOZ_ASSERT(args[1].isString());
1033 MOZ_ASSERT(args[2].isNumber());
1034
1035 RootedObject regexp(cx, &args[0].toObject());
1036 RootedString string(cx, args[1].toString());
1037
1038 int32_t lastIndex;
1039 MOZ_ALWAYS_TRUE(ToInt32(cx, args[2], &lastIndex));
1040
1041 /* Steps 3, 9-14, except 12.a.i, 12.c.i.1. */
1042 size_t endIndex = 0;
1043 RegExpRunStatus status =
1044 ExecuteRegExp(cx, regexp, string, lastIndex, nullptr, &endIndex);
1045
1046 if (status == RegExpRunStatus_Error) return false;
1047
1048 if (status == RegExpRunStatus_Success) {
1049 MOZ_ASSERT(endIndex <= INT32_MAX);
1050 args.rval().setInt32(int32_t(endIndex));
1051 } else {
1052 args.rval().setInt32(-1);
1053 }
1054 return true;
1055 }
1056
1057 /*
1058 * Separate interface for use by IonMonkey.
1059 * This code cannot re-enter Ion code.
1060 */
RegExpTesterRaw(JSContext * cx,HandleObject regexp,HandleString input,int32_t lastIndex,int32_t * endIndex)1061 bool js::RegExpTesterRaw(JSContext* cx, HandleObject regexp, HandleString input,
1062 int32_t lastIndex, int32_t* endIndex) {
1063 MOZ_ASSERT(lastIndex >= 0);
1064
1065 size_t endIndexTmp = 0;
1066 RegExpRunStatus status =
1067 ExecuteRegExp(cx, regexp, input, lastIndex, nullptr, &endIndexTmp);
1068
1069 if (status == RegExpRunStatus_Success) {
1070 MOZ_ASSERT(endIndexTmp <= INT32_MAX);
1071 *endIndex = int32_t(endIndexTmp);
1072 return true;
1073 }
1074 if (status == RegExpRunStatus_Success_NotFound) {
1075 *endIndex = -1;
1076 return true;
1077 }
1078
1079 return false;
1080 }
1081
1082 using CapturesVector = GCVector<Value, 4>;
1083
1084 struct JSSubString {
1085 JSLinearString* base;
1086 size_t offset;
1087 size_t length;
1088
JSSubStringJSSubString1089 JSSubString() { mozilla::PodZero(this); }
1090
initEmptyJSSubString1091 void initEmpty(JSLinearString* base) {
1092 this->base = base;
1093 offset = length = 0;
1094 }
initJSSubString1095 void init(JSLinearString* base, size_t offset, size_t length) {
1096 this->base = base;
1097 this->offset = offset;
1098 this->length = length;
1099 }
1100 };
1101
GetParen(JSLinearString * matched,const JS::Value & capture,JSSubString * out)1102 static void GetParen(JSLinearString* matched, const JS::Value& capture,
1103 JSSubString* out) {
1104 if (capture.isUndefined()) {
1105 out->initEmpty(matched);
1106 return;
1107 }
1108 JSLinearString& captureLinear = capture.toString()->asLinear();
1109 out->init(&captureLinear, 0, captureLinear.length());
1110 }
1111
1112 template <typename CharT>
InterpretDollar(JSLinearString * matched,JSLinearString * string,size_t position,size_t tailPos,Handle<CapturesVector> captures,JSLinearString * replacement,const CharT * replacementBegin,const CharT * currentDollar,const CharT * replacementEnd,JSSubString * out,size_t * skip)1113 static bool InterpretDollar(JSLinearString* matched, JSLinearString* string,
1114 size_t position, size_t tailPos,
1115 Handle<CapturesVector> captures,
1116 JSLinearString* replacement,
1117 const CharT* replacementBegin,
1118 const CharT* currentDollar,
1119 const CharT* replacementEnd, JSSubString* out,
1120 size_t* skip) {
1121 MOZ_ASSERT(*currentDollar == '$');
1122
1123 /* If there is only a dollar, bail now. */
1124 if (currentDollar + 1 >= replacementEnd) return false;
1125
1126 /* ES 2016 draft Mar 25, 2016 Table 46. */
1127 char16_t c = currentDollar[1];
1128 if (JS7_ISDEC(c)) {
1129 /* $n, $nn */
1130 unsigned num = JS7_UNDEC(c);
1131 if (num > captures.length()) {
1132 // The result is implementation-defined, do not substitute.
1133 return false;
1134 }
1135
1136 const CharT* currentChar = currentDollar + 2;
1137 if (currentChar < replacementEnd) {
1138 c = *currentChar;
1139 if (JS7_ISDEC(c)) {
1140 unsigned tmpNum = 10 * num + JS7_UNDEC(c);
1141 // If num > captures.length(), the result is implementation-defined.
1142 // Consume next character only if num <= captures.length().
1143 if (tmpNum <= captures.length()) {
1144 currentChar++;
1145 num = tmpNum;
1146 }
1147 }
1148 }
1149
1150 if (num == 0) {
1151 // The result is implementation-defined.
1152 // Do not substitute.
1153 return false;
1154 }
1155
1156 *skip = currentChar - currentDollar;
1157
1158 MOZ_ASSERT(num <= captures.length());
1159
1160 GetParen(matched, captures[num - 1], out);
1161 return true;
1162 }
1163
1164 *skip = 2;
1165 switch (c) {
1166 default:
1167 return false;
1168 case '$':
1169 out->init(replacement, currentDollar - replacementBegin, 1);
1170 break;
1171 case '&':
1172 out->init(matched, 0, matched->length());
1173 break;
1174 case '+':
1175 // SpiderMonkey extension
1176 if (captures.length() == 0)
1177 out->initEmpty(matched);
1178 else
1179 GetParen(matched, captures[captures.length() - 1], out);
1180 break;
1181 case '`':
1182 out->init(string, 0, position);
1183 break;
1184 case '\'':
1185 out->init(string, tailPos, string->length() - tailPos);
1186 break;
1187 }
1188 return true;
1189 }
1190
1191 template <typename CharT>
FindReplaceLengthString(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1192 static bool FindReplaceLengthString(JSContext* cx, HandleLinearString matched,
1193 HandleLinearString string, size_t position,
1194 size_t tailPos,
1195 Handle<CapturesVector> captures,
1196 HandleLinearString replacement,
1197 size_t firstDollarIndex, size_t* sizep) {
1198 CheckedInt<uint32_t> replen = replacement->length();
1199
1200 JS::AutoCheckCannotGC nogc;
1201 MOZ_ASSERT(firstDollarIndex < replacement->length());
1202 const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1203 const CharT* currentDollar = replacementBegin + firstDollarIndex;
1204 const CharT* replacementEnd = replacementBegin + replacement->length();
1205 do {
1206 JSSubString sub;
1207 size_t skip;
1208 if (InterpretDollar(matched, string, position, tailPos, captures,
1209 replacement, replacementBegin, currentDollar,
1210 replacementEnd, &sub, &skip)) {
1211 if (sub.length > skip)
1212 replen += sub.length - skip;
1213 else
1214 replen -= skip - sub.length;
1215 currentDollar += skip;
1216 } else {
1217 currentDollar++;
1218 }
1219
1220 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1221 } while (currentDollar);
1222
1223 if (!replen.isValid()) {
1224 ReportAllocationOverflow(cx);
1225 return false;
1226 }
1227
1228 *sizep = replen.value();
1229 return true;
1230 }
1231
FindReplaceLength(JSContext * cx,HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,HandleLinearString replacement,size_t firstDollarIndex,size_t * sizep)1232 static bool FindReplaceLength(JSContext* cx, HandleLinearString matched,
1233 HandleLinearString string, size_t position,
1234 size_t tailPos, Handle<CapturesVector> captures,
1235 HandleLinearString replacement,
1236 size_t firstDollarIndex, size_t* sizep) {
1237 return replacement->hasLatin1Chars()
1238 ? FindReplaceLengthString<Latin1Char>(
1239 cx, matched, string, position, tailPos, captures,
1240 replacement, firstDollarIndex, sizep)
1241 : FindReplaceLengthString<char16_t>(cx, matched, string, position,
1242 tailPos, captures, replacement,
1243 firstDollarIndex, sizep);
1244 }
1245
1246 /*
1247 * Precondition: |sb| already has necessary growth space reserved (as
1248 * derived from FindReplaceLength), and has been inflated to TwoByte if
1249 * necessary.
1250 */
1251 template <typename CharT>
DoReplace(HandleLinearString matched,HandleLinearString string,size_t position,size_t tailPos,Handle<CapturesVector> captures,HandleLinearString replacement,size_t firstDollarIndex,StringBuffer & sb)1252 static void DoReplace(HandleLinearString matched, HandleLinearString string,
1253 size_t position, size_t tailPos,
1254 Handle<CapturesVector> captures,
1255 HandleLinearString replacement, size_t firstDollarIndex,
1256 StringBuffer& sb) {
1257 JS::AutoCheckCannotGC nogc;
1258 const CharT* replacementBegin = replacement->chars<CharT>(nogc);
1259 const CharT* currentChar = replacementBegin;
1260
1261 MOZ_ASSERT(firstDollarIndex < replacement->length());
1262 const CharT* currentDollar = replacementBegin + firstDollarIndex;
1263 const CharT* replacementEnd = replacementBegin + replacement->length();
1264 do {
1265 /* Move one of the constant portions of the replacement value. */
1266 size_t len = currentDollar - currentChar;
1267 sb.infallibleAppend(currentChar, len);
1268 currentChar = currentDollar;
1269
1270 JSSubString sub;
1271 size_t skip;
1272 if (InterpretDollar(matched, string, position, tailPos, captures,
1273 replacement, replacementBegin, currentDollar,
1274 replacementEnd, &sub, &skip)) {
1275 sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
1276 currentChar += skip;
1277 currentDollar += skip;
1278 } else {
1279 currentDollar++;
1280 }
1281
1282 currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
1283 } while (currentDollar);
1284 sb.infallibleAppend(currentChar,
1285 replacement->length() - (currentChar - replacementBegin));
1286 }
1287
NeedTwoBytes(HandleLinearString string,HandleLinearString replacement,HandleLinearString matched,Handle<CapturesVector> captures)1288 static bool NeedTwoBytes(HandleLinearString string,
1289 HandleLinearString replacement,
1290 HandleLinearString matched,
1291 Handle<CapturesVector> captures) {
1292 if (string->hasTwoByteChars()) return true;
1293 if (replacement->hasTwoByteChars()) return true;
1294 if (matched->hasTwoByteChars()) return true;
1295
1296 for (size_t i = 0, len = captures.length(); i < len; i++) {
1297 const Value& capture = captures[i];
1298 if (capture.isUndefined()) continue;
1299 if (capture.toString()->hasTwoByteChars()) return true;
1300 }
1301
1302 return false;
1303 }
1304
1305 /* ES 2016 draft Mar 25, 2016 21.1.3.14.1. */
RegExpGetSubstitution(JSContext * cx,HandleArrayObject matchResult,HandleLinearString string,size_t position,HandleLinearString replacement,size_t firstDollarIndex,MutableHandleValue rval)1306 bool js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult,
1307 HandleLinearString string, size_t position,
1308 HandleLinearString replacement,
1309 size_t firstDollarIndex,
1310 MutableHandleValue rval) {
1311 MOZ_ASSERT(firstDollarIndex < replacement->length());
1312
1313 // Step 1 (skipped).
1314
1315 // Step 10 (reordered).
1316 uint32_t matchResultLength = matchResult->length();
1317 MOZ_ASSERT(matchResultLength > 0);
1318 MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
1319
1320 const Value& matchedValue = matchResult->getDenseElement(0);
1321 RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx));
1322 if (!matched) return false;
1323
1324 // Step 2.
1325 size_t matchLength = matched->length();
1326
1327 // Steps 3-5 (skipped).
1328
1329 // Step 6.
1330 MOZ_ASSERT(position <= string->length());
1331
1332 uint32_t nCaptures = matchResultLength - 1;
1333 Rooted<CapturesVector> captures(cx, CapturesVector(cx));
1334 if (!captures.reserve(nCaptures)) return false;
1335
1336 // Step 7.
1337 for (uint32_t i = 1; i <= nCaptures; i++) {
1338 const Value& capture = matchResult->getDenseElement(i);
1339
1340 if (capture.isUndefined()) {
1341 captures.infallibleAppend(capture);
1342 continue;
1343 }
1344
1345 JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
1346 if (!captureLinear) return false;
1347 captures.infallibleAppend(StringValue(captureLinear));
1348 }
1349
1350 // Step 8 (skipped).
1351
1352 // Step 9.
1353 CheckedInt<uint32_t> checkedTailPos(0);
1354 checkedTailPos += position;
1355 checkedTailPos += matchLength;
1356 if (!checkedTailPos.isValid()) {
1357 ReportAllocationOverflow(cx);
1358 return false;
1359 }
1360 uint32_t tailPos = checkedTailPos.value();
1361
1362 // Step 11.
1363 size_t reserveLength;
1364 if (!FindReplaceLength(cx, matched, string, position, tailPos, captures,
1365 replacement, firstDollarIndex, &reserveLength)) {
1366 return false;
1367 }
1368
1369 StringBuffer result(cx);
1370 if (NeedTwoBytes(string, replacement, matched, captures)) {
1371 if (!result.ensureTwoByteChars()) return false;
1372 }
1373
1374 if (!result.reserve(reserveLength)) return false;
1375
1376 if (replacement->hasLatin1Chars()) {
1377 DoReplace<Latin1Char>(matched, string, position, tailPos, captures,
1378 replacement, firstDollarIndex, result);
1379 } else {
1380 DoReplace<char16_t>(matched, string, position, tailPos, captures,
1381 replacement, firstDollarIndex, result);
1382 }
1383
1384 // Step 12.
1385 JSString* resultString = result.finishString();
1386 if (!resultString) return false;
1387
1388 rval.setString(resultString);
1389 return true;
1390 }
1391
GetFirstDollarIndex(JSContext * cx,unsigned argc,Value * vp)1392 bool js::GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp) {
1393 CallArgs args = CallArgsFromVp(argc, vp);
1394 MOZ_ASSERT(args.length() == 1);
1395 JSString* str = args[0].toString();
1396
1397 // Should be handled in different path.
1398 MOZ_ASSERT(str->length() != 0);
1399
1400 int32_t index = -1;
1401 if (!GetFirstDollarIndexRaw(cx, str, &index)) return false;
1402
1403 args.rval().setInt32(index);
1404 return true;
1405 }
1406
1407 template <typename TextChar>
GetFirstDollarIndexImpl(const TextChar * text,uint32_t textLen)1408 static MOZ_ALWAYS_INLINE int GetFirstDollarIndexImpl(const TextChar* text,
1409 uint32_t textLen) {
1410 const TextChar* end = text + textLen;
1411 for (const TextChar* c = text; c != end; ++c) {
1412 if (*c == '$') return c - text;
1413 }
1414 return -1;
1415 }
1416
GetFirstDollarIndexRawFlat(JSLinearString * text)1417 int32_t js::GetFirstDollarIndexRawFlat(JSLinearString* text) {
1418 uint32_t len = text->length();
1419
1420 JS::AutoCheckCannotGC nogc;
1421 if (text->hasLatin1Chars())
1422 return GetFirstDollarIndexImpl(text->latin1Chars(nogc), len);
1423
1424 return GetFirstDollarIndexImpl(text->twoByteChars(nogc), len);
1425 }
1426
GetFirstDollarIndexRaw(JSContext * cx,JSString * str,int32_t * index)1427 bool js::GetFirstDollarIndexRaw(JSContext* cx, JSString* str, int32_t* index) {
1428 JSLinearString* text = str->ensureLinear(cx);
1429 if (!text) return false;
1430
1431 *index = GetFirstDollarIndexRawFlat(text);
1432 return true;
1433 }
1434
RegExpPrototypeOptimizable(JSContext * cx,unsigned argc,Value * vp)1435 bool js::RegExpPrototypeOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1436 // This can only be called from self-hosted code.
1437 CallArgs args = CallArgsFromVp(argc, vp);
1438 MOZ_ASSERT(args.length() == 1);
1439
1440 args.rval().setBoolean(
1441 RegExpPrototypeOptimizableRaw(cx, &args[0].toObject()));
1442 return true;
1443 }
1444
RegExpPrototypeOptimizableRaw(JSContext * cx,JSObject * proto)1445 bool js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto) {
1446 AutoUnsafeCallWithABI unsafe;
1447 AutoAssertNoPendingException aanpe(cx);
1448 if (!proto->isNative()) return false;
1449
1450 NativeObject* nproto = static_cast<NativeObject*>(proto);
1451
1452 Shape* shape =
1453 cx->compartment()->regExps.getOptimizableRegExpPrototypeShape();
1454 if (shape == nproto->lastProperty()) return true;
1455
1456 JSFunction* flagsGetter;
1457 if (!GetOwnGetterPure(cx, proto, NameToId(cx->names().flags), &flagsGetter))
1458 return false;
1459
1460 if (!flagsGetter) return false;
1461
1462 if (!IsSelfHostedFunctionWithName(flagsGetter, cx->names().RegExpFlagsGetter))
1463 return false;
1464
1465 JSNative globalGetter;
1466 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().global),
1467 &globalGetter))
1468 return false;
1469
1470 if (globalGetter != regexp_global) return false;
1471
1472 JSNative ignoreCaseGetter;
1473 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().ignoreCase),
1474 &ignoreCaseGetter))
1475 return false;
1476
1477 if (ignoreCaseGetter != regexp_ignoreCase) return false;
1478
1479 JSNative multilineGetter;
1480 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().multiline),
1481 &multilineGetter))
1482 return false;
1483
1484 if (multilineGetter != regexp_multiline) return false;
1485
1486 JSNative stickyGetter;
1487 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().sticky),
1488 &stickyGetter))
1489 return false;
1490
1491 if (stickyGetter != regexp_sticky) return false;
1492
1493 JSNative unicodeGetter;
1494 if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().unicode),
1495 &unicodeGetter))
1496 return false;
1497
1498 if (unicodeGetter != regexp_unicode) return false;
1499
1500 // Check if @@match, @@search, and exec are own data properties,
1501 // those values should be tested in selfhosted JS.
1502 bool has = false;
1503 if (!HasOwnDataPropertyPure(
1504 cx, proto, SYMBOL_TO_JSID(cx->wellKnownSymbols().match), &has))
1505 return false;
1506 if (!has) return false;
1507
1508 if (!HasOwnDataPropertyPure(
1509 cx, proto, SYMBOL_TO_JSID(cx->wellKnownSymbols().search), &has))
1510 return false;
1511 if (!has) return false;
1512
1513 if (!HasOwnDataPropertyPure(cx, proto, NameToId(cx->names().exec), &has))
1514 return false;
1515 if (!has) return false;
1516
1517 cx->compartment()->regExps.setOptimizableRegExpPrototypeShape(
1518 nproto->lastProperty());
1519 return true;
1520 }
1521
RegExpInstanceOptimizable(JSContext * cx,unsigned argc,Value * vp)1522 bool js::RegExpInstanceOptimizable(JSContext* cx, unsigned argc, Value* vp) {
1523 // This can only be called from self-hosted code.
1524 CallArgs args = CallArgsFromVp(argc, vp);
1525 MOZ_ASSERT(args.length() == 2);
1526
1527 args.rval().setBoolean(RegExpInstanceOptimizableRaw(cx, &args[0].toObject(),
1528 &args[1].toObject()));
1529 return true;
1530 }
1531
RegExpInstanceOptimizableRaw(JSContext * cx,JSObject * obj,JSObject * proto)1532 bool js::RegExpInstanceOptimizableRaw(JSContext* cx, JSObject* obj,
1533 JSObject* proto) {
1534 AutoUnsafeCallWithABI unsafe;
1535 AutoAssertNoPendingException aanpe(cx);
1536
1537 RegExpObject* rx = &obj->as<RegExpObject>();
1538
1539 Shape* shape = cx->compartment()->regExps.getOptimizableRegExpInstanceShape();
1540 if (shape == rx->lastProperty()) return true;
1541
1542 if (!rx->hasStaticPrototype()) return false;
1543
1544 if (rx->staticPrototype() != proto) return false;
1545
1546 if (!RegExpObject::isInitialShape(rx)) return false;
1547
1548 cx->compartment()->regExps.setOptimizableRegExpInstanceShape(
1549 rx->lastProperty());
1550 return true;
1551 }
1552
1553 /*
1554 * Pattern match the script to check if it is is indexing into a particular
1555 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
1556 * such cases, which are used by javascript packers (particularly the popular
1557 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
1558 * code patterns generated by such packers here.
1559 */
intrinsic_GetElemBaseForLambda(JSContext * cx,unsigned argc,Value * vp)1560 bool js::intrinsic_GetElemBaseForLambda(JSContext* cx, unsigned argc,
1561 Value* vp) {
1562 // This can only be called from self-hosted code.
1563 CallArgs args = CallArgsFromVp(argc, vp);
1564 MOZ_ASSERT(args.length() == 1);
1565
1566 JSObject& lambda = args[0].toObject();
1567 args.rval().setUndefined();
1568
1569 if (!lambda.is<JSFunction>()) return true;
1570
1571 RootedFunction fun(cx, &lambda.as<JSFunction>());
1572 if (!fun->isInterpreted() || fun->isClassConstructor()) return true;
1573
1574 JSScript* script = JSFunction::getOrCreateScript(cx, fun);
1575 if (!script) return false;
1576
1577 jsbytecode* pc = script->code();
1578
1579 /*
1580 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
1581 * Rule out the (unlikely) possibility of a function with environment
1582 * objects since it would make our environment walk off.
1583 */
1584 if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->needsSomeEnvironmentObject())
1585 return true;
1586 EnvironmentCoordinate ec(pc);
1587 EnvironmentObject* env = &fun->environment()->as<EnvironmentObject>();
1588 for (unsigned i = 0; i < ec.hops(); ++i)
1589 env = &env->enclosingEnvironment().as<EnvironmentObject>();
1590 Value b = env->aliasedBinding(ec);
1591 pc += JSOP_GETALIASEDVAR_LENGTH;
1592
1593 /* Look for 'a' to be the lambda's first argument. */
1594 if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0) return true;
1595 pc += JSOP_GETARG_LENGTH;
1596
1597 /* 'b[a]' */
1598 if (JSOp(*pc) != JSOP_GETELEM) return true;
1599 pc += JSOP_GETELEM_LENGTH;
1600
1601 /* 'return b[a]' */
1602 if (JSOp(*pc) != JSOP_RETURN) return true;
1603
1604 /* 'b' must behave like a normal object. */
1605 if (!b.isObject()) return true;
1606
1607 JSObject& bobj = b.toObject();
1608 const Class* clasp = bobj.getClass();
1609 if (!clasp->isNative() || clasp->getOpsLookupProperty() ||
1610 clasp->getOpsGetProperty())
1611 return true;
1612
1613 args.rval().setObject(bobj);
1614 return true;
1615 }
1616
1617 /*
1618 * Emulates `b[a]` property access, that is detected in GetElemBaseForLambda.
1619 * It returns the property value only if the property is data property and the
1620 * property value is a string. Otherwise it returns undefined.
1621 */
intrinsic_GetStringDataProperty(JSContext * cx,unsigned argc,Value * vp)1622 bool js::intrinsic_GetStringDataProperty(JSContext* cx, unsigned argc,
1623 Value* vp) {
1624 CallArgs args = CallArgsFromVp(argc, vp);
1625 MOZ_ASSERT(args.length() == 2);
1626
1627 RootedObject obj(cx, &args[0].toObject());
1628 if (!obj->isNative()) {
1629 // The object is already checked to be native in GetElemBaseForLambda,
1630 // but it can be swapped to another class that is non-native.
1631 // Return undefined to mark failure to get the property.
1632 args.rval().setUndefined();
1633 return true;
1634 }
1635
1636 JSAtom* atom = AtomizeString(cx, args[1].toString());
1637 if (!atom) return false;
1638
1639 Value v;
1640 if (GetPropertyPure(cx, obj, AtomToId(atom), &v) && v.isString())
1641 args.rval().set(v);
1642 else
1643 args.rval().setUndefined();
1644
1645 return true;
1646 }
1647